Added checker for the pid offset between two channels

2023-11-13 18:18:53 +01:00
parent ff87dbdd60
commit aa6190e9dd
2 changed files with 52 additions and 3 deletions
--- a/src/cristallina/utils.py
+++ b/src/cristallina/utils.py
@@ -16,7 +16,8 @@ import sfdata.errors
 from sfdata import SFDataFiles, sfdatafile, SFScanInfo, SFProcFile
 from xraydb import material_mu
 from joblib import Parallel, delayed, cpu_count
-
+from scipy.stats import pearsonr
+import matplotlib.pyplot as plt

 def collect_runs_metadata(pgroup_data_dir: str | os.PathLike):
    """
@@ -381,6 +382,49 @@ class ROI:
    def __ne__(self, other):
        return not self == other

+def check_pid_offset(step,ch1,ch2,offsets_to_try=[-2,-1,0,1,2],plot=False):
+    """Check pid offset between two channels. Offset is the value that should be added to the second channel. Returns the best offset.
+    ch1 should be something that can be trusted, from experience for example the SARFE10-PBPG050:HAMP-INTENSITY-CAL is reliable.  
+    """
+    # Take the first step if scan object is given
+    if type(step) == sfdata.sfscaninfo.SFScanInfo:
+        step = step[0]
+    
+    assert type(step) == sfdata.sfdatafiles.SFDataFiles, 'First parameter should be either a step or scan (if scan then first step is tested)'
+    assert type(ch1) == str, f'Channel {ch1} must be a string'
+    assert type(ch2) == str, f'Channel {ch2} must be a string'
+
+    corrs = []
+    if plot:
+        fig,ax = plt.subplots(1,len(offsets_to_try),figsize=(12,3))
+
+    for i,pid_offset in enumerate(offsets_to_try):
+        ss = step[ch1,ch2] # Make a subset with 2 channels
+        ss[ch2].offset = pid_offset  # Add offset to the second channel
+        ss.drop_missing()
+        x = ss[ch1].data
+        y = ss[ch2].data
+        if i == 0:
+            assert len(x.shape) == 1, f'Channel {ch1} has more than one dimension per pid'
+            assert len(y.shape) == 1, f'Channel {ch2} has more than one dimension per pid'
+            assert len(x) > 2, f'Channel {ch1} has less than 3 data points'
+            assert len(y) > 2, f'Channel {ch2} has less than 3 data points'
+        corr = pearsonr(x,y) # Calculate the correlation value
+        corrs.append(corr.correlation) # Save in the array
+        if plot:
+            if i == 0:
+                ax[i].set_ylabel(ch2)
+            ax[2].set_xlabel(ch1)
+            ax[i].plot(ss[ch1].data,ss[ch2].data,'ok',alpha=0.1)
+            ax[i].set_title(f"Corr = {np.round(corr.correlation,2)}")
+
+    best_offset = offsets_to_try[np.argmax(corrs)] # Best offest is where the correlation is the highest
+    
+    if plot:
+        plt.suptitle(f'Best correlation for pid offset {best_offset}') 
+        plt.tight_layout()
+    
+    return best_offset

 ######################## Setting up paths ########################

--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -2,8 +2,8 @@ import pytest
 import os
 import numpy as np
 import cristallina.utils as utils
-
-from cristallina.utils import ROI, print_run_info, heuristic_extract_pgroup, gauss, find_nearest, xray_transmission,stand_table
+from sfdata import SFDataFiles
+from cristallina.utils import ROI, print_run_info, heuristic_extract_pgroup, gauss, find_nearest, xray_transmission,stand_table,check_pid_offset


 def test_print(capsys):
@@ -30,8 +30,10 @@ def test_ROI():
    
    
 def test_extract_pgroup():
+    cwd = os.getcwd()
    os.chdir("tests/data/p20841/raw/")
    assert heuristic_extract_pgroup() == '20841'
+    os.chdir(cwd)


 def test_gauss():
@@ -47,6 +49,9 @@ def test_xray_transmission():
    T = xray_transmission(9000, 100e-6, material = 'Si')
    assert T == 0.342385039732607

+def test_check_pid_offset():
+    with SFDataFiles(f"tests/data/p20841/raw/run0205/data/acq*.h5") as data:
+        assert check_pid_offset(data,'SARFE10-PBPG050:HAMP-INTENSITY-CAL','SARFE10-PBIG050-EVR0:CALCI') == 0

 # This test can only be run localy (github has no access to /sf/cristallina), therefore it's commented out.