diff --git a/src/cristallina/utils.py b/src/cristallina/utils.py index 9e146f7..d6639dd 100644 --- a/src/cristallina/utils.py +++ b/src/cristallina/utils.py @@ -16,7 +16,8 @@ import sfdata.errors from sfdata import SFDataFiles, sfdatafile, SFScanInfo, SFProcFile from xraydb import material_mu from joblib import Parallel, delayed, cpu_count - +from scipy.stats import pearsonr +import matplotlib.pyplot as plt def collect_runs_metadata(pgroup_data_dir: str | os.PathLike): """ @@ -381,6 +382,49 @@ class ROI: def __ne__(self, other): return not self == other +def check_pid_offset(step,ch1,ch2,offsets_to_try=[-2,-1,0,1,2],plot=False): + """Check pid offset between two channels. Offset is the value that should be added to the second channel. Returns the best offset. + ch1 should be something that can be trusted, from experience for example the SARFE10-PBPG050:HAMP-INTENSITY-CAL is reliable. + """ + # Take the first step if scan object is given + if type(step) == sfdata.sfscaninfo.SFScanInfo: + step = step[0] + + assert type(step) == sfdata.sfdatafiles.SFDataFiles, 'First parameter should be either a step or scan (if scan then first step is tested)' + assert type(ch1) == str, f'Channel {ch1} must be a string' + assert type(ch2) == str, f'Channel {ch2} must be a string' + + corrs = [] + if plot: + fig,ax = plt.subplots(1,len(offsets_to_try),figsize=(12,3)) + + for i,pid_offset in enumerate(offsets_to_try): + ss = step[ch1,ch2] # Make a subset with 2 channels + ss[ch2].offset = pid_offset # Add offset to the second channel + ss.drop_missing() + x = ss[ch1].data + y = ss[ch2].data + if i == 0: + assert len(x.shape) == 1, f'Channel {ch1} has more than one dimension per pid' + assert len(y.shape) == 1, f'Channel {ch2} has more than one dimension per pid' + assert len(x) > 2, f'Channel {ch1} has less than 3 data points' + assert len(y) > 2, f'Channel {ch2} has less than 3 data points' + corr = pearsonr(x,y) # Calculate the correlation value + corrs.append(corr.correlation) # Save in the array + if plot: + if i == 0: + ax[i].set_ylabel(ch2) + ax[2].set_xlabel(ch1) + ax[i].plot(ss[ch1].data,ss[ch2].data,'ok',alpha=0.1) + ax[i].set_title(f"Corr = {np.round(corr.correlation,2)}") + + best_offset = offsets_to_try[np.argmax(corrs)] # Best offest is where the correlation is the highest + + if plot: + plt.suptitle(f'Best correlation for pid offset {best_offset}') + plt.tight_layout() + + return best_offset ######################## Setting up paths ######################## diff --git a/tests/test_utils.py b/tests/test_utils.py index d3e7c39..3565ae8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,8 +2,8 @@ import pytest import os import numpy as np import cristallina.utils as utils - -from cristallina.utils import ROI, print_run_info, heuristic_extract_pgroup, gauss, find_nearest, xray_transmission,stand_table +from sfdata import SFDataFiles +from cristallina.utils import ROI, print_run_info, heuristic_extract_pgroup, gauss, find_nearest, xray_transmission,stand_table,check_pid_offset def test_print(capsys): @@ -30,8 +30,10 @@ def test_ROI(): def test_extract_pgroup(): + cwd = os.getcwd() os.chdir("tests/data/p20841/raw/") assert heuristic_extract_pgroup() == '20841' + os.chdir(cwd) def test_gauss(): @@ -47,6 +49,9 @@ def test_xray_transmission(): T = xray_transmission(9000, 100e-6, material = 'Si') assert T == 0.342385039732607 +def test_check_pid_offset(): + with SFDataFiles(f"tests/data/p20841/raw/run0205/data/acq*.h5") as data: + assert check_pid_offset(data,'SARFE10-PBPG050:HAMP-INTENSITY-CAL','SARFE10-PBIG050-EVR0:CALCI') == 0 # This test can only be run localy (github has no access to /sf/cristallina), therefore it's commented out.