added ROI calculation including tests

2023-11-14 16:20:52 +01:00
parent 4d5263e620
commit be78d68819
2 changed files with 102 additions and 17 deletions
--- a/src/cristallina/analysis.py
+++ b/src/cristallina/analysis.py
@@ -1,5 +1,6 @@
 import re
-from collections import defaultdict
+from collections import defaultdict, deque
+from pathlib import Path
 from typing import Optional

 import numpy as np
@@ -10,7 +11,7 @@ import lmfit
 from sfdata import SFDataFiles, sfdatafile, SFScanInfo

 import joblib
-from joblib import Parallel, delayed, Memory
+from joblib import Memory

 from . import utils
 from .utils import ROI
@@ -34,7 +35,15 @@ def setup_cachedirs(pgroup=None, cachedir=None):
        else:
            parts = re.split(r"(\d.*)", pgroup)  # ['p', '2343', '']
            pgroup_no = parts[-2]
-        cachedir = f"/das/work/units/cristallina/p{pgroup_no}/cachedir"
+
+        candidates = [f"/das/work/units/cristallina/p{pgroup_no}", 
+                      f"/sf/cristallina/data/p{pgroup_no}/res",]
+        
+        for cache_parent_dir in candidates:
+            if Path(cache_parent_dir).exists():
+                cachedir = Path(cache_parent_dir) / "cachedir"
+                break
+        
    except KeyError as e:
        print(e)
        cachedir = "/das/work/units/cristallina/p19739/cachedir"
@@ -47,7 +56,6 @@ def setup_cachedirs(pgroup=None, cachedir=None):
    
    return memory

-memory = None
 memory = setup_cachedirs()


@@ -222,7 +230,7 @@ def perform_image_stack_sum(
        if roi is None:
            im_ROI = im[:]
        else:
-            im_ROI = im[:, roi.rows, roi.cols]
+            im_ROI = im[roi.rows, roi.cols]

        summed = np.zeros(im_ROI[0].shape)

@@ -249,6 +257,70 @@ def perform_image_stack_sum(
    return summed


+@memory.cache(ignore=["batch_size"])  # we ignore batch_size for caching purposes
+def perform_image_roi_crop(
+    filesets,
+    channel="JF16T03V01",
+    alignment_channels=None,
+    batch_size=10,
+    roi: ROI = None,
+    preview=False,
+    lower_cutoff=None,
+    upper_cutoff=np.inf,
+):
+    """
+    Cuts out a region of interest (ROI) for an image channel 
+    from a fileset (e.g. "run0352/data/acq0001.*.h5" or step.fnames from a SFScanInfo object).
+    
+    Drops missing data from output and allows alignment, 
+    i.e. reducing only to a common subset with other channels.
+
+    Calculations are performed in batches to reduce maximum memory requirements.
+
+    Lower- and upper cutoff allow to threshold the data.
+
+    Preview only applies calculation to first batch and returns.
+
+    Returns: an 1D array (along the pulses recorded without missing) of 2D images
+
+    TODO: should we create a complete channel here instead of returning `raw` data?
+
+    """
+
+    with SFDataFiles(*filesets) as data:
+        if alignment_channels is not None:
+            channels = [channel] + [ch for ch in alignment_channels]
+        else:
+            channels = [channel]
+
+        subset = data[channels]
+        subset.drop_missing()
+
+        Images = subset[channel]
+
+        rois_within_batch = list()
+
+        for image_slice in Images.in_batches(batch_size):
+            
+            index_slice, im = image_slice
+
+            if roi is None:
+                im_ROI = im[:]
+            else:
+                im_ROI = im[:, roi.rows, roi.cols]
+
+            if lower_cutoff is not None:
+                im_ROI = np.clip(im_ROI, lower_cutoff, upper_cutoff)
+
+            rois_within_batch.extend(im_ROI)
+
+            # only return first batch
+            if preview:
+                break
+        
+    return np.array(rois_within_batch)
+
+
 def fit_2d_gaussian(image, roi: Optional[ROI] = None, plot=False):
    """
    2D Gaussian fit using LMFit for a given image and an optional region of interest.
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -3,16 +3,29 @@ import numpy as np
 import unittest.mock

 import cristallina.analysis
+import cristallina.utils

 __author__ = "Alexander Steppke"


+def test_joblib_memory():
+     """ We need joblib for fast caching of intermediate results in all cases. So we check
+         if the basic function caching to disk works.
+     """
+     def calc_example(x):
+          return x**2
+     
+     calc_cached = cristallina.analysis.memory.cache(calc_example)
+
+     assert calc_cached(8) == 64
+     assert calc_cached.check_call_in_cache(8) == True
+    
@unittest.mock.patch("jungfrau_utils.file_adapter.locate_gain_file", lambda path, **kwargs: "tests/data/gains.h5")
@unittest.mock.patch("jungfrau_utils.file_adapter.locate_pedestal_file", lambda path, **kwargs: "tests/data/JF16T03V01.res.h5")
 def test_image_calculations():
    res = cristallina.analysis.perform_image_calculations(['tests/data/p20841/raw/run0185/data/acq0001.*.h5'])
    # these values are only correct when using the specific gain and pedestal files included in the test data
-    # they do not correspond to the gain and pedestal files used in the actual analysis (otherwise we need to include several here as test data)
+    # they do not correspond to the gain and pedestal files used in the actual analysis
    intensity = [1712858.6,
              693994.06,
              1766390.0,
@@ -27,18 +40,18 @@ def test_image_calculations():
              453842.6]
    assert np.allclose(res["JF16T03V01_intensity"], intensity)

-def test_joblib_memory():
-     """ We need joblib for fast caching of intermediate results in all cases. So we check
-         if the basic function caching to disk works.
-     """
-     def calc_example(x):
-          return x**2
-     
-     calc_cached = cristallina.analysis.memory.cache(calc_example)
-
-     assert calc_cached(8) == 64
-     assert calc_cached.check_call_in_cache(8) == True
+@unittest.mock.patch("jungfrau_utils.file_adapter.locate_gain_file", lambda path, **kwargs: "tests/data/gains.h5")
+@unittest.mock.patch("jungfrau_utils.file_adapter.locate_pedestal_file", lambda path, **kwargs: "tests/data/JF16T03V01.res.h5")
+def test_roi_calculation():
+    roi = cristallina.utils.ROI(left=575, right=750, top=750, bottom=600)
+    
+    cutouts = cristallina.analysis.perform_image_roi_crop(['tests/data/p20841/raw/run0205/data/acq0001.*.h5'], roi=roi)
+    
+    sum_roi, max_roi, min_roi = np.sum(cutouts[11]), np.max(cutouts[11]), np.min(cutouts[11])

+    # these values are only correct when using the specific gain and pedestal files included in the test data
+    # they do not correspond to the gain and pedestal files used in the actual analysis
+    assert np.allclose([3119071.8, 22381.547, -0.9425874 ], [sum_roi, max_roi, min_roi])

 def test_minimal_2d_gaussian():