diff --git a/src/cristallina/analysis.py b/src/cristallina/analysis.py index 447ea0f..210a50e 100644 --- a/src/cristallina/analysis.py +++ b/src/cristallina/analysis.py @@ -1,5 +1,6 @@ import re -from collections import defaultdict +from collections import defaultdict, deque +from pathlib import Path from typing import Optional import numpy as np @@ -10,7 +11,7 @@ import lmfit from sfdata import SFDataFiles, sfdatafile, SFScanInfo import joblib -from joblib import Parallel, delayed, Memory +from joblib import Memory from . import utils from .utils import ROI @@ -34,7 +35,15 @@ def setup_cachedirs(pgroup=None, cachedir=None): else: parts = re.split(r"(\d.*)", pgroup) # ['p', '2343', ''] pgroup_no = parts[-2] - cachedir = f"/das/work/units/cristallina/p{pgroup_no}/cachedir" + + candidates = [f"/das/work/units/cristallina/p{pgroup_no}", + f"/sf/cristallina/data/p{pgroup_no}/res",] + + for cache_parent_dir in candidates: + if Path(cache_parent_dir).exists(): + cachedir = Path(cache_parent_dir) / "cachedir" + break + except KeyError as e: print(e) cachedir = "/das/work/units/cristallina/p19739/cachedir" @@ -47,7 +56,6 @@ def setup_cachedirs(pgroup=None, cachedir=None): return memory -memory = None memory = setup_cachedirs() @@ -222,7 +230,7 @@ def perform_image_stack_sum( if roi is None: im_ROI = im[:] else: - im_ROI = im[:, roi.rows, roi.cols] + im_ROI = im[roi.rows, roi.cols] summed = np.zeros(im_ROI[0].shape) @@ -249,6 +257,70 @@ def perform_image_stack_sum( return summed +@memory.cache(ignore=["batch_size"]) # we ignore batch_size for caching purposes +def perform_image_roi_crop( + filesets, + channel="JF16T03V01", + alignment_channels=None, + batch_size=10, + roi: ROI = None, + preview=False, + lower_cutoff=None, + upper_cutoff=np.inf, +): + """ + Cuts out a region of interest (ROI) for an image channel + from a fileset (e.g. "run0352/data/acq0001.*.h5" or step.fnames from a SFScanInfo object). + + Drops missing data from output and allows alignment, + i.e. reducing only to a common subset with other channels. + + Calculations are performed in batches to reduce maximum memory requirements. + + Lower- and upper cutoff allow to threshold the data. + + Preview only applies calculation to first batch and returns. + + Returns: an 1D array (along the pulses recorded without missing) of 2D images + + TODO: should we create a complete channel here instead of returning `raw` data? + + """ + + with SFDataFiles(*filesets) as data: + if alignment_channels is not None: + channels = [channel] + [ch for ch in alignment_channels] + else: + channels = [channel] + + subset = data[channels] + subset.drop_missing() + + Images = subset[channel] + + rois_within_batch = list() + + for image_slice in Images.in_batches(batch_size): + + index_slice, im = image_slice + + if roi is None: + im_ROI = im[:] + else: + im_ROI = im[:, roi.rows, roi.cols] + + if lower_cutoff is not None: + im_ROI = np.clip(im_ROI, lower_cutoff, upper_cutoff) + + rois_within_batch.extend(im_ROI) + + # only return first batch + if preview: + break + + return np.array(rois_within_batch) + + def fit_2d_gaussian(image, roi: Optional[ROI] = None, plot=False): """ 2D Gaussian fit using LMFit for a given image and an optional region of interest. diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 9fd5f79..2bbae95 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -3,16 +3,29 @@ import numpy as np import unittest.mock import cristallina.analysis +import cristallina.utils __author__ = "Alexander Steppke" +def test_joblib_memory(): + """ We need joblib for fast caching of intermediate results in all cases. So we check + if the basic function caching to disk works. + """ + def calc_example(x): + return x**2 + + calc_cached = cristallina.analysis.memory.cache(calc_example) + + assert calc_cached(8) == 64 + assert calc_cached.check_call_in_cache(8) == True + @unittest.mock.patch("jungfrau_utils.file_adapter.locate_gain_file", lambda path, **kwargs: "tests/data/gains.h5") @unittest.mock.patch("jungfrau_utils.file_adapter.locate_pedestal_file", lambda path, **kwargs: "tests/data/JF16T03V01.res.h5") def test_image_calculations(): res = cristallina.analysis.perform_image_calculations(['tests/data/p20841/raw/run0185/data/acq0001.*.h5']) # these values are only correct when using the specific gain and pedestal files included in the test data - # they do not correspond to the gain and pedestal files used in the actual analysis (otherwise we need to include several here as test data) + # they do not correspond to the gain and pedestal files used in the actual analysis intensity = [1712858.6, 693994.06, 1766390.0, @@ -27,18 +40,18 @@ def test_image_calculations(): 453842.6] assert np.allclose(res["JF16T03V01_intensity"], intensity) -def test_joblib_memory(): - """ We need joblib for fast caching of intermediate results in all cases. So we check - if the basic function caching to disk works. - """ - def calc_example(x): - return x**2 - - calc_cached = cristallina.analysis.memory.cache(calc_example) - - assert calc_cached(8) == 64 - assert calc_cached.check_call_in_cache(8) == True +@unittest.mock.patch("jungfrau_utils.file_adapter.locate_gain_file", lambda path, **kwargs: "tests/data/gains.h5") +@unittest.mock.patch("jungfrau_utils.file_adapter.locate_pedestal_file", lambda path, **kwargs: "tests/data/JF16T03V01.res.h5") +def test_roi_calculation(): + roi = cristallina.utils.ROI(left=575, right=750, top=750, bottom=600) + + cutouts = cristallina.analysis.perform_image_roi_crop(['tests/data/p20841/raw/run0205/data/acq0001.*.h5'], roi=roi) + + sum_roi, max_roi, min_roi = np.sum(cutouts[11]), np.max(cutouts[11]), np.min(cutouts[11]) + # these values are only correct when using the specific gain and pedestal files included in the test data + # they do not correspond to the gain and pedestal files used in the actual analysis + assert np.allclose([3119071.8, 22381.547, -0.9425874 ], [sum_roi, max_roi, min_roi]) def test_minimal_2d_gaussian():