added ROI calculation including tests

This commit is contained in:
2023-11-14 16:20:52 +01:00
parent 4d5263e620
commit be78d68819
2 changed files with 102 additions and 17 deletions

View File

@@ -1,5 +1,6 @@
import re
from collections import defaultdict
from collections import defaultdict, deque
from pathlib import Path
from typing import Optional
import numpy as np
@@ -10,7 +11,7 @@ import lmfit
from sfdata import SFDataFiles, sfdatafile, SFScanInfo
import joblib
from joblib import Parallel, delayed, Memory
from joblib import Memory
from . import utils
from .utils import ROI
@@ -34,7 +35,15 @@ def setup_cachedirs(pgroup=None, cachedir=None):
else:
parts = re.split(r"(\d.*)", pgroup) # ['p', '2343', '']
pgroup_no = parts[-2]
cachedir = f"/das/work/units/cristallina/p{pgroup_no}/cachedir"
candidates = [f"/das/work/units/cristallina/p{pgroup_no}",
f"/sf/cristallina/data/p{pgroup_no}/res",]
for cache_parent_dir in candidates:
if Path(cache_parent_dir).exists():
cachedir = Path(cache_parent_dir) / "cachedir"
break
except KeyError as e:
print(e)
cachedir = "/das/work/units/cristallina/p19739/cachedir"
@@ -47,7 +56,6 @@ def setup_cachedirs(pgroup=None, cachedir=None):
return memory
memory = None
memory = setup_cachedirs()
@@ -222,7 +230,7 @@ def perform_image_stack_sum(
if roi is None:
im_ROI = im[:]
else:
im_ROI = im[:, roi.rows, roi.cols]
im_ROI = im[roi.rows, roi.cols]
summed = np.zeros(im_ROI[0].shape)
@@ -249,6 +257,70 @@ def perform_image_stack_sum(
return summed
@memory.cache(ignore=["batch_size"]) # we ignore batch_size for caching purposes
def perform_image_roi_crop(
filesets,
channel="JF16T03V01",
alignment_channels=None,
batch_size=10,
roi: ROI = None,
preview=False,
lower_cutoff=None,
upper_cutoff=np.inf,
):
"""
Cuts out a region of interest (ROI) for an image channel
from a fileset (e.g. "run0352/data/acq0001.*.h5" or step.fnames from a SFScanInfo object).
Drops missing data from output and allows alignment,
i.e. reducing only to a common subset with other channels.
Calculations are performed in batches to reduce maximum memory requirements.
Lower- and upper cutoff allow to threshold the data.
Preview only applies calculation to first batch and returns.
Returns: an 1D array (along the pulses recorded without missing) of 2D images
TODO: should we create a complete channel here instead of returning `raw` data?
"""
with SFDataFiles(*filesets) as data:
if alignment_channels is not None:
channels = [channel] + [ch for ch in alignment_channels]
else:
channels = [channel]
subset = data[channels]
subset.drop_missing()
Images = subset[channel]
rois_within_batch = list()
for image_slice in Images.in_batches(batch_size):
index_slice, im = image_slice
if roi is None:
im_ROI = im[:]
else:
im_ROI = im[:, roi.rows, roi.cols]
if lower_cutoff is not None:
im_ROI = np.clip(im_ROI, lower_cutoff, upper_cutoff)
rois_within_batch.extend(im_ROI)
# only return first batch
if preview:
break
return np.array(rois_within_batch)
def fit_2d_gaussian(image, roi: Optional[ROI] = None, plot=False):
"""
2D Gaussian fit using LMFit for a given image and an optional region of interest.

View File

@@ -3,16 +3,29 @@ import numpy as np
import unittest.mock
import cristallina.analysis
import cristallina.utils
__author__ = "Alexander Steppke"
def test_joblib_memory():
""" We need joblib for fast caching of intermediate results in all cases. So we check
if the basic function caching to disk works.
"""
def calc_example(x):
return x**2
calc_cached = cristallina.analysis.memory.cache(calc_example)
assert calc_cached(8) == 64
assert calc_cached.check_call_in_cache(8) == True
@unittest.mock.patch("jungfrau_utils.file_adapter.locate_gain_file", lambda path, **kwargs: "tests/data/gains.h5")
@unittest.mock.patch("jungfrau_utils.file_adapter.locate_pedestal_file", lambda path, **kwargs: "tests/data/JF16T03V01.res.h5")
def test_image_calculations():
res = cristallina.analysis.perform_image_calculations(['tests/data/p20841/raw/run0185/data/acq0001.*.h5'])
# these values are only correct when using the specific gain and pedestal files included in the test data
# they do not correspond to the gain and pedestal files used in the actual analysis (otherwise we need to include several here as test data)
# they do not correspond to the gain and pedestal files used in the actual analysis
intensity = [1712858.6,
693994.06,
1766390.0,
@@ -27,18 +40,18 @@ def test_image_calculations():
453842.6]
assert np.allclose(res["JF16T03V01_intensity"], intensity)
def test_joblib_memory():
""" We need joblib for fast caching of intermediate results in all cases. So we check
if the basic function caching to disk works.
"""
def calc_example(x):
return x**2
calc_cached = cristallina.analysis.memory.cache(calc_example)
assert calc_cached(8) == 64
assert calc_cached.check_call_in_cache(8) == True
@unittest.mock.patch("jungfrau_utils.file_adapter.locate_gain_file", lambda path, **kwargs: "tests/data/gains.h5")
@unittest.mock.patch("jungfrau_utils.file_adapter.locate_pedestal_file", lambda path, **kwargs: "tests/data/JF16T03V01.res.h5")
def test_roi_calculation():
roi = cristallina.utils.ROI(left=575, right=750, top=750, bottom=600)
cutouts = cristallina.analysis.perform_image_roi_crop(['tests/data/p20841/raw/run0205/data/acq0001.*.h5'], roi=roi)
sum_roi, max_roi, min_roi = np.sum(cutouts[11]), np.max(cutouts[11]), np.min(cutouts[11])
# these values are only correct when using the specific gain and pedestal files included in the test data
# they do not correspond to the gain and pedestal files used in the actual analysis
assert np.allclose([3119071.8, 22381.547, -0.9425874 ], [sum_roi, max_roi, min_roi])
def test_minimal_2d_gaussian():