Separate PathResolver and Normalisation, prepare different event treatment for normalization and datafiles

2025-10-02 18:03:19 +02:00
parent 2a89e58698
commit cb4415ad3d
6 changed files with 172 additions and 126 deletions
--- a/libeos/event_handling.py
+++ b/libeos/event_handling.py
@@ -12,6 +12,16 @@ from .options import ExperimentConfig, MonitorType
 from .event_data_types import EventDatasetProtocol, EventDataAction, append_fields, EVENT_BITMASKS
 from .helpers import merge_frames, extract_walltime

+class ApplyPhaseOffset(EventDataAction):
+    def __init__(self, chopperPhaseOffset: float):
+        self.chopperPhaseOffset=chopperPhaseOffset
+
+    def perform_action(self, dataset: EventDatasetProtocol) ->None:
+        logging.debug(
+            f'        replaced ch1TriggerPhase = {dataset.timing.ch1TriggerPhase} '
+            f'with {self.chopperPhaseOffset}')
+        dataset.timing.ch1TriggerPhase = self.chopperPhaseOffset
+
 class ApplyParameterOverwrites(EventDataAction):
    def __init__(self, config: ExperimentConfig):
        self.config=config
@@ -26,11 +36,6 @@ class ApplyParameterOverwrites(EventDataAction):
        if self.config.nu:
            logging.debug(f'        replaced nu = {dataset.geometry.nu} with {self.config.nu}')
            dataset.geometry.nu = self.config.nu
-        if self.config.chopperPhaseOffset:
-            logging.debug(
-                f'        replaced ch1TriggerPhase = {dataset.timing.ch1TriggerPhase} '
-                f'with {self.config.chopperPhaseOffset}')
-            dataset.timing.ch1TriggerPhase = self.config.chopperPhaseOffset
        logging.info(f'      mu = {dataset.geometry.mu:6.3f}, '
                     f'nu = {dataset.geometry.nu:6.3f}, '
                     f'kap = {dataset.geometry.kap:6.3f}, '
--- a/libeos/file_reader.py
+++ b/libeos/file_reader.py
@@ -18,6 +18,7 @@ from orsopy.fileio.model_language import SampleModel

 from . import const, event_handling as eh, event_analysis as ea
 from .header import Header
+from .path_handling import PathResolver
 from .event_data_types import AmorGeometry, AmorTiming, AmorEventStream, PACKET_TYPE, EVENT_TYPE, PULSE_TYPE, PC_TYPE
 from .options import ExperimentConfig, IncidentAngle, ReaderConfig, MonitorType

@@ -37,55 +38,13 @@ if  platform.node().startswith('amor'):
 else:
    NICOS_CACHE_DIR = None

-class PathResolver:
-    def __init__(self, year, rawPath):
-        self.year = year
-        self.rawPath = rawPath
-
-    def resolve(self, short_notation):
-        return list(map(self.get_path, self.expand_file_list(short_notation)))
-
-    def expand_file_list(self, short_notation):
-        """Evaluate string entry for file number lists"""
-        file_list = []
-        for i in short_notation.split(','):
-            if '-' in i:
-                if ':' in i:
-                    step = i.split(':', 1)[1]
-                    file_list += range(int(i.split('-', 1)[0]),
-                                       int((i.rsplit('-', 1)[1]).split(':', 1)[0])+1,
-                                       int(step))
-                else:
-                    step = 1
-                    file_list += range(int(i.split('-', 1)[0]),
-                                       int(i.split('-', 1)[1])+1,
-                                       int(step))
-            else:
-                file_list += [int(i)]
-        return list(sorted(file_list))
-
-    def get_path(self, number):
-        fileName = f'amor{self.year}n{number:06d}.hdf'
-        path = ''
-        for rawd in self.rawPath:
-            if os.path.exists(os.path.join(rawd, fileName)):
-                path = rawd
-                break
-        if not path:
-            if os.path.exists(
-                    f'/afs/psi.ch/project/sinqdata/{self.year}/amor/{int(number/1000)}/{fileName}'):
-                path = f'/afs/psi.ch/project/sinqdata/{self.year}/amor/{int(number/1000)}'
-            else:
-                sys.exit(f'# ERROR: the file {fileName} can not be found in {self.rawPath}')
-        return os.path.join(path, fileName)
-
 class AmorEventData:
    """
    Read one amor NeXus datafile and extract relevant header information.

    Implements EventDatasetProtocol
    """
-    fileName: str
+    file_list: List[str]
    first_index: int
    last_index: int = -1
    EOF: bool = False
@@ -102,13 +61,13 @@ class AmorEventData:

    def __init__(self, fileName:Union[str, h5py.File, BinaryIO], first_index:int=0, max_events:int=100_000_000):
        if type(fileName) is str:
-            self.fileName = fileName
+            self.file_list = [fileName]
            self.hdf = h5py.File(fileName, 'r', swmr=True)
        elif type(fileName) is h5py.File:
-            self.fileName = fileName.filename
+            self.file_list = [fileName.filename]
            self.hdf = fileName
        else:
-            self.fileName = repr(fileName)
+            self.file_list = [repr(fileName)]
            self.hdf = h5py.File(fileName, 'r')
        self.first_index = first_index
        self.max_events = max_events
@@ -257,7 +216,7 @@ class AmorEventData:
        """
        Add dataset information into an existing header.
        """
-        logging.info(f'    meta data from: {self.fileName}')
+        logging.info(f'    meta data from: {self.file_list[0]}')
        header.owner = self.owner
        header.experiment = self.experiment
        header.sample = self.sample
@@ -300,6 +259,10 @@ class AmorEventData:
        current = self.read_proton_current_stream()
        self.data = AmorEventStream(events, packets, pulses, current)

+        if self.first_index>0 and not self.EOF:
+            # label the file name if not all events were used
+            self.file_list[0] += f'[{self.first_index}:{self.last_index}]'
+
    def read_chopper_trigger_stream(self):
        chopper1TriggerTime = np.array(self.hdf['entry1/Amor/chopper/ch2_trigger/event_time_zero'][:-2], dtype=np.int64)
        #self.chopper2TriggerTime = self.chopper1TriggerTime + np.array(self.hdf['entry1/Amor/chopper/ch2_trigger/event_time'][:-2], dtype=np.int64)
@@ -351,10 +314,10 @@ class AmorEventData:
        self.data = AmorEventStream(new_events, new_packets, new_pulses, new_proton_current)
        # Indicate that this is amodified dataset, basically counts number of appends as negative indices
        self.last_index = min(self.last_index-1, -1)
-
+        self.file_list += other.file_list

    def __repr__(self):
-        output = (f"AmorEventData({self.fileName!r}) # {self.data.events.shape[0]} events, "
+        output = (f"AmorEventData({self.file_list!r}) # {self.data.events.shape[0]} events, "
                  f"{self.data.pulses.shape[0]} pulses")

        return output
@@ -405,7 +368,8 @@ class AmorData:
        # setup all actions performed in origin AmorData, time correction requires first dataset start time
        # The order of these corrections matter as some rely on parameters modified before
        self.time_correction = eh.CorrectSeriesTime(0)
-        self.event_actions = eh.ApplyParameterOverwrites(self.config) # some actions use instrument parameters, change before that
+        self.event_actions = eh.ApplyPhaseOffset(self.config.chopperPhaseOffset)
+        self.event_actions |= eh.ApplyParameterOverwrites(self.config) # some actions use instrument parameters, change before that
        self.event_actions |= eh.CorrectChopperPhase()
        self.event_actions |= eh.ExtractWalltime()
        self.event_actions |= self.time_correction
--- a/libeos/normalisation.py
+++ b/libeos/normalisation.py
@@ -0,0 +1,74 @@
+"""
+Defines how to normalize a focusing reflectometry dataset by a reference measurement.
+"""
+import logging
+import os
+import numpy as np
+from typing import List
+
+
+from .event_data_types import EventDatasetProtocol
+from .header import Header
+from .options import NormalisationMethod
+from .instrument import Grid
+
+
+class Normalisation:
+    normFileList = List[str]
+    normAngle: float
+    normMonitor: float
+    norm_lz: np.ndarray
+
+    def __init__(self, reference:EventDatasetProtocol, normalisationMethod: NormalisationMethod, grid: Grid):
+        self.normAngle = reference.geometry.nu-reference.geometry.mu
+        lamda_e = reference.data.events.lamda
+        detZ_e = reference.data.events.detZ
+        self.normMonitor = np.sum(reference.data.pulses.monitor)
+        norm_lz, _, _ = np.histogram2d(lamda_e, detZ_e, bins=(grid.lamda(), grid.z()))
+        norm_lz = np.where(norm_lz>2, norm_lz, np.nan)
+        if normalisationMethod==NormalisationMethod.direct_beam:
+            self.norm_lz = np.flip(norm_lz, 1)
+        else:
+            # correct for reference sm reflectivity
+            lamda_l = grid.lamda()
+            theta_z = self.normAngle+reference.geometry.delta_z
+            lamda_lz = (grid.lz().T*lamda_l[:-1]).T
+            theta_lz = grid.lz()*theta_z
+            qz_lz = 4.0*np.pi*np.sin(np.deg2rad(theta_lz))/lamda_lz
+            # TODO: introduce variable for `m` and propably for the slope
+            Rsm_lz = np.ones(np.shape(qz_lz))
+            Rsm_lz = np.where(qz_lz>0.0217, 1-(qz_lz-0.0217)*(0.0625/0.0217), Rsm_lz)
+            Rsm_lz = np.where(qz_lz>0.0217*5, np.nan, Rsm_lz)
+            self.norm_lz = norm_lz/Rsm_lz
+        self.normFileList = [os.path.basename(entry) for entry in reference.file_list]
+
+    @classmethod
+    def from_file(cls, filename) -> 'Normalisation':
+        logging.warning(f'normalisation matrix: found and using {filename}')
+        self = super().__new__(cls)
+        with open(filename, 'rb') as fh:
+            self.normFileList = np.load(fh, allow_pickle=True)
+            self.normAngle = np.load(fh, allow_pickle=True)
+            self.norm_lz = np.load(fh, allow_pickle=True)
+            self.normMonitor = np.load(fh, allow_pickle=True)
+        return self
+
+    @classmethod
+    def unity(cls, grid:Grid) -> 'Normalisation':
+        logging.warning(f'normalisation is unity')
+        self = super().__new__(cls)
+        self.norm_lz = grid.lz()
+        self.normFileList = []
+        self.normAngle = 1.
+        self.normMonitor = 1.
+        return self
+
+    def safe(self, filename):
+        with open(filename, 'wb') as fh:
+            np.save(fh, np.array(self.normFileList), allow_pickle=False)
+            np.save(fh, np.array(self.normAngle), allow_pickle=False)
+            np.save(fh, self.norm_lz, allow_pickle=False)
+            np.save(fh, self.normMonitor, allow_pickle=False)
+
+    def update_header(self, header:Header):
+        header.measurement_additional_files = self.normFileList
--- a/libeos/path_handling.py
+++ b/libeos/path_handling.py
@@ -0,0 +1,49 @@
+"""
+Defines how file paths are resolved from short_notation, year and number to filename.
+"""
+import os
+from typing import List
+
+
+class PathResolver:
+    def __init__(self, year, rawPath):
+        self.year = year
+        self.rawPath = rawPath
+
+    def resolve(self, short_notation):
+        return list(map(self.get_path, self.expand_file_list(short_notation)))
+
+    @staticmethod
+    def expand_file_list(short_notation)->List[int]:
+        """Evaluate string entry for file number lists"""
+        file_list = []
+        for i in short_notation.split(','):
+            if '-' in i:
+                if ':' in i:
+                    step = i.split(':', 1)[1]
+                    file_list += range(int(i.split('-', 1)[0]),
+                                       int((i.rsplit('-', 1)[1]).split(':', 1)[0])+1,
+                                       int(step))
+                else:
+                    step = 1
+                    file_list += range(int(i.split('-', 1)[0]),
+                                       int(i.split('-', 1)[1])+1,
+                                       int(step))
+            else:
+                file_list += [int(i)]
+        return list(sorted(file_list))
+
+    def get_path(self, number):
+        fileName = f'amor{self.year}n{number:06d}.hdf'
+        path = ''
+        for rawd in self.rawPath:
+            if os.path.exists(os.path.join(rawd, fileName)):
+                path = rawd
+                break
+        if not path:
+            if os.path.exists(
+                    f'/afs/psi.ch/project/sinqdata/{self.year}/amor/{int(number/1000)}/{fileName}'):
+                path = f'/afs/psi.ch/project/sinqdata/{self.year}/amor/{int(number/1000)}'
+            else:
+                raise FileNotFoundError(f'# ERROR: the file {fileName} can not be found in {self.rawPath}')
+        return os.path.join(path, fileName)
--- a/libeos/reduction.py
+++ b/libeos/reduction.py
@@ -7,8 +7,10 @@ from orsopy import fileio

 from .file_reader import AmorData
 from .header import Header
+from .path_handling import PathResolver
 from .options import EOSConfig, IncidentAngle, MonitorType, NormalisationMethod
 from .instrument import Grid
+from .normalisation import Normalisation

 MONITOR_UNITS = {
    MonitorType.neutron_monitor: 'cnts',
@@ -38,6 +40,8 @@ class AmorReduction:
        # TODO: bad work-around, should make better destriction of parameters usage
        self.experiment_config.qzRange = self.reduction_config.qzRange

+        self.path_resolver = PathResolver(self.reader_config.year, self.reader_config.rawPath)
+
        self.grid = Grid(self.reduction_config.qResolution, self.reduction_config.qzRange)

    def reduce(self):
@@ -49,11 +53,7 @@ class AmorReduction:
        if self.reduction_config.normalisationFileIdentifier:
            self.create_normalisation_map(self.reduction_config.normalisationFileIdentifier[0])
        else:
-            self.norm_lz = self.grid.lz()
-            self.normAngle = 1.
-            self.normMonitor = 1.
-
-            logging.warning('normalisation matrix: none requested')
+            self.norm = Normalisation.unity(self.grid)

        # load R(q_z) curve to be subtracted:
        if self.reduction_config.subtract:
@@ -97,7 +97,7 @@ class AmorReduction:
        self.monitor = np.sum(self.file_reader.monitorPerPulse)
        logging.warning(f'    monitor = {self.monitor:8.2f} {MONITOR_UNITS[self.experiment_config.monitorType]}')
        qz_lz, qx_lz, ref_lz, err_lz, res_lz, lamda_lz, theta_lz, int_lz, self.mask_lz = self.project_on_lz(
-                self.file_reader, self.norm_lz, self.normAngle, lamda_e, detZ_e)
+                self.file_reader, self.norm.norm_lz, self.norm.normAngle, lamda_e, detZ_e)
        #if self.monitor>1 :
        #    ref_lz /= self.monitor
        #    err_lz /= self.monitor
@@ -116,7 +116,8 @@ class AmorReduction:
                qz_lz *= -1

            # projection on qz-grid
-            q_q, R_q, dR_q, dq_q = self.project_on_qz(qz_lz, ref_lz, err_lz, res_lz, self.norm_lz, self.mask_lz)
+            q_q, R_q, dR_q, dq_q = self.project_on_qz(qz_lz, ref_lz, err_lz, res_lz,
+                                                      self.norm.norm_lz, self.mask_lz)

            # The filtering is now done by restricting the qz-grid
            #filter_q = np.where((self.experiment_config.qzRange[0]>q_q) & (q_q>self.experiment_config.qzRange[1]),
@@ -177,7 +178,7 @@ class AmorReduction:
                    lindex_lz.T,
                    tindex_lz.T,
                    int_lz.T,
-                    self.norm_lz.T,
+                    self.norm.norm_lz.T,
                    np.where(self.mask_lz, 1, 0).T,
                    qx_lz.T,
                    ):
@@ -214,14 +215,14 @@ class AmorReduction:
            logging.info(f'      {ti:<4d}  {time:6.0f}  {self.monitor:7.2f} {MONITOR_UNITS[self.experiment_config.monitorType]}')

            qz_lz, qx_lz, ref_lz, err_lz, res_lz, lamda_lz, theta_lz, int_lz, mask_lz = self.project_on_lz(
-                    self.file_reader, self.norm_lz, self.normAngle, lamda_e, detZ_e)
+                    self.file_reader, self.norm.norm_lz, self.norm.normAngle, lamda_e, detZ_e)
            try:
                ref_lz *= self.reduction_config.scale[i]
                err_lz *= self.reduction_config.scale[i]
            except IndexError:
                ref_lz *= self.reduction_config.scale[-1]
                err_lz *= self.reduction_config.scale[-1]
-            q_q, R_q, dR_q, dq_q = self.project_on_qz(qz_lz, ref_lz, err_lz, res_lz, self.norm_lz, mask_lz)
+            q_q, R_q, dR_q, dq_q = self.project_on_qz(qz_lz, ref_lz, err_lz, res_lz, self.norm.norm_lz, mask_lz)

            filter_q = np.where((self.experiment_config.qzRange[0]<q_q) & (q_q<self.experiment_config.qzRange[1]),
                                True, False)
@@ -340,75 +341,28 @@ class AmorReduction:

        return q_q, Sq_q, dS_q, fileName

-    def expand_file_list(self, short_notation):
-        """Evaluate string entry for file number lists"""
-        #log().debug('Executing get_flist')
-        file_list=[]
-        for i in short_notation.split(','):
-            if '-' in i:
-                if ':' in i:
-                    step = i.split(':', 1)[1]
-                    file_list += range(int(i.split('-', 1)[0]), int((i.rsplit('-', 1)[1]).split(':', 1)[0])+1, int(step))
-                else:
-                    step = 1
-                    file_list += range(int(i.split('-', 1)[0]), int(i.split('-', 1)[1])+1, int(step))
-            else:
-                file_list += [int(i)]
-    
-        return sorted(file_list)
-
-
    def create_normalisation_map(self, short_notation):
        outputPath = self.output_config.outputPath
-        normalisation_list = self.expand_file_list(short_notation)
+        normalisation_list = self.path_resolver.expand_file_list(short_notation)
        name = str(normalisation_list[0])
-        for i in range(1, len(normalisation_list), 1):
-            name = f'{name}_{normalisation_list[i]}'
-        n_path = os.path.join(outputPath, f'{name}.norm')
+        for norm_i in normalisation_list[1:]:
+            name += f'_{norm_i}'
+        n_path = os.path.join(outputPath, f'{name}_{str(self.experiment_config.monitorType)}.norm')
+
        if os.path.exists(n_path):
            logging.warning(f'normalisation matrix: found and using {n_path}')
-            with open(n_path, 'rb') as fh:
-                self.normFileList = np.load(fh, allow_pickle=True)
-                self.normAngle    = np.load(fh, allow_pickle=True)
-                self.norm_lz      = np.load(fh, allow_pickle=True)
-                self.normMonitor  = np.load(fh, allow_pickle=True)
-            for i, entry in enumerate(self.normFileList):
-                 self.normFileList[i] = entry.split('/')[-1]
-            self.header.measurement_additional_files = self.normFileList
+            self.norm = Normalisation.from_file(n_path)
+            self.header.measurement_additional_files = self.norm.normFileList
        else:
            logging.warning(f'normalisation matrix: using the files {normalisation_list}')
-            fromHDF = AmorData(header=self.header,
+            reference = AmorData(header=self.header,
                               reader_config=self.reader_config,
                               config=self.experiment_config,
-                               short_notation=short_notation, norm=True)
-            self.normAngle     = fromHDF.nu - fromHDF.mu
-            lamda_e          = fromHDF.lamda_e
-            detZ_e           = fromHDF.detZ_e
-            self.normMonitor = np.sum(fromHDF.monitorPerPulse)
-            norm_lz, bins_l, bins_z = np.histogram2d(lamda_e, detZ_e, bins = (self.grid.lamda(), self.grid.z()))
-            norm_lz = np.where(norm_lz>2, norm_lz, np.nan)
-            if self.reduction_config.normalisationMethod == NormalisationMethod.direct_beam:
-                self.norm_lz = np.flip(norm_lz, 1)
-            else:
-                # correct for reference sm reflectivity
-                lamda_l  = self.grid.lamda()
-                theta_z  = self.normAngle + fromHDF.delta_z
-                lamda_lz = (self.grid.lz().T*lamda_l[:-1]).T
-                theta_lz = self.grid.lz()*theta_z
-                qz_lz    = 4.0*np.pi * np.sin(np.deg2rad(theta_lz)) / lamda_lz
-                # TODO: introduce variable for `m` and propably for the slope
-                Rsm_lz   = np.ones(np.shape(qz_lz))
-                Rsm_lz   = np.where(qz_lz>0.0217, 1-(qz_lz-0.0217)*(0.0625/0.0217), Rsm_lz)
-                Rsm_lz   = np.where(qz_lz>0.0217*5, np.nan, Rsm_lz)
-                self.norm_lz  = norm_lz / Rsm_lz
+                               short_notation=short_notation, norm=True).dataset
+            self.norm = Normalisation(reference, self.reduction_config.normalisationMethod, self.grid)
+            if reference.data.events.shape[0] > 1e6:
+                self.norm.safe(n_path)

-            if len(lamda_e) > 1e6:
-                with open(n_path, 'wb') as fh:
-                    np.save(fh, np.array(fromHDF.file_list), allow_pickle=False)
-                    np.save(fh, np.array(self.normAngle), allow_pickle=False)
-                    np.save(fh, self.norm_lz, allow_pickle=False)
-                    np.save(fh, self.normMonitor, allow_pickle=False)
-            self.normFileList = fromHDF.file_list
        self.header.reduction.corrections.append('normalisation with \'additional files\'')

    def project_on_lz(self, fromHDF, norm_lz, normAngle, lamda_e, detZ_e):
@@ -490,7 +444,7 @@ class AmorReduction:
            logging.error('unknown normalisation method! Use [u]nder, [o]ver or [d]irect illumination')
            ref_lz    = (int_lz / norm_lz)
        if self.monitor > 1e-6 :
-            ref_lz   *= self.normMonitor / self.monitor
+            ref_lz   *= self.norm.normMonitor / self.monitor
        else:
            logging.info('                               low monitor -> nan output')
            ref_lz   *= np.nan
--- a/tests/test_full_analysis.py
+++ b/tests/test_full_analysis.py
@@ -37,7 +37,7 @@ class FullAmorTest(TestCase):

    def tearDown(self):
        self.pr.disable()
-        for fi in ['../test_results/test.Rqz.ort', '../test_results/5952.norm']:
+        for fi in ['../test_results/test.Rqz.ort', '../test_results/5952_a.norm']:
            try:
                os.unlink(os.path.join(self.reader_config.rawPath[0], fi))
            except FileNotFoundError: