From 7f0e6f10262b6b98ebc9f5211364aaa3b4a348de Mon Sep 17 00:00:00 2001
From: Artur Glavic <artur.glavic@psi.ch>
Date: Fri, 27 Feb 2026 10:08:49 +0100
Subject: [PATCH] Allow option to filter pulses where a switch occured,
 implement updating of headerin information from filtered log-values for
 temp., filed and polarization, don't report empty sample environment values

---
 eos/const.py                | 16 ++++++-----
 eos/event_analysis.py       | 40 +++++++--------------------
 eos/event_handling.py       | 19 ++++++++++++-
 eos/file_reader.py          | 55 ++++++++++++++++++++++++++++---------
 eos/helpers.py              | 45 +++++++++++++++++++++++-------
 tests/test_full_analysis.py | 19 ++++++++++---
 6 files changed, 129 insertions(+), 65 deletions(-)

diff --git a/eos/const.py b/eos/const.py
index b7dc7ce..b5c5fb1 100644
--- a/eos/const.py
+++ b/eos/const.py
@@ -1,7 +1,9 @@
-"""
-Constants used in data reduction.
-"""
-
-hdm = 6.626176e-34/1.674928e-27  # h / m
-lamdaCut = 2.5  # Aa
-lamdaMax = 15.0 # Aa
\ No newline at end of file
+"""
+Constants used in data reduction.
+"""
+
+hdm = 6.626176e-34/1.674928e-27  # h / m
+lamdaCut = 2.5  # Aa
+lamdaMax = 15.0 # Aa
+
+polarizationConfigs = ['unpolarized', 'unpolarized', 'po', 'mo', 'op', 'pp', 'mp', 'om', 'pm', 'mm']
diff --git a/eos/event_analysis.py b/eos/event_analysis.py
index 0552572..8b65c8d 100644
--- a/eos/event_analysis.py
+++ b/eos/event_analysis.py
@@ -9,7 +9,7 @@ from typing import Tuple
 
 from . import const
 from .event_data_types import EventDataAction, EventDatasetProtocol, append_fields, EVENT_BITMASKS
-from .helpers import filter_project_x, merge_frames, extract_walltime
+from .helpers import filter_project_x, merge_frames, extract_walltime, add_log_to_pulses
 from .instrument import Detector
 from .options import IncidentAngle
 from .header import Header
@@ -126,11 +126,14 @@ class FilterQzRange(EventDataAction):
 
         d.events.mask += EVENT_BITMASKS["qRange"]*((self.qzRange[0]>d.events.qz) | (d.events.qz>self.qzRange[1]))
 
-
 class FilterByLog(EventDataAction):
 
-    def __init__(self, filter_string):
+    def __init__(self, filter_string, remove_sitchpulse=False):
+        if filter_string.startswith('!'):
+            filter_string = filter_string[1:]
+            remove_sitchpulse = True
         self.filter_string = filter_string
+        self.remove_sitchpulse = remove_sitchpulse
 
     def perform_action(self, dataset: EventDatasetProtocol) -> None:
         filter_variable = None
@@ -150,34 +153,11 @@ class FilterByLog(EventDataAction):
             EVENT_BITMASKS[filter_variable] = max(EVENT_BITMASKS.values())*2
         if not filter_variable in dataset.data.pulses.dtype.names:
             # interpolate the parameter values for all existing pulses
-            self.add_log_to_pulses(filter_variable, dataset)
+            add_log_to_pulses(filter_variable, dataset)
         fltr_pulses = eval(self.filter_string, {filter_variable: dataset.data.pulses[filter_variable]})
+        if self.remove_sitchpulse:
+            switched = fltr_pulses[:-1] & ~fltr_pulses[1:]
+            fltr_pulses[:-1] &= ~switched
         goodTimeS = dataset.data.pulses.time[fltr_pulses]
         filter_e = np.logical_not(np.isin(dataset.data.events.wallTime, goodTimeS))
         dataset.data.events.mask += EVENT_BITMASKS[filter_variable]*filter_e
-
-
-    def add_log_to_pulses(self, key, dataset: EventDatasetProtocol):
-        """
-        Add a log value for each pulse to the pulses array.
-        """
-        # TODO: perform some interpolation for the pulse where a change occured
-        pulses = dataset.data.pulses
-        log_data = dataset.data.device_logs[key]
-        if log_data.time[0]>0:
-            logTimeS = np.hstack([[0], log_data.time, [pulses.time[-1]+1]])
-            logValues = np.hstack([[log_data.value[0]], log_data.value])
-        else:
-            logTimeS = np.hstack([log_data.time, [pulses.time[-1]+1]])
-            logValues = log_data.value
-        pulseLogS = np.zeros(pulses.time.shape[0], dtype=log_data.value.dtype)
-        j = 0
-        for i, ti in enumerate(pulses.time):
-            # find the last current item that was before this pulse
-            while ti>=logTimeS[j+1]:
-                j += 1
-            pulseLogS[i] = logValues[j]
-        pulses = append_fields(pulses, [(key, pulseLogS.dtype)])
-        pulses[key] = pulseLogS
-        dataset.data.pulses = pulses
-
diff --git a/eos/event_handling.py b/eos/event_handling.py
index 7a610fe..5bd78b9 100644
--- a/eos/event_handling.py
+++ b/eos/event_handling.py
@@ -168,7 +168,7 @@ class ApplyMask(EventDataAction):
         # TODO: why is this action time consuming?
         d = dataset.data
         pre_filter = d.events.shape[0]
-        if logging.getLogger().level == logging.DEBUG:
+        if logging.getLogger().level <= logging.DEBUG:
             # only run this calculation if debug level is actually active
             filtered_by_mask = {}
             for key, value in EVENT_BITMASKS.items():
@@ -183,3 +183,20 @@ class ApplyMask(EventDataAction):
             d.events = d.events[fltr]
         post_filter = d.events.shape[0]
         logging.info(f'      number of events: total = {pre_filter:7d}, filtered = {post_filter:7d}')
+        if d.device_logs == {} or not hasattr(dataset, 'update_info_from_logs'):
+            return
+        # filter pulses and logs to allow update of header information
+        from .helpers import add_log_to_pulses
+        times = np.unique(d.events.wallTime)
+        # make sure all log variables are associated with pulses
+        for key, log in d.device_logs.items():
+            if not key in d.pulses.dtype.names:
+                # interpolate the parameter values for all existing pulses
+                add_log_to_pulses(key, dataset)
+        # remove all pulses that have no more events
+        d.pulses = d.pulses[np.isin(d.pulses.time, times)]
+        for key, log in d.device_logs.items():
+            d.device_logs[key] = np.recarray(d.pulses.shape, dtype = log.dtype)
+            d.device_logs[key].time = d.pulses.time
+            d.device_logs[key].value = d.pulses[key]
+        dataset.update_info_from_logs()
\ No newline at end of file
diff --git a/eos/file_reader.py b/eos/file_reader.py
index 0293a3a..f20a4c4 100644
--- a/eos/file_reader.py
+++ b/eos/file_reader.py
@@ -49,8 +49,8 @@ class AmorHeader:
             chopper_separation=('entry1/Amor/chopper/pair_separation', float),
             detector_distance=('entry1/Amor/detector/transformation/distance', float),
             chopper_distance=('entry1/Amor/chopper/distance', float),
-            sample_temperature=('entry1/sample/temperature', float, 'ignore'),
-            sample_magnetic_field=('entry1/sample/magnetic_field', float, 'ignore'),
+            sample_temperature=('entry1/sample/temperature', float),
+            sample_magnetic_field=('entry1/sample/magnetic_field', float),
 
             mu=('entry1/Amor/instrument_control_parameters/mu', float, 'mu'),
             nu=('entry1/Amor/instrument_control_parameters/nu', float, 'nu'),
@@ -96,7 +96,6 @@ class AmorHeader:
         try:
             hdfgrp = self.hdf[hdf_path]
             if hdfgrp.attrs.get('NX_class', None) == 'NXlog':
-                self._log_keys.append(key)
                 # use the last value that was recoreded before the count started
                 time_column = hdfgrp['time'][:]
                 try:
@@ -104,9 +103,12 @@ class AmorHeader:
                 except IndexError:
                     start_index = 0
                 if hdfgrp['value'].ndim==1:
-                    return dtype(hdfgrp['value'][start_index])
+                    output = dtype(hdfgrp['value'][start_index])
                 else:
-                    return dtype(hdfgrp['value'][start_index, 0])
+                    output = dtype(hdfgrp['value'][start_index, 0])
+                # make sure key is only appended if no exception was raised
+                self._log_keys.append(key)
+                return output
             elif dtype is str:
                 return self.read_string(hdf_path)
             else:
@@ -193,11 +195,17 @@ class AmorHeader:
                 )
         # while event times are not evaluated, use average_value reported in file for SEE
         if self.hdf['entry1/sample'].get('temperature', None) is not None:
-            sample_temperature = self.rv('sample_temperature')
-            self.sample.sample_parameters['temperature'] = fileio.Value(sample_temperature, unit='K')
+            try:
+                sample_temperature = self.rv('sample_temperature')
+            except IndexError: pass
+            else:
+                self.sample.sample_parameters['temperature'] = fileio.Value(sample_temperature, unit='K')
         if self.hdf['entry1/sample'].get('magnetic_field', None) is not None:
-            sample_magnetic_field = self.rv('sample_magnetic_field')
-            self.sample.sample_parameters['magnetic_field'] = fileio.Value(sample_magnetic_field, unit='T')
+            try:
+                sample_magnetic_field = self.rv('sample_magnetic_field')
+            except IndexError: pass
+            else:
+                self.sample.sample_parameters['magnetic_field'] = fileio.Value(sample_magnetic_field, unit='T')
 
     def read_instrument_configuration(self):
         chopperSeparation = self.rv('chopper_separation')
@@ -205,8 +213,6 @@ class AmorHeader:
         chopperDistance = self.rv('chopper_distance')
         chopperDetectorDistance = detectorDistance - chopperDistance
 
-        polarizationConfigs = ['unpolarized', 'unpolarized', 'po', 'mo', 'op', 'pp', 'mp', 'om', 'pm', 'mm']
-
         mu = self.rv('mu')
         nu = self.rv('nu')
         kap = self.rv('kap')
@@ -235,7 +241,7 @@ class AmorHeader:
         self.timing = AmorTiming(ch1TriggerPhase, ch2TriggerPhase, chopperSpeed, chopperPhase, tau)
 
         polarizationConfigLabel = self.rv('polarization_config_label')
-        polarizationConfig = fileio.Polarization(polarizationConfigs[polarizationConfigLabel])
+        polarizationConfig = fileio.Polarization(const.polarizationConfigs[polarizationConfigLabel])
         logging.debug(f'      polarization configuration: {polarizationConfig} (index {polarizationConfigLabel})')
 
 
@@ -395,7 +401,7 @@ class AmorEventData(AmorHeader):
             hdf_path, dtype, *_ = self.hdf_paths[key]
             hdfgroup = self.hdf[hdf_path]
             shape = hdfgroup['time'].shape
-            data = np.recarray(shape, dtype=LOG_TYPE)
+            data = np.recarray(shape, dtype=np.dtype([('value', self.hdf_paths[key][1]), ('time', np.int64)]))
             data.time = hdfgroup['time'][:]
             if len(hdfgroup['value'].shape)==1:
                 data.value = hdfgroup['value'][:]
@@ -403,6 +409,29 @@ class AmorEventData(AmorHeader):
                 data.value = hdfgroup['value'][:, 0]
             self.data.device_logs[key] = data
 
+    def update_info_from_logs(self):
+        RELEVANT_ITEMS = ['sample_temperature', 'sample_magnetic_field', 'polarization_config_label']
+        for key, log in self.data.device_logs.items():
+            if key not in RELEVANT_ITEMS:
+                continue
+            if log.value.dtype in [np.int8, np.int16, np.int32, np.int64]:
+                # for integer items (flags) report the most common one
+                value = np.bincount(log.value).argmax()
+                if logging.getLogger().getEffectiveLevel() <= logging.DEBUG \
+                        and np.unique(log.value).shape[0]>1:
+                    logging.debug(f'        filtered values for {key} not unique, '
+                                  f'has {np.unique(log.value).shape[0]} values')
+            else:
+                value = log.value.mean()
+            if key == 'polarization_config_label':
+                self.instrument_settings.polarization = fileio.Polarization(const.polarizationConfigs[value])
+            elif key == 'sample_temperature':
+                self.sample.sample_parameters['temperature'].magnitue = value
+            elif key == 'sample_magnetic_field':
+                self.sample.sample_parameters['magnetic_field'].magnitue = value
+
+
+
     def read_chopper_trigger_stream(self, packets):
         chopper1TriggerTime = np.array(self.hdf['entry1/Amor/chopper/ch2_trigger/event_time_zero'][:-2], dtype=np.int64)
         #self.chopper2TriggerTime = self.chopper1TriggerTime + np.array(self.hdf['entry1/Amor/chopper/ch2_trigger/event_time'][:-2], dtype=np.int64)
diff --git a/eos/helpers.py b/eos/helpers.py
index ee3486b..a93e280 100644
--- a/eos/helpers.py
+++ b/eos/helpers.py
@@ -1,10 +1,35 @@
-"""
-Helper functions used during calculations. Uses numba enhanced functions if available, otherwise numpy based
-fallback is imported.
-"""
-
-try:
-    from .helpers_numba import merge_frames, extract_walltime, filter_project_x, calculate_derived_properties_focussing
-except ImportError:
-    from .helpers_fallback import merge_frames, extract_walltime, filter_project_x, calculate_derived_properties_focussing
-
+"""
+Helper functions used during calculations. Uses numba enhanced functions if available, otherwise numpy based
+fallback is imported.
+"""
+import numpy as np
+from .event_data_types import EventDatasetProtocol, append_fields
+
+try:
+    from .helpers_numba import merge_frames, extract_walltime, filter_project_x, calculate_derived_properties_focussing
+except ImportError:
+    from .helpers_fallback import merge_frames, extract_walltime, filter_project_x, calculate_derived_properties_focussing
+
+def add_log_to_pulses(key, dataset: EventDatasetProtocol):
+    """
+    Add a log value for each pulse to the pulses array.
+    """
+    pulses = dataset.data.pulses
+    log_data = dataset.data.device_logs[key]
+    if log_data.time[0]>0:
+        logTimeS = np.hstack([[0], log_data.time, [pulses.time[-1]+1]])
+        logValues = np.hstack([[log_data.value[0]], log_data.value])
+    else:
+        logTimeS = np.hstack([log_data.time, [pulses.time[-1]+1]])
+        logValues = log_data.value
+    pulseLogS = np.zeros(pulses.time.shape[0], dtype=log_data.value.dtype)
+    j = 0
+    for i, ti in enumerate(pulses.time):
+        # find the last current item that was before this pulse
+        while ti>=logTimeS[j+1]:
+            j += 1
+        pulseLogS[i] = logValues[j]
+    pulses = append_fields(pulses, [(key, pulseLogS.dtype)])
+    pulses[key] = pulseLogS
+    dataset.data.pulses = pulses
+
diff --git a/tests/test_full_analysis.py b/tests/test_full_analysis.py
index ac6177b..3e23672 100644
--- a/tests/test_full_analysis.py
+++ b/tests/test_full_analysis.py
@@ -38,10 +38,10 @@ class FullAmorTest(TestCase):
     def tearDown(self):
         self.pr.disable()
         for fi in ['test_results/test.Rqz.ort', 'test_results/5952.norm']:
-            try:
-                os.unlink(fi)
-            except FileNotFoundError:
-                pass
+           try:
+               os.unlink(fi)
+           except FileNotFoundError:
+               pass
 
     def test_time_slicing(self):
         experiment_config = options.ExperimentConfig(
@@ -121,9 +121,20 @@ class FullAmorTest(TestCase):
         reducer = reduction_reflectivity.ReflectivityReduction(config)
         reducer.reduce()
         espin_up = reducer.dataset.data.events.shape[0]
+
         reduction_config.logfilter = ['polarization_config_label==3']
+        output_config.append = True
         reducer = reduction_reflectivity.ReflectivityReduction(config)
         reducer.reduce()
         espin_down = reducer.dataset.data.events.shape[0]
         # measurement should have about 2x as many counts in spin_down
         self.assertAlmostEqual(espin_down/espin_up, 2., 2)
+
+        # perform the same filter but remove pulses during which the switch occured
+        reduction_config.logfilter = ['!polarization_config_label==3']
+        output_config.append = True
+        reducer = reduction_reflectivity.ReflectivityReduction(config)
+        reducer.reduce()
+        espin_down2 = reducer.dataset.data.events.shape[0]
+        # measurement should have about 2x as many counts in spin_down
+        self.assertLess(espin_down2, espin_down)