Add numba implementation of some event functions, yielding x2 speed improvement, also some small general speedups

2024-03-05 15:49:15 +01:00
parent 43313841d6
commit 146743673c
3 changed files with 104 additions and 15 deletions
--- a/libeos/file_reader.py
+++ b/libeos/file_reader.py
@@ -14,6 +14,11 @@ from .header import Header
 from .instrument import Detector
 from .options import ExperimentConfig, ReaderConfig

+try:
+    from . import nb_helpers
+except Exception:
+    nb_helpers = None
+

 class AmorData:
    """read meta-data and event streams from .hdf file(s), apply filters and conversions"""
@@ -184,9 +189,16 @@ class AmorData:
        self.wallTime_e = self.wallTime_e[self.mask_e]

    def calculate_derived_properties(self):
-        # lambda
-        self.lamda_e = (1.e13*self.tof_e*const.hdm)/(self.chopperDetectorDistance+self.detXdist_e)
        self.lamdaMax = const.lamdaCut+1.e13*self.tau*const.hdm/(self.chopperDetectorDistance+124.)
+        if nb_helpers and not self.config.offSpecular:
+            self.lamda_e, self.qz_e, self.mask_e = nb_helpers.calculate_derived_properties_focussing(
+                    self.tof_e, self.detXdist_e, self.delta_e, self.mask_e,
+                    self.config.lambdaRange[0], self.config.lambdaRange[1], self.nu, self.mu,
+                    self.chopperDetectorDistance, const.hdm
+                    )
+            return
+        # lambda
+        self.lamda_e = (1.e13*const.hdm)*self.tof_e/(self.chopperDetectorDistance+self.detXdist_e)
        self.mask_e = np.logical_and(self.mask_e, (self.config.lambdaRange[0]<=self.lamda_e) & (
                    self.lamda_e<=self.config.lambdaRange[1]))
        # alpha_f
@@ -194,24 +206,32 @@ class AmorData:
        # q_z
        if self.config.offSpecular:
            alphaI = self.kap+self.kad+self.mu
-            self.qz_e = 2*np.pi*(np.sin(np.deg2rad(alphaF_e))+np.sin(np.deg2rad(alphaI)))/self.lamda_e
-            self.qx_e = 2*np.pi*(np.cos(np.deg2rad(alphaF_e))-np.cos(np.deg2rad(alphaI)))/self.lamda_e
+            self.qz_e = 2*np.pi*((np.sin(np.deg2rad(alphaF_e))+np.sin(np.deg2rad(alphaI)))/self.lamda_e)
+            self.qx_e = 2*np.pi*((np.cos(np.deg2rad(alphaF_e))-np.cos(np.deg2rad(alphaI)))/self.lamda_e)
            self.header.measurement_scheme = 'energy-dispersive',
        else:
-            self.qz_e = 4*np.pi*np.sin(np.deg2rad(alphaF_e))/self.lamda_e
+            self.qz_e = 4*np.pi*(np.sin(np.deg2rad(alphaF_e))/self.lamda_e)
            # qx_e = 0.
            self.header.measurement_scheme = 'angle- and energy-dispersive'

    def filter_project_x(self):
        pixelLookUp = self.resolve_pixels()
-        # resolve pixel ID into y and z indicees, x position and angle
-        (detY_e, self.detZ_e, self.detXdist_e, self.delta_e) = pixelLookUp[np.int_(self.pixelID_e)-1, :].T
-        # define mask and filter y range
-        self.mask_e = (self.config.yRange[0]<=detY_e) & (detY_e<=self.config.yRange[1])
+        if nb_helpers:
+            (self.detZ_e, self.detXdist_e, self.delta_e, self.mask_e) = nb_helpers.filter_project_x(
+                    pixelLookUp, self.pixelID_e.astype(np.int64), self.config.yRange[0], self.config.yRange[1]
+                    )
+        else:
+            # resolve pixel ID into y and z indicees, x position and angle
+            (detY_e, self.detZ_e, self.detXdist_e, self.delta_e) = pixelLookUp[np.int_(self.pixelID_e)-1, :].T
+            # define mask and filter y range
+            self.mask_e = (self.config.yRange[0]<=detY_e) & (detY_e<=self.config.yRange[1])

    def merge_frames(self):
        total_offset = self.tofCut+self.tau*self.config.chopperPhaseOffset/180.
-        self.tof_e = np.remainder(self.tof_e-(self.tofCut-self.tau), self.tau)+total_offset  # tof shifted to 1 frame
+        if nb_helpers:
+            self.tof_e = nb_helpers.merge_frames(self.tof_e, self.tofCut, self.tau, total_offset)
+        else:
+            self.tof_e = np.remainder(self.tof_e-(self.tofCut-self.tau), self.tau)+total_offset  # tof shifted to 1 frame

    def filter_strange_times(self):
        # filter 'strange' tof times > 2 tau
@@ -223,11 +243,14 @@ class AmorData:
            logging.warning(f'#    strange times: {np.shape(filter_e)[0]-np.shape(self.tof_e)[0]}')

    def extract_walltime(self, norm):
-        totalNumber = np.shape(self.tof_e)[0]
-        self.wallTime_e = np.empty(totalNumber)
-        for i in range(len(self.dataPacket_p)-1):
-            self.wallTime_e[self.dataPacket_p[i]:self.dataPacket_p[i+1]] = self.dataPacketTime_p[i]
-        self.wallTime_e[self.dataPacket_p[-1]:] = self.dataPacketTime_p[-1]
+        if nb_helpers:
+            self.wallTime_e = nb_helpers.extract_walltime(self.tof_e, self.dataPacket_p, self.dataPacketTime_p)
+        else:
+            totalNumber = np.shape(self.tof_e)[0]
+            self.wallTime_e = np.empty(totalNumber)
+            for i in range(len(self.dataPacket_p)-1):
+                self.wallTime_e[self.dataPacket_p[i]:self.dataPacket_p[i+1]] = self.dataPacketTime_p[i]
+            self.wallTime_e[self.dataPacket_p[-1]:] = self.dataPacketTime_p[-1]
        if not self.startTime and not norm:
            self.startTime = self.wallTime_e[0]
        self.wallTime_e -= self.startTime
--- a/libeos/nb_helpers.py
+++ b/libeos/nb_helpers.py
@@ -0,0 +1,65 @@
+import numba as nb
+import numpy as np
+
+@nb.jit(nb.float64[:](nb.float64[:], nb.float64, nb.float64, nb.float64),
+        nopython=True, parallel=True, cache=True)
+def merge_frames(tof_e, tofCut, tau, total_offset):
+    # fast implementation of the merging function used in file_reader.AmorData.merge_frames
+    tof_e_out = np.empty(tof_e.shape, dtype=np.float64)
+    dt = (tofCut-tau)
+    for ti in nb.prange(tof_e.shape[0]):
+        tof_e_out[ti] = ((tof_e[ti]-dt)%tau)+total_offset  # tof shifted to 1 frame
+    return tof_e
+
+@nb.jit(nb.float64[:](nb.float64[:], nb.uint64[:], nb.float64[:]),
+        nopython=True, parallel=True, cache=True)
+def extract_walltime(tof_e, dataPacket_p, dataPacketTime_p):
+    # assigning every event the wall time of the event packet (absolute time of pulse ?start?)
+    totalNumber = np.shape(tof_e)[0]
+    wallTime_e = np.empty(totalNumber, dtype=np.float64)
+    for i in nb.prange(len(dataPacket_p)-1):
+        for j in range(dataPacket_p[i], dataPacket_p[i+1]):
+            wallTime_e[j] = dataPacketTime_p[i]
+    for j in range(dataPacket_p[-1], totalNumber):
+        wallTime_e[j] = dataPacketTime_p[-1]
+    return wallTime_e
+
+@nb.jit(nb.types.Tuple((nb.int64[:], nb.float64[:], nb.float64[:], nb.boolean[:]))
+                (nb.float64[:, :], nb.int64[:], nb.int64, nb.int64),
+        nopython=True, parallel=True, cache=True)
+def filter_project_x(pixelLookUp, pixelID_e, ymin, ymax):
+    # project events on z-axis and create filter for events outside of y-range
+    events = pixelID_e.shape[0]
+    detY_e = np.empty(events, dtype=np.int64)
+    detZ_e = np.empty(events, dtype=np.int64)
+    detXdist_e = np.empty(events, dtype=np.float64)
+    delta_e = np.empty(events, dtype=np.float64)
+    mask_e = np.empty(events, dtype=nb.boolean)
+    for i in nb.prange(events):
+        # resolve pixel ID into y and z indicees, x position and angle
+        detY_e[i] = pixelLookUp[pixelID_e[i]-1, 0]
+        detZ_e[i] = pixelLookUp[pixelID_e[i]-1, 1]
+        detXdist_e[i] = pixelLookUp[pixelID_e[i]-1, 2]
+        delta_e[i] = pixelLookUp[pixelID_e[i]-1, 3]
+        # define mask and filter y range
+        mask_e[i] = (ymin<=detY_e[i]) & (detY_e[i]<=ymax)
+    return (detZ_e, detXdist_e, delta_e, mask_e)
+
+@nb.jit(nb.types.Tuple((nb.float64[:], nb.float64[:], nb.boolean[:]))
+                (nb.float64[:], nb.float64[:], nb.float64[:], nb.boolean[:],
+                 nb.float64, nb.float64, nb.float64, nb.float64, nb.float64, nb.float64),
+        nopython=True, parallel=True, cache=True)
+def calculate_derived_properties_focussing(tof_e, detXdist_e, delta_e, mask_e,
+                                           lmin, lmax, nu, mu, chopperDetectorDistance, hdm):
+    events = tof_e.shape[0]
+    alphaF_e = np.empty(events, dtype=np.float64)
+    lamda_e = np.empty(events, dtype=np.float64)
+    qz_e = np.empty(events, dtype=np.float64)
+    mask_e_out = np.empty(events, dtype=nb.boolean)
+    denom_f1 = 1.e13*hdm
+    for i in nb.prange(events):
+        lamda_e[i] = denom_f1*tof_e[i]/(chopperDetectorDistance+detXdist_e[i])
+        mask_e_out[i] = mask_e[i] & ((lmin<=lamda_e[i]) & (lamda_e[i]<=lmax))
+        alphaF_e[i] = nu-mu+delta_e[i]
+        qz_e[i] = 4*np.pi*np.sin(np.deg2rad(alphaF_e[i]))/lamda_e[i]
+    return lamda_e, qz_e, mask_e
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 numpy
 h5py
 orsopy
+numba