RawSubFile support multi file access (#173)

This PR is a fix/improvement to a problem that Jonathan had. (#156) The original implementation opened all subfiles at once witch works for normal sized datasets but fails at a certain point (thousands of files). - This solution uses RawSubFile to manage the different file indicies and only opens the file we need - Added logger.h from slsDetectorPackage for debug printing (in production no messages should be visible)
2026-03-10 16:07:45 +01:00 · 2025-05-22 11:00:03 +02:00
parent a6eebbe9bd
commit 9e1b8731b0
16 changed files with 517 additions and 216 deletions
--- a/python/examples/play.py
+++ b/python/examples/play.py
@@ -1,79 +1,89 @@
 import sys
 sys.path.append('/home/l_msdetect/erik/aare/build')

-from aare._aare import ClusterVector_i, Interpolator

-import pickle 
-import numpy as np
-import matplotlib.pyplot as plt
-import boost_histogram as bh
-import torch
-import math
-import time
+from aare import RawSubFile, DetectorType, RawFile
+
+from pathlib import Path
+path = Path("/home/l_msdetect/erik/data/aare-test-data/raw/jungfrau/")
+f = RawSubFile(path/"jungfrau_single_d0_f0_0.raw", DetectorType.Jungfrau, 512, 1024, 16)
+
+# f = RawFile(path/"jungfrau_single_master_0.json")
+
+
+# from aare._aare import ClusterVector_i, Interpolator
+
+# import pickle 
+# import numpy as np
+# import matplotlib.pyplot as plt
+# import boost_histogram as bh
+# import torch
+# import math
+# import time



-def gaussian_2d(mx, my, sigma = 1, res=100, grid_size = 2):
-    """
-    Generate a 2D gaussian as position mx, my, with sigma=sigma. 
-    The gaussian is placed on a 2x2 pixel matrix with resolution 
-    res in one dimesion.
-    """
-    x = torch.linspace(0, pixel_size*grid_size, res)
-    x,y = torch.meshgrid(x,x, indexing="ij")
-    return 1 / (2*math.pi*sigma**2) * \
-      torch.exp(-((x - my)**2 / (2*sigma**2) + (y - mx)**2 / (2*sigma**2)))
+# def gaussian_2d(mx, my, sigma = 1, res=100, grid_size = 2):
+#     """
+#     Generate a 2D gaussian as position mx, my, with sigma=sigma. 
+#     The gaussian is placed on a 2x2 pixel matrix with resolution 
+#     res in one dimesion.
+#     """
+#     x = torch.linspace(0, pixel_size*grid_size, res)
+#     x,y = torch.meshgrid(x,x, indexing="ij")
+#     return 1 / (2*math.pi*sigma**2) * \
+#       torch.exp(-((x - my)**2 / (2*sigma**2) + (y - mx)**2 / (2*sigma**2)))

-scale = 1000 #Scale factor when converting to integer
-pixel_size = 25 #um
-grid = 2
-resolution = 100
-sigma_um = 10
-xa = np.linspace(0,grid*pixel_size,resolution)
-ticks = [0, 25, 50]
+# scale = 1000 #Scale factor when converting to integer
+# pixel_size = 25 #um
+# grid = 2
+# resolution = 100
+# sigma_um = 10
+# xa = np.linspace(0,grid*pixel_size,resolution)
+# ticks = [0, 25, 50]

-hit = np.array((20,20))
-etahist_fname = "/home/l_msdetect/erik/tmp/test_hist.pkl"
+# hit = np.array((20,20))
+# etahist_fname = "/home/l_msdetect/erik/tmp/test_hist.pkl"

-local_resolution = 99
-grid_size = 3
-xaxis = np.linspace(0,grid_size*pixel_size, local_resolution)
-t = gaussian_2d(hit[0],hit[1], grid_size = grid_size, sigma = 10, res = local_resolution)
-pixels = t.reshape(grid_size, t.shape[0] // grid_size, grid_size, t.shape[1] // grid_size).sum(axis = 3).sum(axis = 1)
-pixels = pixels.numpy()
-pixels = (pixels*scale).astype(np.int32)
-v = ClusterVector_i(3,3)
-v.push_back(1,1, pixels)
+# local_resolution = 99
+# grid_size = 3
+# xaxis = np.linspace(0,grid_size*pixel_size, local_resolution)
+# t = gaussian_2d(hit[0],hit[1], grid_size = grid_size, sigma = 10, res = local_resolution)
+# pixels = t.reshape(grid_size, t.shape[0] // grid_size, grid_size, t.shape[1] // grid_size).sum(axis = 3).sum(axis = 1)
+# pixels = pixels.numpy()
+# pixels = (pixels*scale).astype(np.int32)
+# v = ClusterVector_i(3,3)
+# v.push_back(1,1, pixels)

-with open(etahist_fname, "rb") as f:
-        hist = pickle.load(f)
-eta = hist.view().copy()
-etabinsx = np.array(hist.axes.edges.T[0].flat)
-etabinsy = np.array(hist.axes.edges.T[1].flat)
-ebins = np.array(hist.axes.edges.T[2].flat)
-p = Interpolator(eta, etabinsx[0:-1], etabinsy[0:-1], ebins[0:-1])
+# with open(etahist_fname, "rb") as f:
+#         hist = pickle.load(f)
+# eta = hist.view().copy()
+# etabinsx = np.array(hist.axes.edges.T[0].flat)
+# etabinsy = np.array(hist.axes.edges.T[1].flat)
+# ebins = np.array(hist.axes.edges.T[2].flat)
+# p = Interpolator(eta, etabinsx[0:-1], etabinsy[0:-1], ebins[0:-1])




-#Generate the hit
+# #Generate the hit




-tmp = p.interpolate(v)
-print(f'tmp:{tmp}')
-pos = np.array((tmp['x'], tmp['y']))*25
+# tmp = p.interpolate(v)
+# print(f'tmp:{tmp}')
+# pos = np.array((tmp['x'], tmp['y']))*25


-print(pixels)
-fig, ax = plt.subplots(figsize = (7,7))
-ax.pcolormesh(xaxis, xaxis, t)
-ax.plot(*pos, 'o')
-ax.set_xticks([0,25,50,75])
-ax.set_yticks([0,25,50,75])
-ax.set_xlim(0,75)
-ax.set_ylim(0,75)
-ax.grid()
-print(f'{hit=}')
-print(f'{pos=}')
+# print(pixels)
+# fig, ax = plt.subplots(figsize = (7,7))
+# ax.pcolormesh(xaxis, xaxis, t)
+# ax.plot(*pos, 'o')
+# ax.set_xticks([0,25,50,75])
+# ax.set_yticks([0,25,50,75])
+# ax.set_xlim(0,75)
+# ax.set_ylim(0,75)
+# ax.grid()
+# print(f'{hit=}')
+# print(f'{pos=}')
--- a/python/src/raw_file.hpp
+++ b/python/src/raw_file.hpp
@@ -32,7 +32,7 @@ void define_raw_file_io_bindings(py::module &m) {
                 shape.push_back(self.cols());

                 // return headers from all subfiles
-                 py::array_t<DetectorHeader> header(self.n_mod());
+                 py::array_t<DetectorHeader> header(self.n_modules());

                 const uint8_t item_size = self.bytes_per_pixel();
                 if (item_size == 1) {
@@ -61,10 +61,10 @@ void define_raw_file_io_bindings(py::module &m) {
            
                // return headers from all subfiles
                py::array_t<DetectorHeader> header;
-                if (self.n_mod() == 1) {
+                if (self.n_modules() == 1) {
                    header = py::array_t<DetectorHeader>(n_frames);
                } else {
-                    header = py::array_t<DetectorHeader>({self.n_mod(), n_frames});
+                    header = py::array_t<DetectorHeader>({self.n_modules(), n_frames});
                }
                // py::array_t<DetectorHeader> header({self.n_mod(), n_frames});

@@ -100,7 +100,7 @@ void define_raw_file_io_bindings(py::module &m) {
        .def_property_readonly("cols", &RawFile::cols)
        .def_property_readonly("bitdepth", &RawFile::bitdepth)
        .def_property_readonly("geometry", &RawFile::geometry)
-        .def_property_readonly("n_mod", &RawFile::n_mod)
+        .def_property_readonly("n_modules", &RawFile::n_modules)
        .def_property_readonly("detector_type", &RawFile::detector_type)
        .def_property_readonly("master", &RawFile::master);
 }
--- a/python/tests/test_RawSubFile.py
+++ b/python/tests/test_RawSubFile.py
@@ -5,32 +5,35 @@ from aare import RawSubFile, DetectorType

@pytest.mark.files
 def test_read_a_jungfrau_RawSubFile(test_data_path):
+
+    # Starting with f1 there is now 7 frames left in the series of files
    with RawSubFile(test_data_path / "raw/jungfrau/jungfrau_single_d0_f1_0.raw", DetectorType.Jungfrau, 512, 1024, 16) as f:
-        assert f.frames_in_file == 3
+        assert f.frames_in_file == 7

        headers, frames = f.read()

-    assert headers.size == 3
-    assert frames.shape == (3, 512, 1024)
+    assert headers.size == 7
+    assert frames.shape == (7, 512, 1024)
    
-    # Frame numbers in this file should be 4, 5, 6
-    for i,h in zip(range(4,7,1), headers):
+
+    for i,h in zip(range(4,11,1), headers):
        assert h["frameNumber"] == i

    # Compare to canned data using numpy
    data = np.load(test_data_path / "raw/jungfrau/jungfrau_single_0.npy")
-    assert np.all(data[3:6] == frames)
+    assert np.all(data[3:] == frames)

@pytest.mark.files
 def test_iterate_over_a_jungfrau_RawSubFile(test_data_path):

    data = np.load(test_data_path / "raw/jungfrau/jungfrau_single_0.npy")

+    # Given the first subfile in a series we can read all frames from f0, f1, f2...fN
    with RawSubFile(test_data_path / "raw/jungfrau/jungfrau_single_d0_f0_0.raw", DetectorType.Jungfrau, 512, 1024, 16) as f:
        i = 0
        for header, frame in f:
            assert header["frameNumber"] == i+1
            assert np.all(frame == data[i])
            i += 1
-        assert i == 3
-        assert header["frameNumber"] == 3
+        assert i == 10
+        assert header["frameNumber"] == 10