RawSubFile support multi file access (#173)

This PR is a fix/improvement to a problem that Jonathan had. (#156) The
original implementation opened all subfiles at once witch works for
normal sized datasets but fails at a certain point (thousands of files).

- This solution uses RawSubFile to manage the different file indicies
and only opens the file we need
- Added logger.h from slsDetectorPackage for debug printing (in
production no messages should be visible)
This commit is contained in:
Erik Fröjdh
2025-05-22 11:00:03 +02:00
committed by GitHub
parent a6eebbe9bd
commit 9e1b8731b0
16 changed files with 517 additions and 216 deletions

View File

@ -1,79 +1,89 @@
import sys
sys.path.append('/home/l_msdetect/erik/aare/build')
from aare._aare import ClusterVector_i, Interpolator
import pickle
import numpy as np
import matplotlib.pyplot as plt
import boost_histogram as bh
import torch
import math
import time
from aare import RawSubFile, DetectorType, RawFile
from pathlib import Path
path = Path("/home/l_msdetect/erik/data/aare-test-data/raw/jungfrau/")
f = RawSubFile(path/"jungfrau_single_d0_f0_0.raw", DetectorType.Jungfrau, 512, 1024, 16)
# f = RawFile(path/"jungfrau_single_master_0.json")
# from aare._aare import ClusterVector_i, Interpolator
# import pickle
# import numpy as np
# import matplotlib.pyplot as plt
# import boost_histogram as bh
# import torch
# import math
# import time
def gaussian_2d(mx, my, sigma = 1, res=100, grid_size = 2):
"""
Generate a 2D gaussian as position mx, my, with sigma=sigma.
The gaussian is placed on a 2x2 pixel matrix with resolution
res in one dimesion.
"""
x = torch.linspace(0, pixel_size*grid_size, res)
x,y = torch.meshgrid(x,x, indexing="ij")
return 1 / (2*math.pi*sigma**2) * \
torch.exp(-((x - my)**2 / (2*sigma**2) + (y - mx)**2 / (2*sigma**2)))
# def gaussian_2d(mx, my, sigma = 1, res=100, grid_size = 2):
# """
# Generate a 2D gaussian as position mx, my, with sigma=sigma.
# The gaussian is placed on a 2x2 pixel matrix with resolution
# res in one dimesion.
# """
# x = torch.linspace(0, pixel_size*grid_size, res)
# x,y = torch.meshgrid(x,x, indexing="ij")
# return 1 / (2*math.pi*sigma**2) * \
# torch.exp(-((x - my)**2 / (2*sigma**2) + (y - mx)**2 / (2*sigma**2)))
scale = 1000 #Scale factor when converting to integer
pixel_size = 25 #um
grid = 2
resolution = 100
sigma_um = 10
xa = np.linspace(0,grid*pixel_size,resolution)
ticks = [0, 25, 50]
# scale = 1000 #Scale factor when converting to integer
# pixel_size = 25 #um
# grid = 2
# resolution = 100
# sigma_um = 10
# xa = np.linspace(0,grid*pixel_size,resolution)
# ticks = [0, 25, 50]
hit = np.array((20,20))
etahist_fname = "/home/l_msdetect/erik/tmp/test_hist.pkl"
# hit = np.array((20,20))
# etahist_fname = "/home/l_msdetect/erik/tmp/test_hist.pkl"
local_resolution = 99
grid_size = 3
xaxis = np.linspace(0,grid_size*pixel_size, local_resolution)
t = gaussian_2d(hit[0],hit[1], grid_size = grid_size, sigma = 10, res = local_resolution)
pixels = t.reshape(grid_size, t.shape[0] // grid_size, grid_size, t.shape[1] // grid_size).sum(axis = 3).sum(axis = 1)
pixels = pixels.numpy()
pixels = (pixels*scale).astype(np.int32)
v = ClusterVector_i(3,3)
v.push_back(1,1, pixels)
# local_resolution = 99
# grid_size = 3
# xaxis = np.linspace(0,grid_size*pixel_size, local_resolution)
# t = gaussian_2d(hit[0],hit[1], grid_size = grid_size, sigma = 10, res = local_resolution)
# pixels = t.reshape(grid_size, t.shape[0] // grid_size, grid_size, t.shape[1] // grid_size).sum(axis = 3).sum(axis = 1)
# pixels = pixels.numpy()
# pixels = (pixels*scale).astype(np.int32)
# v = ClusterVector_i(3,3)
# v.push_back(1,1, pixels)
with open(etahist_fname, "rb") as f:
hist = pickle.load(f)
eta = hist.view().copy()
etabinsx = np.array(hist.axes.edges.T[0].flat)
etabinsy = np.array(hist.axes.edges.T[1].flat)
ebins = np.array(hist.axes.edges.T[2].flat)
p = Interpolator(eta, etabinsx[0:-1], etabinsy[0:-1], ebins[0:-1])
# with open(etahist_fname, "rb") as f:
# hist = pickle.load(f)
# eta = hist.view().copy()
# etabinsx = np.array(hist.axes.edges.T[0].flat)
# etabinsy = np.array(hist.axes.edges.T[1].flat)
# ebins = np.array(hist.axes.edges.T[2].flat)
# p = Interpolator(eta, etabinsx[0:-1], etabinsy[0:-1], ebins[0:-1])
#Generate the hit
# #Generate the hit
tmp = p.interpolate(v)
print(f'tmp:{tmp}')
pos = np.array((tmp['x'], tmp['y']))*25
# tmp = p.interpolate(v)
# print(f'tmp:{tmp}')
# pos = np.array((tmp['x'], tmp['y']))*25
print(pixels)
fig, ax = plt.subplots(figsize = (7,7))
ax.pcolormesh(xaxis, xaxis, t)
ax.plot(*pos, 'o')
ax.set_xticks([0,25,50,75])
ax.set_yticks([0,25,50,75])
ax.set_xlim(0,75)
ax.set_ylim(0,75)
ax.grid()
print(f'{hit=}')
print(f'{pos=}')
# print(pixels)
# fig, ax = plt.subplots(figsize = (7,7))
# ax.pcolormesh(xaxis, xaxis, t)
# ax.plot(*pos, 'o')
# ax.set_xticks([0,25,50,75])
# ax.set_yticks([0,25,50,75])
# ax.set_xlim(0,75)
# ax.set_ylim(0,75)
# ax.grid()
# print(f'{hit=}')
# print(f'{pos=}')

View File

@ -32,7 +32,7 @@ void define_raw_file_io_bindings(py::module &m) {
shape.push_back(self.cols());
// return headers from all subfiles
py::array_t<DetectorHeader> header(self.n_mod());
py::array_t<DetectorHeader> header(self.n_modules());
const uint8_t item_size = self.bytes_per_pixel();
if (item_size == 1) {
@ -61,10 +61,10 @@ void define_raw_file_io_bindings(py::module &m) {
// return headers from all subfiles
py::array_t<DetectorHeader> header;
if (self.n_mod() == 1) {
if (self.n_modules() == 1) {
header = py::array_t<DetectorHeader>(n_frames);
} else {
header = py::array_t<DetectorHeader>({self.n_mod(), n_frames});
header = py::array_t<DetectorHeader>({self.n_modules(), n_frames});
}
// py::array_t<DetectorHeader> header({self.n_mod(), n_frames});
@ -100,7 +100,7 @@ void define_raw_file_io_bindings(py::module &m) {
.def_property_readonly("cols", &RawFile::cols)
.def_property_readonly("bitdepth", &RawFile::bitdepth)
.def_property_readonly("geometry", &RawFile::geometry)
.def_property_readonly("n_mod", &RawFile::n_mod)
.def_property_readonly("n_modules", &RawFile::n_modules)
.def_property_readonly("detector_type", &RawFile::detector_type)
.def_property_readonly("master", &RawFile::master);
}

View File

@ -5,32 +5,35 @@ from aare import RawSubFile, DetectorType
@pytest.mark.files
def test_read_a_jungfrau_RawSubFile(test_data_path):
# Starting with f1 there is now 7 frames left in the series of files
with RawSubFile(test_data_path / "raw/jungfrau/jungfrau_single_d0_f1_0.raw", DetectorType.Jungfrau, 512, 1024, 16) as f:
assert f.frames_in_file == 3
assert f.frames_in_file == 7
headers, frames = f.read()
assert headers.size == 3
assert frames.shape == (3, 512, 1024)
assert headers.size == 7
assert frames.shape == (7, 512, 1024)
# Frame numbers in this file should be 4, 5, 6
for i,h in zip(range(4,7,1), headers):
for i,h in zip(range(4,11,1), headers):
assert h["frameNumber"] == i
# Compare to canned data using numpy
data = np.load(test_data_path / "raw/jungfrau/jungfrau_single_0.npy")
assert np.all(data[3:6] == frames)
assert np.all(data[3:] == frames)
@pytest.mark.files
def test_iterate_over_a_jungfrau_RawSubFile(test_data_path):
data = np.load(test_data_path / "raw/jungfrau/jungfrau_single_0.npy")
# Given the first subfile in a series we can read all frames from f0, f1, f2...fN
with RawSubFile(test_data_path / "raw/jungfrau/jungfrau_single_d0_f0_0.raw", DetectorType.Jungfrau, 512, 1024, 16) as f:
i = 0
for header, frame in f:
assert header["frameNumber"] == i+1
assert np.all(frame == data[i])
i += 1
assert i == 3
assert header["frameNumber"] == 3
assert i == 10
assert header["frameNumber"] == 10