RawSubFile support multi file access (#173)

This PR is a fix/improvement to a problem that Jonathan had. (#156) The
original implementation opened all subfiles at once witch works for
normal sized datasets but fails at a certain point (thousands of files).

- This solution uses RawSubFile to manage the different file indicies
and only opens the file we need
- Added logger.h from slsDetectorPackage for debug printing (in
production no messages should be visible)
This commit is contained in:
Erik Fröjdh 2025-05-22 11:00:03 +02:00 committed by GitHub
parent a6eebbe9bd
commit 9e1b8731b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 517 additions and 216 deletions

View File

@ -79,6 +79,9 @@ endif()
if(AARE_VERBOSE)
add_compile_definitions(AARE_VERBOSE)
add_compile_definitions(AARE_LOG_LEVEL=aare::logDEBUG5)
else()
add_compile_definitions(AARE_LOG_LEVEL=aare::logERROR)
endif()
if(AARE_CUSTOM_ASSERT)
@ -90,6 +93,7 @@ if(AARE_BENCHMARKS)
endif()
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if(AARE_FETCH_LMFIT)
@ -452,6 +456,7 @@ if(AARE_TESTS)
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/RawFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/RawSubFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/utils/task.test.cpp
)

View File

@ -30,22 +30,11 @@ struct ModuleConfig {
* Consider using that unless you need raw file specific functionality.
*/
class RawFile : public FileInterface {
size_t n_subfiles{}; //f0,f1...fn
size_t n_subfile_parts{}; // d0,d1...dn
//TODO! move to vector of SubFile instead of pointers
std::vector<std::vector<RawSubFile *>> subfiles; //subfiles[f0,f1...fn][d0,d1...dn]
// std::vector<xy> positions;
std::vector<std::unique_ptr<RawSubFile>> m_subfiles;
ModuleConfig cfg{0, 0};
RawMasterFile m_master;
size_t m_current_frame{};
// std::vector<ModuleGeometry> m_module_pixel_0;
// size_t m_rows{};
// size_t m_cols{};
size_t m_current_subfile{};
DetectorGeometry m_geometry;
public:
@ -56,7 +45,7 @@ class RawFile : public FileInterface {
*/
RawFile(const std::filesystem::path &fname, const std::string &mode = "r");
virtual ~RawFile() override;
virtual ~RawFile() override = default;
Frame read_frame() override;
Frame read_frame(size_t frame_number) override;
@ -80,7 +69,7 @@ class RawFile : public FileInterface {
size_t cols() const override;
size_t bitdepth() const override;
xy geometry();
size_t n_mod() const;
size_t n_modules() const;
RawMasterFile master() const;
@ -115,9 +104,6 @@ class RawFile : public FileInterface {
*/
static DetectorHeader read_header(const std::filesystem::path &fname);
// void update_geometry_with_roi();
int find_number_of_subfiles();
void open_subfiles();
void find_geometry();
};

View File

@ -121,6 +121,7 @@ class RawMasterFile {
size_t total_frames_expected() const;
xy geometry() const;
size_t n_modules() const;
std::optional<size_t> analog_samples() const;
std::optional<size_t> digital_samples() const;

View File

@ -18,11 +18,20 @@ class RawSubFile {
std::ifstream m_file;
DetectorType m_detector_type;
size_t m_bitdepth;
std::filesystem::path m_fname;
std::filesystem::path m_path; //!< path to the subfile
std::string m_base_name; //!< base name used for formatting file names
size_t m_offset{}; //!< file index of the first file, allow starting at non zero file
size_t m_total_frames{}; //!< total number of frames in the series of files
size_t m_rows{};
size_t m_cols{};
size_t m_bytes_per_frame{};
size_t m_num_frames{};
int m_module_index{};
size_t m_current_file_index{}; //!< The index of the open file
size_t m_current_frame_index{}; //!< The index of the current frame (with reference to all files)
std::vector<size_t> m_last_frame_in_file{}; //!< Used for seeking to the correct file
uint32_t m_pos_row{};
uint32_t m_pos_col{};
@ -67,12 +76,17 @@ class RawSubFile {
size_t pixels_per_frame() const { return m_rows * m_cols; }
size_t bytes_per_pixel() const { return m_bitdepth / bits_per_byte; }
size_t frames_in_file() const { return m_num_frames; }
size_t frames_in_file() const { return m_total_frames; }
private:
template <typename T>
void read_with_map(std::byte *image_buf);
void parse_fname(const std::filesystem::path &fname);
void scan_files();
void open_file(size_t file_index);
std::filesystem::path fpath(size_t file_index) const;
};
} // namespace aare

View File

@ -107,5 +107,16 @@ std::vector<T> cumsum(const std::vector<T>& vec) {
}
template <typename Container> bool all_equal(const Container &c) {
if (!c.empty() &&
std::all_of(begin(c), end(c),
[c](const typename Container::value_type &element) {
return element == c.front();
}))
return true;
return false;
}
} // namespace aare

View File

@ -204,6 +204,8 @@ struct DetectorGeometry{
int module_gap_row{};
int module_gap_col{};
std::vector<ModuleGeometry> module_pixel_0;
auto size() const { return module_pixel_0.size(); }
};
struct ROI{

139
include/aare/logger.hpp Normal file
View File

@ -0,0 +1,139 @@
#pragma once
/*Utility to log to console*/
#include <iostream>
#include <sstream>
#include <sys/time.h>
namespace aare {
#define RED "\x1b[31m"
#define GREEN "\x1b[32m"
#define YELLOW "\x1b[33m"
#define BLUE "\x1b[34m"
#define MAGENTA "\x1b[35m"
#define CYAN "\x1b[36m"
#define GRAY "\x1b[37m"
#define DARKGRAY "\x1b[30m"
#define BG_BLACK "\x1b[48;5;232m"
#define BG_RED "\x1b[41m"
#define BG_GREEN "\x1b[42m"
#define BG_YELLOW "\x1b[43m"
#define BG_BLUE "\x1b[44m"
#define BG_MAGENTA "\x1b[45m"
#define BG_CYAN "\x1b[46m"
#define RESET "\x1b[0m"
#define BOLD "\x1b[1m"
enum TLogLevel {
logERROR,
logWARNING,
logINFOBLUE,
logINFOGREEN,
logINFORED,
logINFOCYAN,
logINFOMAGENTA,
logINFO,
logDEBUG,
logDEBUG1,
logDEBUG2,
logDEBUG3,
logDEBUG4,
logDEBUG5
};
// Compiler should optimize away anything below this value
#ifndef AARE_LOG_LEVEL
#define AARE_LOG_LEVEL "LOG LEVEL NOT SET IN CMAKE" //This is configured in the main CMakeLists.txt
#endif
#define __AT__ \
std::string(__FILE__) + std::string("::") + std::string(__func__) + \
std::string("(): ")
#define __SHORT_FORM_OF_FILE__ \
(strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
#define __SHORT_AT__ \
std::string(__SHORT_FORM_OF_FILE__) + std::string("::") + \
std::string(__func__) + std::string("(): ")
class Logger {
std::ostringstream os;
TLogLevel m_level = AARE_LOG_LEVEL;
public:
Logger() = default;
explicit Logger(TLogLevel level) : m_level(level){};
~Logger() {
// output in the destructor to allow for << syntax
os << RESET << '\n';
std::clog << os.str() << std::flush; // Single write
}
static TLogLevel &ReportingLevel() { // singelton eeh TODO! Do we need a runtime option?
static TLogLevel reportingLevel = logDEBUG5;
return reportingLevel;
}
// Danger this buffer need as many elements as TLogLevel
static const char *Color(TLogLevel level) noexcept {
static const char *const colors[] = {
RED BOLD, YELLOW BOLD, BLUE, GREEN, RED, CYAN, MAGENTA,
RESET, RESET, RESET, RESET, RESET, RESET, RESET};
// out of bounds
if (level < 0 || level >= sizeof(colors) / sizeof(colors[0])) {
return RESET;
}
return colors[level];
}
// Danger this buffer need as many elements as TLogLevel
static std::string ToString(TLogLevel level) {
static const char *const buffer[] = {
"ERROR", "WARNING", "INFO", "INFO", "INFO",
"INFO", "INFO", "INFO", "DEBUG", "DEBUG1",
"DEBUG2", "DEBUG3", "DEBUG4", "DEBUG5"};
// out of bounds
if (level < 0 || level >= sizeof(buffer) / sizeof(buffer[0])) {
return "UNKNOWN";
}
return buffer[level];
}
std::ostringstream &Get() {
os << Color(m_level) << "- " << Timestamp() << " " << ToString(m_level)
<< ": ";
return os;
}
static std::string Timestamp() {
constexpr size_t buffer_len = 12;
char buffer[buffer_len];
time_t t;
::time(&t);
tm r;
strftime(buffer, buffer_len, "%X", localtime_r(&t, &r));
buffer[buffer_len - 1] = '\0';
struct timeval tv;
gettimeofday(&tv, nullptr);
constexpr size_t result_len = 100;
char result[result_len];
snprintf(result, result_len, "%s.%03ld", buffer,
static_cast<long>(tv.tv_usec) / 1000);
result[result_len - 1] = '\0';
return result;
}
};
// TODO! Do we need to keep the runtime option?
#define LOG(level) \
if (level > AARE_LOG_LEVEL) \
; \
else if (level > aare::Logger::ReportingLevel()) \
; \
else \
aare::Logger(level).Get()
} // namespace aare

View File

@ -1,79 +1,89 @@
import sys
sys.path.append('/home/l_msdetect/erik/aare/build')
from aare._aare import ClusterVector_i, Interpolator
import pickle
import numpy as np
import matplotlib.pyplot as plt
import boost_histogram as bh
import torch
import math
import time
from aare import RawSubFile, DetectorType, RawFile
from pathlib import Path
path = Path("/home/l_msdetect/erik/data/aare-test-data/raw/jungfrau/")
f = RawSubFile(path/"jungfrau_single_d0_f0_0.raw", DetectorType.Jungfrau, 512, 1024, 16)
# f = RawFile(path/"jungfrau_single_master_0.json")
# from aare._aare import ClusterVector_i, Interpolator
# import pickle
# import numpy as np
# import matplotlib.pyplot as plt
# import boost_histogram as bh
# import torch
# import math
# import time
def gaussian_2d(mx, my, sigma = 1, res=100, grid_size = 2):
"""
Generate a 2D gaussian as position mx, my, with sigma=sigma.
The gaussian is placed on a 2x2 pixel matrix with resolution
res in one dimesion.
"""
x = torch.linspace(0, pixel_size*grid_size, res)
x,y = torch.meshgrid(x,x, indexing="ij")
return 1 / (2*math.pi*sigma**2) * \
torch.exp(-((x - my)**2 / (2*sigma**2) + (y - mx)**2 / (2*sigma**2)))
# def gaussian_2d(mx, my, sigma = 1, res=100, grid_size = 2):
# """
# Generate a 2D gaussian as position mx, my, with sigma=sigma.
# The gaussian is placed on a 2x2 pixel matrix with resolution
# res in one dimesion.
# """
# x = torch.linspace(0, pixel_size*grid_size, res)
# x,y = torch.meshgrid(x,x, indexing="ij")
# return 1 / (2*math.pi*sigma**2) * \
# torch.exp(-((x - my)**2 / (2*sigma**2) + (y - mx)**2 / (2*sigma**2)))
scale = 1000 #Scale factor when converting to integer
pixel_size = 25 #um
grid = 2
resolution = 100
sigma_um = 10
xa = np.linspace(0,grid*pixel_size,resolution)
ticks = [0, 25, 50]
# scale = 1000 #Scale factor when converting to integer
# pixel_size = 25 #um
# grid = 2
# resolution = 100
# sigma_um = 10
# xa = np.linspace(0,grid*pixel_size,resolution)
# ticks = [0, 25, 50]
hit = np.array((20,20))
etahist_fname = "/home/l_msdetect/erik/tmp/test_hist.pkl"
# hit = np.array((20,20))
# etahist_fname = "/home/l_msdetect/erik/tmp/test_hist.pkl"
local_resolution = 99
grid_size = 3
xaxis = np.linspace(0,grid_size*pixel_size, local_resolution)
t = gaussian_2d(hit[0],hit[1], grid_size = grid_size, sigma = 10, res = local_resolution)
pixels = t.reshape(grid_size, t.shape[0] // grid_size, grid_size, t.shape[1] // grid_size).sum(axis = 3).sum(axis = 1)
pixels = pixels.numpy()
pixels = (pixels*scale).astype(np.int32)
v = ClusterVector_i(3,3)
v.push_back(1,1, pixels)
# local_resolution = 99
# grid_size = 3
# xaxis = np.linspace(0,grid_size*pixel_size, local_resolution)
# t = gaussian_2d(hit[0],hit[1], grid_size = grid_size, sigma = 10, res = local_resolution)
# pixels = t.reshape(grid_size, t.shape[0] // grid_size, grid_size, t.shape[1] // grid_size).sum(axis = 3).sum(axis = 1)
# pixels = pixels.numpy()
# pixels = (pixels*scale).astype(np.int32)
# v = ClusterVector_i(3,3)
# v.push_back(1,1, pixels)
with open(etahist_fname, "rb") as f:
hist = pickle.load(f)
eta = hist.view().copy()
etabinsx = np.array(hist.axes.edges.T[0].flat)
etabinsy = np.array(hist.axes.edges.T[1].flat)
ebins = np.array(hist.axes.edges.T[2].flat)
p = Interpolator(eta, etabinsx[0:-1], etabinsy[0:-1], ebins[0:-1])
# with open(etahist_fname, "rb") as f:
# hist = pickle.load(f)
# eta = hist.view().copy()
# etabinsx = np.array(hist.axes.edges.T[0].flat)
# etabinsy = np.array(hist.axes.edges.T[1].flat)
# ebins = np.array(hist.axes.edges.T[2].flat)
# p = Interpolator(eta, etabinsx[0:-1], etabinsy[0:-1], ebins[0:-1])
#Generate the hit
# #Generate the hit
tmp = p.interpolate(v)
print(f'tmp:{tmp}')
pos = np.array((tmp['x'], tmp['y']))*25
# tmp = p.interpolate(v)
# print(f'tmp:{tmp}')
# pos = np.array((tmp['x'], tmp['y']))*25
print(pixels)
fig, ax = plt.subplots(figsize = (7,7))
ax.pcolormesh(xaxis, xaxis, t)
ax.plot(*pos, 'o')
ax.set_xticks([0,25,50,75])
ax.set_yticks([0,25,50,75])
ax.set_xlim(0,75)
ax.set_ylim(0,75)
ax.grid()
print(f'{hit=}')
print(f'{pos=}')
# print(pixels)
# fig, ax = plt.subplots(figsize = (7,7))
# ax.pcolormesh(xaxis, xaxis, t)
# ax.plot(*pos, 'o')
# ax.set_xticks([0,25,50,75])
# ax.set_yticks([0,25,50,75])
# ax.set_xlim(0,75)
# ax.set_ylim(0,75)
# ax.grid()
# print(f'{hit=}')
# print(f'{pos=}')

View File

@ -32,7 +32,7 @@ void define_raw_file_io_bindings(py::module &m) {
shape.push_back(self.cols());
// return headers from all subfiles
py::array_t<DetectorHeader> header(self.n_mod());
py::array_t<DetectorHeader> header(self.n_modules());
const uint8_t item_size = self.bytes_per_pixel();
if (item_size == 1) {
@ -61,10 +61,10 @@ void define_raw_file_io_bindings(py::module &m) {
// return headers from all subfiles
py::array_t<DetectorHeader> header;
if (self.n_mod() == 1) {
if (self.n_modules() == 1) {
header = py::array_t<DetectorHeader>(n_frames);
} else {
header = py::array_t<DetectorHeader>({self.n_mod(), n_frames});
header = py::array_t<DetectorHeader>({self.n_modules(), n_frames});
}
// py::array_t<DetectorHeader> header({self.n_mod(), n_frames});
@ -100,7 +100,7 @@ void define_raw_file_io_bindings(py::module &m) {
.def_property_readonly("cols", &RawFile::cols)
.def_property_readonly("bitdepth", &RawFile::bitdepth)
.def_property_readonly("geometry", &RawFile::geometry)
.def_property_readonly("n_mod", &RawFile::n_mod)
.def_property_readonly("n_modules", &RawFile::n_modules)
.def_property_readonly("detector_type", &RawFile::detector_type)
.def_property_readonly("master", &RawFile::master);
}

View File

@ -5,32 +5,35 @@ from aare import RawSubFile, DetectorType
@pytest.mark.files
def test_read_a_jungfrau_RawSubFile(test_data_path):
# Starting with f1 there is now 7 frames left in the series of files
with RawSubFile(test_data_path / "raw/jungfrau/jungfrau_single_d0_f1_0.raw", DetectorType.Jungfrau, 512, 1024, 16) as f:
assert f.frames_in_file == 3
assert f.frames_in_file == 7
headers, frames = f.read()
assert headers.size == 3
assert frames.shape == (3, 512, 1024)
assert headers.size == 7
assert frames.shape == (7, 512, 1024)
# Frame numbers in this file should be 4, 5, 6
for i,h in zip(range(4,7,1), headers):
for i,h in zip(range(4,11,1), headers):
assert h["frameNumber"] == i
# Compare to canned data using numpy
data = np.load(test_data_path / "raw/jungfrau/jungfrau_single_0.npy")
assert np.all(data[3:6] == frames)
assert np.all(data[3:] == frames)
@pytest.mark.files
def test_iterate_over_a_jungfrau_RawSubFile(test_data_path):
data = np.load(test_data_path / "raw/jungfrau/jungfrau_single_0.npy")
# Given the first subfile in a series we can read all frames from f0, f1, f2...fN
with RawSubFile(test_data_path / "raw/jungfrau/jungfrau_single_d0_f0_0.raw", DetectorType.Jungfrau, 512, 1024, 16) as f:
i = 0
for header, frame in f:
assert header["frameNumber"] == i+1
assert np.all(frame == data[i])
i += 1
assert i == 3
assert header["frameNumber"] == 3
assert i == 10
assert header["frameNumber"] == 10

View File

@ -1,6 +1,8 @@
#include "aare/RawFile.hpp"
#include "aare/algorithm.hpp"
#include "aare/PixelMap.hpp"
#include "aare/defs.hpp"
#include "aare/logger.hpp"
#include "aare/geo_helpers.hpp"
#include <fmt/format.h>
@ -14,23 +16,14 @@ RawFile::RawFile(const std::filesystem::path &fname, const std::string &mode)
: m_master(fname) {
m_mode = mode;
if (mode == "r") {
n_subfiles = find_number_of_subfiles(); // f0,f1...fn
n_subfile_parts =
m_master.geometry().col * m_master.geometry().row; // d0,d1...dn
find_geometry();
if (m_master.roi()){
m_geometry = update_geometry_with_roi(m_geometry, m_master.roi().value());
}
open_subfiles();
} else {
throw std::runtime_error(LOCATION +
"Unsupported mode. Can only read RawFiles.");
" Unsupported mode. Can only read RawFiles.");
}
}
@ -67,12 +60,12 @@ void RawFile::read_into(std::byte *image_buf, size_t n_frames, DetectorHeader *h
this->get_frame_into(m_current_frame++, image_buf, header);
image_buf += bytes_per_frame();
if(header)
header+=n_mod();
header+=n_modules();
}
}
size_t RawFile::n_mod() const { return n_subfile_parts; }
size_t RawFile::n_modules() const { return m_master.n_modules(); }
size_t RawFile::bytes_per_frame() {
@ -106,17 +99,11 @@ xy RawFile::geometry() { return m_master.geometry(); }
void RawFile::open_subfiles() {
if (m_mode == "r")
for (size_t i = 0; i != n_subfiles; ++i) {
auto v = std::vector<RawSubFile *>(n_subfile_parts);
for (size_t j = 0; j != n_subfile_parts; ++j) {
auto pos = m_geometry.module_pixel_0[j];
v[j] = new RawSubFile(m_master.data_fname(j, i),
m_master.detector_type(), pos.height,
pos.width, m_master.bitdepth(),
pos.row_index, pos.col_index);
}
subfiles.push_back(v);
for (size_t i = 0; i != n_modules(); ++i) {
auto pos = m_geometry.module_pixel_0[i];
m_subfiles.emplace_back(std::make_unique<RawSubFile>(
m_master.data_fname(i, 0), m_master.detector_type(), pos.height,
pos.width, m_master.bitdepth(), pos.row_index, pos.col_index));
}
else {
throw std::runtime_error(LOCATION +
@ -141,18 +128,6 @@ DetectorHeader RawFile::read_header(const std::filesystem::path &fname) {
return h;
}
int RawFile::find_number_of_subfiles() {
int n_files = 0;
// f0,f1...fn How many files is the data split into?
while (std::filesystem::exists(m_master.data_fname(0, n_files)))
n_files++; // increment after test
#ifdef AARE_VERBOSE
fmt::print("Found: {} subfiles\n", n_files);
#endif
return n_files;
}
RawMasterFile RawFile::master() const { return m_master; }
@ -168,7 +143,7 @@ void RawFile::find_geometry() {
uint16_t c{};
for (size_t i = 0; i < n_subfile_parts; i++) {
for (size_t i = 0; i < n_modules(); i++) {
auto h = read_header(m_master.data_fname(i, 0));
r = std::max(r, h.row);
c = std::max(c, h.column);
@ -210,70 +185,58 @@ size_t RawFile::bytes_per_pixel() const {
}
void RawFile::get_frame_into(size_t frame_index, std::byte *frame_buffer, DetectorHeader *header) {
LOG(logDEBUG) << "RawFile::get_frame_into(" << frame_index << ")";
if (frame_index >= total_frames()) {
throw std::runtime_error(LOCATION + "Frame number out of range");
}
std::vector<size_t> frame_numbers(n_subfile_parts);
std::vector<size_t> frame_indices(n_subfile_parts, frame_index);
std::vector<size_t> frame_numbers(n_modules());
std::vector<size_t> frame_indices(n_modules(), frame_index);
// sync the frame numbers
if (n_subfile_parts != 1) {
for (size_t part_idx = 0; part_idx != n_subfile_parts; ++part_idx) {
auto subfile_id = frame_index / m_master.max_frames_per_file();
if (subfile_id >= subfiles.size()) {
throw std::runtime_error(LOCATION +
" Subfile out of range. Possible missing data.");
}
frame_numbers[part_idx] =
subfiles[subfile_id][part_idx]->frame_number(
frame_index % m_master.max_frames_per_file());
if (n_modules() != 1) { //if we have more than one module
for (size_t part_idx = 0; part_idx != n_modules(); ++part_idx) {
frame_numbers[part_idx] = m_subfiles[part_idx]->frame_number(frame_index);
}
// 1. if frame number vector is the same break
while (std::adjacent_find(frame_numbers.begin(), frame_numbers.end(),
std::not_equal_to<>()) !=
frame_numbers.end()) {
while (!all_equal(frame_numbers)) {
// 2. find the index of the minimum frame number,
auto min_frame_idx = std::distance(
frame_numbers.begin(),
std::min_element(frame_numbers.begin(), frame_numbers.end()));
// 3. increase its index and update its respective frame number
frame_indices[min_frame_idx]++;
// 4. if we can't increase its index => throw error
if (frame_indices[min_frame_idx] >= total_frames()) {
throw std::runtime_error(LOCATION +
"Frame number out of range");
}
auto subfile_id =
frame_indices[min_frame_idx] / m_master.max_frames_per_file();
frame_numbers[min_frame_idx] =
subfiles[subfile_id][min_frame_idx]->frame_number(
frame_indices[min_frame_idx] %
m_master.max_frames_per_file());
m_subfiles[min_frame_idx]->frame_number(frame_indices[min_frame_idx]);
}
}
if (m_master.geometry().col == 1) {
// get the part from each subfile and copy it to the frame
for (size_t part_idx = 0; part_idx != n_subfile_parts; ++part_idx) {
for (size_t part_idx = 0; part_idx != n_modules(); ++part_idx) {
auto corrected_idx = frame_indices[part_idx];
auto subfile_id = corrected_idx / m_master.max_frames_per_file();
if (subfile_id >= subfiles.size()) {
throw std::runtime_error(LOCATION +
" Subfile out of range. Possible missing data.");
}
// This is where we start writing
auto offset = (m_geometry.module_pixel_0[part_idx].origin_y * m_geometry.pixels_x +
m_geometry.module_pixel_0[part_idx].origin_x)*m_master.bitdepth()/8;
if (m_geometry.module_pixel_0[part_idx].origin_x!=0)
throw std::runtime_error(LOCATION + "Implementation error. x pos not 0.");
throw std::runtime_error(LOCATION + " Implementation error. x pos not 0.");
//TODO! Risk for out of range access
subfiles[subfile_id][part_idx]->seek(corrected_idx % m_master.max_frames_per_file());
subfiles[subfile_id][part_idx]->read_into(frame_buffer + offset, header);
//TODO! What if the files don't match?
m_subfiles[part_idx]->seek(corrected_idx);
m_subfiles[part_idx]->read_into(frame_buffer + offset, header);
if (header)
++header;
}
@ -282,26 +245,21 @@ void RawFile::get_frame_into(size_t frame_index, std::byte *frame_buffer, Detect
//TODO! should we read row by row?
// create a buffer large enough to hold a full module
auto bytes_per_part = m_master.pixels_y() * m_master.pixels_x() *
m_master.bitdepth() /
8; // TODO! replace with image_size_in_bytes
auto *part_buffer = new std::byte[bytes_per_part];
// TODO! if we have many submodules we should reorder them on the module
// level
for (size_t part_idx = 0; part_idx != n_subfile_parts; ++part_idx) {
for (size_t part_idx = 0; part_idx != n_modules(); ++part_idx) {
auto pos = m_geometry.module_pixel_0[part_idx];
auto corrected_idx = frame_indices[part_idx];
auto subfile_id = corrected_idx / m_master.max_frames_per_file();
if (subfile_id >= subfiles.size()) {
throw std::runtime_error(LOCATION +
" Subfile out of range. Possible missing data.");
}
subfiles[subfile_id][part_idx]->seek(corrected_idx % m_master.max_frames_per_file());
subfiles[subfile_id][part_idx]->read_into(part_buffer, header);
m_subfiles[part_idx]->seek(corrected_idx);
m_subfiles[part_idx]->read_into(part_buffer, header);
if(header)
++header;
@ -321,6 +279,7 @@ void RawFile::get_frame_into(size_t frame_index, std::byte *frame_buffer, Detect
}
delete[] part_buffer;
}
}
std::vector<Frame> RawFile::read_n(size_t n_frames) {
@ -337,27 +296,8 @@ size_t RawFile::frame_number(size_t frame_index) {
if (frame_index >= m_master.frames_in_file()) {
throw std::runtime_error(LOCATION + " Frame number out of range");
}
size_t subfile_id = frame_index / m_master.max_frames_per_file();
if (subfile_id >= subfiles.size()) {
throw std::runtime_error(
LOCATION + " Subfile out of range. Possible missing data.");
}
return subfiles[subfile_id][0]->frame_number(
frame_index % m_master.max_frames_per_file());
return m_subfiles[0]->frame_number(frame_index);
}
RawFile::~RawFile() {
// TODO! Fix this, for file closing
for (auto &vec : subfiles) {
for (auto *subfile : vec) {
delete subfile;
}
}
}
} // namespace aare

View File

@ -99,11 +99,11 @@ TEST_CASE("Read frame numbers from a raw file", "[.integration]") {
}
}
TEST_CASE("Compare reading from a numpy file with a raw file", "[.integration]") {
auto fpath_raw = test_data_path() / "jungfrau" / "jungfrau_single_master_0.json";
TEST_CASE("Compare reading from a numpy file with a raw file", "[.files]") {
auto fpath_raw = test_data_path() / "raw/jungfrau" / "jungfrau_single_master_0.json";
REQUIRE(std::filesystem::exists(fpath_raw));
auto fpath_npy = test_data_path() / "jungfrau" / "jungfrau_single_0.npy";
auto fpath_npy = test_data_path() / "raw/jungfrau" / "jungfrau_single_0.npy";
REQUIRE(std::filesystem::exists(fpath_npy));
File raw(fpath_raw, "r");
@ -113,6 +113,7 @@ TEST_CASE("Compare reading from a numpy file with a raw file", "[.integration]")
CHECK(npy.total_frames() == 10);
for (size_t i = 0; i < 10; ++i) {
CHECK(raw.tell() == i);
auto raw_frame = raw.read_frame();
auto npy_frame = npy.read_frame();
CHECK((raw_frame.view<uint16_t>() == npy_frame.view<uint16_t>()));

View File

@ -140,6 +140,10 @@ std::optional<size_t> RawMasterFile::number_of_rows() const {
xy RawMasterFile::geometry() const { return m_geometry; }
size_t RawMasterFile::n_modules() const {
return m_geometry.row * m_geometry.col;
}
std::optional<uint8_t> RawMasterFile::quad() const { return m_quad; }
// optional values, these may or may not be present in the master file

View File

@ -1,10 +1,14 @@
#include "aare/RawSubFile.hpp"
#include "aare/PixelMap.hpp"
#include "aare/algorithm.hpp"
#include "aare/utils/ifstream_helpers.hpp"
#include "aare/logger.hpp"
#include <cstring> // memcpy
#include <fmt/core.h>
#include <iostream>
#include <regex>
namespace aare {
@ -12,51 +16,51 @@ namespace aare {
RawSubFile::RawSubFile(const std::filesystem::path &fname,
DetectorType detector, size_t rows, size_t cols,
size_t bitdepth, uint32_t pos_row, uint32_t pos_col)
: m_detector_type(detector), m_bitdepth(bitdepth), m_fname(fname),
: m_detector_type(detector), m_bitdepth(bitdepth),
m_rows(rows), m_cols(cols),
m_bytes_per_frame((m_bitdepth / 8) * m_rows * m_cols), m_pos_row(pos_row),
m_pos_col(pos_col) {
LOG(logDEBUG) << "RawSubFile::RawSubFile()";
if (m_detector_type == DetectorType::Moench03_old) {
m_pixel_map = GenerateMoench03PixelMap();
} else if (m_detector_type == DetectorType::Eiger && m_pos_row % 2 == 0) {
m_pixel_map = GenerateEigerFlipRowsPixelMap();
}
if (std::filesystem::exists(fname)) {
m_num_frames = std::filesystem::file_size(fname) /
(sizeof(DetectorHeader) + rows * cols * bitdepth / 8);
} else {
throw std::runtime_error(
LOCATION + fmt::format("File {} does not exist", m_fname.string()));
}
// fp = fopen(m_fname.string().c_str(), "rb");
m_file.open(m_fname, std::ios::binary);
if (!m_file.is_open()) {
throw std::runtime_error(
LOCATION + fmt::format("Could not open file {}", m_fname.string()));
}
#ifdef AARE_VERBOSE
fmt::print("Opened file: {} with {} frames\n", m_fname.string(), m_num_frames);
fmt::print("m_rows: {}, m_cols: {}, m_bitdepth: {}\n", m_rows, m_cols,
m_bitdepth);
fmt::print("file size: {}\n", std::filesystem::file_size(fname));
#endif
parse_fname(fname);
scan_files();
open_file(m_current_file_index); // open the first file
}
void RawSubFile::seek(size_t frame_index) {
if (frame_index >= m_num_frames) {
throw std::runtime_error(LOCATION + fmt::format("Frame index {} out of range in a file with {} frames", frame_index, m_num_frames));
LOG(logDEBUG) << "RawSubFile::seek(" << frame_index << ")";
if (frame_index >= m_total_frames) {
throw std::runtime_error(LOCATION + " Frame index out of range: " +
std::to_string(frame_index));
}
m_file.seekg((sizeof(DetectorHeader) + bytes_per_frame()) * frame_index);
m_current_frame_index = frame_index;
auto file_index = first_larger(m_last_frame_in_file, frame_index);
if (file_index != m_current_file_index)
open_file(file_index);
auto frame_offset = (file_index)
? frame_index - m_last_frame_in_file[file_index - 1]
: frame_index;
auto byte_offset = frame_offset * (m_bytes_per_frame + sizeof(DetectorHeader));
m_file.seekg(byte_offset);
}
size_t RawSubFile::tell() {
return m_file.tellg() / (sizeof(DetectorHeader) + bytes_per_frame());
LOG(logDEBUG) << "RawSubFile::tell():" << m_current_frame_index;
return m_current_frame_index;
}
void RawSubFile::read_into(std::byte *image_buf, DetectorHeader *header) {
LOG(logDEBUG) << "RawSubFile::read_into()";
if (header) {
m_file.read(reinterpret_cast<char *>(header), sizeof(DetectorHeader));
} else {
@ -90,6 +94,13 @@ void RawSubFile::read_into(std::byte *image_buf, DetectorHeader *header) {
if (m_file.fail()){
throw std::runtime_error(LOCATION + ifstream_error_msg(m_file));
}
++ m_current_frame_index;
if (m_current_frame_index >= m_last_frame_in_file[m_current_file_index] &&
(m_current_frame_index < m_total_frames)) {
++m_current_file_index;
open_file(m_current_file_index);
}
}
void RawSubFile::read_into(std::byte *image_buf, size_t n_frames, DetectorHeader *header) {
@ -130,4 +141,69 @@ size_t RawSubFile::frame_number(size_t frame_index) {
return h.frameNumber;
}
void RawSubFile::parse_fname(const std::filesystem::path &fname) {
LOG(logDEBUG) << "RawSubFile::parse_fname()";
// data has the format: /path/too/data/jungfrau_single_d0_f1_0.raw
// d0 is the module index, will not change for this file
// f1 is the file index - thi is the one we need
// 0 is the measurement index, will not change
m_path = fname.parent_path();
m_base_name = fname.filename();
// Regex to extract numbers after 'd' and 'f'
std::regex pattern(R"(^(.*_d)(\d+)(_f)(\d+)(_\d+\.raw)$)");
std::smatch match;
if (std::regex_match(m_base_name, match, pattern)) {
m_offset = std::stoi(match[4].str()); // find the first file index in case of a truncated series
m_base_name = match[1].str() + match[2].str() + match[3].str() + "{}" + match[5].str();
LOG(logDEBUG) << "Base name: " << m_base_name;
LOG(logDEBUG) << "Offset: " << m_offset;
LOG(logDEBUG) << "Path: " << m_path.string();
} else {
throw std::runtime_error(
LOCATION + fmt::format("Could not parse file name {}", fname.string()));
}
}
std::filesystem::path RawSubFile::fpath(size_t file_index) const {
auto fname = fmt::format(m_base_name, file_index);
return m_path / fname;
}
void RawSubFile::open_file(size_t file_index) {
m_file.close();
auto fname = fpath(file_index+m_offset);
LOG(logDEBUG) << "RawSubFile::open_file(): " << fname.string();
m_file.open(fname, std::ios::binary);
if (!m_file.is_open()) {
throw std::runtime_error(
LOCATION + fmt::format("Could not open file {}", fpath(file_index).string()));
}
m_current_file_index = file_index;
}
void RawSubFile::scan_files() {
LOG(logDEBUG) << "RawSubFile::scan_files()";
// find how many files we have and the number of frames in each file
m_last_frame_in_file.clear();
size_t file_index = m_offset;
while (std::filesystem::exists(fpath(file_index))) {
auto n_frames = std::filesystem::file_size(fpath(file_index)) /
(m_bytes_per_frame + sizeof(DetectorHeader));
m_last_frame_in_file.push_back(n_frames);
LOG(logDEBUG) << "Found: " << n_frames << " frames in file: " << fpath(file_index).string();
++file_index;
}
// find where we need to open the next file and total number of frames
m_last_frame_in_file = cumsum(m_last_frame_in_file);
if(m_last_frame_in_file.empty()){
m_total_frames = 0;
}else{
m_total_frames = m_last_frame_in_file.back();
}
}
} // namespace aare

76
src/RawSubFile.test.cpp Normal file
View File

@ -0,0 +1,76 @@
#include "aare/RawSubFile.hpp"
#include "aare/File.hpp"
#include "aare/NDArray.hpp"
#include <catch2/catch_test_macros.hpp>
#include "test_config.hpp"
using namespace aare;
TEST_CASE("Read frames directly from a RawSubFile", "[.files]"){
auto fpath_raw = test_data_path() / "raw/jungfrau" / "jungfrau_single_d0_f0_0.raw";
REQUIRE(std::filesystem::exists(fpath_raw));
RawSubFile f(fpath_raw, DetectorType::Jungfrau, 512, 1024, 16);
REQUIRE(f.rows() == 512);
REQUIRE(f.cols() == 1024);
REQUIRE(f.pixels_per_frame() == 512 * 1024);
REQUIRE(f.bytes_per_frame() == 512 * 1024 * 2);
REQUIRE(f.bytes_per_pixel() == 2);
auto fpath_npy = test_data_path() / "raw/jungfrau" / "jungfrau_single_0.npy";
REQUIRE(std::filesystem::exists(fpath_npy));
//Numpy file with the same data to use as reference
File npy(fpath_npy, "r");
CHECK(f.frames_in_file() == 10);
CHECK(npy.total_frames() == 10);
DetectorHeader header{};
NDArray<uint16_t, 2> image({static_cast<ssize_t>(f.rows()), static_cast<ssize_t>(f.cols())});
for (size_t i = 0; i < 10; ++i) {
CHECK(f.tell() == i);
f.read_into(image.buffer(), &header);
auto npy_frame = npy.read_frame();
CHECK((image.view() == npy_frame.view<uint16_t>()));
}
}
TEST_CASE("Read frames directly from a RawSubFile starting at the second file", "[.files]"){
// we know this file has 10 frames with frame numbers 1 to 10
// f0 1,2,3
// f1 4,5,6 <-- starting here
// f2 7,8,9
// f3 10
auto fpath_raw = test_data_path() / "raw/jungfrau" / "jungfrau_single_d0_f1_0.raw";
REQUIRE(std::filesystem::exists(fpath_raw));
RawSubFile f(fpath_raw, DetectorType::Jungfrau, 512, 1024, 16);
auto fpath_npy = test_data_path() / "raw/jungfrau" / "jungfrau_single_0.npy";
REQUIRE(std::filesystem::exists(fpath_npy));
//Numpy file with the same data to use as reference
File npy(fpath_npy, "r");
npy.seek(3);
CHECK(f.frames_in_file() == 7);
CHECK(npy.total_frames() == 10);
DetectorHeader header{};
NDArray<uint16_t, 2> image({static_cast<ssize_t>(f.rows()), static_cast<ssize_t>(f.cols())});
for (size_t i = 0; i < 7; ++i) {
CHECK(f.tell() == i);
f.read_into(image.buffer(), &header);
// frame numbers start at 1 frame index at 0
// adding 3 + 1 to verify the frame number
CHECK(header.frameNumber == i + 4);
auto npy_frame = npy.read_frame();
CHECK((image.view() == npy_frame.view<uint16_t>()));
}
}

View File

@ -160,3 +160,36 @@ TEST_CASE("cumsum works with negative numbers", "[algorithm]") {
REQUIRE(result[3] == -6);
REQUIRE(result[4] == -10);
}
TEST_CASE("cumsum on an empty vector", "[algorithm]") {
std::vector<double> vec = {};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == 0);
}
TEST_CASE("All equal on an empty vector is false", "[algorithm]") {
std::vector<int> vec = {};
REQUIRE(aare::all_equal(vec) == false);
}
TEST_CASE("All equal on a vector with 1 element is true", "[algorithm]") {
std::vector<int> vec = {1};
REQUIRE(aare::all_equal(vec) == true);
}
TEST_CASE("All equal on a vector with 2 elements is true", "[algorithm]") {
std::vector<int> vec = {1, 1};
REQUIRE(aare::all_equal(vec) == true);
}
TEST_CASE("All equal on a vector with two different elements is false", "[algorithm]") {
std::vector<int> vec = {1, 2};
REQUIRE(aare::all_equal(vec) == false);
}
TEST_CASE("Last element is different", "[algorithm]") {
std::vector<int> vec = {1, 1, 1, 1, 2};
REQUIRE(aare::all_equal(vec) == false);
}