Adding support for Jungfrau .dat files (#152)
All checks were successful
Build on RHEL9 / buildh (push) Successful in 1m48s

closes #150 

**Not addressed in this PR:** 

- pixels_per_frame, bytes_per_frame and tell should be made cost in
FileInterface
This commit is contained in:
Erik Fröjdh
2025-04-08 15:31:04 +02:00
committed by GitHub
parent 7db1ae4d94
commit f16273a566
21 changed files with 1025 additions and 17 deletions

View File

@ -11,9 +11,9 @@
using aare::ClusterFile;
TEST_CASE("Read one frame from a a cluster file", "[.integration]") {
TEST_CASE("Read one frame from a a cluster file", "[.files]") {
//We know that the frame has 97 clusters
auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust";
auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust";
REQUIRE(std::filesystem::exists(fpath));
ClusterFile f(fpath);
@ -22,9 +22,9 @@ TEST_CASE("Read one frame from a a cluster file", "[.integration]") {
REQUIRE(clusters.frame_number() == 135);
}
TEST_CASE("Read one frame using ROI", "[.integration]") {
TEST_CASE("Read one frame using ROI", "[.files]") {
//We know that the frame has 97 clusters
auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust";
auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust";
REQUIRE(std::filesystem::exists(fpath));
ClusterFile f(fpath);
@ -50,9 +50,9 @@ TEST_CASE("Read one frame using ROI", "[.integration]") {
}
TEST_CASE("Read clusters from single frame file", "[.integration]") {
TEST_CASE("Read clusters from single frame file", "[.files]") {
auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust";
auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust";
REQUIRE(std::filesystem::exists(fpath));
SECTION("Read fewer clusters than available") {

View File

@ -1,4 +1,5 @@
#include "aare/File.hpp"
#include "aare/JungfrauDataFile.hpp"
#include "aare/NumpyFile.hpp"
#include "aare/RawFile.hpp"
@ -27,6 +28,8 @@ File::File(const std::filesystem::path &fname, const std::string &mode,
else if (fname.extension() == ".npy") {
// file_impl = new NumpyFile(fname, mode, cfg);
file_impl = std::make_unique<NumpyFile>(fname, mode, cfg);
}else if(fname.extension() == ".dat"){
file_impl = std::make_unique<JungfrauDataFile>(fname);
} else {
throw std::runtime_error("Unsupported file type");
}

44
src/FilePtr.cpp Normal file
View File

@ -0,0 +1,44 @@
#include "aare/FilePtr.hpp"
#include <fmt/format.h>
#include <stdexcept>
#include <utility>
namespace aare {
FilePtr::FilePtr(const std::filesystem::path& fname, const std::string& mode = "rb") {
fp_ = fopen(fname.c_str(), mode.c_str());
if (!fp_)
throw std::runtime_error(fmt::format("Could not open: {}", fname.c_str()));
}
FilePtr::FilePtr(FilePtr &&other) { std::swap(fp_, other.fp_); }
FilePtr &FilePtr::operator=(FilePtr &&other) {
std::swap(fp_, other.fp_);
return *this;
}
FILE *FilePtr::get() { return fp_; }
int64_t FilePtr::tell() {
auto pos = ftell(fp_);
if (pos == -1)
throw std::runtime_error(fmt::format("Error getting file position: {}", error_msg()));
return pos;
}
FilePtr::~FilePtr() {
if (fp_)
fclose(fp_); // check?
}
std::string FilePtr::error_msg(){
if (feof(fp_)) {
return "End of file reached";
}
if (ferror(fp_)) {
return fmt::format("Error reading file: {}", std::strerror(errno));
}
return "";
}
} // namespace aare

242
src/JungfrauDataFile.cpp Normal file
View File

@ -0,0 +1,242 @@
#include "aare/JungfrauDataFile.hpp"
#include "aare/algorithm.hpp"
#include "aare/defs.hpp"
#include <cerrno>
#include <fmt/format.h>
namespace aare {
JungfrauDataFile::JungfrauDataFile(const std::filesystem::path &fname) {
if (!std::filesystem::exists(fname)) {
throw std::runtime_error(LOCATION +
"File does not exist: " + fname.string());
}
find_frame_size(fname);
parse_fname(fname);
scan_files();
open_file(m_current_file_index);
}
// FileInterface
Frame JungfrauDataFile::read_frame(){
Frame f(rows(), cols(), Dtype::UINT16);
read_into(reinterpret_cast<std::byte *>(f.data()), nullptr);
return f;
}
Frame JungfrauDataFile::read_frame(size_t frame_number){
seek(frame_number);
Frame f(rows(), cols(), Dtype::UINT16);
read_into(reinterpret_cast<std::byte *>(f.data()), nullptr);
return f;
}
std::vector<Frame> JungfrauDataFile::read_n(size_t n_frames) {
std::vector<Frame> frames;
throw std::runtime_error(LOCATION +
"Not implemented yet");
return frames;
}
void JungfrauDataFile::read_into(std::byte *image_buf) {
read_into(image_buf, nullptr);
}
void JungfrauDataFile::read_into(std::byte *image_buf, size_t n_frames) {
read_into(image_buf, n_frames, nullptr);
}
size_t JungfrauDataFile::frame_number(size_t frame_index) {
seek(frame_index);
return read_header().framenum;
}
DetectorType JungfrauDataFile::detector_type() const { return DetectorType::Jungfrau; }
std::string JungfrauDataFile::base_name() const { return m_base_name; }
size_t JungfrauDataFile::bytes_per_frame() { return m_bytes_per_frame; }
size_t JungfrauDataFile::pixels_per_frame() { return m_rows * m_cols; }
size_t JungfrauDataFile::bytes_per_pixel() const { return sizeof(pixel_type); }
size_t JungfrauDataFile::bitdepth() const {
return bytes_per_pixel() * bits_per_byte;
}
void JungfrauDataFile::seek(size_t frame_index) {
if (frame_index >= m_total_frames) {
throw std::runtime_error(LOCATION + "Frame index out of range: " +
std::to_string(frame_index));
}
m_current_frame_index = frame_index;
auto file_index = first_larger(m_last_frame_in_file, frame_index);
if (file_index != m_current_file_index)
open_file(file_index);
auto frame_offset = (file_index)
? frame_index - m_last_frame_in_file[file_index - 1]
: frame_index;
auto byte_offset = frame_offset * (m_bytes_per_frame + header_size);
m_fp.seek(byte_offset);
};
size_t JungfrauDataFile::tell() { return m_current_frame_index; }
size_t JungfrauDataFile::total_frames() const { return m_total_frames; }
size_t JungfrauDataFile::rows() const { return m_rows; }
size_t JungfrauDataFile::cols() const { return m_cols; }
size_t JungfrauDataFile::n_files() const { return m_last_frame_in_file.size(); }
void JungfrauDataFile::find_frame_size(const std::filesystem::path &fname) {
static constexpr size_t module_data_size =
header_size + sizeof(pixel_type) * 512 * 1024;
static constexpr size_t half_data_size =
header_size + sizeof(pixel_type) * 256 * 1024;
static constexpr size_t chip_data_size =
header_size + sizeof(pixel_type) * 256 * 256;
auto file_size = std::filesystem::file_size(fname);
if (file_size == 0) {
throw std::runtime_error(LOCATION +
"Cannot guess frame size: file is empty");
}
if (file_size % module_data_size == 0) {
m_rows = 512;
m_cols = 1024;
m_bytes_per_frame = module_data_size - header_size;
} else if (file_size % half_data_size == 0) {
m_rows = 256;
m_cols = 1024;
m_bytes_per_frame = half_data_size - header_size;
} else if (file_size % chip_data_size == 0) {
m_rows = 256;
m_cols = 256;
m_bytes_per_frame = chip_data_size - header_size;
} else {
throw std::runtime_error(LOCATION +
"Cannot find frame size: file size is not a "
"multiple of any known frame size");
}
}
void JungfrauDataFile::parse_fname(const std::filesystem::path &fname) {
m_path = fname.parent_path();
m_base_name = fname.stem();
// find file index, then remove if from the base name
if (auto pos = m_base_name.find_last_of('_'); pos != std::string::npos) {
m_offset = std::stoul(m_base_name.substr(pos + 1));
m_base_name.erase(pos);
}
}
void JungfrauDataFile::scan_files() {
// find how many files we have and the number of frames in each file
m_last_frame_in_file.clear();
size_t file_index = m_offset;
while (std::filesystem::exists(fpath(file_index))) {
auto n_frames = std::filesystem::file_size(fpath(file_index)) /
(m_bytes_per_frame + header_size);
m_last_frame_in_file.push_back(n_frames);
++file_index;
}
// find where we need to open the next file and total number of frames
m_last_frame_in_file = cumsum(m_last_frame_in_file);
m_total_frames = m_last_frame_in_file.back();
}
void JungfrauDataFile::read_into(std::byte *image_buf,
JungfrauDataHeader *header) {
// read header if not passed nullptr
if (header) {
if (auto rc = fread(header, sizeof(JungfrauDataHeader), 1, m_fp.get());
rc != 1) {
throw std::runtime_error(
LOCATION +
"Could not read header from file:" + m_fp.error_msg());
}
} else {
m_fp.seek(header_size, SEEK_CUR);
}
// read data
if (auto rc = fread(image_buf, 1, m_bytes_per_frame, m_fp.get());
rc != m_bytes_per_frame) {
throw std::runtime_error(LOCATION + "Could not read image from file" +
m_fp.error_msg());
}
// prepare for next read
// if we are at the end of the file, open the next file
++m_current_frame_index;
if (m_current_frame_index >= m_last_frame_in_file[m_current_file_index] &&
(m_current_frame_index < m_total_frames)) {
++m_current_file_index;
open_file(m_current_file_index);
}
}
void JungfrauDataFile::read_into(std::byte *image_buf, size_t n_frames,
JungfrauDataHeader *header) {
if (header) {
for (size_t i = 0; i < n_frames; ++i)
read_into(image_buf + i * m_bytes_per_frame, header + i);
}else{
for (size_t i = 0; i < n_frames; ++i)
read_into(image_buf + i * m_bytes_per_frame, nullptr);
}
}
void JungfrauDataFile::read_into(NDArray<uint16_t>* image, JungfrauDataHeader* header) {
if(!(rows() == image->shape(0) && cols() == image->shape(1))){
throw std::runtime_error(LOCATION +
"Image shape does not match file size: " + std::to_string(rows()) + "x" + std::to_string(cols()));
}
read_into(reinterpret_cast<std::byte *>(image->data()), header);
}
NDArray<uint16_t> JungfrauDataFile::read_frame(JungfrauDataHeader* header) {
Shape<2> shape{rows(), cols()};
NDArray<uint16_t> image(shape);
read_into(reinterpret_cast<std::byte *>(image.data()),
header);
return image;
}
JungfrauDataHeader JungfrauDataFile::read_header() {
JungfrauDataHeader header;
if (auto rc = fread(&header, 1, sizeof(header), m_fp.get());
rc != sizeof(header)) {
throw std::runtime_error(LOCATION + "Could not read header from file" +
m_fp.error_msg());
}
m_fp.seek(-header_size, SEEK_CUR);
return header;
}
void JungfrauDataFile::open_file(size_t file_index) {
// fmt::print(stderr, "Opening file: {}\n",
// fpath(file_index+m_offset).string());
m_fp = FilePtr(fpath(file_index + m_offset), "rb");
m_current_file_index = file_index;
}
std::filesystem::path JungfrauDataFile::fpath(size_t file_index) const {
auto fname = fmt::format("{}_{:0{}}.dat", m_base_name, file_index,
n_digits_in_file_index);
return m_path / fname;
}
} // namespace aare

View File

@ -0,0 +1,94 @@
#include "aare/JungfrauDataFile.hpp"
#include <catch2/catch_test_macros.hpp>
#include "test_config.hpp"
using aare::JungfrauDataFile;
using aare::JungfrauDataHeader;
TEST_CASE("Open a Jungfrau data file", "[.files]") {
//we know we have 4 files with 7, 7, 7, and 3 frames
//firs frame number if 1 and the bunch id is frame_number**2
//so we can check the header
auto fpath = test_data_path() / "dat" / "AldoJF500k_000000.dat";
REQUIRE(std::filesystem::exists(fpath));
JungfrauDataFile f(fpath);
REQUIRE(f.rows() == 512);
REQUIRE(f.cols() == 1024);
REQUIRE(f.bytes_per_frame() == 1048576);
REQUIRE(f.pixels_per_frame() == 524288);
REQUIRE(f.bytes_per_pixel() == 2);
REQUIRE(f.bitdepth() == 16);
REQUIRE(f.base_name() == "AldoJF500k");
REQUIRE(f.n_files() == 4);
REQUIRE(f.tell() == 0);
REQUIRE(f.total_frames() == 24);
REQUIRE(f.current_file() == fpath);
//Check that the frame number and buch id is read correctly
for (size_t i = 0; i < 24; ++i) {
JungfrauDataHeader header;
auto image = f.read_frame(&header);
REQUIRE(header.framenum == i + 1);
REQUIRE(header.bunchid == (i + 1) * (i + 1));
REQUIRE(image.shape(0) == 512);
REQUIRE(image.shape(1) == 1024);
}
}
TEST_CASE("Seek in a JungfrauDataFile", "[.files]"){
auto fpath = test_data_path() / "dat" / "AldoJF65k_000000.dat";
REQUIRE(std::filesystem::exists(fpath));
JungfrauDataFile f(fpath);
//The file should have 113 frames
f.seek(19);
REQUIRE(f.tell() == 19);
auto h = f.read_header();
REQUIRE(h.framenum == 19+1);
//Reading again does not change the file pointer
auto h2 = f.read_header();
REQUIRE(h2.framenum == 19+1);
f.seek(59);
REQUIRE(f.tell() == 59);
auto h3 = f.read_header();
REQUIRE(h3.framenum == 59+1);
JungfrauDataHeader h4;
auto image = f.read_frame(&h4);
REQUIRE(h4.framenum == 59+1);
//now we should be on the next frame
REQUIRE(f.tell() == 60);
REQUIRE(f.read_header().framenum == 60+1);
REQUIRE_THROWS(f.seek(86356)); //out of range
}
TEST_CASE("Open a Jungfrau data file with non zero file index", "[.files]"){
auto fpath = test_data_path() / "dat" / "AldoJF65k_000003.dat";
REQUIRE(std::filesystem::exists(fpath));
JungfrauDataFile f(fpath);
//18 files per data file, opening the 3rd file we ignore the first 3
REQUIRE(f.total_frames() == 113-18*3);
REQUIRE(f.tell() == 0);
//Frame numbers start at 1 in the first file
REQUIRE(f.read_header().framenum == 18*3+1);
// moving relative to the third file
f.seek(5);
REQUIRE(f.read_header().framenum == 18*3+1+5);
// ignoring the first 3 files
REQUIRE(f.n_files() == 4);
REQUIRE(f.current_file().stem() == "AldoJF65k_000003");
}

View File

@ -49,6 +49,16 @@ TEST_CASE("nearest index works with std::array", "[algorithm]"){
REQUIRE(aare::nearest_index(arr, -10.0) == 0);
}
TEST_CASE("nearest index when there is no different uses the first element", "[algorithm]"){
std::vector<int> vec = {5, 5, 5, 5, 5};
REQUIRE(aare::nearest_index(vec, 5) == 0);
}
TEST_CASE("nearest index when there is no different uses the first element also when all smaller", "[algorithm]"){
std::vector<int> vec = {5, 5, 5, 5, 5};
REQUIRE(aare::nearest_index(vec, 10) == 0);
}
TEST_CASE("last smaller", "[algorithm]"){
aare::NDArray<double, 1> arr({5});
@ -68,6 +78,82 @@ TEST_CASE("returns last bin strictly smaller", "[algorithm]"){
arr[i] = i;
}
// arr 0, 1, 2, 3, 4
REQUIRE(aare::last_smaller(arr, 2.0) == 2);
REQUIRE(aare::last_smaller(arr, 2.0) == 1);
}
TEST_CASE("last_smaller with all elements smaller returns last element", "[algorithm]"){
aare::NDArray<double, 1> arr({5});
for (size_t i = 0; i < arr.size(); i++) {
arr[i] = i;
}
// arr 0, 1, 2, 3, 4
REQUIRE(aare::last_smaller(arr, 50.) == 4);
}
TEST_CASE("last_smaller with all elements bigger returns first element", "[algorithm]"){
aare::NDArray<double, 1> arr({5});
for (size_t i = 0; i < arr.size(); i++) {
arr[i] = i;
}
// arr 0, 1, 2, 3, 4
REQUIRE(aare::last_smaller(arr, -50.) == 0);
}
TEST_CASE("last smaller with all elements equal returns the first element", "[algorithm]"){
std::vector<int> vec = {5,5,5,5,5,5,5};
REQUIRE(aare::last_smaller(vec, 5) == 0);
}
TEST_CASE("first_lager with vector", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
REQUIRE(aare::first_larger(vec, 2.5) == 3);
}
TEST_CASE("first_lager with all elements smaller returns last element", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
REQUIRE(aare::first_larger(vec, 50.) == 4);
}
TEST_CASE("first_lager with all elements bigger returns first element", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
REQUIRE(aare::first_larger(vec, -50.) == 0);
}
TEST_CASE("first_lager with all elements the same as the check returns last", "[algorithm]"){
std::vector<int> vec = {14, 14, 14, 14, 14};
REQUIRE(aare::first_larger(vec, 14) == 4);
}
TEST_CASE("first larger with the same element", "[algorithm]"){
std::vector<int> vec = {7,8,9,10,11};
REQUIRE(aare::first_larger(vec, 9) == 3);
}
TEST_CASE("cumsum works", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == vec.size());
REQUIRE(result[0] == 0);
REQUIRE(result[1] == 1);
REQUIRE(result[2] == 3);
REQUIRE(result[3] == 6);
REQUIRE(result[4] == 10);
}
TEST_CASE("cumsum works with empty vector", "[algorithm]"){
std::vector<double> vec = {};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == 0);
}
TEST_CASE("cumsum works with negative numbers", "[algorithm]"){
std::vector<double> vec = {0, -1, -2, -3, -4};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == vec.size());
REQUIRE(result[0] == 0);
REQUIRE(result[1] == -1);
REQUIRE(result[2] == -3);
REQUIRE(result[3] == -6);
REQUIRE(result[4] == -10);
}
}