Adding support for Jungfrau .dat files (#152)
All checks were successful
Build on RHEL9 / buildh (push) Successful in 1m48s

closes #150 

**Not addressed in this PR:** 

- pixels_per_frame, bytes_per_frame and tell should be made cost in
FileInterface
This commit is contained in:
Erik Fröjdh 2025-04-08 15:31:04 +02:00 committed by GitHub
parent 7db1ae4d94
commit f16273a566
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 1025 additions and 17 deletions

View File

@ -342,8 +342,10 @@ set(PUBLICHEADERS
include/aare/File.hpp
include/aare/Fit.hpp
include/aare/FileInterface.hpp
include/aare/FilePtr.hpp
include/aare/Frame.hpp
include/aare/geo_helpers.hpp
include/aare/JungfrauDataFile.hpp
include/aare/NDArray.hpp
include/aare/NDView.hpp
include/aare/NumpyFile.hpp
@ -367,8 +369,10 @@ set(SourceFiles
${CMAKE_CURRENT_SOURCE_DIR}/src/decode.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/Frame.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/File.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/FilePtr.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/Fit.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/geo_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/JungfrauDataFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/Interpolator.cpp
@ -423,6 +427,7 @@ if(AARE_TESTS)
${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterVector.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/Pedestal.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/JungfrauDataFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/RawFile.test.cpp

View File

@ -0,0 +1,25 @@
JungfrauDataFile
==================
JungfrauDataFile is a class to read the .dat files that are produced by Aldo's receiver.
It is mostly used for calibration.
The structure of the file is:
* JungfrauDataHeader
* Binary data (256x256, 256x1024 or 512x1024)
* JungfrauDataHeader
* ...
There is no metadata indicating number of frames or the size of the image, but this
will be infered by this reader.
.. doxygenstruct:: aare::JungfrauDataHeader
:members:
:undoc-members:
:private-members:
.. doxygenclass:: aare::JungfrauDataFile
:members:
:undoc-members:
:private-members:

47
docs/src/Tests.rst Normal file
View File

@ -0,0 +1,47 @@
****************
Tests
****************
We test the code both from the C++ and Python API. By default only tests that does not require image data is run.
C++
~~~~~~~~~~~~~~~~~~
.. code-block:: bash
mkdir build
cd build
cmake .. -DAARE_TESTS=ON
make -j 4
export AARE_TEST_DATA=/path/to/test/data
./run_test [.files] #or using ctest, [.files] is the option to include tests needing data
Python
~~~~~~~~~~~~~~~~~~
.. code-block:: bash
#From the root dir of the library
python -m pytest python/tests --files # passing --files will run the tests needing data
Getting the test data
~~~~~~~~~~~~~~~~~~~~~~~~
.. attention ::
The tests needing the test data are not run by default. To make the data available, you need to set the environment variable
AARE_TEST_DATA to the path of the test data directory. Then pass either [.files] for the C++ tests or --files for Python
The image files needed for the test are large and are not included in the repository. They are stored
using GIT LFS in a separate repository. To get the test data, you need to clone the repository.
To do this, you need to have GIT LFS installed. You can find instructions on how to install it here: https://git-lfs.github.com/
Once you have GIT LFS installed, you can clone the repository like any normal repo using:
.. code-block:: bash
git clone https://gitea.psi.ch/detectors/aare-test-data.git

5
docs/src/algorithm.rst Normal file
View File

@ -0,0 +1,5 @@
algorithm
=============
.. doxygenfile:: algorithm.hpp

View File

@ -20,9 +20,6 @@ AARE
Requirements
Consume
.. toctree::
:caption: Python API
:maxdepth: 1
@ -31,6 +28,7 @@ AARE
pyCtbRawFile
pyClusterFile
pyClusterVector
pyJungfrauDataFile
pyRawFile
pyRawMasterFile
pyVarClusterFinder
@ -42,6 +40,7 @@ AARE
:caption: C++ API
:maxdepth: 1
algorithm
NDArray
NDView
Frame
@ -51,6 +50,7 @@ AARE
ClusterFinderMT
ClusterFile
ClusterVector
JungfrauDataFile
Pedestal
RawFile
RawSubFile
@ -59,4 +59,8 @@ AARE
.. toctree::
:caption: Developer
:maxdepth: 3
Tests

View File

@ -0,0 +1,10 @@
JungfrauDataFile
===================
.. py:currentmodule:: aare
.. autoclass:: JungfrauDataFile
:members:
:undoc-members:
:show-inheritance:
:inherited-members:

30
include/aare/FilePtr.hpp Normal file
View File

@ -0,0 +1,30 @@
#pragma once
#include <cstdio>
#include <filesystem>
namespace aare {
/**
* \brief RAII wrapper for FILE pointer
*/
class FilePtr {
FILE *fp_{nullptr};
public:
FilePtr() = default;
FilePtr(const std::filesystem::path& fname, const std::string& mode);
FilePtr(const FilePtr &) = delete; // we don't want a copy
FilePtr &operator=(const FilePtr &) = delete; // since we handle a resource
FilePtr(FilePtr &&other);
FilePtr &operator=(FilePtr &&other);
FILE *get();
int64_t tell();
void seek(int64_t offset, int whence = SEEK_SET) {
if (fseek(fp_, offset, whence) != 0)
throw std::runtime_error("Error seeking in file");
}
std::string error_msg();
~FilePtr();
};
} // namespace aare

View File

@ -0,0 +1,112 @@
#pragma once
#include <cstdint>
#include <filesystem>
#include <vector>
#include "aare/FilePtr.hpp"
#include "aare/defs.hpp"
#include "aare/NDArray.hpp"
#include "aare/FileInterface.hpp"
namespace aare {
struct JungfrauDataHeader{
uint64_t framenum;
uint64_t bunchid;
};
class JungfrauDataFile : public FileInterface {
size_t m_rows{}; //!< number of rows in the image, from find_frame_size();
size_t m_cols{}; //!< number of columns in the image, from find_frame_size();
size_t m_bytes_per_frame{}; //!< number of bytes per frame excluding header
size_t m_total_frames{}; //!< total number of frames in the series of files
size_t m_offset{}; //!< file index of the first file, allow starting at non zero file
size_t m_current_file_index{}; //!< The index of the open file
size_t m_current_frame_index{}; //!< The index of the current frame (with reference to all files)
std::vector<size_t> m_last_frame_in_file{}; //!< Used for seeking to the correct file
std::filesystem::path m_path; //!< path to the files
std::string m_base_name; //!< base name used for formatting file names
FilePtr m_fp; //!< RAII wrapper for a FILE*
using pixel_type = uint16_t;
static constexpr size_t header_size = sizeof(JungfrauDataHeader);
static constexpr size_t n_digits_in_file_index = 6; //!< to format file names
public:
JungfrauDataFile(const std::filesystem::path &fname);
std::string base_name() const; //!< get the base name of the file (without path and extension)
size_t bytes_per_frame() override;
size_t pixels_per_frame() override;
size_t bytes_per_pixel() const;
size_t bitdepth() const override;
void seek(size_t frame_index) override; //!< seek to the given frame index (note not byte offset)
size_t tell() override; //!< get the frame index of the file pointer
size_t total_frames() const override;
size_t rows() const override;
size_t cols() const override;
size_t n_files() const; //!< get the number of files in the series.
// Extra functions needed for FileInterface
Frame read_frame() override;
Frame read_frame(size_t frame_number) override;
std::vector<Frame> read_n(size_t n_frames=0) override;
void read_into(std::byte *image_buf) override;
void read_into(std::byte *image_buf, size_t n_frames) override;
size_t frame_number(size_t frame_index) override;
DetectorType detector_type() const override;
/**
* @brief Read a single frame from the file into the given buffer.
* @param image_buf buffer to read the frame into. (Note the caller is responsible for allocating the buffer)
* @param header pointer to a JungfrauDataHeader or nullptr to skip header)
*/
void read_into(std::byte *image_buf, JungfrauDataHeader *header = nullptr);
/**
* @brief Read a multiple frames from the file into the given buffer.
* @param image_buf buffer to read the frame into. (Note the caller is responsible for allocating the buffer)
* @param n_frames number of frames to read
* @param header pointer to a JungfrauDataHeader or nullptr to skip header)
*/
void read_into(std::byte *image_buf, size_t n_frames, JungfrauDataHeader *header = nullptr);
/**
* @brief Read a single frame from the file into the given NDArray
* @param image NDArray to read the frame into.
*/
void read_into(NDArray<uint16_t>* image, JungfrauDataHeader* header = nullptr);
/**
* @brief Read a single frame from the file. Allocated a new NDArray for the output data
* @param header pointer to a JungfrauDataHeader or nullptr to skip header)
* @return NDArray with the image data
*/
NDArray<uint16_t> read_frame(JungfrauDataHeader* header = nullptr);
JungfrauDataHeader read_header();
std::filesystem::path current_file() const { return fpath(m_current_file_index+m_offset); }
private:
/**
* @brief Find the size of the frame in the file. (256x256, 256x1024, 512x1024)
* @param fname path to the file
* @throws std::runtime_error if the file is empty or the size cannot be determined
*/
void find_frame_size(const std::filesystem::path &fname);
void parse_fname(const std::filesystem::path &fname);
void scan_files();
void open_file(size_t file_index);
std::filesystem::path fpath(size_t frame_index) const;
};
} // namespace aare

View File

@ -7,13 +7,20 @@
namespace aare {
/**
* @brief Find the index of the last element smaller than val
* assume a sorted array
* @brief Index of the last element that is smaller than val.
* Requires a sorted array. Uses >= for ordering. If all elements
* are smaller it returns the last element and if all elements are
* larger it returns the first element.
* @param first iterator to the first element
* @param last iterator to the last element
* @param val value to compare
* @return index of the last element that is smaller than val
*
*/
template <typename T>
size_t last_smaller(const T* first, const T* last, T val) {
for (auto iter = first+1; iter != last; ++iter) {
if (*iter > val) {
if (*iter >= val) {
return std::distance(first, iter-1);
}
}
@ -25,7 +32,49 @@ size_t last_smaller(const NDArray<T, 1>& arr, T val) {
return last_smaller(arr.begin(), arr.end(), val);
}
template <typename T>
size_t last_smaller(const std::vector<T>& vec, T val) {
return last_smaller(vec.data(), vec.data()+vec.size(), val);
}
/**
* @brief Index of the first element that is larger than val.
* Requires a sorted array. Uses > for ordering. If all elements
* are larger it returns the first element and if all elements are
* smaller it returns the last element.
* @param first iterator to the first element
* @param last iterator to the last element
* @param val value to compare
* @return index of the first element that is larger than val
*/
template <typename T>
size_t first_larger(const T* first, const T* last, T val) {
for (auto iter = first; iter != last; ++iter) {
if (*iter > val) {
return std::distance(first, iter);
}
}
return std::distance(first, last-1);
}
template <typename T>
size_t first_larger(const NDArray<T, 1>& arr, T val) {
return first_larger(arr.begin(), arr.end(), val);
}
template <typename T>
size_t first_larger(const std::vector<T>& vec, T val) {
return first_larger(vec.data(), vec.data()+vec.size(), val);
}
/**
* @brief Index of the nearest element to val.
* Requires a sorted array. If there is no difference it takes the first element.
* @param first iterator to the first element
* @param last iterator to the last element
* @param val value to compare
* @return index of the nearest element
*/
template <typename T>
size_t nearest_index(const T* first, const T* last, T val) {
auto iter = std::min_element(first, last,
@ -50,6 +99,13 @@ size_t nearest_index(const std::array<T,N>& arr, T val) {
return nearest_index(arr.data(), arr.data()+arr.size(), val);
}
template <typename T>
std::vector<T> cumsum(const std::vector<T>& vec) {
std::vector<T> result(vec.size());
std::partial_sum(vec.begin(), vec.end(), result.begin());
return result;
}
} // namespace aare

View File

@ -15,4 +15,9 @@ cmake.verbose = true
[tool.scikit-build.cmake.define]
AARE_PYTHON_BINDINGS = "ON"
AARE_SYSTEM_LIBRARIES = "ON"
AARE_INSTALL_PYTHONEXT = "ON"
AARE_INSTALL_PYTHONEXT = "ON"
[tool.pytest.ini_options]
markers = [
"files: marks tests that need additional data (deselect with '-m \"not files\"')",
]

View File

@ -2,7 +2,7 @@
from . import _aare
from ._aare import File, RawMasterFile, RawSubFile
from ._aare import File, RawMasterFile, RawSubFile, JungfrauDataFile
from ._aare import Pedestal_d, Pedestal_f, ClusterFinder, VarClusterFinder
from ._aare import DetectorType
from ._aare import ClusterFile

View File

@ -0,0 +1,116 @@
#include "aare/JungfrauDataFile.hpp"
#include "aare/defs.hpp"
#include <cstdint>
#include <filesystem>
#include <pybind11/iostream.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/stl/filesystem.h>
#include <string>
namespace py = pybind11;
using namespace ::aare;
// Disable warnings for unused parameters, as we ignore some
// in the __exit__ method
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
auto read_dat_frame(JungfrauDataFile &self) {
py::array_t<JungfrauDataHeader> header(1);
py::array_t<uint16_t> image({
self.rows(),
self.cols()
});
self.read_into(reinterpret_cast<std::byte *>(image.mutable_data()),
header.mutable_data());
return py::make_tuple(header, image);
}
auto read_n_dat_frames(JungfrauDataFile &self, size_t n_frames) {
// adjust for actual frames left in the file
n_frames = std::min(n_frames, self.total_frames() - self.tell());
if (n_frames == 0) {
throw std::runtime_error("No frames left in file");
}
py::array_t<JungfrauDataHeader> header(n_frames);
py::array_t<uint16_t> image({
n_frames, self.rows(),
self.cols()});
self.read_into(reinterpret_cast<std::byte *>(image.mutable_data()),
n_frames, header.mutable_data());
return py::make_tuple(header, image);
}
void define_jungfrau_data_file_io_bindings(py::module &m) {
// Make the JungfrauDataHeader usable from numpy
PYBIND11_NUMPY_DTYPE(JungfrauDataHeader, framenum, bunchid);
py::class_<JungfrauDataFile>(m, "JungfrauDataFile")
.def(py::init<const std::filesystem::path &>())
.def("seek", &JungfrauDataFile::seek,
R"(
Seek to the given frame index.
)")
.def("tell", &JungfrauDataFile::tell,
R"(
Get the current frame index.
)")
.def_property_readonly("rows", &JungfrauDataFile::rows)
.def_property_readonly("cols", &JungfrauDataFile::cols)
.def_property_readonly("base_name", &JungfrauDataFile::base_name)
.def_property_readonly("bytes_per_frame",
&JungfrauDataFile::bytes_per_frame)
.def_property_readonly("pixels_per_frame",
&JungfrauDataFile::pixels_per_frame)
.def_property_readonly("bytes_per_pixel",
&JungfrauDataFile::bytes_per_pixel)
.def_property_readonly("bitdepth", &JungfrauDataFile::bitdepth)
.def_property_readonly("current_file", &JungfrauDataFile::current_file)
.def_property_readonly("total_frames", &JungfrauDataFile::total_frames)
.def_property_readonly("n_files", &JungfrauDataFile::n_files)
.def("read_frame", &read_dat_frame,
R"(
Read a single frame from the file.
)")
.def("read_n", &read_n_dat_frames,
R"(
Read maximum n_frames frames from the file.
)")
.def(
"read",
[](JungfrauDataFile &self) {
self.seek(0);
auto n_frames = self.total_frames();
return read_n_dat_frames(self, n_frames);
},
R"(
Read all frames from the file. Seeks to the beginning before reading.
)")
.def("__enter__", [](JungfrauDataFile &self) { return &self; })
.def("__exit__",
[](JungfrauDataFile &self,
const std::optional<pybind11::type> &exc_type,
const std::optional<pybind11::object> &exc_value,
const std::optional<pybind11::object> &traceback) {
// self.close();
})
.def("__iter__", [](JungfrauDataFile &self) { return &self; })
.def("__next__", [](JungfrauDataFile &self) {
try {
return read_dat_frame(self);
} catch (std::runtime_error &e) {
throw py::stop_iteration();
}
});
}
#pragma GCC diagnostic pop

View File

@ -11,6 +11,8 @@
#include "fit.hpp"
#include "interpolation.hpp"
#include "jungfrau_data_file.hpp"
//Pybind stuff
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
@ -33,5 +35,6 @@ PYBIND11_MODULE(_aare, m) {
define_cluster_file_sink_bindings(m);
define_fit_bindings(m);
define_interpolation_bindings(m);
define_jungfrau_data_file_io_bindings(m);
}

29
python/tests/conftest.py Normal file
View File

@ -0,0 +1,29 @@
import os
from pathlib import Path
import pytest
def pytest_addoption(parser):
parser.addoption(
"--files", action="store_true", default=False, help="run slow tests"
)
def pytest_configure(config):
config.addinivalue_line("markers", "files: mark test as needing image files to run")
def pytest_collection_modifyitems(config, items):
if config.getoption("--files"):
return
skip = pytest.mark.skip(reason="need --files option to run")
for item in items:
if "files" in item.keywords:
item.add_marker(skip)
@pytest.fixture
def test_data_path():
return Path(os.environ["AARE_TEST_DATA"])

View File

@ -0,0 +1,92 @@
import pytest
import numpy as np
from aare import JungfrauDataFile
@pytest.mark.files
def test_jfungfrau_dat_read_number_of_frames(test_data_path):
with JungfrauDataFile(test_data_path / "dat/AldoJF500k_000000.dat") as dat_file:
assert dat_file.total_frames == 24
with JungfrauDataFile(test_data_path / "dat/AldoJF250k_000000.dat") as dat_file:
assert dat_file.total_frames == 53
with JungfrauDataFile(test_data_path / "dat/AldoJF65k_000000.dat") as dat_file:
assert dat_file.total_frames == 113
@pytest.mark.files
def test_jfungfrau_dat_read_number_of_file(test_data_path):
with JungfrauDataFile(test_data_path / "dat/AldoJF500k_000000.dat") as dat_file:
assert dat_file.n_files == 4
with JungfrauDataFile(test_data_path / "dat/AldoJF250k_000000.dat") as dat_file:
assert dat_file.n_files == 7
with JungfrauDataFile(test_data_path / "dat/AldoJF65k_000000.dat") as dat_file:
assert dat_file.n_files == 7
@pytest.mark.files
def test_read_module(test_data_path):
"""
Read all frames from the series of .dat files. Compare to canned data in npz format.
"""
# Read all frames from the .dat file
with JungfrauDataFile(test_data_path / "dat/AldoJF500k_000000.dat") as f:
header, data = f.read()
#Sanity check
n_frames = 24
assert header.size == n_frames
assert data.shape == (n_frames, 512, 1024)
# Read reference data using numpy
with np.load(test_data_path / "dat/AldoJF500k.npz") as f:
ref_header = f["headers"]
ref_data = f["frames"]
# Check that the data is the same
assert np.all(ref_header == header)
assert np.all(ref_data == data)
@pytest.mark.files
def test_read_half_module(test_data_path):
# Read all frames from the .dat file
with JungfrauDataFile(test_data_path / "dat/AldoJF250k_000000.dat") as f:
header, data = f.read()
n_frames = 53
assert header.size == n_frames
assert data.shape == (n_frames, 256, 1024)
# Read reference data using numpy
with np.load(test_data_path / "dat/AldoJF250k.npz") as f:
ref_header = f["headers"]
ref_data = f["frames"]
# Check that the data is the same
assert np.all(ref_header == header)
assert np.all(ref_data == data)
@pytest.mark.files
def test_read_single_chip(test_data_path):
# Read all frames from the .dat file
with JungfrauDataFile(test_data_path / "dat/AldoJF65k_000000.dat") as f:
header, data = f.read()
n_frames = 113
assert header.size == n_frames
assert data.shape == (n_frames, 256, 256)
# Read reference data using numpy
with np.load(test_data_path / "dat/AldoJF65k.npz") as f:
ref_header = f["headers"]
ref_data = f["frames"]
# Check that the data is the same
assert np.all(ref_header == header)
assert np.all(ref_data == data)

View File

@ -11,9 +11,9 @@
using aare::ClusterFile;
TEST_CASE("Read one frame from a a cluster file", "[.integration]") {
TEST_CASE("Read one frame from a a cluster file", "[.files]") {
//We know that the frame has 97 clusters
auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust";
auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust";
REQUIRE(std::filesystem::exists(fpath));
ClusterFile f(fpath);
@ -22,9 +22,9 @@ TEST_CASE("Read one frame from a a cluster file", "[.integration]") {
REQUIRE(clusters.frame_number() == 135);
}
TEST_CASE("Read one frame using ROI", "[.integration]") {
TEST_CASE("Read one frame using ROI", "[.files]") {
//We know that the frame has 97 clusters
auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust";
auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust";
REQUIRE(std::filesystem::exists(fpath));
ClusterFile f(fpath);
@ -50,9 +50,9 @@ TEST_CASE("Read one frame using ROI", "[.integration]") {
}
TEST_CASE("Read clusters from single frame file", "[.integration]") {
TEST_CASE("Read clusters from single frame file", "[.files]") {
auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust";
auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust";
REQUIRE(std::filesystem::exists(fpath));
SECTION("Read fewer clusters than available") {

View File

@ -1,4 +1,5 @@
#include "aare/File.hpp"
#include "aare/JungfrauDataFile.hpp"
#include "aare/NumpyFile.hpp"
#include "aare/RawFile.hpp"
@ -27,6 +28,8 @@ File::File(const std::filesystem::path &fname, const std::string &mode,
else if (fname.extension() == ".npy") {
// file_impl = new NumpyFile(fname, mode, cfg);
file_impl = std::make_unique<NumpyFile>(fname, mode, cfg);
}else if(fname.extension() == ".dat"){
file_impl = std::make_unique<JungfrauDataFile>(fname);
} else {
throw std::runtime_error("Unsupported file type");
}

44
src/FilePtr.cpp Normal file
View File

@ -0,0 +1,44 @@
#include "aare/FilePtr.hpp"
#include <fmt/format.h>
#include <stdexcept>
#include <utility>
namespace aare {
FilePtr::FilePtr(const std::filesystem::path& fname, const std::string& mode = "rb") {
fp_ = fopen(fname.c_str(), mode.c_str());
if (!fp_)
throw std::runtime_error(fmt::format("Could not open: {}", fname.c_str()));
}
FilePtr::FilePtr(FilePtr &&other) { std::swap(fp_, other.fp_); }
FilePtr &FilePtr::operator=(FilePtr &&other) {
std::swap(fp_, other.fp_);
return *this;
}
FILE *FilePtr::get() { return fp_; }
int64_t FilePtr::tell() {
auto pos = ftell(fp_);
if (pos == -1)
throw std::runtime_error(fmt::format("Error getting file position: {}", error_msg()));
return pos;
}
FilePtr::~FilePtr() {
if (fp_)
fclose(fp_); // check?
}
std::string FilePtr::error_msg(){
if (feof(fp_)) {
return "End of file reached";
}
if (ferror(fp_)) {
return fmt::format("Error reading file: {}", std::strerror(errno));
}
return "";
}
} // namespace aare

242
src/JungfrauDataFile.cpp Normal file
View File

@ -0,0 +1,242 @@
#include "aare/JungfrauDataFile.hpp"
#include "aare/algorithm.hpp"
#include "aare/defs.hpp"
#include <cerrno>
#include <fmt/format.h>
namespace aare {
JungfrauDataFile::JungfrauDataFile(const std::filesystem::path &fname) {
if (!std::filesystem::exists(fname)) {
throw std::runtime_error(LOCATION +
"File does not exist: " + fname.string());
}
find_frame_size(fname);
parse_fname(fname);
scan_files();
open_file(m_current_file_index);
}
// FileInterface
Frame JungfrauDataFile::read_frame(){
Frame f(rows(), cols(), Dtype::UINT16);
read_into(reinterpret_cast<std::byte *>(f.data()), nullptr);
return f;
}
Frame JungfrauDataFile::read_frame(size_t frame_number){
seek(frame_number);
Frame f(rows(), cols(), Dtype::UINT16);
read_into(reinterpret_cast<std::byte *>(f.data()), nullptr);
return f;
}
std::vector<Frame> JungfrauDataFile::read_n(size_t n_frames) {
std::vector<Frame> frames;
throw std::runtime_error(LOCATION +
"Not implemented yet");
return frames;
}
void JungfrauDataFile::read_into(std::byte *image_buf) {
read_into(image_buf, nullptr);
}
void JungfrauDataFile::read_into(std::byte *image_buf, size_t n_frames) {
read_into(image_buf, n_frames, nullptr);
}
size_t JungfrauDataFile::frame_number(size_t frame_index) {
seek(frame_index);
return read_header().framenum;
}
DetectorType JungfrauDataFile::detector_type() const { return DetectorType::Jungfrau; }
std::string JungfrauDataFile::base_name() const { return m_base_name; }
size_t JungfrauDataFile::bytes_per_frame() { return m_bytes_per_frame; }
size_t JungfrauDataFile::pixels_per_frame() { return m_rows * m_cols; }
size_t JungfrauDataFile::bytes_per_pixel() const { return sizeof(pixel_type); }
size_t JungfrauDataFile::bitdepth() const {
return bytes_per_pixel() * bits_per_byte;
}
void JungfrauDataFile::seek(size_t frame_index) {
if (frame_index >= m_total_frames) {
throw std::runtime_error(LOCATION + "Frame index out of range: " +
std::to_string(frame_index));
}
m_current_frame_index = frame_index;
auto file_index = first_larger(m_last_frame_in_file, frame_index);
if (file_index != m_current_file_index)
open_file(file_index);
auto frame_offset = (file_index)
? frame_index - m_last_frame_in_file[file_index - 1]
: frame_index;
auto byte_offset = frame_offset * (m_bytes_per_frame + header_size);
m_fp.seek(byte_offset);
};
size_t JungfrauDataFile::tell() { return m_current_frame_index; }
size_t JungfrauDataFile::total_frames() const { return m_total_frames; }
size_t JungfrauDataFile::rows() const { return m_rows; }
size_t JungfrauDataFile::cols() const { return m_cols; }
size_t JungfrauDataFile::n_files() const { return m_last_frame_in_file.size(); }
void JungfrauDataFile::find_frame_size(const std::filesystem::path &fname) {
static constexpr size_t module_data_size =
header_size + sizeof(pixel_type) * 512 * 1024;
static constexpr size_t half_data_size =
header_size + sizeof(pixel_type) * 256 * 1024;
static constexpr size_t chip_data_size =
header_size + sizeof(pixel_type) * 256 * 256;
auto file_size = std::filesystem::file_size(fname);
if (file_size == 0) {
throw std::runtime_error(LOCATION +
"Cannot guess frame size: file is empty");
}
if (file_size % module_data_size == 0) {
m_rows = 512;
m_cols = 1024;
m_bytes_per_frame = module_data_size - header_size;
} else if (file_size % half_data_size == 0) {
m_rows = 256;
m_cols = 1024;
m_bytes_per_frame = half_data_size - header_size;
} else if (file_size % chip_data_size == 0) {
m_rows = 256;
m_cols = 256;
m_bytes_per_frame = chip_data_size - header_size;
} else {
throw std::runtime_error(LOCATION +
"Cannot find frame size: file size is not a "
"multiple of any known frame size");
}
}
void JungfrauDataFile::parse_fname(const std::filesystem::path &fname) {
m_path = fname.parent_path();
m_base_name = fname.stem();
// find file index, then remove if from the base name
if (auto pos = m_base_name.find_last_of('_'); pos != std::string::npos) {
m_offset = std::stoul(m_base_name.substr(pos + 1));
m_base_name.erase(pos);
}
}
void JungfrauDataFile::scan_files() {
// find how many files we have and the number of frames in each file
m_last_frame_in_file.clear();
size_t file_index = m_offset;
while (std::filesystem::exists(fpath(file_index))) {
auto n_frames = std::filesystem::file_size(fpath(file_index)) /
(m_bytes_per_frame + header_size);
m_last_frame_in_file.push_back(n_frames);
++file_index;
}
// find where we need to open the next file and total number of frames
m_last_frame_in_file = cumsum(m_last_frame_in_file);
m_total_frames = m_last_frame_in_file.back();
}
void JungfrauDataFile::read_into(std::byte *image_buf,
JungfrauDataHeader *header) {
// read header if not passed nullptr
if (header) {
if (auto rc = fread(header, sizeof(JungfrauDataHeader), 1, m_fp.get());
rc != 1) {
throw std::runtime_error(
LOCATION +
"Could not read header from file:" + m_fp.error_msg());
}
} else {
m_fp.seek(header_size, SEEK_CUR);
}
// read data
if (auto rc = fread(image_buf, 1, m_bytes_per_frame, m_fp.get());
rc != m_bytes_per_frame) {
throw std::runtime_error(LOCATION + "Could not read image from file" +
m_fp.error_msg());
}
// prepare for next read
// if we are at the end of the file, open the next file
++m_current_frame_index;
if (m_current_frame_index >= m_last_frame_in_file[m_current_file_index] &&
(m_current_frame_index < m_total_frames)) {
++m_current_file_index;
open_file(m_current_file_index);
}
}
void JungfrauDataFile::read_into(std::byte *image_buf, size_t n_frames,
JungfrauDataHeader *header) {
if (header) {
for (size_t i = 0; i < n_frames; ++i)
read_into(image_buf + i * m_bytes_per_frame, header + i);
}else{
for (size_t i = 0; i < n_frames; ++i)
read_into(image_buf + i * m_bytes_per_frame, nullptr);
}
}
void JungfrauDataFile::read_into(NDArray<uint16_t>* image, JungfrauDataHeader* header) {
if(!(rows() == image->shape(0) && cols() == image->shape(1))){
throw std::runtime_error(LOCATION +
"Image shape does not match file size: " + std::to_string(rows()) + "x" + std::to_string(cols()));
}
read_into(reinterpret_cast<std::byte *>(image->data()), header);
}
NDArray<uint16_t> JungfrauDataFile::read_frame(JungfrauDataHeader* header) {
Shape<2> shape{rows(), cols()};
NDArray<uint16_t> image(shape);
read_into(reinterpret_cast<std::byte *>(image.data()),
header);
return image;
}
JungfrauDataHeader JungfrauDataFile::read_header() {
JungfrauDataHeader header;
if (auto rc = fread(&header, 1, sizeof(header), m_fp.get());
rc != sizeof(header)) {
throw std::runtime_error(LOCATION + "Could not read header from file" +
m_fp.error_msg());
}
m_fp.seek(-header_size, SEEK_CUR);
return header;
}
void JungfrauDataFile::open_file(size_t file_index) {
// fmt::print(stderr, "Opening file: {}\n",
// fpath(file_index+m_offset).string());
m_fp = FilePtr(fpath(file_index + m_offset), "rb");
m_current_file_index = file_index;
}
std::filesystem::path JungfrauDataFile::fpath(size_t file_index) const {
auto fname = fmt::format("{}_{:0{}}.dat", m_base_name, file_index,
n_digits_in_file_index);
return m_path / fname;
}
} // namespace aare

View File

@ -0,0 +1,94 @@
#include "aare/JungfrauDataFile.hpp"
#include <catch2/catch_test_macros.hpp>
#include "test_config.hpp"
using aare::JungfrauDataFile;
using aare::JungfrauDataHeader;
TEST_CASE("Open a Jungfrau data file", "[.files]") {
//we know we have 4 files with 7, 7, 7, and 3 frames
//firs frame number if 1 and the bunch id is frame_number**2
//so we can check the header
auto fpath = test_data_path() / "dat" / "AldoJF500k_000000.dat";
REQUIRE(std::filesystem::exists(fpath));
JungfrauDataFile f(fpath);
REQUIRE(f.rows() == 512);
REQUIRE(f.cols() == 1024);
REQUIRE(f.bytes_per_frame() == 1048576);
REQUIRE(f.pixels_per_frame() == 524288);
REQUIRE(f.bytes_per_pixel() == 2);
REQUIRE(f.bitdepth() == 16);
REQUIRE(f.base_name() == "AldoJF500k");
REQUIRE(f.n_files() == 4);
REQUIRE(f.tell() == 0);
REQUIRE(f.total_frames() == 24);
REQUIRE(f.current_file() == fpath);
//Check that the frame number and buch id is read correctly
for (size_t i = 0; i < 24; ++i) {
JungfrauDataHeader header;
auto image = f.read_frame(&header);
REQUIRE(header.framenum == i + 1);
REQUIRE(header.bunchid == (i + 1) * (i + 1));
REQUIRE(image.shape(0) == 512);
REQUIRE(image.shape(1) == 1024);
}
}
TEST_CASE("Seek in a JungfrauDataFile", "[.files]"){
auto fpath = test_data_path() / "dat" / "AldoJF65k_000000.dat";
REQUIRE(std::filesystem::exists(fpath));
JungfrauDataFile f(fpath);
//The file should have 113 frames
f.seek(19);
REQUIRE(f.tell() == 19);
auto h = f.read_header();
REQUIRE(h.framenum == 19+1);
//Reading again does not change the file pointer
auto h2 = f.read_header();
REQUIRE(h2.framenum == 19+1);
f.seek(59);
REQUIRE(f.tell() == 59);
auto h3 = f.read_header();
REQUIRE(h3.framenum == 59+1);
JungfrauDataHeader h4;
auto image = f.read_frame(&h4);
REQUIRE(h4.framenum == 59+1);
//now we should be on the next frame
REQUIRE(f.tell() == 60);
REQUIRE(f.read_header().framenum == 60+1);
REQUIRE_THROWS(f.seek(86356)); //out of range
}
TEST_CASE("Open a Jungfrau data file with non zero file index", "[.files]"){
auto fpath = test_data_path() / "dat" / "AldoJF65k_000003.dat";
REQUIRE(std::filesystem::exists(fpath));
JungfrauDataFile f(fpath);
//18 files per data file, opening the 3rd file we ignore the first 3
REQUIRE(f.total_frames() == 113-18*3);
REQUIRE(f.tell() == 0);
//Frame numbers start at 1 in the first file
REQUIRE(f.read_header().framenum == 18*3+1);
// moving relative to the third file
f.seek(5);
REQUIRE(f.read_header().framenum == 18*3+1+5);
// ignoring the first 3 files
REQUIRE(f.n_files() == 4);
REQUIRE(f.current_file().stem() == "AldoJF65k_000003");
}

View File

@ -49,6 +49,16 @@ TEST_CASE("nearest index works with std::array", "[algorithm]"){
REQUIRE(aare::nearest_index(arr, -10.0) == 0);
}
TEST_CASE("nearest index when there is no different uses the first element", "[algorithm]"){
std::vector<int> vec = {5, 5, 5, 5, 5};
REQUIRE(aare::nearest_index(vec, 5) == 0);
}
TEST_CASE("nearest index when there is no different uses the first element also when all smaller", "[algorithm]"){
std::vector<int> vec = {5, 5, 5, 5, 5};
REQUIRE(aare::nearest_index(vec, 10) == 0);
}
TEST_CASE("last smaller", "[algorithm]"){
aare::NDArray<double, 1> arr({5});
@ -68,6 +78,82 @@ TEST_CASE("returns last bin strictly smaller", "[algorithm]"){
arr[i] = i;
}
// arr 0, 1, 2, 3, 4
REQUIRE(aare::last_smaller(arr, 2.0) == 2);
REQUIRE(aare::last_smaller(arr, 2.0) == 1);
}
TEST_CASE("last_smaller with all elements smaller returns last element", "[algorithm]"){
aare::NDArray<double, 1> arr({5});
for (size_t i = 0; i < arr.size(); i++) {
arr[i] = i;
}
// arr 0, 1, 2, 3, 4
REQUIRE(aare::last_smaller(arr, 50.) == 4);
}
TEST_CASE("last_smaller with all elements bigger returns first element", "[algorithm]"){
aare::NDArray<double, 1> arr({5});
for (size_t i = 0; i < arr.size(); i++) {
arr[i] = i;
}
// arr 0, 1, 2, 3, 4
REQUIRE(aare::last_smaller(arr, -50.) == 0);
}
TEST_CASE("last smaller with all elements equal returns the first element", "[algorithm]"){
std::vector<int> vec = {5,5,5,5,5,5,5};
REQUIRE(aare::last_smaller(vec, 5) == 0);
}
TEST_CASE("first_lager with vector", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
REQUIRE(aare::first_larger(vec, 2.5) == 3);
}
TEST_CASE("first_lager with all elements smaller returns last element", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
REQUIRE(aare::first_larger(vec, 50.) == 4);
}
TEST_CASE("first_lager with all elements bigger returns first element", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
REQUIRE(aare::first_larger(vec, -50.) == 0);
}
TEST_CASE("first_lager with all elements the same as the check returns last", "[algorithm]"){
std::vector<int> vec = {14, 14, 14, 14, 14};
REQUIRE(aare::first_larger(vec, 14) == 4);
}
TEST_CASE("first larger with the same element", "[algorithm]"){
std::vector<int> vec = {7,8,9,10,11};
REQUIRE(aare::first_larger(vec, 9) == 3);
}
TEST_CASE("cumsum works", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == vec.size());
REQUIRE(result[0] == 0);
REQUIRE(result[1] == 1);
REQUIRE(result[2] == 3);
REQUIRE(result[3] == 6);
REQUIRE(result[4] == 10);
}
TEST_CASE("cumsum works with empty vector", "[algorithm]"){
std::vector<double> vec = {};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == 0);
}
TEST_CASE("cumsum works with negative numbers", "[algorithm]"){
std::vector<double> vec = {0, -1, -2, -3, -4};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == vec.size());
REQUIRE(result[0] == 0);
REQUIRE(result[1] == -1);
REQUIRE(result[2] == -3);
REQUIRE(result[3] == -6);
REQUIRE(result[4] == -10);
}
}