From f16273a566a6cfa0a0ce906f0a0a8462a615ff9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Fr=C3=B6jdh?= Date: Tue, 8 Apr 2025 15:31:04 +0200 Subject: [PATCH] Adding support for Jungfrau .dat files (#152) closes #150 **Not addressed in this PR:** - pixels_per_frame, bytes_per_frame and tell should be made cost in FileInterface --- CMakeLists.txt | 5 + docs/src/JungfrauDataFile.rst | 25 +++ docs/src/Tests.rst | 47 +++++ docs/src/algorithm.rst | 5 + docs/src/index.rst | 12 +- docs/src/pyJungfrauDataFile.rst | 10 + include/aare/FilePtr.hpp | 30 +++ include/aare/JungfrauDataFile.hpp | 112 +++++++++++ include/aare/algorithm.hpp | 62 +++++- pyproject.toml | 7 +- python/aare/__init__.py | 2 +- python/src/jungfrau_data_file.hpp | 116 ++++++++++++ python/src/module.cpp | 3 + python/tests/conftest.py | 29 +++ python/tests/test_jungfrau_dat_files.py | 92 +++++++++ src/ClusterFile.test.cpp | 12 +- src/File.cpp | 3 + src/FilePtr.cpp | 44 +++++ src/JungfrauDataFile.cpp | 242 ++++++++++++++++++++++++ src/JungfrauDataFile.test.cpp | 94 +++++++++ src/algorithm.test.cpp | 90 ++++++++- 21 files changed, 1025 insertions(+), 17 deletions(-) create mode 100644 docs/src/JungfrauDataFile.rst create mode 100644 docs/src/Tests.rst create mode 100644 docs/src/algorithm.rst create mode 100644 docs/src/pyJungfrauDataFile.rst create mode 100644 include/aare/FilePtr.hpp create mode 100644 include/aare/JungfrauDataFile.hpp create mode 100644 python/src/jungfrau_data_file.hpp create mode 100644 python/tests/conftest.py create mode 100644 python/tests/test_jungfrau_dat_files.py create mode 100644 src/FilePtr.cpp create mode 100644 src/JungfrauDataFile.cpp create mode 100644 src/JungfrauDataFile.test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 804b2f6..6db9314 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -342,8 +342,10 @@ set(PUBLICHEADERS include/aare/File.hpp include/aare/Fit.hpp include/aare/FileInterface.hpp + include/aare/FilePtr.hpp include/aare/Frame.hpp include/aare/geo_helpers.hpp + include/aare/JungfrauDataFile.hpp include/aare/NDArray.hpp include/aare/NDView.hpp include/aare/NumpyFile.hpp @@ -367,8 +369,10 @@ set(SourceFiles ${CMAKE_CURRENT_SOURCE_DIR}/src/decode.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Frame.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/File.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/FilePtr.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Fit.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/geo_helpers.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/JungfrauDataFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Interpolator.cpp @@ -423,6 +427,7 @@ if(AARE_TESTS) ${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterVector.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterFile.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Pedestal.test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/JungfrauDataFile.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/RawFile.test.cpp diff --git a/docs/src/JungfrauDataFile.rst b/docs/src/JungfrauDataFile.rst new file mode 100644 index 0000000..78d473f --- /dev/null +++ b/docs/src/JungfrauDataFile.rst @@ -0,0 +1,25 @@ +JungfrauDataFile +================== + +JungfrauDataFile is a class to read the .dat files that are produced by Aldo's receiver. +It is mostly used for calibration. + +The structure of the file is: + +* JungfrauDataHeader +* Binary data (256x256, 256x1024 or 512x1024) +* JungfrauDataHeader +* ... + +There is no metadata indicating number of frames or the size of the image, but this +will be infered by this reader. + +.. doxygenstruct:: aare::JungfrauDataHeader + :members: + :undoc-members: + :private-members: + +.. doxygenclass:: aare::JungfrauDataFile + :members: + :undoc-members: + :private-members: \ No newline at end of file diff --git a/docs/src/Tests.rst b/docs/src/Tests.rst new file mode 100644 index 0000000..da98001 --- /dev/null +++ b/docs/src/Tests.rst @@ -0,0 +1,47 @@ +**************** +Tests +**************** + +We test the code both from the C++ and Python API. By default only tests that does not require image data is run. + +C++ +~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + mkdir build + cd build + cmake .. -DAARE_TESTS=ON + make -j 4 + + export AARE_TEST_DATA=/path/to/test/data + ./run_test [.files] #or using ctest, [.files] is the option to include tests needing data + + + +Python +~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + #From the root dir of the library + python -m pytest python/tests --files # passing --files will run the tests needing data + + + +Getting the test data +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. attention :: + + The tests needing the test data are not run by default. To make the data available, you need to set the environment variable + AARE_TEST_DATA to the path of the test data directory. Then pass either [.files] for the C++ tests or --files for Python + +The image files needed for the test are large and are not included in the repository. They are stored +using GIT LFS in a separate repository. To get the test data, you need to clone the repository. +To do this, you need to have GIT LFS installed. You can find instructions on how to install it here: https://git-lfs.github.com/ +Once you have GIT LFS installed, you can clone the repository like any normal repo using: + +.. code-block:: bash + + git clone https://gitea.psi.ch/detectors/aare-test-data.git diff --git a/docs/src/algorithm.rst b/docs/src/algorithm.rst new file mode 100644 index 0000000..9b11857 --- /dev/null +++ b/docs/src/algorithm.rst @@ -0,0 +1,5 @@ +algorithm +============= + +.. doxygenfile:: algorithm.hpp + diff --git a/docs/src/index.rst b/docs/src/index.rst index 905caea..af5e99a 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -20,9 +20,6 @@ AARE Requirements Consume - - - .. toctree:: :caption: Python API :maxdepth: 1 @@ -31,6 +28,7 @@ AARE pyCtbRawFile pyClusterFile pyClusterVector + pyJungfrauDataFile pyRawFile pyRawMasterFile pyVarClusterFinder @@ -42,6 +40,7 @@ AARE :caption: C++ API :maxdepth: 1 + algorithm NDArray NDView Frame @@ -51,6 +50,7 @@ AARE ClusterFinderMT ClusterFile ClusterVector + JungfrauDataFile Pedestal RawFile RawSubFile @@ -59,4 +59,8 @@ AARE - +.. toctree:: + :caption: Developer + :maxdepth: 3 + + Tests \ No newline at end of file diff --git a/docs/src/pyJungfrauDataFile.rst b/docs/src/pyJungfrauDataFile.rst new file mode 100644 index 0000000..2173adf --- /dev/null +++ b/docs/src/pyJungfrauDataFile.rst @@ -0,0 +1,10 @@ +JungfrauDataFile +=================== + +.. py:currentmodule:: aare + +.. autoclass:: JungfrauDataFile + :members: + :undoc-members: + :show-inheritance: + :inherited-members: \ No newline at end of file diff --git a/include/aare/FilePtr.hpp b/include/aare/FilePtr.hpp new file mode 100644 index 0000000..4c88ecb --- /dev/null +++ b/include/aare/FilePtr.hpp @@ -0,0 +1,30 @@ +#pragma once +#include +#include + +namespace aare { + +/** + * \brief RAII wrapper for FILE pointer + */ +class FilePtr { + FILE *fp_{nullptr}; + + public: + FilePtr() = default; + FilePtr(const std::filesystem::path& fname, const std::string& mode); + FilePtr(const FilePtr &) = delete; // we don't want a copy + FilePtr &operator=(const FilePtr &) = delete; // since we handle a resource + FilePtr(FilePtr &&other); + FilePtr &operator=(FilePtr &&other); + FILE *get(); + int64_t tell(); + void seek(int64_t offset, int whence = SEEK_SET) { + if (fseek(fp_, offset, whence) != 0) + throw std::runtime_error("Error seeking in file"); + } + std::string error_msg(); + ~FilePtr(); +}; + +} // namespace aare \ No newline at end of file diff --git a/include/aare/JungfrauDataFile.hpp b/include/aare/JungfrauDataFile.hpp new file mode 100644 index 0000000..bba5403 --- /dev/null +++ b/include/aare/JungfrauDataFile.hpp @@ -0,0 +1,112 @@ +#pragma once +#include +#include +#include + +#include "aare/FilePtr.hpp" +#include "aare/defs.hpp" +#include "aare/NDArray.hpp" +#include "aare/FileInterface.hpp" +namespace aare { + + +struct JungfrauDataHeader{ + uint64_t framenum; + uint64_t bunchid; +}; + +class JungfrauDataFile : public FileInterface { + + size_t m_rows{}; //!< number of rows in the image, from find_frame_size(); + size_t m_cols{}; //!< number of columns in the image, from find_frame_size(); + size_t m_bytes_per_frame{}; //!< number of bytes per frame excluding header + size_t m_total_frames{}; //!< total number of frames in the series of files + size_t m_offset{}; //!< file index of the first file, allow starting at non zero file + size_t m_current_file_index{}; //!< The index of the open file + size_t m_current_frame_index{}; //!< The index of the current frame (with reference to all files) + + std::vector m_last_frame_in_file{}; //!< Used for seeking to the correct file + std::filesystem::path m_path; //!< path to the files + std::string m_base_name; //!< base name used for formatting file names + + FilePtr m_fp; //!< RAII wrapper for a FILE* + + + using pixel_type = uint16_t; + static constexpr size_t header_size = sizeof(JungfrauDataHeader); + static constexpr size_t n_digits_in_file_index = 6; //!< to format file names + + public: + JungfrauDataFile(const std::filesystem::path &fname); + + std::string base_name() const; //!< get the base name of the file (without path and extension) + size_t bytes_per_frame() override; + size_t pixels_per_frame() override; + size_t bytes_per_pixel() const; + size_t bitdepth() const override; + void seek(size_t frame_index) override; //!< seek to the given frame index (note not byte offset) + size_t tell() override; //!< get the frame index of the file pointer + size_t total_frames() const override; + size_t rows() const override; + size_t cols() const override; + size_t n_files() const; //!< get the number of files in the series. + + // Extra functions needed for FileInterface + Frame read_frame() override; + Frame read_frame(size_t frame_number) override; + std::vector read_n(size_t n_frames=0) override; + void read_into(std::byte *image_buf) override; + void read_into(std::byte *image_buf, size_t n_frames) override; + size_t frame_number(size_t frame_index) override; + DetectorType detector_type() const override; + + /** + * @brief Read a single frame from the file into the given buffer. + * @param image_buf buffer to read the frame into. (Note the caller is responsible for allocating the buffer) + * @param header pointer to a JungfrauDataHeader or nullptr to skip header) + */ + void read_into(std::byte *image_buf, JungfrauDataHeader *header = nullptr); + + /** + * @brief Read a multiple frames from the file into the given buffer. + * @param image_buf buffer to read the frame into. (Note the caller is responsible for allocating the buffer) + * @param n_frames number of frames to read + * @param header pointer to a JungfrauDataHeader or nullptr to skip header) + */ + void read_into(std::byte *image_buf, size_t n_frames, JungfrauDataHeader *header = nullptr); + + /** + * @brief Read a single frame from the file into the given NDArray + * @param image NDArray to read the frame into. + */ + void read_into(NDArray* image, JungfrauDataHeader* header = nullptr); + + /** + * @brief Read a single frame from the file. Allocated a new NDArray for the output data + * @param header pointer to a JungfrauDataHeader or nullptr to skip header) + * @return NDArray with the image data + */ + NDArray read_frame(JungfrauDataHeader* header = nullptr); + + JungfrauDataHeader read_header(); + std::filesystem::path current_file() const { return fpath(m_current_file_index+m_offset); } + + + private: + /** + * @brief Find the size of the frame in the file. (256x256, 256x1024, 512x1024) + * @param fname path to the file + * @throws std::runtime_error if the file is empty or the size cannot be determined + */ + void find_frame_size(const std::filesystem::path &fname); + + + void parse_fname(const std::filesystem::path &fname); + void scan_files(); + void open_file(size_t file_index); + std::filesystem::path fpath(size_t frame_index) const; + + + }; + +} // namespace aare \ No newline at end of file diff --git a/include/aare/algorithm.hpp b/include/aare/algorithm.hpp index 5d6dc57..fc7d51f 100644 --- a/include/aare/algorithm.hpp +++ b/include/aare/algorithm.hpp @@ -7,13 +7,20 @@ namespace aare { /** - * @brief Find the index of the last element smaller than val - * assume a sorted array + * @brief Index of the last element that is smaller than val. + * Requires a sorted array. Uses >= for ordering. If all elements + * are smaller it returns the last element and if all elements are + * larger it returns the first element. + * @param first iterator to the first element + * @param last iterator to the last element + * @param val value to compare + * @return index of the last element that is smaller than val + * */ template size_t last_smaller(const T* first, const T* last, T val) { for (auto iter = first+1; iter != last; ++iter) { - if (*iter > val) { + if (*iter >= val) { return std::distance(first, iter-1); } } @@ -25,7 +32,49 @@ size_t last_smaller(const NDArray& arr, T val) { return last_smaller(arr.begin(), arr.end(), val); } +template +size_t last_smaller(const std::vector& vec, T val) { + return last_smaller(vec.data(), vec.data()+vec.size(), val); +} +/** + * @brief Index of the first element that is larger than val. + * Requires a sorted array. Uses > for ordering. If all elements + * are larger it returns the first element and if all elements are + * smaller it returns the last element. + * @param first iterator to the first element + * @param last iterator to the last element + * @param val value to compare + * @return index of the first element that is larger than val + */ +template +size_t first_larger(const T* first, const T* last, T val) { + for (auto iter = first; iter != last; ++iter) { + if (*iter > val) { + return std::distance(first, iter); + } + } + return std::distance(first, last-1); +} + +template +size_t first_larger(const NDArray& arr, T val) { + return first_larger(arr.begin(), arr.end(), val); +} + +template +size_t first_larger(const std::vector& vec, T val) { + return first_larger(vec.data(), vec.data()+vec.size(), val); +} + +/** + * @brief Index of the nearest element to val. + * Requires a sorted array. If there is no difference it takes the first element. + * @param first iterator to the first element + * @param last iterator to the last element + * @param val value to compare + * @return index of the nearest element + */ template size_t nearest_index(const T* first, const T* last, T val) { auto iter = std::min_element(first, last, @@ -50,6 +99,13 @@ size_t nearest_index(const std::array& arr, T val) { return nearest_index(arr.data(), arr.data()+arr.size(), val); } +template +std::vector cumsum(const std::vector& vec) { + std::vector result(vec.size()); + std::partial_sum(vec.begin(), vec.end(), result.begin()); + return result; +} + } // namespace aare \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 60128c9..470d158 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,4 +15,9 @@ cmake.verbose = true [tool.scikit-build.cmake.define] AARE_PYTHON_BINDINGS = "ON" AARE_SYSTEM_LIBRARIES = "ON" -AARE_INSTALL_PYTHONEXT = "ON" \ No newline at end of file +AARE_INSTALL_PYTHONEXT = "ON" + +[tool.pytest.ini_options] +markers = [ + "files: marks tests that need additional data (deselect with '-m \"not files\"')", +] \ No newline at end of file diff --git a/python/aare/__init__.py b/python/aare/__init__.py index 058d7cf..606f958 100644 --- a/python/aare/__init__.py +++ b/python/aare/__init__.py @@ -2,7 +2,7 @@ from . import _aare -from ._aare import File, RawMasterFile, RawSubFile +from ._aare import File, RawMasterFile, RawSubFile, JungfrauDataFile from ._aare import Pedestal_d, Pedestal_f, ClusterFinder, VarClusterFinder from ._aare import DetectorType from ._aare import ClusterFile diff --git a/python/src/jungfrau_data_file.hpp b/python/src/jungfrau_data_file.hpp new file mode 100644 index 0000000..942f6a6 --- /dev/null +++ b/python/src/jungfrau_data_file.hpp @@ -0,0 +1,116 @@ + +#include "aare/JungfrauDataFile.hpp" +#include "aare/defs.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace py = pybind11; +using namespace ::aare; + +// Disable warnings for unused parameters, as we ignore some +// in the __exit__ method +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" + +auto read_dat_frame(JungfrauDataFile &self) { + py::array_t header(1); + py::array_t image({ + self.rows(), + self.cols() + }); + + self.read_into(reinterpret_cast(image.mutable_data()), + header.mutable_data()); + + return py::make_tuple(header, image); +} + +auto read_n_dat_frames(JungfrauDataFile &self, size_t n_frames) { + // adjust for actual frames left in the file + n_frames = std::min(n_frames, self.total_frames() - self.tell()); + if (n_frames == 0) { + throw std::runtime_error("No frames left in file"); + } + + py::array_t header(n_frames); + py::array_t image({ + n_frames, self.rows(), + self.cols()}); + + self.read_into(reinterpret_cast(image.mutable_data()), + n_frames, header.mutable_data()); + + return py::make_tuple(header, image); +} + +void define_jungfrau_data_file_io_bindings(py::module &m) { + // Make the JungfrauDataHeader usable from numpy + PYBIND11_NUMPY_DTYPE(JungfrauDataHeader, framenum, bunchid); + + py::class_(m, "JungfrauDataFile") + .def(py::init()) + .def("seek", &JungfrauDataFile::seek, + R"( + Seek to the given frame index. + )") + .def("tell", &JungfrauDataFile::tell, + R"( + Get the current frame index. + )") + .def_property_readonly("rows", &JungfrauDataFile::rows) + .def_property_readonly("cols", &JungfrauDataFile::cols) + .def_property_readonly("base_name", &JungfrauDataFile::base_name) + .def_property_readonly("bytes_per_frame", + &JungfrauDataFile::bytes_per_frame) + .def_property_readonly("pixels_per_frame", + &JungfrauDataFile::pixels_per_frame) + .def_property_readonly("bytes_per_pixel", + &JungfrauDataFile::bytes_per_pixel) + .def_property_readonly("bitdepth", &JungfrauDataFile::bitdepth) + .def_property_readonly("current_file", &JungfrauDataFile::current_file) + .def_property_readonly("total_frames", &JungfrauDataFile::total_frames) + .def_property_readonly("n_files", &JungfrauDataFile::n_files) + .def("read_frame", &read_dat_frame, + R"( + Read a single frame from the file. + )") + .def("read_n", &read_n_dat_frames, + R"( + Read maximum n_frames frames from the file. + )") + .def( + "read", + [](JungfrauDataFile &self) { + self.seek(0); + auto n_frames = self.total_frames(); + return read_n_dat_frames(self, n_frames); + }, + R"( + Read all frames from the file. Seeks to the beginning before reading. + )") + .def("__enter__", [](JungfrauDataFile &self) { return &self; }) + .def("__exit__", + [](JungfrauDataFile &self, + const std::optional &exc_type, + const std::optional &exc_value, + const std::optional &traceback) { + // self.close(); + }) + .def("__iter__", [](JungfrauDataFile &self) { return &self; }) + .def("__next__", [](JungfrauDataFile &self) { + try { + return read_dat_frame(self); + } catch (std::runtime_error &e) { + throw py::stop_iteration(); + } + }); +} + +#pragma GCC diagnostic pop \ No newline at end of file diff --git a/python/src/module.cpp b/python/src/module.cpp index 43f48ba..7a17e78 100644 --- a/python/src/module.cpp +++ b/python/src/module.cpp @@ -11,6 +11,8 @@ #include "fit.hpp" #include "interpolation.hpp" +#include "jungfrau_data_file.hpp" + //Pybind stuff #include #include @@ -33,5 +35,6 @@ PYBIND11_MODULE(_aare, m) { define_cluster_file_sink_bindings(m); define_fit_bindings(m); define_interpolation_bindings(m); + define_jungfrau_data_file_io_bindings(m); } \ No newline at end of file diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 0000000..5badf13 --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,29 @@ +import os +from pathlib import Path +import pytest + + + +def pytest_addoption(parser): + parser.addoption( + "--files", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_configure(config): + config.addinivalue_line("markers", "files: mark test as needing image files to run") + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--files"): + return + skip = pytest.mark.skip(reason="need --files option to run") + for item in items: + if "files" in item.keywords: + item.add_marker(skip) + + +@pytest.fixture +def test_data_path(): + return Path(os.environ["AARE_TEST_DATA"]) + diff --git a/python/tests/test_jungfrau_dat_files.py b/python/tests/test_jungfrau_dat_files.py new file mode 100644 index 0000000..5d3fdf8 --- /dev/null +++ b/python/tests/test_jungfrau_dat_files.py @@ -0,0 +1,92 @@ +import pytest +import numpy as np +from aare import JungfrauDataFile + +@pytest.mark.files +def test_jfungfrau_dat_read_number_of_frames(test_data_path): + with JungfrauDataFile(test_data_path / "dat/AldoJF500k_000000.dat") as dat_file: + assert dat_file.total_frames == 24 + + with JungfrauDataFile(test_data_path / "dat/AldoJF250k_000000.dat") as dat_file: + assert dat_file.total_frames == 53 + + with JungfrauDataFile(test_data_path / "dat/AldoJF65k_000000.dat") as dat_file: + assert dat_file.total_frames == 113 + + +@pytest.mark.files +def test_jfungfrau_dat_read_number_of_file(test_data_path): + with JungfrauDataFile(test_data_path / "dat/AldoJF500k_000000.dat") as dat_file: + assert dat_file.n_files == 4 + + with JungfrauDataFile(test_data_path / "dat/AldoJF250k_000000.dat") as dat_file: + assert dat_file.n_files == 7 + + with JungfrauDataFile(test_data_path / "dat/AldoJF65k_000000.dat") as dat_file: + assert dat_file.n_files == 7 + + +@pytest.mark.files +def test_read_module(test_data_path): + """ + Read all frames from the series of .dat files. Compare to canned data in npz format. + """ + + # Read all frames from the .dat file + with JungfrauDataFile(test_data_path / "dat/AldoJF500k_000000.dat") as f: + header, data = f.read() + + #Sanity check + n_frames = 24 + assert header.size == n_frames + assert data.shape == (n_frames, 512, 1024) + + # Read reference data using numpy + with np.load(test_data_path / "dat/AldoJF500k.npz") as f: + ref_header = f["headers"] + ref_data = f["frames"] + + # Check that the data is the same + assert np.all(ref_header == header) + assert np.all(ref_data == data) + +@pytest.mark.files +def test_read_half_module(test_data_path): + + # Read all frames from the .dat file + with JungfrauDataFile(test_data_path / "dat/AldoJF250k_000000.dat") as f: + header, data = f.read() + + n_frames = 53 + assert header.size == n_frames + assert data.shape == (n_frames, 256, 1024) + + # Read reference data using numpy + with np.load(test_data_path / "dat/AldoJF250k.npz") as f: + ref_header = f["headers"] + ref_data = f["frames"] + + # Check that the data is the same + assert np.all(ref_header == header) + assert np.all(ref_data == data) + + +@pytest.mark.files +def test_read_single_chip(test_data_path): + + # Read all frames from the .dat file + with JungfrauDataFile(test_data_path / "dat/AldoJF65k_000000.dat") as f: + header, data = f.read() + + n_frames = 113 + assert header.size == n_frames + assert data.shape == (n_frames, 256, 256) + + # Read reference data using numpy + with np.load(test_data_path / "dat/AldoJF65k.npz") as f: + ref_header = f["headers"] + ref_data = f["frames"] + + # Check that the data is the same + assert np.all(ref_header == header) + assert np.all(ref_data == data) \ No newline at end of file diff --git a/src/ClusterFile.test.cpp b/src/ClusterFile.test.cpp index a0eed04..a7fc044 100644 --- a/src/ClusterFile.test.cpp +++ b/src/ClusterFile.test.cpp @@ -11,9 +11,9 @@ using aare::ClusterFile; -TEST_CASE("Read one frame from a a cluster file", "[.integration]") { +TEST_CASE("Read one frame from a a cluster file", "[.files]") { //We know that the frame has 97 clusters - auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust"; + auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust"; REQUIRE(std::filesystem::exists(fpath)); ClusterFile f(fpath); @@ -22,9 +22,9 @@ TEST_CASE("Read one frame from a a cluster file", "[.integration]") { REQUIRE(clusters.frame_number() == 135); } -TEST_CASE("Read one frame using ROI", "[.integration]") { +TEST_CASE("Read one frame using ROI", "[.files]") { //We know that the frame has 97 clusters - auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust"; + auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust"; REQUIRE(std::filesystem::exists(fpath)); ClusterFile f(fpath); @@ -50,9 +50,9 @@ TEST_CASE("Read one frame using ROI", "[.integration]") { } -TEST_CASE("Read clusters from single frame file", "[.integration]") { +TEST_CASE("Read clusters from single frame file", "[.files]") { - auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust"; + auto fpath = test_data_path() / "clust" / "single_frame_97_clustrers.clust"; REQUIRE(std::filesystem::exists(fpath)); SECTION("Read fewer clusters than available") { diff --git a/src/File.cpp b/src/File.cpp index 3c68eff..eb04893 100644 --- a/src/File.cpp +++ b/src/File.cpp @@ -1,4 +1,5 @@ #include "aare/File.hpp" +#include "aare/JungfrauDataFile.hpp" #include "aare/NumpyFile.hpp" #include "aare/RawFile.hpp" @@ -27,6 +28,8 @@ File::File(const std::filesystem::path &fname, const std::string &mode, else if (fname.extension() == ".npy") { // file_impl = new NumpyFile(fname, mode, cfg); file_impl = std::make_unique(fname, mode, cfg); + }else if(fname.extension() == ".dat"){ + file_impl = std::make_unique(fname); } else { throw std::runtime_error("Unsupported file type"); } diff --git a/src/FilePtr.cpp b/src/FilePtr.cpp new file mode 100644 index 0000000..4fed3d7 --- /dev/null +++ b/src/FilePtr.cpp @@ -0,0 +1,44 @@ + +#include "aare/FilePtr.hpp" +#include +#include +#include + +namespace aare { + +FilePtr::FilePtr(const std::filesystem::path& fname, const std::string& mode = "rb") { + fp_ = fopen(fname.c_str(), mode.c_str()); + if (!fp_) + throw std::runtime_error(fmt::format("Could not open: {}", fname.c_str())); +} + +FilePtr::FilePtr(FilePtr &&other) { std::swap(fp_, other.fp_); } + +FilePtr &FilePtr::operator=(FilePtr &&other) { + std::swap(fp_, other.fp_); + return *this; +} + +FILE *FilePtr::get() { return fp_; } + +int64_t FilePtr::tell() { + auto pos = ftell(fp_); + if (pos == -1) + throw std::runtime_error(fmt::format("Error getting file position: {}", error_msg())); + return pos; +} +FilePtr::~FilePtr() { + if (fp_) + fclose(fp_); // check? +} + +std::string FilePtr::error_msg(){ + if (feof(fp_)) { + return "End of file reached"; + } + if (ferror(fp_)) { + return fmt::format("Error reading file: {}", std::strerror(errno)); + } + return ""; +} +} // namespace aare diff --git a/src/JungfrauDataFile.cpp b/src/JungfrauDataFile.cpp new file mode 100644 index 0000000..6e1ccd6 --- /dev/null +++ b/src/JungfrauDataFile.cpp @@ -0,0 +1,242 @@ +#include "aare/JungfrauDataFile.hpp" +#include "aare/algorithm.hpp" +#include "aare/defs.hpp" + +#include +#include + +namespace aare { + +JungfrauDataFile::JungfrauDataFile(const std::filesystem::path &fname) { + + if (!std::filesystem::exists(fname)) { + throw std::runtime_error(LOCATION + + "File does not exist: " + fname.string()); + } + find_frame_size(fname); + parse_fname(fname); + scan_files(); + open_file(m_current_file_index); +} + + +// FileInterface + +Frame JungfrauDataFile::read_frame(){ + Frame f(rows(), cols(), Dtype::UINT16); + read_into(reinterpret_cast(f.data()), nullptr); + return f; +} + +Frame JungfrauDataFile::read_frame(size_t frame_number){ + seek(frame_number); + Frame f(rows(), cols(), Dtype::UINT16); + read_into(reinterpret_cast(f.data()), nullptr); + return f; +} + +std::vector JungfrauDataFile::read_n(size_t n_frames) { + std::vector frames; + throw std::runtime_error(LOCATION + + "Not implemented yet"); + return frames; +} + +void JungfrauDataFile::read_into(std::byte *image_buf) { + read_into(image_buf, nullptr); +} +void JungfrauDataFile::read_into(std::byte *image_buf, size_t n_frames) { + read_into(image_buf, n_frames, nullptr); +} + +size_t JungfrauDataFile::frame_number(size_t frame_index) { + seek(frame_index); + return read_header().framenum; +} + +DetectorType JungfrauDataFile::detector_type() const { return DetectorType::Jungfrau; } + +std::string JungfrauDataFile::base_name() const { return m_base_name; } + +size_t JungfrauDataFile::bytes_per_frame() { return m_bytes_per_frame; } + +size_t JungfrauDataFile::pixels_per_frame() { return m_rows * m_cols; } + +size_t JungfrauDataFile::bytes_per_pixel() const { return sizeof(pixel_type); } + +size_t JungfrauDataFile::bitdepth() const { + return bytes_per_pixel() * bits_per_byte; +} + +void JungfrauDataFile::seek(size_t frame_index) { + if (frame_index >= m_total_frames) { + throw std::runtime_error(LOCATION + "Frame index out of range: " + + std::to_string(frame_index)); + } + m_current_frame_index = frame_index; + auto file_index = first_larger(m_last_frame_in_file, frame_index); + + if (file_index != m_current_file_index) + open_file(file_index); + + auto frame_offset = (file_index) + ? frame_index - m_last_frame_in_file[file_index - 1] + : frame_index; + auto byte_offset = frame_offset * (m_bytes_per_frame + header_size); + m_fp.seek(byte_offset); +}; + +size_t JungfrauDataFile::tell() { return m_current_frame_index; } +size_t JungfrauDataFile::total_frames() const { return m_total_frames; } +size_t JungfrauDataFile::rows() const { return m_rows; } +size_t JungfrauDataFile::cols() const { return m_cols; } + +size_t JungfrauDataFile::n_files() const { return m_last_frame_in_file.size(); } + +void JungfrauDataFile::find_frame_size(const std::filesystem::path &fname) { + + static constexpr size_t module_data_size = + header_size + sizeof(pixel_type) * 512 * 1024; + static constexpr size_t half_data_size = + header_size + sizeof(pixel_type) * 256 * 1024; + static constexpr size_t chip_data_size = + header_size + sizeof(pixel_type) * 256 * 256; + + auto file_size = std::filesystem::file_size(fname); + if (file_size == 0) { + throw std::runtime_error(LOCATION + + "Cannot guess frame size: file is empty"); + } + + if (file_size % module_data_size == 0) { + m_rows = 512; + m_cols = 1024; + m_bytes_per_frame = module_data_size - header_size; + } else if (file_size % half_data_size == 0) { + m_rows = 256; + m_cols = 1024; + m_bytes_per_frame = half_data_size - header_size; + } else if (file_size % chip_data_size == 0) { + m_rows = 256; + m_cols = 256; + m_bytes_per_frame = chip_data_size - header_size; + } else { + throw std::runtime_error(LOCATION + + "Cannot find frame size: file size is not a " + "multiple of any known frame size"); + } +} + +void JungfrauDataFile::parse_fname(const std::filesystem::path &fname) { + m_path = fname.parent_path(); + m_base_name = fname.stem(); + + // find file index, then remove if from the base name + if (auto pos = m_base_name.find_last_of('_'); pos != std::string::npos) { + m_offset = std::stoul(m_base_name.substr(pos + 1)); + m_base_name.erase(pos); + } +} + +void JungfrauDataFile::scan_files() { + // find how many files we have and the number of frames in each file + m_last_frame_in_file.clear(); + size_t file_index = m_offset; + while (std::filesystem::exists(fpath(file_index))) { + auto n_frames = std::filesystem::file_size(fpath(file_index)) / + (m_bytes_per_frame + header_size); + m_last_frame_in_file.push_back(n_frames); + ++file_index; + } + + // find where we need to open the next file and total number of frames + m_last_frame_in_file = cumsum(m_last_frame_in_file); + m_total_frames = m_last_frame_in_file.back(); +} + +void JungfrauDataFile::read_into(std::byte *image_buf, + JungfrauDataHeader *header) { + + // read header if not passed nullptr + if (header) { + if (auto rc = fread(header, sizeof(JungfrauDataHeader), 1, m_fp.get()); + rc != 1) { + throw std::runtime_error( + LOCATION + + "Could not read header from file:" + m_fp.error_msg()); + } + } else { + m_fp.seek(header_size, SEEK_CUR); + } + + // read data + if (auto rc = fread(image_buf, 1, m_bytes_per_frame, m_fp.get()); + rc != m_bytes_per_frame) { + throw std::runtime_error(LOCATION + "Could not read image from file" + + m_fp.error_msg()); + } + + // prepare for next read + // if we are at the end of the file, open the next file + ++m_current_frame_index; + if (m_current_frame_index >= m_last_frame_in_file[m_current_file_index] && + (m_current_frame_index < m_total_frames)) { + ++m_current_file_index; + open_file(m_current_file_index); + } +} + +void JungfrauDataFile::read_into(std::byte *image_buf, size_t n_frames, + JungfrauDataHeader *header) { + if (header) { + for (size_t i = 0; i < n_frames; ++i) + read_into(image_buf + i * m_bytes_per_frame, header + i); + }else{ + for (size_t i = 0; i < n_frames; ++i) + read_into(image_buf + i * m_bytes_per_frame, nullptr); + } +} + +void JungfrauDataFile::read_into(NDArray* image, JungfrauDataHeader* header) { + if(!(rows() == image->shape(0) && cols() == image->shape(1))){ + throw std::runtime_error(LOCATION + + "Image shape does not match file size: " + std::to_string(rows()) + "x" + std::to_string(cols())); + } + read_into(reinterpret_cast(image->data()), header); +} + +NDArray JungfrauDataFile::read_frame(JungfrauDataHeader* header) { + Shape<2> shape{rows(), cols()}; + NDArray image(shape); + + read_into(reinterpret_cast(image.data()), + header); + + return image; +} + +JungfrauDataHeader JungfrauDataFile::read_header() { + JungfrauDataHeader header; + if (auto rc = fread(&header, 1, sizeof(header), m_fp.get()); + rc != sizeof(header)) { + throw std::runtime_error(LOCATION + "Could not read header from file" + + m_fp.error_msg()); + } + m_fp.seek(-header_size, SEEK_CUR); + return header; +} + +void JungfrauDataFile::open_file(size_t file_index) { + // fmt::print(stderr, "Opening file: {}\n", + // fpath(file_index+m_offset).string()); + m_fp = FilePtr(fpath(file_index + m_offset), "rb"); + m_current_file_index = file_index; +} + +std::filesystem::path JungfrauDataFile::fpath(size_t file_index) const { + auto fname = fmt::format("{}_{:0{}}.dat", m_base_name, file_index, + n_digits_in_file_index); + return m_path / fname; +} + +} // namespace aare \ No newline at end of file diff --git a/src/JungfrauDataFile.test.cpp b/src/JungfrauDataFile.test.cpp new file mode 100644 index 0000000..626a318 --- /dev/null +++ b/src/JungfrauDataFile.test.cpp @@ -0,0 +1,94 @@ +#include "aare/JungfrauDataFile.hpp" + +#include +#include "test_config.hpp" + +using aare::JungfrauDataFile; +using aare::JungfrauDataHeader; +TEST_CASE("Open a Jungfrau data file", "[.files]") { + //we know we have 4 files with 7, 7, 7, and 3 frames + //firs frame number if 1 and the bunch id is frame_number**2 + //so we can check the header + auto fpath = test_data_path() / "dat" / "AldoJF500k_000000.dat"; + REQUIRE(std::filesystem::exists(fpath)); + + JungfrauDataFile f(fpath); + REQUIRE(f.rows() == 512); + REQUIRE(f.cols() == 1024); + REQUIRE(f.bytes_per_frame() == 1048576); + REQUIRE(f.pixels_per_frame() == 524288); + REQUIRE(f.bytes_per_pixel() == 2); + REQUIRE(f.bitdepth() == 16); + REQUIRE(f.base_name() == "AldoJF500k"); + REQUIRE(f.n_files() == 4); + REQUIRE(f.tell() == 0); + REQUIRE(f.total_frames() == 24); + REQUIRE(f.current_file() == fpath); + + //Check that the frame number and buch id is read correctly + for (size_t i = 0; i < 24; ++i) { + JungfrauDataHeader header; + auto image = f.read_frame(&header); + REQUIRE(header.framenum == i + 1); + REQUIRE(header.bunchid == (i + 1) * (i + 1)); + REQUIRE(image.shape(0) == 512); + REQUIRE(image.shape(1) == 1024); + } +} + +TEST_CASE("Seek in a JungfrauDataFile", "[.files]"){ + auto fpath = test_data_path() / "dat" / "AldoJF65k_000000.dat"; + REQUIRE(std::filesystem::exists(fpath)); + + JungfrauDataFile f(fpath); + + //The file should have 113 frames + f.seek(19); + REQUIRE(f.tell() == 19); + auto h = f.read_header(); + REQUIRE(h.framenum == 19+1); + + //Reading again does not change the file pointer + auto h2 = f.read_header(); + REQUIRE(h2.framenum == 19+1); + + f.seek(59); + REQUIRE(f.tell() == 59); + auto h3 = f.read_header(); + REQUIRE(h3.framenum == 59+1); + + JungfrauDataHeader h4; + auto image = f.read_frame(&h4); + REQUIRE(h4.framenum == 59+1); + + //now we should be on the next frame + REQUIRE(f.tell() == 60); + REQUIRE(f.read_header().framenum == 60+1); + + REQUIRE_THROWS(f.seek(86356)); //out of range +} + +TEST_CASE("Open a Jungfrau data file with non zero file index", "[.files]"){ + + auto fpath = test_data_path() / "dat" / "AldoJF65k_000003.dat"; + REQUIRE(std::filesystem::exists(fpath)); + + JungfrauDataFile f(fpath); + + //18 files per data file, opening the 3rd file we ignore the first 3 + REQUIRE(f.total_frames() == 113-18*3); + REQUIRE(f.tell() == 0); + + //Frame numbers start at 1 in the first file + REQUIRE(f.read_header().framenum == 18*3+1); + + // moving relative to the third file + f.seek(5); + REQUIRE(f.read_header().framenum == 18*3+1+5); + + // ignoring the first 3 files + REQUIRE(f.n_files() == 4); + + REQUIRE(f.current_file().stem() == "AldoJF65k_000003"); + +} \ No newline at end of file diff --git a/src/algorithm.test.cpp b/src/algorithm.test.cpp index fcfa8d2..e2ae8fa 100644 --- a/src/algorithm.test.cpp +++ b/src/algorithm.test.cpp @@ -49,6 +49,16 @@ TEST_CASE("nearest index works with std::array", "[algorithm]"){ REQUIRE(aare::nearest_index(arr, -10.0) == 0); } +TEST_CASE("nearest index when there is no different uses the first element", "[algorithm]"){ + std::vector vec = {5, 5, 5, 5, 5}; + REQUIRE(aare::nearest_index(vec, 5) == 0); +} + +TEST_CASE("nearest index when there is no different uses the first element also when all smaller", "[algorithm]"){ + std::vector vec = {5, 5, 5, 5, 5}; + REQUIRE(aare::nearest_index(vec, 10) == 0); +} + TEST_CASE("last smaller", "[algorithm]"){ aare::NDArray arr({5}); @@ -68,6 +78,82 @@ TEST_CASE("returns last bin strictly smaller", "[algorithm]"){ arr[i] = i; } // arr 0, 1, 2, 3, 4 - REQUIRE(aare::last_smaller(arr, 2.0) == 2); + REQUIRE(aare::last_smaller(arr, 2.0) == 1); + +} + +TEST_CASE("last_smaller with all elements smaller returns last element", "[algorithm]"){ + aare::NDArray arr({5}); + for (size_t i = 0; i < arr.size(); i++) { + arr[i] = i; + } + // arr 0, 1, 2, 3, 4 + REQUIRE(aare::last_smaller(arr, 50.) == 4); +} + +TEST_CASE("last_smaller with all elements bigger returns first element", "[algorithm]"){ + aare::NDArray arr({5}); + for (size_t i = 0; i < arr.size(); i++) { + arr[i] = i; + } + // arr 0, 1, 2, 3, 4 + REQUIRE(aare::last_smaller(arr, -50.) == 0); +} + +TEST_CASE("last smaller with all elements equal returns the first element", "[algorithm]"){ + std::vector vec = {5,5,5,5,5,5,5}; + REQUIRE(aare::last_smaller(vec, 5) == 0); +} + + +TEST_CASE("first_lager with vector", "[algorithm]"){ + std::vector vec = {0, 1, 2, 3, 4}; + REQUIRE(aare::first_larger(vec, 2.5) == 3); +} + +TEST_CASE("first_lager with all elements smaller returns last element", "[algorithm]"){ + std::vector vec = {0, 1, 2, 3, 4}; + REQUIRE(aare::first_larger(vec, 50.) == 4); +} + +TEST_CASE("first_lager with all elements bigger returns first element", "[algorithm]"){ + std::vector vec = {0, 1, 2, 3, 4}; + REQUIRE(aare::first_larger(vec, -50.) == 0); +} + +TEST_CASE("first_lager with all elements the same as the check returns last", "[algorithm]"){ + std::vector vec = {14, 14, 14, 14, 14}; + REQUIRE(aare::first_larger(vec, 14) == 4); +} + +TEST_CASE("first larger with the same element", "[algorithm]"){ + std::vector vec = {7,8,9,10,11}; + REQUIRE(aare::first_larger(vec, 9) == 3); +} + +TEST_CASE("cumsum works", "[algorithm]"){ + std::vector vec = {0, 1, 2, 3, 4}; + auto result = aare::cumsum(vec); + REQUIRE(result.size() == vec.size()); + REQUIRE(result[0] == 0); + REQUIRE(result[1] == 1); + REQUIRE(result[2] == 3); + REQUIRE(result[3] == 6); + REQUIRE(result[4] == 10); +} +TEST_CASE("cumsum works with empty vector", "[algorithm]"){ + std::vector vec = {}; + auto result = aare::cumsum(vec); + REQUIRE(result.size() == 0); +} +TEST_CASE("cumsum works with negative numbers", "[algorithm]"){ + std::vector vec = {0, -1, -2, -3, -4}; + auto result = aare::cumsum(vec); + REQUIRE(result.size() == vec.size()); + REQUIRE(result[0] == 0); + REQUIRE(result[1] == -1); + REQUIRE(result[2] == -3); + REQUIRE(result[3] == -6); + REQUIRE(result[4] == -10); +} -} \ No newline at end of file