Hdf5Reader supporting reading into frame, ndarray and generic bytestream

2025-06-11 06:47:14 +02:00 · 2025-05-26 09:06:47 +02:00
parent 6328369ce9
commit b94be4cbe8
5 changed files with 456 additions and 26 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -62,6 +62,7 @@ option(AARE_FETCH_CATCH "Use FetchContent to download catch2" ON)
 option(AARE_FETCH_JSON "Use FetchContent to download nlohmann::json" ON)
 option(AARE_FETCH_ZMQ "Use FetchContent to download libzmq" ON)
 option(AARE_FETCH_LMFIT "Use FetchContent to download lmfit" ON)
+option(AARE_FETCH_HDF5 "Use FetchContent to download hdf5-devel" OFF)


 #Convenience option to use system libraries only (no FetchContent)
@ -73,6 +74,7 @@ if(AARE_SYSTEM_LIBRARIES)
    set(AARE_FETCH_CATCH OFF CACHE BOOL "Disabled FetchContent for catch2" FORCE)
    set(AARE_FETCH_JSON OFF CACHE BOOL "Disabled FetchContent for nlohmann::json" FORCE)
    set(AARE_FETCH_ZMQ OFF CACHE BOOL "Disabled FetchContent for libzmq" FORCE)
+    set(AARE_FETCH_HDF5 OFF CACHE BOOL "Disabled FetchContent for hdf5" FORCE)
    # Still fetch lmfit when setting AARE_SYSTEM_LIBRARIES since this is not available
    # on conda-forge
 endif()
@ -219,6 +221,23 @@ else()
    find_package(nlohmann_json 3.11.3 REQUIRED)
 endif()

+if(AARE_FETCH_HDF5)
+    message(FATAL_ERROR "Fetching HDF5 via FetchContent is not supported here. Please install it via your system. 
+    For Ubuntu: sudo apt install libhdf5-dev
+    For Red Hat: sudo dnf install hdf5-devel
+    For MacOS: brew install hdf5")
+else()
+    find_package(HDF5 QUIET COMPONENTS CXX)
+    if (HDF5_FOUND)
+        message(STATUS "Found HDF5: ${HDF5_INCLUDE_DIRS}")
+    else()
+        message(FATAL_ERROR "HDF5 was NOT found! Please install it via your system  
+            For Ubuntu: sudo apt install libhdf5-dev
+            For Red Hat: sudo dnf install hdf5-devel
+            For MacOS: brew install hdf5")
+    endif()
+endif()
+
 include(GNUInstallDirs)

 # If conda build, always set lib dir to 'lib'
@ -331,10 +350,6 @@ if(AARE_ASAN)
    )
 endif()

-
-
-
-
 if(AARE_TESTS)
    enable_testing()
    add_subdirectory(tests)
@ -362,6 +377,7 @@ set(PUBLICHEADERS
    include/aare/Frame.hpp
    include/aare/GainMap.hpp
    include/aare/geo_helpers.hpp
+    include/aare/Hdf5FileReader.hpp
    include/aare/JungfrauDataFile.hpp
    include/aare/NDArray.hpp
    include/aare/NDView.hpp
@ -414,6 +430,7 @@ target_link_libraries(
    PUBLIC 
    fmt::fmt 
    nlohmann_json::nlohmann_json 
+    HDF5::HDF5
    ${STD_FS_LIB} # from helpers.cmake
    PRIVATE 
    aare_compiler_flags 
@ -441,6 +458,7 @@ if(AARE_TESTS)
            ${CMAKE_CURRENT_SOURCE_DIR}/src/Frame.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/geo_helpers.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/RawMasterFile.test.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/Hdf5FileReader.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NDArray.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NDView.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterFinder.test.cpp
--- a/include/aare/AngleCalibration.hpp
+++ b/include/aare/AngleCalibration.hpp
@ -12,23 +12,23 @@

 // function read flatfield store in ff_corr probably correlation field

-// class variables:
-
 namespace aare {

 using parameters =
    std::tuple<std::vector<double>, std::vector<double>, std::vector<double>>;

-// TODO: can i have a static struct, constexpr?
 struct MythenSpecifications {

    static constexpr int32_t max_modules = 48;
-    static constexpr int32_t channels_per_module = 1280;
+    static constexpr int32_t strips_per_module = 1280;
    static constexpr double pitch = 0.05; // strip width [mm] ?? TODO: not sure
-    static constexpr double ttmin = -180.0; // what is this the angle
+    static constexpr double ttmin = -180.0; // what is this?
    static constexpr float ttmax = 180.0;
    static constexpr float ttstep =
-        0.0036; // probably here to calculate bin size
+        0.0036; // probably here to calculate bin size, what is this?
+
+    static constexpr double bloffset =
+        1.532; // what is this? detector offset relative to what?
 };

 // number_of_activated_modules
@ -50,19 +50,53 @@ class AngleCalibration {
    /** converts DG parameters to easy EE parameters e.g.geometric parameters */
    parameters convert_to_EE_parameters();

-    /** converts DG parameters to easy BC parameters e.g. best computing
+    std::tuple<double, double, double>
+    convert_to_EE_parameters(const size_t module_index);
+
+    /** converts DG parameters to BC parameters e.g. best computing
     * parameters */
    parameters convert_to_BC_parameters();

+    /** calculates diffraction angle from EE module parameters (used in Beer's
+     * Law)
+     * @param strip_index local strip index of module
+     */
+    double diffraction_angle_from_EE_parameters(
+        const double normal_distance, const double module_center_distance,
+        const double angle, const size_t strip_index);
+
+    /** calculated the strip width expressed as angle [degrees]
+     * @param strip_index gloabl strip index of detector
+     */
+    double angular_strip_width(const size_t strip_index);
+
+    /** converts global strip index to local strip index of that module */
+    size_t
+    global_to_local_strip_index_conversion(const size_t global_strip_index) {
+        const size_t module_index =
+            global_strip_index / MythenSpecifications::strips_per_module;
+        // local strip index in module
+        size_t local_strip_index =
+            global_strip_index -
+            module_index * MythenSpecifications::strips_per_module;
+        // switch if indexing is in clock-wise direction
+        local_strip_index =
+            signbit(conversions[module_index])
+                ? MythenSpecifications::strips_per_module - local_strip_index
+                : local_strip_index;
+
+        return local_strip_index;
+    }
+
  protected:
-    // TODO: Design maybe have a struct with three vectors, store all three sets
-    // of parameters
+    // TODO: Design maybe have a struct with three vectors, store all three
+    // sets of parameters as member variables

    // TODO: check if interpretation and units are correct
    // historical DG parameters
-    std::vector<double>
-        centers; // orthogonal projection of sample onto detector (given in
-                 // strip number) [mm] D/pitch
+    std::vector<double> centers; // orthogonal projection of sample onto
+                                 // detector (given in strip number) [mm]
+                                 // D/pitch
    std::vector<double>
        conversions; // pitch/(normal distance from sample to detector (R)) [mm]
                     // //used for easy conversion
@ -113,8 +147,7 @@ void AngleCalibration::read_initial_calibration_from_file(

        file.close();
    } catch (const std::exception &e) {
-        std::cerr << "Error: " << e.what()
-                  << std::endl; // TODO: replace with log
+        std::cerr << "Error: " << e.what() << std::endl;
    }
 }

@ -130,19 +163,87 @@ parameters AngleCalibration::convert_to_EE_parameters() {
    std::vector<double> angles(centers.size());

    for (size_t i = 0; i < centers.size(); ++i) {
-        normal_distances[i] = centers[i] * MythenSpecifications::pitch;
-        module_center_distances[i] =
-            MythenSpecifications::pitch / std::abs(conversions[i]);
-        angles[i] =
-            offsets[i] + 180.0 / M_PI * centers[i] * std::abs(conversions[i]);
+        auto [normal_distance, module_center_distance, angle] =
+            convert_to_EE_parameters(i);
+        normal_distances[i] = normal_distance;
+        module_center_distances[i] = module_center_distance;
+        angles[i] = angle;
    }
-    // TODO: maybe add function rad_to_deg

    return std::make_tuple(normal_distances, module_center_distances, angles);
 }
+
+std::tuple<double, double, double>
+AngleCalibration::convert_to_EE_parameters(const size_t module_index) {
+    const double normal_distance =
+        centers[module_index] * MythenSpecifications::pitch;
+    const double module_center_distance =
+        MythenSpecifications::pitch / std::abs(conversions[module_index]);
+    const double angle =
+        offsets[module_index] +
+        180.0 / M_PI * centers[module_index] *
+            std::abs(conversions[module_index]); // TODO: maybe add function
+                                                 // rad_to_deg
+
+    return std::make_tuple(normal_distance, module_center_distance, angle);
+}
+
 /*
 parameters
 AngleCalibration::convert_to_BC_parameters() {}
 */

+double AngleCalibration::diffraction_angle_from_EE_parameters(
+    const double normal_distance, const double module_center_distance,
+    const double angle, const size_t strip_index) {
+
+    return angle - 180.0 / M_PI *
+                       atan((module_center_distance -
+                             MythenSpecifications::pitch * strip_index) /
+                            normal_distance); // TODO: why is it minus
+                                              // is it defined counter
+                                              // clockwise? thought
+                                              // should have a flipped
+                                              // sign
+}
+
+double AngleCalibration::angular_strip_width(const size_t strip_index) {
+
+    const size_t module_index =
+        strip_index / MythenSpecifications::strips_per_module;
+
+    const auto [normal_distance, module_center_distance, angle] =
+        convert_to_EE_parameters(module_index);
+
+    const size_t local_strip_index =
+        global_to_local_strip_index_conversion(strip_index);
+
+    return 180.0 / M_PI *
+           std::abs(diffraction_angle_from_EE_parameters(
+                        normal_distance, module_center_distance, angle,
+                        local_strip_index - 0.5) -
+                    diffraction_angle_from_EE_parameters(
+                        normal_distance, module_center_distance, angle,
+                        local_strip_index + 0.5));
+    // TODO: again not sure about division order - taking abs anyway
+}
+
+/*
+void AngleCalibration::Calibrate() {
+
+    // iterates over each strip
+    // skips the one which are not enabled or have a bad channel
+    // get module index
+    // get index of strip in module - make sure to use function correctly based
+    // on sign
+
+    // then I read some images - probably the histogram - h5 file - need a
+    // reader need to read the filesystem!! -
+
+    // we modify it with ff_corr computed by flatfield why?
+
+
+}
+*/
+
 } // namespace aare
--- a/include/aare/Hdf5FileReader.hpp
+++ b/include/aare/Hdf5FileReader.hpp
@ -0,0 +1,203 @@
+/************************************************
+ * @file HD5FFileReader.hpp
+ * @short HDF5FileReader based on H5File object
+ ***********************************************/
+
+#include "Frame.hpp"
+#include "NDArray.hpp"
+#include <H5Cpp.h>
+#include <array>
+#include <cxxabi.h>
+#include <optional>
+
+namespace aare {
+
+// return std::type_info
+const std::type_info &deduce_cpp_type(const H5::DataType datatype) {
+    if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_UINT8.getId())) {
+        return typeid(uint8_t);
+    } else if (H5Tequal(datatype.getId(),
+                        H5::PredType::NATIVE_UINT16.getId())) {
+        return typeid(uint16_t);
+    } else if (H5Tequal(datatype.getId(),
+                        H5::PredType::NATIVE_UINT32.getId())) {
+        return typeid(uint32_t);
+    } else if (H5Tequal(datatype.getId(),
+                        H5::PredType::NATIVE_UINT64.getId())) {
+        return typeid(uint64_t);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT8.getId())) {
+        return typeid(int8_t);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT16.getId())) {
+        return typeid(int16_t);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT32.getId())) {
+        return typeid(int32_t);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT64.getId())) {
+        return typeid(int64_t);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT.getId())) {
+        return typeid(int);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::IEEE_F64LE.getId())) {
+        return typeid(double);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::IEEE_F32LE.getId())) {
+        return typeid(float);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_FLOAT.getId())) {
+        return typeid(float);
+    } else if (H5Tequal(datatype.getId(),
+                        H5::PredType::NATIVE_DOUBLE.getId())) {
+        return typeid(float);
+    } else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_CHAR.getId()) &&
+               datatype.getId() == H5::PredType::NATIVE_CHAR.getId()) {
+        return typeid(char);
+    } else {
+        throw std::runtime_error("c++ type cannot be deduced");
+    }
+}
+
+struct Subset {
+    std::vector<hsize_t> shape;
+    std::vector<hsize_t> offset; // index where data subset should start
+};
+
+class HDF5Dataset {
+
+  public:
+    HDF5Dataset(const std::string &datasetname_, const H5::DataSet dataset_)
+        : datasetname(datasetname_), dataset(dataset_) {
+        datatype = dataset.getDataType();
+
+        cpp_type = &deduce_cpp_type(datatype);
+
+        dataspace = dataset.getSpace();
+        rank = dataspace.getSimpleExtentNdims(); // number of dimensions
+
+        shape.resize(rank);
+        dataspace.getSimpleExtentDims(shape.data(), nullptr);
+    }
+
+    hsize_t get_shape(ssize_t index) { return shape[index]; }
+
+    std::vector<hsize_t> get_shape() { return shape; }
+
+    H5::DataType get_datatype() { return datatype; }
+
+    const std::type_info *get_cpp_type() { return cpp_type; }
+
+    /**
+     * Reads subset of dataset into the buffer
+     * e.g. to read one 2d frame pass Subset({shape[1], shape[2]}, {frame_index,
+     * 0,0})
+     */
+    void read_into_buffer(std::byte *buffer,
+                          std::optional<const Subset> subset = std::nullopt) {
+
+        if (subset) {
+            // TODO treat scalar cases
+            if (static_cast<ssize_t>(subset->offset.size()) != rank) {
+                throw std::runtime_error("provide an offset for" +
+                                         std::to_string(rank) + "dimensions");
+            }
+            for (ssize_t i = 0; i < rank; ++i) {
+                hsize_t size =
+                    i < rank - static_cast<ssize_t>(subset->shape.size())
+                        ? 0
+                        : subset->shape[i - (rank - subset->shape.size())];
+                if ((size + subset->offset[i]) > shape[i]) {
+                    throw std::runtime_error(
+                        "subset is too large or offset is out of bounds");
+                }
+            }
+
+            H5::DataSpace memspace(static_cast<int>(subset->shape.size()),
+                                   subset->shape.data());
+
+            dataspace.selectHyperslab(H5S_SELECT_SET, subset->shape.data(),
+                                      subset->offset.data());
+            dataset.read(buffer, datatype, memspace, dataspace);
+        } else {
+            dataset.read(buffer, datatype);
+        }
+    }
+
+    Frame store_as_frame() {
+        uint32_t rows{}, cols{};
+        if (rank == 1) {
+            rows = 1;
+            // TODO overflow
+            cols = static_cast<uint32_t>(shape[0]);
+        } else if (rank == 2) {
+            rows = static_cast<uint32_t>(shape[0]);
+            cols = static_cast<uint32_t>(shape[1]);
+        } else {
+            throw std::runtime_error("Frame only supports 2d images");
+        }
+
+        Frame frame(rows, cols, Dtype(*cpp_type));
+
+        read_into_buffer(frame.data());
+
+        return frame;
+    }
+
+    template <typename T, ssize_t NDim> NDArray<T, NDim> store_as_ndarray() {
+        if (NDim != rank) {
+            std::cout
+                << "Warning: dataset dimension and array dimension mismatch"
+                << std::endl; // TODO: replace with log - still valid if we
+                              // want subset
+        }
+        if (typeid(T) != *cpp_type) {
+            std::cout << "Warning: dataset and array type mismatch"
+                      << std::endl;
+        }
+        std::array<ssize_t, NDim> array_shape{};
+        std::transform(
+            shape.begin(), shape.end(), array_shape.begin(),
+            [](const auto dim) { return static_cast<ssize_t>(dim); });
+
+        aare::NDArray<T, NDim> dataset_array(array_shape);
+
+        read_into_buffer(reinterpret_cast<std::byte *>(dataset_array.data()));
+
+        return dataset_array;
+    }
+
+    // getMemDataSize()
+
+  private:
+    std::string datasetname{};
+    H5::DataSet dataset;
+    H5::DataSpace dataspace;
+    H5::DataType datatype;
+    const std::type_info *cpp_type;
+    ssize_t rank{};
+    std::vector<hsize_t> shape{};
+};
+
+class HDF5FileReader {
+
+  public:
+    HDF5FileReader(const std::string &filename_) : filename(filename_) {
+        try {
+            file = H5::H5File(filename, H5F_ACC_RDONLY);
+        } catch (H5::Exception &e) {
+            std::cerr << "Error: " << e.getDetailMsg() << std::endl;
+        }
+    }
+
+    HDF5Dataset get_dataset(const std::string &dataset_name) {
+        H5::DataSet dataset;
+        try {
+            dataset = file.openDataSet(dataset_name);
+        } catch (H5::Exception &e) {
+            std::cerr << "Error: " << e.getDetailMsg() << std::endl;
+        }
+
+        // TODO use optional to handle error
+        return HDF5Dataset(dataset_name, dataset);
+    }
+
+  private:
+    std::string filename{};
+    H5::H5File file;
+};
+
+} // namespace aare
--- a/src/AngleCalibration.test.cpp
+++ b/src/AngleCalibration.test.cpp
@ -1,6 +1,6 @@
 /************************************************
 * @file AngleCalibration.test.cpp
- * @short test case for reading ascii file
+ * @short test case for angle calibration class
 ***********************************************/

 #include "aare/AngleCalibration.hpp"
@ -11,6 +11,8 @@
 #include <catch2/catch_test_macros.hpp>
 #include <catch2/matchers/catch_matchers_floating_point.hpp>

+using namespace aare;
+
 class AngleCalibrationTestClass : public aare::AngleCalibration {

  public:
@ -50,4 +52,4 @@ TEST_CASE("read initial angle calibration file",
    CHECK(centers[9] == Catch::Approx(660.342326));
    CHECK(offsets[47] == Catch::Approx(5.8053312));
    CHECK(conversions[27] == Catch::Approx(-0.6581179125e-4));
-}
+}
--- a/src/Hdf5FileReader.test.cpp
+++ b/src/Hdf5FileReader.test.cpp
@ -0,0 +1,106 @@
+/************************************************
+ * @file Hdf5FileReader.test.cpp
+ * @short test case for reading hdf5 files
+ ***********************************************/
+
+#include <filesystem>
+
+#include <H5Cpp.h>
+
+#include "aare/Hdf5FileReader.hpp"
+#include "aare/NDArray.hpp"
+
+#include <catch2/catch_all.hpp>
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/matchers/catch_matchers_floating_point.hpp>
+
+using namespace aare;
+
+TEST_CASE("read hdf5 file", "[.hdf5file]") {
+
+    // TODO generalize datasetpath
+    std::string filename = "/home/mazzol_a/Documents/mythen3tools/beamline/"
+                           "TDATA/ang1up_22keV_LaB60p3mm_48M_a_0320.h5";
+
+    REQUIRE(std::filesystem::exists(filename));
+
+    HDF5FileReader file(filename);
+
+    auto dataset = file.get_dataset("/entry/data/data");
+
+    auto shape = dataset.get_shape();
+
+    CHECK(shape[0] == 61440);
+
+    auto type = dataset.get_datatype();
+
+    const std::type_info *type_info = dataset.get_cpp_type();
+
+    CHECK(*type_info == typeid(uint32_t));
+
+    SECTION("read dataset into NDArray") {
+
+        NDArray<uint32_t, 1> dataset_array =
+            dataset.store_as_ndarray<uint32_t, 1>();
+
+        CHECK(dataset_array(0) == 866);
+        CHECK(dataset_array(61439) == 1436);
+    }
+
+    SECTION("read dataset into Frame") {
+        Frame frame = dataset.store_as_frame();
+        CHECK(*(reinterpret_cast<uint32_t *>(frame.pixel_ptr(0, 0))) == 866);
+        CHECK(*(reinterpret_cast<uint32_t *>(frame.pixel_ptr(0, 61439))) ==
+              1436);
+    }
+    SECTION("read subset of dataset") {
+        Frame frame(1, 10, Dtype(typeid(uint32_t)));
+
+        Subset subset{std::vector<hsize_t>{10}, std::vector<hsize_t>{10}};
+
+        dataset.read_into_buffer(frame.data(), subset);
+
+        CHECK(*(reinterpret_cast<uint32_t *>(frame.pixel_ptr(0, 0))) == 664);
+        CHECK(*(reinterpret_cast<uint32_t *>(frame.pixel_ptr(0, 9))) == 654);
+    }
+    /*
+    SECTION("read scalar") {
+    }
+    */
+}
+
+TEST_CASE("test datatypes", "[.hdf5file]") {
+
+    auto [dtype, expected_type_info] = GENERATE(
+        std::make_tuple(H5::DataType(H5::PredType::NATIVE_INT), &typeid(int)),
+        std::make_tuple(H5::DataType(H5::PredType::NATIVE_INT8),
+                        &typeid(int8_t)),
+        std::make_tuple(H5::DataType(H5::PredType::NATIVE_UINT16),
+                        &typeid(uint16_t)),
+        std::make_tuple(H5::DataType(H5::PredType::NATIVE_INT16),
+                        &typeid(int16_t)),
+        std::make_tuple(H5::DataType(H5::PredType::STD_U32LE),
+                        &typeid(uint32_t)),
+        std::make_tuple(H5::DataType(H5::PredType::STD_I32LE),
+                        &typeid(int32_t)),
+        std::make_tuple(H5::DataType(H5::PredType::NATIVE_INT32),
+                        &typeid(int32_t)),
+        std::make_tuple(H5::DataType(H5::PredType::IEEE_F64LE),
+                        &typeid(double)),
+        std::make_tuple(H5::DataType(H5::PredType::IEEE_F32LE), &typeid(float)),
+        std::make_tuple(H5::DataType(H5::PredType::NATIVE_FLOAT),
+                        &typeid(float)),
+        std::make_tuple(H5::DataType(H5::PredType::NATIVE_DOUBLE),
+                        &typeid(double)),
+        std::make_tuple(H5::DataType(H5::PredType::NATIVE_CHAR),
+                        &typeid(int8_t)));
+
+    const std::type_info &type_info = deduce_cpp_type(dtype);
+
+    CHECK(type_info == *expected_type_info);
+
+    // TODO: handle bit swapping
+    REQUIRE_THROWS(deduce_cpp_type(
+        H5::DataType(H5::PredType::IEEE_F32BE))); // does not convert from big
+                                                  // to little endian
+}