Hdf5Reader supporting reading into frame, ndarray and generic bytestream

This commit is contained in:
mazzol_a
2025-05-26 09:06:47 +02:00
parent 6328369ce9
commit b94be4cbe8
5 changed files with 456 additions and 26 deletions

View File

@ -62,6 +62,7 @@ option(AARE_FETCH_CATCH "Use FetchContent to download catch2" ON)
option(AARE_FETCH_JSON "Use FetchContent to download nlohmann::json" ON)
option(AARE_FETCH_ZMQ "Use FetchContent to download libzmq" ON)
option(AARE_FETCH_LMFIT "Use FetchContent to download lmfit" ON)
option(AARE_FETCH_HDF5 "Use FetchContent to download hdf5-devel" OFF)
#Convenience option to use system libraries only (no FetchContent)
@ -73,6 +74,7 @@ if(AARE_SYSTEM_LIBRARIES)
set(AARE_FETCH_CATCH OFF CACHE BOOL "Disabled FetchContent for catch2" FORCE)
set(AARE_FETCH_JSON OFF CACHE BOOL "Disabled FetchContent for nlohmann::json" FORCE)
set(AARE_FETCH_ZMQ OFF CACHE BOOL "Disabled FetchContent for libzmq" FORCE)
set(AARE_FETCH_HDF5 OFF CACHE BOOL "Disabled FetchContent for hdf5" FORCE)
# Still fetch lmfit when setting AARE_SYSTEM_LIBRARIES since this is not available
# on conda-forge
endif()
@ -219,6 +221,23 @@ else()
find_package(nlohmann_json 3.11.3 REQUIRED)
endif()
if(AARE_FETCH_HDF5)
message(FATAL_ERROR "Fetching HDF5 via FetchContent is not supported here. Please install it via your system.
For Ubuntu: sudo apt install libhdf5-dev
For Red Hat: sudo dnf install hdf5-devel
For MacOS: brew install hdf5")
else()
find_package(HDF5 QUIET COMPONENTS CXX)
if (HDF5_FOUND)
message(STATUS "Found HDF5: ${HDF5_INCLUDE_DIRS}")
else()
message(FATAL_ERROR "HDF5 was NOT found! Please install it via your system
For Ubuntu: sudo apt install libhdf5-dev
For Red Hat: sudo dnf install hdf5-devel
For MacOS: brew install hdf5")
endif()
endif()
include(GNUInstallDirs)
# If conda build, always set lib dir to 'lib'
@ -331,10 +350,6 @@ if(AARE_ASAN)
)
endif()
if(AARE_TESTS)
enable_testing()
add_subdirectory(tests)
@ -362,6 +377,7 @@ set(PUBLICHEADERS
include/aare/Frame.hpp
include/aare/GainMap.hpp
include/aare/geo_helpers.hpp
include/aare/Hdf5FileReader.hpp
include/aare/JungfrauDataFile.hpp
include/aare/NDArray.hpp
include/aare/NDView.hpp
@ -414,6 +430,7 @@ target_link_libraries(
PUBLIC
fmt::fmt
nlohmann_json::nlohmann_json
HDF5::HDF5
${STD_FS_LIB} # from helpers.cmake
PRIVATE
aare_compiler_flags
@ -441,6 +458,7 @@ if(AARE_TESTS)
${CMAKE_CURRENT_SOURCE_DIR}/src/Frame.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/geo_helpers.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/RawMasterFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/Hdf5FileReader.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NDArray.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NDView.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterFinder.test.cpp

View File

@ -12,23 +12,23 @@
// function read flatfield store in ff_corr probably correlation field
// class variables:
namespace aare {
using parameters =
std::tuple<std::vector<double>, std::vector<double>, std::vector<double>>;
// TODO: can i have a static struct, constexpr?
struct MythenSpecifications {
static constexpr int32_t max_modules = 48;
static constexpr int32_t channels_per_module = 1280;
static constexpr int32_t strips_per_module = 1280;
static constexpr double pitch = 0.05; // strip width [mm] ?? TODO: not sure
static constexpr double ttmin = -180.0; // what is this the angle
static constexpr double ttmin = -180.0; // what is this?
static constexpr float ttmax = 180.0;
static constexpr float ttstep =
0.0036; // probably here to calculate bin size
0.0036; // probably here to calculate bin size, what is this?
static constexpr double bloffset =
1.532; // what is this? detector offset relative to what?
};
// number_of_activated_modules
@ -50,19 +50,53 @@ class AngleCalibration {
/** converts DG parameters to easy EE parameters e.g.geometric parameters */
parameters convert_to_EE_parameters();
/** converts DG parameters to easy BC parameters e.g. best computing
std::tuple<double, double, double>
convert_to_EE_parameters(const size_t module_index);
/** converts DG parameters to BC parameters e.g. best computing
* parameters */
parameters convert_to_BC_parameters();
/** calculates diffraction angle from EE module parameters (used in Beer's
* Law)
* @param strip_index local strip index of module
*/
double diffraction_angle_from_EE_parameters(
const double normal_distance, const double module_center_distance,
const double angle, const size_t strip_index);
/** calculated the strip width expressed as angle [degrees]
* @param strip_index gloabl strip index of detector
*/
double angular_strip_width(const size_t strip_index);
/** converts global strip index to local strip index of that module */
size_t
global_to_local_strip_index_conversion(const size_t global_strip_index) {
const size_t module_index =
global_strip_index / MythenSpecifications::strips_per_module;
// local strip index in module
size_t local_strip_index =
global_strip_index -
module_index * MythenSpecifications::strips_per_module;
// switch if indexing is in clock-wise direction
local_strip_index =
signbit(conversions[module_index])
? MythenSpecifications::strips_per_module - local_strip_index
: local_strip_index;
return local_strip_index;
}
protected:
// TODO: Design maybe have a struct with three vectors, store all three sets
// of parameters
// TODO: Design maybe have a struct with three vectors, store all three
// sets of parameters as member variables
// TODO: check if interpretation and units are correct
// historical DG parameters
std::vector<double>
centers; // orthogonal projection of sample onto detector (given in
// strip number) [mm] D/pitch
std::vector<double> centers; // orthogonal projection of sample onto
// detector (given in strip number) [mm]
// D/pitch
std::vector<double>
conversions; // pitch/(normal distance from sample to detector (R)) [mm]
// //used for easy conversion
@ -113,8 +147,7 @@ void AngleCalibration::read_initial_calibration_from_file(
file.close();
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what()
<< std::endl; // TODO: replace with log
std::cerr << "Error: " << e.what() << std::endl;
}
}
@ -130,19 +163,87 @@ parameters AngleCalibration::convert_to_EE_parameters() {
std::vector<double> angles(centers.size());
for (size_t i = 0; i < centers.size(); ++i) {
normal_distances[i] = centers[i] * MythenSpecifications::pitch;
module_center_distances[i] =
MythenSpecifications::pitch / std::abs(conversions[i]);
angles[i] =
offsets[i] + 180.0 / M_PI * centers[i] * std::abs(conversions[i]);
auto [normal_distance, module_center_distance, angle] =
convert_to_EE_parameters(i);
normal_distances[i] = normal_distance;
module_center_distances[i] = module_center_distance;
angles[i] = angle;
}
// TODO: maybe add function rad_to_deg
return std::make_tuple(normal_distances, module_center_distances, angles);
}
std::tuple<double, double, double>
AngleCalibration::convert_to_EE_parameters(const size_t module_index) {
const double normal_distance =
centers[module_index] * MythenSpecifications::pitch;
const double module_center_distance =
MythenSpecifications::pitch / std::abs(conversions[module_index]);
const double angle =
offsets[module_index] +
180.0 / M_PI * centers[module_index] *
std::abs(conversions[module_index]); // TODO: maybe add function
// rad_to_deg
return std::make_tuple(normal_distance, module_center_distance, angle);
}
/*
parameters
AngleCalibration::convert_to_BC_parameters() {}
*/
double AngleCalibration::diffraction_angle_from_EE_parameters(
const double normal_distance, const double module_center_distance,
const double angle, const size_t strip_index) {
return angle - 180.0 / M_PI *
atan((module_center_distance -
MythenSpecifications::pitch * strip_index) /
normal_distance); // TODO: why is it minus
// is it defined counter
// clockwise? thought
// should have a flipped
// sign
}
double AngleCalibration::angular_strip_width(const size_t strip_index) {
const size_t module_index =
strip_index / MythenSpecifications::strips_per_module;
const auto [normal_distance, module_center_distance, angle] =
convert_to_EE_parameters(module_index);
const size_t local_strip_index =
global_to_local_strip_index_conversion(strip_index);
return 180.0 / M_PI *
std::abs(diffraction_angle_from_EE_parameters(
normal_distance, module_center_distance, angle,
local_strip_index - 0.5) -
diffraction_angle_from_EE_parameters(
normal_distance, module_center_distance, angle,
local_strip_index + 0.5));
// TODO: again not sure about division order - taking abs anyway
}
/*
void AngleCalibration::Calibrate() {
// iterates over each strip
// skips the one which are not enabled or have a bad channel
// get module index
// get index of strip in module - make sure to use function correctly based
// on sign
// then I read some images - probably the histogram - h5 file - need a
// reader need to read the filesystem!! -
// we modify it with ff_corr computed by flatfield why?
}
*/
} // namespace aare

View File

@ -0,0 +1,203 @@
/************************************************
* @file HD5FFileReader.hpp
* @short HDF5FileReader based on H5File object
***********************************************/
#include "Frame.hpp"
#include "NDArray.hpp"
#include <H5Cpp.h>
#include <array>
#include <cxxabi.h>
#include <optional>
namespace aare {
// return std::type_info
const std::type_info &deduce_cpp_type(const H5::DataType datatype) {
if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_UINT8.getId())) {
return typeid(uint8_t);
} else if (H5Tequal(datatype.getId(),
H5::PredType::NATIVE_UINT16.getId())) {
return typeid(uint16_t);
} else if (H5Tequal(datatype.getId(),
H5::PredType::NATIVE_UINT32.getId())) {
return typeid(uint32_t);
} else if (H5Tequal(datatype.getId(),
H5::PredType::NATIVE_UINT64.getId())) {
return typeid(uint64_t);
} else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT8.getId())) {
return typeid(int8_t);
} else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT16.getId())) {
return typeid(int16_t);
} else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT32.getId())) {
return typeid(int32_t);
} else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT64.getId())) {
return typeid(int64_t);
} else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_INT.getId())) {
return typeid(int);
} else if (H5Tequal(datatype.getId(), H5::PredType::IEEE_F64LE.getId())) {
return typeid(double);
} else if (H5Tequal(datatype.getId(), H5::PredType::IEEE_F32LE.getId())) {
return typeid(float);
} else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_FLOAT.getId())) {
return typeid(float);
} else if (H5Tequal(datatype.getId(),
H5::PredType::NATIVE_DOUBLE.getId())) {
return typeid(float);
} else if (H5Tequal(datatype.getId(), H5::PredType::NATIVE_CHAR.getId()) &&
datatype.getId() == H5::PredType::NATIVE_CHAR.getId()) {
return typeid(char);
} else {
throw std::runtime_error("c++ type cannot be deduced");
}
}
struct Subset {
std::vector<hsize_t> shape;
std::vector<hsize_t> offset; // index where data subset should start
};
class HDF5Dataset {
public:
HDF5Dataset(const std::string &datasetname_, const H5::DataSet dataset_)
: datasetname(datasetname_), dataset(dataset_) {
datatype = dataset.getDataType();
cpp_type = &deduce_cpp_type(datatype);
dataspace = dataset.getSpace();
rank = dataspace.getSimpleExtentNdims(); // number of dimensions
shape.resize(rank);
dataspace.getSimpleExtentDims(shape.data(), nullptr);
}
hsize_t get_shape(ssize_t index) { return shape[index]; }
std::vector<hsize_t> get_shape() { return shape; }
H5::DataType get_datatype() { return datatype; }
const std::type_info *get_cpp_type() { return cpp_type; }
/**
* Reads subset of dataset into the buffer
* e.g. to read one 2d frame pass Subset({shape[1], shape[2]}, {frame_index,
* 0,0})
*/
void read_into_buffer(std::byte *buffer,
std::optional<const Subset> subset = std::nullopt) {
if (subset) {
// TODO treat scalar cases
if (static_cast<ssize_t>(subset->offset.size()) != rank) {
throw std::runtime_error("provide an offset for" +
std::to_string(rank) + "dimensions");
}
for (ssize_t i = 0; i < rank; ++i) {
hsize_t size =
i < rank - static_cast<ssize_t>(subset->shape.size())
? 0
: subset->shape[i - (rank - subset->shape.size())];
if ((size + subset->offset[i]) > shape[i]) {
throw std::runtime_error(
"subset is too large or offset is out of bounds");
}
}
H5::DataSpace memspace(static_cast<int>(subset->shape.size()),
subset->shape.data());
dataspace.selectHyperslab(H5S_SELECT_SET, subset->shape.data(),
subset->offset.data());
dataset.read(buffer, datatype, memspace, dataspace);
} else {
dataset.read(buffer, datatype);
}
}
Frame store_as_frame() {
uint32_t rows{}, cols{};
if (rank == 1) {
rows = 1;
// TODO overflow
cols = static_cast<uint32_t>(shape[0]);
} else if (rank == 2) {
rows = static_cast<uint32_t>(shape[0]);
cols = static_cast<uint32_t>(shape[1]);
} else {
throw std::runtime_error("Frame only supports 2d images");
}
Frame frame(rows, cols, Dtype(*cpp_type));
read_into_buffer(frame.data());
return frame;
}
template <typename T, ssize_t NDim> NDArray<T, NDim> store_as_ndarray() {
if (NDim != rank) {
std::cout
<< "Warning: dataset dimension and array dimension mismatch"
<< std::endl; // TODO: replace with log - still valid if we
// want subset
}
if (typeid(T) != *cpp_type) {
std::cout << "Warning: dataset and array type mismatch"
<< std::endl;
}
std::array<ssize_t, NDim> array_shape{};
std::transform(
shape.begin(), shape.end(), array_shape.begin(),
[](const auto dim) { return static_cast<ssize_t>(dim); });
aare::NDArray<T, NDim> dataset_array(array_shape);
read_into_buffer(reinterpret_cast<std::byte *>(dataset_array.data()));
return dataset_array;
}
// getMemDataSize()
private:
std::string datasetname{};
H5::DataSet dataset;
H5::DataSpace dataspace;
H5::DataType datatype;
const std::type_info *cpp_type;
ssize_t rank{};
std::vector<hsize_t> shape{};
};
class HDF5FileReader {
public:
HDF5FileReader(const std::string &filename_) : filename(filename_) {
try {
file = H5::H5File(filename, H5F_ACC_RDONLY);
} catch (H5::Exception &e) {
std::cerr << "Error: " << e.getDetailMsg() << std::endl;
}
}
HDF5Dataset get_dataset(const std::string &dataset_name) {
H5::DataSet dataset;
try {
dataset = file.openDataSet(dataset_name);
} catch (H5::Exception &e) {
std::cerr << "Error: " << e.getDetailMsg() << std::endl;
}
// TODO use optional to handle error
return HDF5Dataset(dataset_name, dataset);
}
private:
std::string filename{};
H5::H5File file;
};
} // namespace aare

View File

@ -1,6 +1,6 @@
/************************************************
* @file AngleCalibration.test.cpp
* @short test case for reading ascii file
* @short test case for angle calibration class
***********************************************/
#include "aare/AngleCalibration.hpp"
@ -11,6 +11,8 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_floating_point.hpp>
using namespace aare;
class AngleCalibrationTestClass : public aare::AngleCalibration {
public:
@ -50,4 +52,4 @@ TEST_CASE("read initial angle calibration file",
CHECK(centers[9] == Catch::Approx(660.342326));
CHECK(offsets[47] == Catch::Approx(5.8053312));
CHECK(conversions[27] == Catch::Approx(-0.6581179125e-4));
}
}

106
src/Hdf5FileReader.test.cpp Normal file
View File

@ -0,0 +1,106 @@
/************************************************
* @file Hdf5FileReader.test.cpp
* @short test case for reading hdf5 files
***********************************************/
#include <filesystem>
#include <H5Cpp.h>
#include "aare/Hdf5FileReader.hpp"
#include "aare/NDArray.hpp"
#include <catch2/catch_all.hpp>
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_floating_point.hpp>
using namespace aare;
TEST_CASE("read hdf5 file", "[.hdf5file]") {
// TODO generalize datasetpath
std::string filename = "/home/mazzol_a/Documents/mythen3tools/beamline/"
"TDATA/ang1up_22keV_LaB60p3mm_48M_a_0320.h5";
REQUIRE(std::filesystem::exists(filename));
HDF5FileReader file(filename);
auto dataset = file.get_dataset("/entry/data/data");
auto shape = dataset.get_shape();
CHECK(shape[0] == 61440);
auto type = dataset.get_datatype();
const std::type_info *type_info = dataset.get_cpp_type();
CHECK(*type_info == typeid(uint32_t));
SECTION("read dataset into NDArray") {
NDArray<uint32_t, 1> dataset_array =
dataset.store_as_ndarray<uint32_t, 1>();
CHECK(dataset_array(0) == 866);
CHECK(dataset_array(61439) == 1436);
}
SECTION("read dataset into Frame") {
Frame frame = dataset.store_as_frame();
CHECK(*(reinterpret_cast<uint32_t *>(frame.pixel_ptr(0, 0))) == 866);
CHECK(*(reinterpret_cast<uint32_t *>(frame.pixel_ptr(0, 61439))) ==
1436);
}
SECTION("read subset of dataset") {
Frame frame(1, 10, Dtype(typeid(uint32_t)));
Subset subset{std::vector<hsize_t>{10}, std::vector<hsize_t>{10}};
dataset.read_into_buffer(frame.data(), subset);
CHECK(*(reinterpret_cast<uint32_t *>(frame.pixel_ptr(0, 0))) == 664);
CHECK(*(reinterpret_cast<uint32_t *>(frame.pixel_ptr(0, 9))) == 654);
}
/*
SECTION("read scalar") {
}
*/
}
TEST_CASE("test datatypes", "[.hdf5file]") {
auto [dtype, expected_type_info] = GENERATE(
std::make_tuple(H5::DataType(H5::PredType::NATIVE_INT), &typeid(int)),
std::make_tuple(H5::DataType(H5::PredType::NATIVE_INT8),
&typeid(int8_t)),
std::make_tuple(H5::DataType(H5::PredType::NATIVE_UINT16),
&typeid(uint16_t)),
std::make_tuple(H5::DataType(H5::PredType::NATIVE_INT16),
&typeid(int16_t)),
std::make_tuple(H5::DataType(H5::PredType::STD_U32LE),
&typeid(uint32_t)),
std::make_tuple(H5::DataType(H5::PredType::STD_I32LE),
&typeid(int32_t)),
std::make_tuple(H5::DataType(H5::PredType::NATIVE_INT32),
&typeid(int32_t)),
std::make_tuple(H5::DataType(H5::PredType::IEEE_F64LE),
&typeid(double)),
std::make_tuple(H5::DataType(H5::PredType::IEEE_F32LE), &typeid(float)),
std::make_tuple(H5::DataType(H5::PredType::NATIVE_FLOAT),
&typeid(float)),
std::make_tuple(H5::DataType(H5::PredType::NATIVE_DOUBLE),
&typeid(double)),
std::make_tuple(H5::DataType(H5::PredType::NATIVE_CHAR),
&typeid(int8_t)));
const std::type_info &type_info = deduce_cpp_type(dtype);
CHECK(type_info == *expected_type_info);
// TODO: handle bit swapping
REQUIRE_THROWS(deduce_cpp_type(
H5::DataType(H5::PredType::IEEE_F32BE))); // does not convert from big
// to little endian
}