From d905cc230c04662de4b81af94a7ce21023482c93 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Tue, 5 May 2026 11:58:08 +0200 Subject: [PATCH] HDF5: Use explicit mapping for VDS --- reader/JFJochHDF5Reader.cpp | 72 ++++++++++-- reader/JFJochHDF5Reader.h | 3 + tests/HDF5WritingTest.cpp | 97 ++++++++++++++++ writer/HDF5Objects.cpp | 224 +++++++++++++++++++++++++++++++++++- writer/HDF5Objects.h | 24 +++- 5 files changed, 407 insertions(+), 13 deletions(-) diff --git a/reader/JFJochHDF5Reader.cpp b/reader/JFJochHDF5Reader.cpp index 2668b2e8..46bace0b 100644 --- a/reader/JFJochHDF5Reader.cpp +++ b/reader/JFJochHDF5Reader.cpp @@ -62,6 +62,37 @@ std::vector GetDimension(HDF5Object& object, const std::string& path) { return dim; } +std::vector ReadVDSImageMappings(HDF5Object &file, + const std::string &dataset_name) { + HDF5DataSet dataset(file, dataset_name); + HDF5Dcpl dcpl(dataset); + auto mappings = dcpl.GetVirtualMappings(); + + if (mappings.empty()) + throw JFJochException(JFJochExceptionCategory::HDF5, + dataset_name + " is not a virtual dataset"); + + for (const auto &mapping: mappings) { + if (mapping.dataset.empty()) + throw JFJochException(JFJochExceptionCategory::HDF5, + "VDS mapping has empty source dataset name"); + if (mapping.virtual_start.size() != 3) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Only 3D image VDS mappings are supported"); + } + + return mappings; +} + +std::string ResolveRelativeToMaster(const std::string &directory, + const std::string &filename) { + std::filesystem::path path(filename); + if (path.is_absolute() || directory.empty()) + return filename; + + return (std::filesystem::path(directory) / path).string(); +} + template void JFJochHDF5Reader::ReadVector(std::vector &v, HDF5Object &file, @@ -104,6 +135,10 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { master_file = std::make_shared(filename); dataset->experiment = default_experiment; + std::filesystem::path master_path(filename); + master_file_directory = master_path.parent_path().string(); + vds_data_mappings.clear(); + dataset->arm_date = master_file->GetString("/entry/start_time"); dataset->experiment.FilePrefix(dataset_name(filename)); @@ -115,19 +150,19 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { size_t image_size_x = 0; size_t image_size_y = 0; - if (master_file->Exists("/entry/data/data")) - format = FileWriterFormat::NXmxVDS; - else if (master_file->Exists("/entry/data/data_000001")) { - format = FileWriterFormat::NXmxLegacy; - } + if (master_file->Exists("/entry/data/data")) { + HDF5DataSet data_dataset(*master_file, "/entry/data/data"); + HDF5Dcpl dcpl(data_dataset); + data_layout = dcpl.GetLayout(); - if (format == FileWriterFormat::NXmxVDS ) { auto dim = GetDimension(*master_file, "/entry/data/data"); number_of_images = dim[0]; image_size_y = dim[1]; image_size_x = dim[2]; images_per_file = number_of_images; + if (data_layout == HDF5DataSetLayout::VIRTUAL) + vds_data_mappings = ReadVDSImageMappings(*master_file, "/entry/data/data"); if (master_file->Exists("/entry/instrument/detector/detectorSpecific/data_collection_efficiency_image")) dataset->efficiency = master_file->ReadVector( @@ -166,7 +201,12 @@ void JFJochHDF5Reader::ReadFile(const std::string& filename) { } if (master_file->Exists("/entry/image")) dataset->max_value = master_file->ReadOptVector("/entry/image/max_value"); - } else if (format == FileWriterFormat::NXmxLegacy) { + + format = FileWriterFormat::NXmxVDS; + } else if (master_file->Exists("/entry/data/data_000001")) { + format = FileWriterFormat::NXmxLegacy; + data_layout = HDF5DataSetLayout::CONTIGUOUS; + legacy_format_files.clear(); image_size_x = master_file->GetInt("/entry/instrument/detector/detectorSpecific/x_pixels_in_detector"); @@ -509,6 +549,22 @@ std::pair, uint32_t> JFJochHDF5Reader::GetImag uint32_t file_id = image_number / images_per_file; image_id = image_number % images_per_file; data_file = std::make_shared(legacy_format_files.at(file_id)); + } else if (format == FileWriterFormat::NXmxVDS && data_layout == HDF5DataSetLayout::VIRTUAL) { + const auto image = static_cast(image_number); + + for (const auto &mapping: vds_data_mappings) { + if (!mapping.ContainsVirtualImage(image)) + continue; + + image_id = static_cast(mapping.SourceImage(image)); + data_file = std::make_shared( + ResolveRelativeToMaster(master_file_directory, mapping.filename) + ); + return {std::move(data_file), image_id}; + } + + throw JFJochException(JFJochExceptionCategory::HDF5, + "Image not covered by /entry/data/data VDS mappings"); } else { image_id = image_number; data_file = master_file; @@ -774,6 +830,8 @@ void JFJochHDF5Reader::Close() { master_file = {}; number_of_images = 0; legacy_format_files.clear(); + vds_data_mappings.clear(); + master_file_directory.clear(); SetStartMessage({}); } diff --git a/reader/JFJochHDF5Reader.h b/reader/JFJochHDF5Reader.h index 3068e243..49e7e137 100644 --- a/reader/JFJochHDF5Reader.h +++ b/reader/JFJochHDF5Reader.h @@ -9,10 +9,13 @@ class JFJochHDF5Reader : public JFJochReader { FileWriterFormat format = FileWriterFormat::NoFile; + HDF5DataSetLayout data_layout = HDF5DataSetLayout::CONTIGUOUS; std::shared_ptr master_file; std::vector legacy_format_files; + std::vector vds_data_mappings; + std::string master_file_directory; size_t images_per_file = 1; size_t number_of_images = 0; diff --git a/tests/HDF5WritingTest.cpp b/tests/HDF5WritingTest.cpp index 32e95ae8..eb4206bd 100644 --- a/tests/HDF5WritingTest.cpp +++ b/tests/HDF5WritingTest.cpp @@ -1344,3 +1344,100 @@ TEST_CASE("FileWriter_TIFF", "[HDF5][Full]") { REQUIRE(!file_set.GetZMQAddr()); } } + +TEST_CASE("HDF5Objects_VDS_reverse_contiguous", "[HDF5][Unit]") { + { + RegisterHDF5Filter(); + + HDF5File file("scratch_vds_reverse_contiguous.h5", true); + HDF5Dcpl dcpl; + HDF5DataType data_type((int16_t) 0); + HDF5DataSpace full_space({5, 4, 3}); + + { + HDF5DataSpace source_space({2, 4, 3}); + HDF5DataSpace virtual_space({5, 4, 3}); + virtual_space.SelectHyperslab({0, 0, 0}, {2, 4, 3}); + dcpl.SetVirtual("file_000001.h5", "/entry/data/data", source_space, virtual_space); + } + + { + HDF5DataSpace source_space({3, 4, 3}); + HDF5DataSpace virtual_space({5, 4, 3}); + virtual_space.SelectHyperslab({2, 0, 0}, {3, 4, 3}); + dcpl.SetVirtual("file_000002.h5", "/entry/data/data", source_space, virtual_space); + } + + HDF5DataSet dataset(file, "/data", data_type, full_space, dcpl); + + HDF5Dcpl read_dcpl(dataset); + auto mappings = read_dcpl.GetVirtualMappings(); + + REQUIRE(mappings.size() == 2); + REQUIRE(mappings[0].ContainsVirtualImage(0)); + REQUIRE(mappings[0].ContainsVirtualImage(1)); + REQUIRE(!mappings[0].ContainsVirtualImage(2)); + CHECK(mappings[0].SourceImage(0) == 0); + CHECK(mappings[0].SourceImage(1) == 1); + + REQUIRE(mappings[1].ContainsVirtualImage(2)); + REQUIRE(mappings[1].ContainsVirtualImage(4)); + CHECK(mappings[1].SourceImage(2) == 0); + CHECK(mappings[1].SourceImage(3) == 1); + CHECK(mappings[1].SourceImage(4) == 2); + } + + remove("scratch_vds_reverse_contiguous.h5"); + REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); +} + +TEST_CASE("HDF5Objects_VDS_reverse_strided", "[HDF5][Unit]") { + { + RegisterHDF5Filter(); + + HDF5File file("scratch_vds_reverse_strided.h5", true); + HDF5Dcpl dcpl; + HDF5DataType data_type((int16_t) 0); + HDF5DataSpace full_space({6, 4, 3}); + + { + HDF5DataSpace source_space({3, 4, 3}); + HDF5DataSpace virtual_space({6, 4, 3}); + virtual_space.SelectHyperslabWithStride({0, 0, 0}, {3, 4, 3}, {2, 1, 1}); + dcpl.SetVirtual("file_even.h5", "/entry/data/data", source_space, virtual_space); + } + + { + HDF5DataSpace source_space({3, 4, 3}); + HDF5DataSpace virtual_space({6, 4, 3}); + virtual_space.SelectHyperslabWithStride({1, 0, 0}, {3, 4, 3}, {2, 1, 1}); + dcpl.SetVirtual("file_odd.h5", "/entry/data/data", source_space, virtual_space); + } + + HDF5DataSet dataset(file, "/data", data_type, full_space, dcpl); + + HDF5Dcpl read_dcpl(dataset); + auto mappings = read_dcpl.GetVirtualMappings(); + + REQUIRE(mappings.size() == 2); + + REQUIRE(mappings[0].ContainsVirtualImage(0)); + REQUIRE(mappings[0].ContainsVirtualImage(2)); + REQUIRE(mappings[0].ContainsVirtualImage(4)); + REQUIRE(!mappings[0].ContainsVirtualImage(1)); + CHECK(mappings[0].SourceImage(0) == 0); + CHECK(mappings[0].SourceImage(2) == 1); + CHECK(mappings[0].SourceImage(4) == 2); + + REQUIRE(mappings[1].ContainsVirtualImage(1)); + REQUIRE(mappings[1].ContainsVirtualImage(3)); + REQUIRE(mappings[1].ContainsVirtualImage(5)); + REQUIRE(!mappings[1].ContainsVirtualImage(0)); + CHECK(mappings[1].SourceImage(1) == 0); + CHECK(mappings[1].SourceImage(3) == 1); + CHECK(mappings[1].SourceImage(5) == 2); + } + + remove("scratch_vds_reverse_strided.h5"); + REQUIRE(H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL) == 0); +} \ No newline at end of file diff --git a/writer/HDF5Objects.cpp b/writer/HDF5Objects.cpp index b0c540e5..06cd4166 100644 --- a/writer/HDF5Objects.cpp +++ b/writer/HDF5Objects.cpp @@ -231,23 +231,26 @@ bool HDF5DataType::IsFloat() const { HDF5Dcpl::HDF5Dcpl() : HDF5Id() { id = H5Pcreate(H5P_DATASET_CREATE); ndim = 0; + layout = HDF5DataSetLayout::CONTIGUOUS; } HDF5Dcpl::HDF5Dcpl(const HDF5DataSet &data_set) : HDF5Id() { id = H5Dget_create_plist(data_set.GetID()); // Check if chunking is enabled - H5D_layout_t layout = H5Pget_layout(id); - if (layout != H5D_CHUNKED) { - ndim = 0; - } else { + H5D_layout_t h5_layout = H5Pget_layout(id); + if (h5_layout == H5D_VIRTUAL) + layout = HDF5DataSetLayout::VIRTUAL; + else if (h5_layout == H5D_CHUNKED) { + layout = HDF5DataSetLayout::CHUNKED; ndim = H5Pget_chunk(id, 0, nullptr); if (ndim <= 0) { H5Pclose(id); throw JFJochException(JFJochExceptionCategory::HDF5, "Error getting number of chunk dimensions"); } - } + } else + layout = HDF5DataSetLayout::CONTIGUOUS; } HDF5Dcpl::~HDF5Dcpl() { @@ -255,6 +258,7 @@ HDF5Dcpl::~HDF5Dcpl() { } void HDF5Dcpl::SetChunking(const std::vector &dims) { + layout = HDF5DataSetLayout::CHUNKED; if ((dims.empty()) || dims[0] == 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Value dimension cannot be 0"); ndim = dims.size(); @@ -298,11 +302,16 @@ void HDF5Dcpl::SetFillValueU16(uint16_t val) { } void HDF5Dcpl::SetVirtual(const std::string &path, const std::string &dataset, const HDF5DataSpace &src_dataspace, const HDF5DataSpace &dest_dataspace) { + layout = HDF5DataSetLayout::VIRTUAL; std::string filename = ExtractFilename(path); if (H5Pset_virtual(id, dest_dataspace.GetID(), filename.c_str(), dataset.c_str(), src_dataspace.GetID()) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot set virtual mapping"); } +HDF5DataSetLayout HDF5Dcpl::GetLayout() const { + return layout; +} + HDF5Fapl::HDF5Fapl() : HDF5Id() { id = H5Pcreate(H5P_FILE_ACCESS); } @@ -995,3 +1004,208 @@ std::string HDF5Object::GetLinkedFileName(const std::string& name) const { return s; } + + +namespace { + std::vector GetDataspaceDimensions(hid_t dataspace_id) { + const int ndims = H5Sget_simple_extent_ndims(dataspace_id); + if (ndims < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot read dataspace dimensions"); + + std::vector dims(ndims); + if (ndims > 0 && H5Sget_simple_extent_dims(dataspace_id, dims.data(), nullptr) < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot read dataspace dimensions"); + + return dims; + } + + void GetRegularSelection(hid_t dataspace_id, + std::vector &start, + std::vector &stride, + std::vector &count, + std::vector &block) { + const auto dims = GetDataspaceDimensions(dataspace_id); + + const H5S_sel_type selection_type = H5Sget_select_type(dataspace_id); + if (selection_type == H5S_SEL_ALL) { + start.clear(); + stride.clear(); + count.clear(); + block.clear(); + return; + } + + start.assign(dims.size(), 0); + stride.assign(dims.size(), 1); + count.assign(dims.size(), 1); + block = dims; + + if (selection_type != H5S_SEL_HYPERSLABS) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Only regular hyperslab VDS selections are supported"); + + if (H5Sis_regular_hyperslab(dataspace_id) <= 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Only regular hyperslab VDS selections are supported"); + + if (H5Sget_regular_hyperslab(dataspace_id, + start.data(), + stride.data(), + count.data(), + block.data()) < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot decode VDS hyperslab selection"); + } + + bool ContainsInRegularHyperslab(hsize_t value, + hsize_t start, + hsize_t stride, + hsize_t count, + hsize_t block) { + for (hsize_t i = 0; i < count; i++) { + const hsize_t block_start = start + i * stride; + if ((value >= block_start) && (value < block_start + block)) + return true; + } + return false; + } + + hsize_t IndexInRegularHyperslab(hsize_t value, + hsize_t start, + hsize_t stride, + hsize_t count, + hsize_t block) { + for (hsize_t i = 0; i < count; i++) { + const hsize_t block_start = start + i * stride; + if ((value >= block_start) && (value < block_start + block)) + return i * block + (value - block_start); + } + + throw JFJochException(JFJochExceptionCategory::HDF5, + "Image is not part of VDS hyperslab"); + } + + hsize_t ValueFromRegularHyperslabIndex(hsize_t index, + hsize_t start, + hsize_t stride, + hsize_t count, + hsize_t block) { + if (index >= count * block) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Source image is outside of VDS source hyperslab"); + + const hsize_t block_number = index / block; + const hsize_t in_block = index % block; + return start + block_number * stride + in_block; + } +} + +bool HDF5VirtualDatasetMapping::ContainsVirtualImage(hsize_t image_number) const { + if (virtual_start.empty() || virtual_stride.empty() || virtual_count.empty() || virtual_block.empty()) + return false; + + return ContainsInRegularHyperslab(image_number, + virtual_start[0], + virtual_stride[0], + virtual_count[0], + virtual_block[0]); +} + +hsize_t HDF5VirtualDatasetMapping::SourceImage(hsize_t image_number) const { + if (!ContainsVirtualImage(image_number)) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Image is outside of VDS mapping"); + + const hsize_t source_index = IndexInRegularHyperslab(image_number, + virtual_start[0], + virtual_stride[0], + virtual_count[0], + virtual_block[0]); + + if (source_start.empty() || source_stride.empty() || source_count.empty() || source_block.empty()) + return source_index; + + return ValueFromRegularHyperslabIndex(source_index, + source_start[0], + source_stride[0], + source_count[0], + source_block[0]); +} + + +std::vector HDF5Dcpl::GetVirtualMappings() const { + size_t mapping_count = 0; + + if (H5Pget_virtual_count(id, &mapping_count) < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot get number of VDS mappings"); + if (mapping_count < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot get number of VDS mappings"); + + std::vector ret; + ret.reserve(static_cast(mapping_count)); + + for (size_t i = 0; i < static_cast(mapping_count); i++) { + HDF5VirtualDatasetMapping mapping; + + const ssize_t filename_size = H5Pget_virtual_filename(id, i, nullptr, 0); + if (filename_size < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot get VDS source filename size"); + + std::vector filename(filename_size + 1, '\0'); + if (H5Pget_virtual_filename(id, i, filename.data(), filename.size()) < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot get VDS source filename"); + mapping.filename = filename.data(); + + const ssize_t dataset_size = H5Pget_virtual_dsetname(id, i, nullptr, 0); + if (dataset_size < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot get VDS source dataset size"); + + std::vector dataset(dataset_size + 1, '\0'); + if (H5Pget_virtual_dsetname(id, i, dataset.data(), dataset.size()) < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot get VDS source dataset"); + mapping.dataset = dataset.data(); + + const hid_t virtual_space = H5Pget_virtual_vspace(id, i); + if (virtual_space < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot get VDS virtual dataspace"); + + const hid_t source_space = H5Pget_virtual_srcspace(id, i); + if (source_space < 0) { + H5Sclose(virtual_space); + throw JFJochException(JFJochExceptionCategory::HDF5, + "Cannot get VDS source dataspace"); + } + + try { + GetRegularSelection(virtual_space, + mapping.virtual_start, + mapping.virtual_stride, + mapping.virtual_count, + mapping.virtual_block); + + GetRegularSelection(source_space, + mapping.source_start, + mapping.source_stride, + mapping.source_count, + mapping.source_block); + } catch (...) { + H5Sclose(source_space); + H5Sclose(virtual_space); + throw; + } + + H5Sclose(source_space); + H5Sclose(virtual_space); + + ret.emplace_back(std::move(mapping)); + } + + return ret; +} \ No newline at end of file diff --git a/writer/HDF5Objects.h b/writer/HDF5Objects.h index 09a0c48e..3fda329c 100644 --- a/writer/HDF5Objects.h +++ b/writer/HDF5Objects.h @@ -19,6 +19,24 @@ extern std::mutex hdf5_mutex; class HDF5DataSet; +struct HDF5VirtualDatasetMapping { + std::string filename; + std::string dataset; + + std::vector virtual_start; + std::vector virtual_stride; + std::vector virtual_count; + std::vector virtual_block; + + std::vector source_start; + std::vector source_stride; + std::vector source_count; + std::vector source_block; + + bool ContainsVirtualImage(hsize_t image_number) const; + hsize_t SourceImage(hsize_t image_number) const; +}; + class HDF5Id { protected: hid_t id; @@ -70,8 +88,11 @@ public: ~HDF5DataType(); }; +enum class HDF5DataSetLayout {CONTIGUOUS, CHUNKED, VIRTUAL}; + class HDF5Dcpl : public HDF5Id { uint8_t ndim = 0; + HDF5DataSetLayout layout = HDF5DataSetLayout::CONTIGUOUS; public: HDF5Dcpl(); HDF5Dcpl(const HDF5DataSet& data_set); @@ -87,7 +108,8 @@ public: void SetCompression(CompressionAlgorithm algorithm, size_t block_size); void SetVirtual(const std::string& filename, const std::string& dataset, const HDF5DataSpace& src_dataspace, const HDF5DataSpace& virtual_dataspace); - + HDF5DataSetLayout GetLayout() const; + std::vector GetVirtualMappings() const; uint8_t GetNumOfDimensions() const; };