diff --git a/reader/CMakeLists.txt b/reader/CMakeLists.txt index 75ee565e..8e8793ea 100644 --- a/reader/CMakeLists.txt +++ b/reader/CMakeLists.txt @@ -2,6 +2,10 @@ ADD_LIBRARY(JFJochReader STATIC JFJochReader.cpp JFJochReader.h JFJochHDF5Reader.cpp JFJochHDF5Reader.h + HDF5ImageLocator.cpp + HDF5ImageLocator.h + HDF5ImageSource.cpp + HDF5ImageSource.h JFJochReaderImage.cpp JFJochReaderImage.h JFJochReaderDataset.cpp diff --git a/reader/HDF5ImageLocator.cpp b/reader/HDF5ImageLocator.cpp new file mode 100644 index 00000000..5132a904 --- /dev/null +++ b/reader/HDF5ImageLocator.cpp @@ -0,0 +1,160 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "HDF5ImageLocator.h" +#include "../common/JFJochException.h" + +namespace { + // Coalesce consecutive single-image mappings into one contiguous range when the source and + // virtual images stay contiguous in the same file/dataset. + void AppendOrExtendSourceMapping(std::vector &ret, + const std::string &filename, + const std::string &dataset, + uint64_t source_first_image, + uint64_t virtual_first_image, + uint64_t image_count) { + if (image_count == 0) + return; + + if (!ret.empty()) { + auto &last = ret.back(); + if (last.filename == filename + && last.dataset == dataset + && last.source_first_image + last.image_count == source_first_image + && last.virtual_first_image + last.image_count == virtual_first_image) { + last.image_count += image_count; + return; + } + } + + ret.push_back(HDF5DataSourceMessage{ + .filename = filename, + .dataset = dataset, + .source_first_image = source_first_image, + .virtual_first_image = virtual_first_image, + .image_count = image_count + }); + } +} + +void HDF5ImageLocator::Configure(Layout layout) { + file_cache_.clear(); + layout_ = std::move(layout); +} + +void HDF5ImageLocator::Clear() { + file_cache_.clear(); + layout_ = Layout{}; +} + +std::shared_ptr HDF5ImageLocator::OpenCached(const std::string &path) const { + auto it = file_cache_.find(path); + if (it != file_cache_.end()) + return it->second; + auto file = std::make_shared(path); + file_cache_[path] = file; + return file; +} + +HDF5ImageLocator::Location HDF5ImageLocator::Resolve(int64_t global_image) const { + if (global_image < 0) + throw JFJochException(JFJochExceptionCategory::HDF5, "Image out of bounds"); + + if (layout_.format == FileWriterFormat::NXmxLegacy) { + const uint32_t file_id = global_image / layout_.images_per_file; + const uint32_t local_index = global_image % layout_.images_per_file; + return {OpenCached(layout_.legacy_files.at(file_id)), local_index}; + } + + if (layout_.format == FileWriterFormat::NXmxVDS + && layout_.data_layout == HDF5DataSetLayout::VIRTUAL) { + const auto image = static_cast(global_image); + for (const auto &mapping: layout_.vds_mappings) { + if (!mapping.ContainsVirtualImage(image)) + continue; + return {OpenCached(mapping.filename), static_cast(mapping.SourceImage(image))}; + } + throw JFJochException(JFJochExceptionCategory::HDF5, + "Image not covered by /entry/data/data VDS mappings"); + } + + // Contiguous / integrated: pixels live in the master file at the global index. + if (!layout_.master_file) + throw JFJochException(JFJochExceptionCategory::HDF5, "Master file not loaded"); + return {layout_.master_file, static_cast(global_image)}; +} + +std::vector HDF5ImageLocator::GetSourceMapping(uint64_t first_image, + std::optional image_count, + uint64_t total_images) const { + if (first_image > total_images) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "First image outside dataset range"); + + const uint64_t requested_count = image_count.value_or(total_images - first_image); + if (first_image + requested_count > total_images) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Requested image range outside dataset range"); + + std::vector ret; + if (requested_count == 0) + return ret; + + // Integrated / contiguous source: link directly to the original master file. + if (layout_.format == FileWriterFormat::NXmxVDS && layout_.data_layout != HDF5DataSetLayout::VIRTUAL) { + AppendOrExtendSourceMapping(ret, layout_.master_filename, "/entry/data/data", + first_image, 0, requested_count); + return ret; + } + + // VDS source: expand VDS mappings to original source files, not to the VDS master. + if (layout_.format == FileWriterFormat::NXmxVDS && layout_.data_layout == HDF5DataSetLayout::VIRTUAL) { + for (uint64_t local_image = 0; local_image < requested_count; ++local_image) { + const hsize_t virtual_image = first_image + local_image; + + bool found = false; + for (const auto &mapping: layout_.vds_mappings) { + if (!mapping.ContainsVirtualImage(virtual_image)) + continue; + + const uint64_t source_image = mapping.SourceImage(virtual_image); + const std::string dataset = mapping.dataset.empty() ? "/entry/data/data" : mapping.dataset; + + AppendOrExtendSourceMapping(ret, mapping.filename, dataset, source_image, local_image, 1); + found = true; + break; + } + + if (!found) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Image not covered by /entry/data/data VDS mappings"); + } + + return ret; + } + + // Legacy source: link directly to the linked data files. + if (layout_.format == FileWriterFormat::NXmxLegacy) { + if (layout_.images_per_file == 0) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Cannot generate HDF5 source mapping: images_per_file is zero"); + + for (uint64_t local_image = 0; local_image < requested_count; ++local_image) { + const uint64_t source_global_image = first_image + local_image; + const uint64_t file_id = source_global_image / layout_.images_per_file; + const uint64_t source_image = source_global_image % layout_.images_per_file; + + if (file_id >= layout_.legacy_files.size()) + throw JFJochException(JFJochExceptionCategory::HDF5, + "Legacy image source file missing"); + + AppendOrExtendSourceMapping(ret, layout_.legacy_files.at(file_id), "/entry/data/data", + source_image, local_image, 1); + } + + return ret; + } + + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Unsupported HDF5 file layout for source mapping"); +} diff --git a/reader/HDF5ImageLocator.h b/reader/HDF5ImageLocator.h new file mode 100644 index 00000000..de69ae2b --- /dev/null +++ b/reader/HDF5ImageLocator.h @@ -0,0 +1,60 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include +#include +#include +#include +#include + +#include "../writer/HDF5Objects.h" // HDF5ReadOnlyFile, HDF5VirtualDatasetMapping, HDF5DataSetLayout +#include "../common/JFJochMessages.h" // FileWriterFormat, HDF5DataSourceMessage + +// Turns a global image number into the HDF5 file + local index that physically holds its pixels, +// for all three on-disk layouts (legacy linked data files, VDS, contiguous/integrated). This is +// the part of the reader whose "links to files stay" constant: it knows where the raw images +// live, independent of which master file the per-image metadata is read from. +// +// Open data-file handles are cached, so scanning many images (e.g. reprocessing) does not reopen +// the same file on every read. HDF5 is not thread-safe, so every call must be made with the +// global hdf5_mutex held by the caller; the locator does no locking of its own. +class HDF5ImageLocator { +public: + struct Location { + std::shared_ptr file; + uint32_t local_index = 0; + }; + + // Layout description, filled by the reader once the master file has been parsed. All paths + // are absolute: legacy data files and VDS mapping filenames are resolved relative to the + // master before being handed over, so the locator never deals with relative paths. + struct Layout { + FileWriterFormat format = FileWriterFormat::NoFile; + HDF5DataSetLayout data_layout = HDF5DataSetLayout::CONTIGUOUS; + std::shared_ptr master_file; + std::string master_filename; + std::vector legacy_files; + size_t images_per_file = 1; + std::vector vds_mappings; + }; + + void Configure(Layout layout); + void Clear(); + + // Resolve a global image number to {file, local index}. Throws if the image is not covered + // by the layout. Does not bounds-check against the total image count - the caller does that. + Location Resolve(int64_t global_image) const; + + // Source mapping for re-writing a derived file (e.g. _process.h5) so it links back to the + // original pixel sources rather than to a master. total_images is supplied by the caller. + std::vector GetSourceMapping(uint64_t first_image, + std::optional image_count, + uint64_t total_images) const; + +private: + Layout layout_; + mutable std::map > file_cache_; + std::shared_ptr OpenCached(const std::string &path) const; +}; diff --git a/reader/HDF5ImageSource.cpp b/reader/HDF5ImageSource.cpp new file mode 100644 index 00000000..42464817 --- /dev/null +++ b/reader/HDF5ImageSource.cpp @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "HDF5ImageSource.h" +#include "../common/JFJochException.h" + +void HDF5ImageSource::Configure(HDF5ImageLocator::Layout layout) { + locator_.Configure(std::move(layout)); +} + +void HDF5ImageSource::Clear() { + locator_.Clear(); +} + +HDF5ImageLocator::Location HDF5ImageSource::Resolve(int64_t global) const { + return locator_.Resolve(global); +} + +std::vector HDF5ImageSource::GetSourceMapping(uint64_t first_image, + std::optional image_count, + uint64_t total_images) const { + return locator_.GetSourceMapping(first_image, image_count, total_images); +} + +CompressedImage HDF5ImageSource::ReadImageAt(std::vector &buffer, + const HDF5ImageLocator::Location &loc) const { + return LoadImageDataset(buffer, *loc.file, loc.local_index); +} + +CompressedImage HDF5ImageSource::LoadImageDataset(std::vector &tmp, HDF5Object &file, hsize_t number) { + std::vector start = {static_cast(number), 0, 0}; + + const std::string dataset_name = "/entry/data/data"; + + HDF5DataSet dataset(file, dataset_name); + HDF5DataSpace dataspace(dataset); + HDF5DataType datatype(dataset); + HDF5Dcpl dcpl(dataset); + + if (dataspace.GetNumOfDimensions() != 3) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "/entry/data/data dataset must be 3D"); + + auto dim = dataspace.GetDimensions(); + + CompressionAlgorithm algorithm = CompressionAlgorithm::NO_COMPRESSION; + auto chunk_size = dcpl.GetChunking(); + + if ((chunk_size.size() == 3) && (chunk_size[0] == 1) && (chunk_size[1] == dim[1]) && (chunk_size[2] == dim[2])) { + dataset.ReadDirectChunk(tmp, start); + algorithm = dcpl.GetCompression(); + } else { + dataset.ReadVectorToU8(tmp, start, {1, dim[1], dim[2]}); + algorithm = CompressionAlgorithm::NO_COMPRESSION; + } + + if (datatype.IsFloat()) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, + "Float datasets not supported at this time"); + + return { + tmp, dim[2], dim[1], + CalcImageMode(datatype.GetElemSize(), datatype.IsFloat(), datatype.IsSigned()), + algorithm + }; +} diff --git a/reader/HDF5ImageSource.h b/reader/HDF5ImageSource.h new file mode 100644 index 00000000..ac45020c --- /dev/null +++ b/reader/HDF5ImageSource.h @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include +#include +#include + +#include "HDF5ImageLocator.h" +#include "../common/CompressedImage.h" + +// Raw-pixel side of the reader. Turns a global image number into a CompressedImage, using +// HDF5ImageLocator to find the file (with its open-file cache). This is the part whose "links +// to files stay" constant: switching which master the per-image metadata is read from never +// touches it. Caller must hold the global hdf5_mutex (HDF5 is not thread-safe). +class HDF5ImageSource { +public: + void Configure(HDF5ImageLocator::Layout layout); + void Clear(); + + // Where image `global` physically lives. Also used by the metadata source to find the data + // file that holds a legacy/VDS image's per-image metadata. + HDF5ImageLocator::Location Resolve(int64_t global) const; + + // Read the pixels at a resolved location into a CompressedImage backed by `buffer`. + CompressedImage ReadImageAt(std::vector &buffer, const HDF5ImageLocator::Location &loc) const; + + std::vector GetSourceMapping(uint64_t first_image, + std::optional image_count, + uint64_t total_images) const; + +private: + HDF5ImageLocator locator_; + static CompressedImage LoadImageDataset(std::vector &tmp, HDF5Object &file, hsize_t number); +}; diff --git a/reader/JFJochHDF5Reader.cpp b/reader/JFJochHDF5Reader.cpp index 590d2100..0bbbe8dc 100644 --- a/reader/JFJochHDF5Reader.cpp +++ b/reader/JFJochHDF5Reader.cpp @@ -288,15 +288,23 @@ void JFJochHDF5Reader::ReadROIMetadata(HDF5ReadOnlyFile &file, JFJochReaderDatas void JFJochHDF5Reader::ReadFile(const std::string &filename) { std::unique_lock ul(hdf5_mutex); + image_source_.Clear(); try { auto dataset = std::make_shared(); master_file = std::make_shared(filename); master_filename = filename; dataset->experiment = default_experiment; + // Image-layout state is accumulated locally while parsing, then handed to image_locator_ + // at the end. format stays NoFile if the master carries no image data. + FileWriterFormat format = FileWriterFormat::NoFile; + HDF5DataSetLayout data_layout = HDF5DataSetLayout::CONTIGUOUS; + std::vector legacy_format_files; + std::vector vds_data_mappings; + size_t images_per_file = 1; + std::filesystem::path master_path(filename); - master_file_directory = master_path.parent_path().string(); - vds_data_mappings.clear(); + std::string master_file_directory = master_path.parent_path().string(); dataset->arm_date = master_file->GetString("/entry/start_time"); @@ -679,12 +687,27 @@ void JFJochHDF5Reader::ReadFile(const std::string &filename) { ReadROIMetadata(*master_file, *dataset); + // Hand the parsed image layout to the locator. VDS mapping filenames are resolved to + // absolute paths here so the locator only ever deals with real paths. + for (auto &m : vds_data_mappings) + m.filename = ResolveRelativeToMaster(master_file_directory, m.filename); + image_source_.Configure(HDF5ImageLocator::Layout{ + .format = format, + .data_layout = data_layout, + .master_file = master_file, + .master_filename = master_filename, + .legacy_files = std::move(legacy_format_files), + .images_per_file = images_per_file, + .vds_mappings = std::move(vds_data_mappings) + }); + dataset->experiment.ImagesPerTrigger(number_of_images); cached_geom = dataset->experiment.GetDiffractionGeometry(); SetStartMessage(dataset); } catch (const std::exception &e) { master_file = {}; number_of_images = 0; + image_source_.Clear(); SetStartMessage({}); throw; } @@ -695,77 +718,10 @@ uint64_t JFJochHDF5Reader::GetNumberOfImages() const { return number_of_images; } -CompressedImage JFJochHDF5Reader::LoadImageDataset(std::vector &tmp, HDF5Object &file, hsize_t number) { - std::vector start = {static_cast(number), 0, 0}; - - const std::string dataset_name = "/entry/data/data"; - - HDF5DataSet dataset(file, dataset_name); - HDF5DataSpace dataspace(dataset); - HDF5DataType datatype(dataset); - HDF5Dcpl dcpl(dataset); - - if (dataspace.GetNumOfDimensions() != 3) - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, - "/entry/data/data dataset must be 3D"); - - auto dim = dataspace.GetDimensions(); - - CompressionAlgorithm algorithm = CompressionAlgorithm::NO_COMPRESSION; - auto chunk_size = dcpl.GetChunking(); - - if ((chunk_size.size() == 3) && (chunk_size[0] == 1) && (chunk_size[1] == dim[1]) && (chunk_size[2] == dim[2])) { - dataset.ReadDirectChunk(tmp, start); - algorithm = dcpl.GetCompression(); - } else { - dataset.ReadVectorToU8(tmp, start, {1, dim[1], dim[2]}); - algorithm = CompressionAlgorithm::NO_COMPRESSION; - } - - if (datatype.IsFloat()) - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, - "Float datasets not supported at this time"); - - return { - tmp, dim[2], dim[1], - CalcImageMode(datatype.GetElemSize(), datatype.IsFloat(), datatype.IsSigned()), - algorithm - }; -} - -std::pair, uint32_t> JFJochHDF5Reader::GetImageLocation(int64_t image_number) { - if (image_number >= number_of_images || image_number < 0) +HDF5ImageLocator::Location JFJochHDF5Reader::GetImageLocation(int64_t image_number) const { + if (image_number >= static_cast(number_of_images) || image_number < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Image out of bounds"); - - uint32_t image_id; - std::shared_ptr data_file; - - if (format == FileWriterFormat::NXmxLegacy) { - uint32_t file_id = image_number / images_per_file; - image_id = image_number % images_per_file; - data_file = std::make_shared(legacy_format_files.at(file_id)); - } else if (format == FileWriterFormat::NXmxVDS && data_layout == HDF5DataSetLayout::VIRTUAL) { - const auto image = static_cast(image_number); - - for (const auto &mapping: vds_data_mappings) { - if (!mapping.ContainsVirtualImage(image)) - continue; - - image_id = static_cast(mapping.SourceImage(image)); - data_file = std::make_shared( - ResolveRelativeToMaster(master_file_directory, mapping.filename) - ); - return {std::move(data_file), image_id}; - } - - throw JFJochException(JFJochExceptionCategory::HDF5, - "Image not covered by /entry/data/data VDS mappings"); - } else { - image_id = image_number; - data_file = master_file; - } - - return {std::move(data_file), image_id}; + return image_source_.Resolve(image_number); } std::shared_ptr JFJochHDF5Reader::GetRawImage(int64_t image_number) { @@ -775,9 +731,9 @@ std::shared_ptr JFJochHDF5Reader::GetRawImage(int64_t imag throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Cannot load image if file not loaded"); - auto [source_file, image_id] = GetImageLocation(image_number); + auto loc = GetImageLocation(image_number); auto ret = std::make_shared(); - ret->image = LoadImageDataset(ret->image_buffer, *source_file, image_id); + ret->image = image_source_.ReadImageAt(ret->image_buffer, loc); return ret; } @@ -921,9 +877,11 @@ bool JFJochHDF5Reader::LoadImage_i(std::shared_ptr &dataset throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Cannot load image if file not loaded"); - auto [source_file, image_id] = GetImageLocation(image_number); + auto loc = GetImageLocation(image_number); + auto &source_file = loc.file; + const uint32_t image_id = loc.local_index; - message.image = LoadImageDataset(buffer, *source_file, image_id); + message.image = image_source_.ReadImageAt(buffer, loc); message.number = image_number; const auto master_image = static_cast(image_number); @@ -1037,11 +995,9 @@ void JFJochHDF5Reader::Close() { std::unique_lock ul(hdf5_mutex); master_file = {}; number_of_images = 0; - legacy_format_files.clear(); - vds_data_mappings.clear(); - master_file_directory.clear(); master_filename.clear(); cached_geom = DiffractionGeometry{}; + image_source_.Clear(); SetStartMessage({}); } @@ -1109,35 +1065,6 @@ CompressedImage JFJochHDF5Reader::ReadCalibration(std::vector &tmp, con }; } -void AppendOrExtendSourceMapping(std::vector &ret, - const std::string &filename, - const std::string &dataset, - uint64_t source_first_image, - uint64_t virtual_first_image, - uint64_t image_count) { - if (image_count == 0) - return; - - if (!ret.empty()) { - auto &last = ret.back(); - if (last.filename == filename - && last.dataset == dataset - && last.source_first_image + last.image_count == source_first_image - && last.virtual_first_image + last.image_count == virtual_first_image) { - last.image_count += image_count; - return; - } - } - - ret.push_back(HDF5DataSourceMessage{ - .filename = filename, - .dataset = dataset, - .source_first_image = source_first_image, - .virtual_first_image = virtual_first_image, - .image_count = image_count - }); -}; - std::vector JFJochHDF5Reader::GetHDF5DataSource(uint64_t first_image, std::optional image_count) const { std::unique_lock ul(hdf5_mutex); @@ -1146,90 +1073,7 @@ std::vector JFJochHDF5Reader::GetHDF5DataSource(uint64_t throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Cannot generate HDF5 source mapping if file not loaded"); - if (first_image > number_of_images) - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, - "First image outside dataset range"); - - const uint64_t requested_count = image_count.value_or(number_of_images - first_image); - if (first_image + requested_count > number_of_images) - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, - "Requested image range outside dataset range"); - - std::vector ret; - if (requested_count == 0) - return ret; - - // Integrated / contiguous source: link directly to original master file. - if (format == FileWriterFormat::NXmxVDS && data_layout != HDF5DataSetLayout::VIRTUAL) { - AppendOrExtendSourceMapping(ret, - master_filename, - "/entry/data/data", - first_image, - 0, - requested_count); - return ret; - } - - // VDS source: expand VDS mappings to original source files, not to the VDS master. - if (format == FileWriterFormat::NXmxVDS && data_layout == HDF5DataSetLayout::VIRTUAL) { - for (uint64_t local_image = 0; local_image < requested_count; ++local_image) { - const hsize_t virtual_image = first_image + local_image; - - bool found = false; - for (const auto &mapping: vds_data_mappings) { - if (!mapping.ContainsVirtualImage(virtual_image)) - continue; - - const uint64_t source_image = mapping.SourceImage(virtual_image); - const std::string filename = ResolveRelativeToMaster(master_file_directory, mapping.filename); - const std::string dataset = mapping.dataset.empty() ? "/entry/data/data" : mapping.dataset; - - AppendOrExtendSourceMapping(ret, - filename, - dataset, - source_image, - local_image, - 1); - found = true; - break; - } - - if (!found) - throw JFJochException(JFJochExceptionCategory::HDF5, - "Image not covered by /entry/data/data VDS mappings"); - } - - return ret; - } - - // Legacy source: link directly to linked data files. - if (format == FileWriterFormat::NXmxLegacy) { - if (images_per_file == 0) - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, - "Cannot generate HDF5 source mapping: images_per_file is zero"); - - for (uint64_t local_image = 0; local_image < requested_count; ++local_image) { - const uint64_t source_global_image = first_image + local_image; - const uint64_t file_id = source_global_image / images_per_file; - const uint64_t source_image = source_global_image % images_per_file; - - if (file_id >= legacy_format_files.size()) - throw JFJochException(JFJochExceptionCategory::HDF5, - "Legacy image source file missing"); - - AppendOrExtendSourceMapping(ret, - legacy_format_files.at(file_id), - "/entry/data/data", - source_image, - local_image, - 1); - } - - return ret; - } - - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, - "Unsupported HDF5 file layout for source mapping"); + return image_source_.GetSourceMapping(first_image, image_count, number_of_images); } @@ -1255,19 +1099,6 @@ std::vector JFJochHDF5Reader::ReadReflections(size_t start_i throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Cannot read reflections if file not loaded"); - // Cache of data files opened during this call (keyed by path) to avoid - // re-opening the same file for every image in a multi-image batch. - std::map > open_files; - - auto get_data_file = [&](const std::string &path) -> std::shared_ptr { - auto it = open_files.find(path); - if (it != open_files.end()) - return it->second; - auto f = std::make_shared(path); - open_files[path] = f; - return f; - }; - std::vector ret; ret.reserve(end_image_val - start_image + 1); @@ -1277,23 +1108,11 @@ std::vector JFJochHDF5Reader::ReadReflections(size_t start_i // Generic (non-image-specific) detector geometry from experiment setup. outcome.geom = cached_geom; - // Determine which HDF5 file holds the reflections and MX metadata for - // this image, and what local image index to use within that file. - HDF5Object *meta_file = master_file.get(); - size_t meta_image_id = img; - - std::shared_ptr owned_source; // keeps the file alive - - if (format == FileWriterFormat::NXmxLegacy) { - // Reflections and per-image MX data are in the individual data files, - // addressed by the source-local image index. - const size_t file_id = img / images_per_file; - meta_image_id = img % images_per_file; - owned_source = get_data_file(legacy_format_files.at(file_id)); - meta_file = owned_source.get(); - } - // NXmxVDS with contiguous layout (integrated): everything is in master_file - // with global indices — the defaults set above are correct. + // Per-image reflections and MX metadata live in the same file as the image pixels, + // at the source-local index (the locator keeps the data-file handle cached). + const auto loc = GetImageLocation(static_cast(img)); + HDF5Object *meta_file = loc.file.get(); + const size_t meta_image_id = loc.local_index; // ── reflections ────────────────────────────────────────────────────── const std::string refl_group = fmt::format("/entry/reflections/image_{:06d}", meta_image_id); @@ -1339,46 +1158,12 @@ std::vector JFJochHDF5Reader::ReadSpots(int64_t image) const { throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Cannot read spots if file not loaded"); - // Cache of data files opened during this call (keyed by path) to avoid - // re-opening the same file for every image in a multi-image batch. - std::map > open_files; + // Spot/MX data lives in the same file as the image pixels, at the source-local index + // (the locator keeps the data-file handle cached across calls). + const auto loc = GetImageLocation(image); + HDF5Object *meta_file = loc.file.get(); + const size_t meta_image_id = loc.local_index; - auto get_data_file = [&](const std::string &path) -> std::shared_ptr { - auto it = open_files.find(path); - if (it != open_files.end()) - return it->second; - auto f = std::make_shared(path); - open_files[path] = f; - return f; - }; - - std::vector ret; - - // Resolve which HDF5 file and local image index hold the MX spot data. - // Uses the same logic as LoadImage_i / ReadReflections so all three - // code paths stay in sync. - HDF5Object *meta_file = master_file.get(); - size_t meta_image_id = image; - - std::shared_ptr owned_source; - - if (format == FileWriterFormat::NXmxLegacy) { - const size_t file_id = image / images_per_file; - meta_image_id = image % images_per_file; - owned_source = get_data_file(legacy_format_files.at(file_id)); - meta_file = owned_source.get(); - } else if (format == FileWriterFormat::NXmxVDS && data_layout == HDF5DataSetLayout::VIRTUAL) { - for (const auto &mapping: vds_data_mappings) { - if (!mapping.ContainsVirtualImage(image)) - continue; - meta_image_id = mapping.SourceImage(image); - owned_source = get_data_file(ResolveRelativeToMaster(master_file_directory, mapping.filename)); - meta_file = owned_source.get(); - break; - } - } - // NXmxVDS contiguous / NXmxIntegrated: master_file with global index — - // the defaults set above are already correct. DataMessage tmp_message; tmp_message.number = static_cast(image); diff --git a/reader/JFJochHDF5Reader.h b/reader/JFJochHDF5Reader.h index 137f2ad6..4271ec6d 100644 --- a/reader/JFJochHDF5Reader.h +++ b/reader/JFJochHDF5Reader.h @@ -5,23 +5,20 @@ #include "JFJochReader.h" #include "JFJochReaderSpots.h" +#include "HDF5ImageSource.h" #include "../writer/HDF5Objects.h" #include "../image_analysis/IntegrationOutcome.h" class JFJochHDF5Reader : public JFJochReader { - FileWriterFormat format = FileWriterFormat::NoFile; - HDF5DataSetLayout data_layout = HDF5DataSetLayout::CONTIGUOUS; + // Raw-pixel side: where each image physically lives + the open-file cache. Stays fixed while + // the metadata read from master_file may change. + HDF5ImageSource image_source_; + // Metadata side: the master file the per-image and dataset-level metadata is read from. std::shared_ptr master_file; - - DiffractionGeometry cached_geom; - - std::vector legacy_format_files; - std::vector vds_data_mappings; - std::string master_file_directory; std::string master_filename; - size_t images_per_file = 1; + DiffractionGeometry cached_geom; size_t number_of_images = 0; bool LoadImage_i(std::shared_ptr &dataset, @@ -30,8 +27,6 @@ class JFJochHDF5Reader : public JFJochReader { int64_t image_number, bool update_dataset) override; - CompressedImage LoadImageDataset(std::vector &tmp, HDF5Object &file, hsize_t number); - template void ReadVector(std::vector &v, HDF5Object &file, @@ -39,7 +34,7 @@ class JFJochHDF5Reader : public JFJochReader { size_t image0, size_t nimages); - std::pair, uint32_t> GetImageLocation(int64_t image_number); + HDF5ImageLocator::Location GetImageLocation(int64_t image_number) const; std::optional ReadAxis(HDF5Object *file, const std::string &name); // Read the master-file ROI definitions: logical definitions into experiment.ROI()