Files
Jungfraujoch/reader/HDF5MetadataSource.cpp
T
leonarski_f b9f8c2b675 reader/viewer: snapshot image->original map; subset runs register and overlay
Foundation for a dataset (snapshot or, later, the main file) being a subset of the
truly collected images:
- JFJochReaderDataset gains source_image_number (image index -> original image
  number; empty = identity).
- HDF5MetadataSource reads /entry/detector/number into that map and an inverse
  (original -> local) map; FillPerImage / ReadSpots translate the requested global
  image to this source's local index via ToLocalIndex (return nothing if the image
  is not covered), so partial snapshots are correct and never read out of bounds.
- RegisterSnapshot now accepts a snapshot with fewer images than the dataset
  (only rejects more), so sub-range / strided reprocessing runs register.
- The dataset-info plot draws each run at its original image numbers (x map from
  source_image_number), so a subset run lands at the right place on the shared
  axis. The live run's map is filled from msg.original_number.

This makes the foundation ready for strided / filtered selections (e.g. reprocess
only images with >N spots) without restricting to a min-max sub-range.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-22 14:59:23 +02:00

1163 lines
53 KiB
C++

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include <cmath>
#include <set>
#include "HDF5MetadataSource.h"
#include "spdlog/fmt/fmt.h"
#include "../image_analysis/bragg_integration/CalcISigma.h"
#include "../image_analysis/spot_finding/SpotUtils.h"
#include "../common/GridScanSettings.h"
#include "../common/JFJochMath.h"
#include "../common/ROIDefinition.h"
inline std::pair<gemmi::CrystalSystem, char> parse_bravais_lattice(const std::string &val) {
if (val.empty())
return {gemmi::CrystalSystem::Triclinic, 'P'};
if (val.size() != 2)
throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong Bravais lattice encoding");
gemmi::CrystalSystem cs;
char centering = val[1];
std::set<char> allowed_centering;
switch (val[0]) {
case 'a':
cs = gemmi::CrystalSystem::Triclinic;
allowed_centering = {'P'};
break;
case 'm':
cs = gemmi::CrystalSystem::Monoclinic;
allowed_centering = {'P', 'A', 'B', 'C'};
break;
case 'o':
cs = gemmi::CrystalSystem::Orthorhombic;
allowed_centering = {'P', 'A', 'B', 'C', 'I', 'F'};
break;
case 't':
cs = gemmi::CrystalSystem::Tetragonal;
allowed_centering = {'P', 'I'};
break;
case 'h':
if (centering == 'P')
cs = gemmi::CrystalSystem::Hexagonal;
else if (centering == 'R')
cs = gemmi::CrystalSystem::Trigonal;
allowed_centering = {'P', 'R'};
break;
case 'c':
cs = gemmi::CrystalSystem::Cubic;
allowed_centering = {'P', 'F', 'I'};
break;
default:
// allowed_centering is empty and exception will be always thrown
break;
}
if (!allowed_centering.contains(centering))
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Invalid lattice encoding " + val);
return {cs, centering};
}
std::vector<hsize_t> GetDimension(HDF5Object &object, const std::string &path) {
const auto dim = object.GetDimension(path);
if (dim.size() != 3)
throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong dimension of /entry/data/data");
return dim;
}
std::vector<HDF5VirtualDatasetMapping> ReadVDSImageMappings(HDF5Object &file,
const std::string &dataset_name) {
HDF5DataSet dataset(file, dataset_name);
HDF5Dcpl dcpl(dataset);
auto mappings = dcpl.GetVirtualMappings();
if (mappings.empty())
throw JFJochException(JFJochExceptionCategory::HDF5,
dataset_name + " is not a virtual dataset");
for (const auto &mapping: mappings) {
if (mapping.dataset.empty())
throw JFJochException(JFJochExceptionCategory::HDF5,
"VDS mapping has empty source dataset name");
if (mapping.virtual_start.size() != 3)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Only 3D image VDS mappings are supported");
}
return mappings;
}
std::string ResolveRelativeToMaster(const std::string &directory,
const std::string &filename) {
std::filesystem::path path(filename);
if (path.is_absolute() || directory.empty())
return filename;
return (std::filesystem::path(directory) / path).string();
}
template<class T>
void ReadVector(std::vector<T> &v,
HDF5Object &file,
const std::string &dataset_name,
size_t image0,
size_t nimages) {
try {
auto tmp = file.ReadOptVector<T>(dataset_name);
if (tmp.size() <= nimages) {
v.resize(image0 + nimages);
for (int i = 0; i < tmp.size(); i++)
v[image0 + i] = tmp[i];
}
} catch (JFJochException &e) {
}
}
std::string removeSuffix(const std::string &s, const std::string &suffix) {
if (s.ends_with(suffix))
return s.substr(0, s.size() - suffix.size());
return s;
}
std::string dataset_name(const std::string &path) {
std::string file = std::filesystem::path(path).filename().string();
file = removeSuffix(file, "_master.h5");
// If previous suffix was not found, try removing this one
file = removeSuffix(file, ".h5");
return file;
}
bool ReadReflectionsFromGroup(HDF5Object &file,
const std::string &image_group_name,
std::vector<Reflection> &reflections) {
if (!file.Exists("/entry/reflections") || !file.Exists(image_group_name))
return false;
auto h = file.ReadOptVector<int32_t>(image_group_name + "/h");
auto k = file.ReadOptVector<int32_t>(image_group_name + "/k");
auto l = file.ReadOptVector<int32_t>(image_group_name + "/l");
auto predicted_x = file.ReadOptVector<float>(image_group_name + "/predicted_x");
auto predicted_y = file.ReadOptVector<float>(image_group_name + "/predicted_y");
auto obs_x = file.ReadOptVector<float>(image_group_name + "/observed_x");
auto obs_y = file.ReadOptVector<float>(image_group_name + "/observed_y");
auto d = file.ReadOptVector<float>(image_group_name + "/d");
auto int_sum = file.ReadOptVector<float>(image_group_name + "/int_sum");
auto int_err = file.ReadOptVector<float>(image_group_name + "/int_err");
auto bkg = file.ReadOptVector<float>(image_group_name + "/background_mean");
auto lp = file.ReadOptVector<float>(image_group_name + "/lp");
auto partiality = file.ReadOptVector<float>(image_group_name + "/partiality");
auto phi = file.ReadOptVector<float>(image_group_name + "/delta_phi");
auto zeta = file.ReadOptVector<float>(image_group_name + "/zeta");
auto image_scale_corr = file.ReadOptVector<float>(image_group_name + "/image_scale_corr");
if (h.size() != l.size() || h.size() != k.size() || h.size() != d.size()
|| h.size() != predicted_x.size() || h.size() != predicted_y.size()
|| h.size() != int_sum.size() || h.size() != int_err.size() || h.size() != bkg.size())
throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong size of reflections dataset");
for (size_t i = 0; i < h.size(); i++) {
float lp_val = 0.0;
if (lp.size() > i && lp[i] != 0.0f)
lp_val = 1.0f / lp[i];
float partiality_val = -1.0f;
if (partiality.size() > i && partiality[i] >= 0.0f)
partiality_val = partiality[i];
float delta_phi_val = NAN;
if (phi.size() > i)
delta_phi_val = phi[i];
float zeta_val = NAN;
if (zeta.size() > i)
zeta_val = zeta[i];
float image_scale_corr_val = 1.0f; // Default is 1.0, if we don't know any better
if (image_scale_corr.size() > i)
image_scale_corr_val = image_scale_corr[i];
float obs_x_val = NAN;
float obs_y_val = NAN;
if (obs_x.size() > i && obs_y.size() > i) {
obs_x_val = obs_x[i];
obs_y_val = obs_y[i];
}
Reflection r{
.h = h.at(i),
.k = k.at(i),
.l = l.at(i),
.delta_phi_deg = delta_phi_val,
.predicted_x = predicted_x.at(i),
.predicted_y = predicted_y.at(i),
.observed_x = obs_x_val,
.observed_y = obs_y_val,
.d = d.at(i),
.I = int_sum.at(i),
.bkg = bkg.at(i),
.sigma = int_err.at(i),
.rlp = lp_val,
.partiality = partiality_val,
.zeta = zeta_val,
.image_scale_corr = image_scale_corr_val
};
reflections.emplace_back(r);
}
return true;
}
template<class T>
std::optional<T> ReadElementMasterFirst(HDF5Object &master_file,
HDF5Object &source_file,
const std::string &path,
hsize_t master_image,
hsize_t source_image) {
if (master_file.Exists(path))
return master_file.ReadElement<T>(path, master_image);
if (source_file.Exists(path))
return source_file.ReadElement<T>(path, source_image);
return {};
}
template<class T>
std::vector<T> ReadVectorMasterFirst(HDF5Object &master_file,
HDF5Object &source_file,
const std::string &path,
const std::vector<hsize_t> &master_start,
const std::vector<hsize_t> &source_start,
const std::vector<hsize_t> &size) {
if (master_file.Exists(path))
return master_file.ReadOptVector<T>(path, master_start, size);
if (source_file.Exists(path))
return source_file.ReadOptVector<T>(path, source_start, size);
return {};
}
void HDF5MetadataSource::ReadROIMetadata(HDF5ReadOnlyFile &file, JFJochReaderDataset &dataset) const {
// ROI definitions live in /entry/roi_defs (kept separate from the per-image ROI
// results in /entry/roi so that older readers, which iterate /entry/roi, are not
// disturbed by the bitmap and definition subgroups).
if (!file.Exists("/entry/roi_defs"))
return;
if (file.Exists("/entry/roi_defs/roi_map")) {
auto dim = file.GetDimension("/entry/roi_defs/roi_map"); // [y, x]
if (dim.size() == 2)
dataset.roi_map = file.ReadOptVector<uint16_t>("/entry/roi_defs/roi_map",
{0, 0}, {dim[0], dim[1]});
}
ROIDefinition defs;
for (const auto &name: file.FindLeafs("/entry/roi_defs")) {
const std::string base = "/entry/roi_defs/" + name;
// Skip the roi_map bitmask; only named ROI subgroups carry a definition.
if (name == "roi_map" || !file.Exists(base + "/type"))
continue;
dataset.roi_bit_index[name] = static_cast<uint16_t>(file.GetInt(base + "/bit_index"));
const std::string type = file.GetString(base + "/type");
if (type == "box")
defs.boxes.emplace_back(name, file.GetInt(base + "/min_x_pxl"), file.GetInt(base + "/max_x_pxl"),
file.GetInt(base + "/min_y_pxl"), file.GetInt(base + "/max_y_pxl"));
else if (type == "circle")
defs.circles.emplace_back(name, file.GetFloat(base + "/center_x_pxl"), file.GetFloat(base + "/center_y_pxl"),
file.GetFloat(base + "/radius_pxl"));
else if (type == "azim") {
const float qmin = file.GetFloat(base + "/q_min_recipA");
const float qmax = file.GetFloat(base + "/q_max_recipA");
float phi_min = 0, phi_max = 0;
if (file.Exists(base + "/phi_min_deg") && file.Exists(base + "/phi_max_deg")) {
phi_min = file.GetFloat(base + "/phi_min_deg");
phi_max = file.GetFloat(base + "/phi_max_deg");
}
const float d_min = (qmax == 0.0f) ? 0.0f : 2.0f * static_cast<float>(PI) / qmax;
const float d_max = (qmin == 0.0f) ? 0.0f : 2.0f * static_cast<float>(PI) / qmin;
defs.azimuthal.emplace_back(name, d_min, d_max, phi_min, phi_max);
}
}
if (!defs.boxes.empty() || !defs.circles.empty() || !defs.azimuthal.empty())
dataset.experiment.ROI().SetROI(defs);
}
HDF5MetadataSource::OpenResult HDF5MetadataSource::Open(const std::string &filename,
const DiffractionExperiment &default_experiment) {
try {
auto dataset = std::make_shared<JFJochReaderDataset>();
master_file = std::make_shared<HDF5ReadOnlyFile>(filename);
master_filename = filename;
dataset->experiment = default_experiment;
// Image-layout state is accumulated locally while parsing, then handed to image_locator_
// at the end. format stays NoFile if the master carries no image data.
FileWriterFormat format = FileWriterFormat::NoFile;
HDF5DataSetLayout data_layout = HDF5DataSetLayout::CONTIGUOUS;
std::vector<std::string> legacy_format_files;
std::vector<HDF5VirtualDatasetMapping> vds_data_mappings;
size_t images_per_file = 1;
std::filesystem::path master_path(filename);
std::string master_file_directory = master_path.parent_path().string();
dataset->arm_date = master_file->GetString("/entry/start_time");
dataset->experiment.FilePrefix(dataset_name(filename));
// JFJochReader is always using int32_t
dataset->experiment.BitDepthImage(32);
dataset->experiment.PixelSigned(true);
size_t image_size_x = 0;
size_t image_size_y = 0;
if (master_file->Exists("/entry/data/data")) {
HDF5DataSet data_dataset(*master_file, "/entry/data/data");
HDF5Dcpl dcpl(data_dataset);
data_layout = dcpl.GetLayout();
auto dim = GetDimension(*master_file, "/entry/data/data");
number_of_images = dim[0];
image_size_y = dim[1];
image_size_x = dim[2];
images_per_file = number_of_images;
if (data_layout == HDF5DataSetLayout::VIRTUAL)
vds_data_mappings = ReadVDSImageMappings(*master_file, "/entry/data/data");
if (master_file->Exists("/entry/instrument/detector/detectorSpecific/data_collection_efficiency_image"))
dataset->efficiency = master_file->ReadVector<float>(
"/entry/instrument/detector/detectorSpecific/data_collection_efficiency_image");
else
dataset->efficiency = std::vector<float>(number_of_images, 1.0);
if (master_file->Exists("/entry/roi"))
dataset->roi = master_file->FindLeafs("/entry/roi");
for (const auto &s: dataset->roi) {
dataset->roi_max.emplace_back(master_file->ReadVector<int64_t>("/entry/roi/" + s + "/max"));
dataset->roi_sum.emplace_back(master_file->ReadVector<int64_t>("/entry/roi/" + s + "/sum"));
dataset->roi_sum_sq.emplace_back(master_file->ReadVector<int64_t>("/entry/roi/" + s + "/sum_sq"));
dataset->roi_npixel.emplace_back(master_file->ReadVector<int64_t>("/entry/roi/" + s + "/npixel"));
dataset->roi_x.emplace_back(master_file->ReadVector<float>("/entry/roi/" + s + "/x"));
dataset->roi_y.emplace_back(master_file->ReadVector<float>("/entry/roi/" + s + "/y"));
}
if (master_file->Exists("/entry/MX")) {
if (master_file->Exists("/entry/MX/peakCountUnfiltered"))
dataset->spot_count = master_file->ReadOptVector<float>("/entry/MX/peakCountUnfiltered");
else
dataset->spot_count = master_file->ReadOptVector<float>("/entry/MX/nPeaks");
dataset->spot_count_low_res = master_file->ReadOptVector<float>("/entry/MX/peakCountLowRes");
dataset->spot_count_indexed = master_file->ReadOptVector<float>("/entry/MX/peakCountIndexed");
dataset->spot_count_ice_rings = master_file->ReadOptVector<float>("/entry/MX/peakCountIceRingRes");
dataset->indexing_result = master_file->ReadOptVector<float>("/entry/MX/imageIndexed");
dataset->bkg_estimate = master_file->ReadOptVector<float>("/entry/MX/bkgEstimate");
dataset->resolution_estimate = master_file->ReadOptVector<float>("/entry/MX/resolutionEstimate");
dataset->profile_radius = master_file->ReadOptVector<float>("/entry/MX/profileRadius");
// Master files write indexedLatticeCount; data files / the per-file MX
// plugin use indexingLatticeCount. Accept either for backward compatibility.
dataset->indexing_lattice_count = master_file->ReadOptVector<float>("/entry/MX/indexedLatticeCount");
if (dataset->indexing_lattice_count.empty())
dataset->indexing_lattice_count = master_file->ReadOptVector<float>("/entry/MX/indexingLatticeCount");
dataset->mosaicity_deg = master_file->ReadOptVector<float>("/entry/MX/mosaicity");
dataset->b_factor = master_file->ReadOptVector<float>("/entry/MX/bFactor");
dataset->image_scale_factor = master_file->ReadOptVector<float>("/entry/MX/imageScaleFactor");
dataset->image_scale_cc = master_file->ReadOptVector<float>("/entry/MX/imageScaleCC");
dataset->image_scale_b = master_file->ReadOptVector<float>("/entry/MX/imageScaleBFactor");
dataset->integrated_reflections = master_file->ReadOptVector<float>("/entry/MX/integratedReflections");
}
if (master_file->Exists("/entry/image"))
dataset->max_value = master_file->ReadOptVector<int64_t>("/entry/image/max_value");
format = FileWriterFormat::NXmxVDS;
} else if (master_file->Exists("/entry/data/data_000001")) {
format = FileWriterFormat::NXmxLegacy;
data_layout = HDF5DataSetLayout::CONTIGUOUS;
legacy_format_files.clear();
image_size_x = master_file->GetInt("/entry/instrument/detector/detectorSpecific/x_pixels_in_detector");
image_size_y = master_file->GetInt("/entry/instrument/detector/detectorSpecific/y_pixels_in_detector");
//size_t expected_images = master_file->GetInt("/entry/instrument/detector/detectorSpecific/nimages");
images_per_file = 0;
number_of_images = 0;
uint32_t nfiles = 0;
std::filesystem::path file_path(filename);
std::filesystem::path directory = file_path.parent_path();
while (true) {
std::string dname = fmt::format("/entry/data/data_{:06d}", nfiles + 1);
if (!master_file->Exists(dname))
break;
size_t fimages = 0;
try {
auto fname = ResolveRelativeToMaster(directory.string(),
master_file->GetLinkedFileName(dname));
HDF5ReadOnlyFile data_file(fname);
fimages = GetDimension(data_file, "/entry/data/data")[0];
legacy_format_files.push_back(fname);
if (nfiles == 0 && data_file.Exists("/entry/roi"))
dataset->roi = data_file.FindLeafs("/entry/roi");
dataset->roi_max.resize(dataset->roi.size());
dataset->roi_npixel.resize(dataset->roi.size());
dataset->roi_sum.resize(dataset->roi.size());
dataset->roi_sum_sq.resize(dataset->roi.size());
dataset->roi_x.resize(dataset->roi.size());
dataset->roi_y.resize(dataset->roi.size());
for (int i = 0; i < dataset->roi.size(); i++) {
auto roi_name = dataset->roi[i];
ReadVector(dataset->roi_max.at(i),
data_file, "/entry/roi/" + roi_name + "/max",
number_of_images, fimages);
ReadVector(dataset->roi_npixel.at(i),
data_file, "/entry/roi/" + roi_name + "/npixel",
number_of_images, fimages);
ReadVector(dataset->roi_sum.at(i),
data_file, "/entry/roi/" + roi_name + "/sum",
number_of_images, fimages);
ReadVector(dataset->roi_sum_sq.at(i),
data_file, "/entry/roi/" + roi_name + "/sum_sq",
number_of_images, fimages);
ReadVector(dataset->roi_x.at(i),
data_file, "/entry/roi/" + roi_name + "/x",
number_of_images, fimages);
ReadVector(dataset->roi_y.at(i),
data_file, "/entry/roi/" + roi_name + "/y",
number_of_images, fimages);
}
if (data_file.Exists("/entry/detector")) {
ReadVector(dataset->efficiency,
data_file, "/entry/detector/data_collection_efficiency_image",
number_of_images, fimages);
}
if (data_file.Exists("/entry/MX")) {
if (data_file.Exists("/entry/MX/peakCountUnfiltered"))
ReadVector(dataset->spot_count,
data_file, "/entry/MX/peakCountUnfiltered",
number_of_images, fimages);
else
ReadVector(dataset->spot_count,
data_file, "/entry/MX/nPeaks",
number_of_images, fimages);
ReadVector(dataset->spot_count_ice_rings,
data_file, "/entry/MX/peakCountIceRingRes",
number_of_images, fimages);
ReadVector(dataset->spot_count_low_res,
data_file, "/entry/MX/peakCountLowRes",
number_of_images, fimages);
ReadVector(dataset->spot_count_indexed,
data_file, "/entry/MX/peakCountIndexed",
number_of_images, fimages);
ReadVector(dataset->indexing_result,
data_file, "/entry/MX/imageIndexed",
number_of_images, fimages);
ReadVector(dataset->bkg_estimate,
data_file, "/entry/MX/bkgEstimate",
number_of_images, fimages);
ReadVector(dataset->profile_radius,
data_file, "/entry/MX/profileRadius",
number_of_images, fimages);
ReadVector(dataset->indexing_lattice_count,
data_file, "/entry/MX/indexingLatticeCount",
number_of_images, fimages);
ReadVector(dataset->mosaicity_deg,
data_file, "/entry/MX/mosaicity",
number_of_images, fimages);
ReadVector(dataset->b_factor,
data_file, "/entry/MX/bFactor",
number_of_images, fimages);
ReadVector(dataset->resolution_estimate,
data_file, "/entry/MX/resolutionEstimate",
number_of_images, fimages);
}
if (data_file.Exists("/entry/image")) {
ReadVector(dataset->max_value,
data_file, "/entry/image/max_value",
number_of_images, fimages);
}
} catch (JFJochException &e) {
}
if (nfiles == 0)
images_per_file = fimages;
number_of_images += fimages;
nfiles++;
}
} else {
image_size_x = master_file->GetInt("/entry/instrument/detector/detectorSpecific/x_pixels_in_detector");
image_size_y = master_file->GetInt("/entry/instrument/detector/detectorSpecific/y_pixels_in_detector");
number_of_images = 0;
}
if (master_file->Exists("/entry/MX")) {
auto indexing = master_file->GetString("/entry/MX/indexing_algorithm", "none");
if (indexing == "fft" || indexing == "FFT (CUDA)" || indexing == "FFT (FFTW)")
dataset->experiment.IndexingAlgorithm(IndexingAlgorithmEnum::FFT);
else if (indexing == "ffbidx" || indexing == "FFBIDX")
dataset->experiment.IndexingAlgorithm(IndexingAlgorithmEnum::FFBIDX);
}
auto ring_current_A = master_file->GetOptFloat("/entry/source/current");
if (ring_current_A) dataset->experiment.RingCurrent_mA(ring_current_A.value() * 1000.0);
dataset->experiment.DetectIceRings(
master_file->GetOptBool("/entry/instrument/detector/detectorSpecific/detect_ice_rings").value_or(false));
dataset->experiment.PoniRot1_rad(
master_file->GetOptFloat("/entry/instrument/detector/transformations/rot1").value_or(0.0));
dataset->experiment.PoniRot2_rad(
master_file->GetOptFloat("/entry/instrument/detector/transformations/rot2").value_or(0.0));
dataset->experiment.PoniRot3_rad(
master_file->GetOptFloat("/entry/instrument/detector/transformations/rot3").value_or(0.0));
dataset->experiment.SampleTemperature_K(master_file->GetOptFloat("/entry/sample/temperature"));
dataset->experiment.BeamX_pxl(master_file->GetFloat("/entry/instrument/detector/beam_center_x"));
dataset->experiment.BeamY_pxl(master_file->GetFloat("/entry/instrument/detector/beam_center_y"));
float det_distance = master_file->GetFloat("/entry/instrument/detector/distance");
if (det_distance < 0.001)
det_distance = 0.1; // Set to 100 mm, if det distance is less than 1 mm
dataset->experiment.DetectorDistance_mm(det_distance * 1000.0);
const float incident_wavelength_A = master_file->GetFloat("/entry/instrument/beam/incident_wavelength");
dataset->experiment.IncidentEnergy_keV(WVL_1A_IN_KEV / incident_wavelength_A);
// NXmx incident_wavelength_spread is the absolute FWHM (Angstrom); store it
// as the relative bandwidth FWHM (dlambda/lambda) used internally.
if (const auto spread = master_file->GetOptFloat("/entry/instrument/beam/incident_wavelength_spread"))
if (incident_wavelength_A > 0.0f)
dataset->experiment.BandwidthFWHM(spread.value() / incident_wavelength_A);
dataset->error_value = master_file->GetOptInt("/entry/instrument/detector/error_value");
dataset->jfjoch_release = master_file->GetString("/entry/instrument/detector/detectorSpecific/jfjoch_release");
InstrumentMetadata metadata;
metadata.InstrumentName(master_file->GetString("/entry/instrument/name"));
metadata.SourceName(master_file->GetString("/entry/source/name"));
dataset->experiment.ImportInstrumentMetadata(metadata);
if (master_file->Exists("/entry/sample/transformations")) {
if (master_file->Exists("/entry/sample/transformations/omega")) {
auto omega = ReadAxis(master_file.get(), "omega");
dataset->experiment.Goniometer(omega);
} else if (master_file->Exists("/entry/sample/grid_scan")) {
GridScanSettings grid(
master_file->GetInt("/entry/sample/grid_scan/n_fast"),
master_file->GetFloat("/entry/sample/grid_scan/step_x") * 1e6f,
master_file->GetFloat("/entry/sample/grid_scan/step_y") * 1e6f,
master_file->GetOptBool("/entry/sample/grid_scan/snake_scan").value_or(false),
master_file->GetOptBool("/entry/sample/grid_scan/vertical_scan").value_or(false)
);
grid.ImageNum(number_of_images);
dataset->experiment.GridScan(grid);
}
}
auto tmp = master_file->ReadOptVector<float>("/entry/sample/unit_cell");
if (tmp.size() == 6)
dataset->experiment.SetUnitCell(UnitCell{
.a = tmp[0],
.b = tmp[1],
.c = tmp[2],
.alpha = tmp[3],
.beta = tmp[4],
.gamma = tmp[5]
});
dataset->experiment.SpaceGroupNumber(master_file->GetOptInt("/entry/sample/space_group_number"));
dataset->experiment.SampleName(master_file->GetString("/entry/sample/name"));
if (master_file->Exists("/entry/instrument/attenuator"))
dataset->experiment.AttenuatorTransmission(
master_file->GetOptFloat("/entry/instrument/attenuator/attenuator_transmission"));
dataset->experiment.TotalFlux(master_file->GetOptFloat("/entry/instrument/beam/total_flux"));
if (master_file->Exists("/entry/azint") && master_file->Exists("/entry/azint/bin_to_q")) {
HDF5DataSet bin_to_q_dataset(*master_file, "/entry/azint/bin_to_q");
HDF5DataSpace bin_to_q_dataspace(bin_to_q_dataset);
auto dim = bin_to_q_dataspace.GetDimensions();
if (dim.size() == 1) {
dataset->azimuthal_bins = 0;
dataset->q_bins = dim[0];
bin_to_q_dataset.ReadVector(dataset->az_int_bin_to_q);
} else if (dim.size() == 2) {
dataset->azimuthal_bins = dim[0];
dataset->q_bins = dim[1];
dataset->az_int_bin_to_q.resize(dim[0] * dim[1]);
bin_to_q_dataset.ReadVector(dataset->az_int_bin_to_q, {0, 0}, dim);
} else
throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong dimension of /entry/azint/image dataset");
if (master_file->Exists("/entry/azint/bin_to_phi")) {
HDF5DataSet bin_to_phi_dataset(*master_file, "/entry/azint/bin_to_phi");
if (dataset->q_bins > 0) {
dataset->az_int_bin_to_phi.resize(dim[0] * dim[1]);
bin_to_phi_dataset.ReadVector(dataset->az_int_bin_to_phi, {0, 0}, dim);
} else {
bin_to_phi_dataset.ReadVector(dataset->az_int_bin_to_phi);
}
}
}
// Read fluorescence spectrum if present
if (master_file->Exists("/entry/instrument/fluorescence")) {
auto energy = master_file->ReadOptVector<float>("/entry/instrument/fluorescence/energy");
auto data = master_file->ReadOptVector<float>("/entry/instrument/fluorescence/data");
if (!energy.empty() && energy.size() == data.size())
dataset->experiment.FluorescenceSpectrum(XrayFluorescenceSpectrum(energy, data));
}
auto detector_name = master_file->GetString("/entry/instrument/detector/description");
DetectorSetup detector = DetDECTRIS(image_size_x, image_size_y, detector_name, {});
detector.PixelSize_um(master_file->GetFloat("/entry/instrument/detector/x_pixel_size") * 1e6);
detector.SaturationLimit(master_file->GetInt("/entry/instrument/detector/saturation_value"));
detector.MinFrameTime(std::chrono::microseconds(0));
detector.MinCountTime(std::chrono::microseconds(0));
detector.ReadOutTime(std::chrono::nanoseconds(0));
dataset->experiment.Detector(detector);
dataset->experiment.FrameTime(
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<float>(
master_file->GetFloat("/entry/instrument/detector/frame_time"))),
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<float>(
master_file->GetFloat("/entry/instrument/detector/count_time")))
);
if (master_file->Exists("/entry/instrument/detector/calibration")) {
dataset->calibration_data = master_file->FindLeafs("/entry/instrument/detector/calibration");
std::sort(dataset->calibration_data.begin(), dataset->calibration_data.end());
}
if (image_size_x * image_size_y > 0) {
auto mask_tmp = master_file->ReadOptVector<uint32_t>(
"/entry/instrument/detector/pixel_mask",
{0, 0},
{image_size_y, image_size_x}
);
if (mask_tmp.empty())
mask_tmp = master_file->ReadOptVector<uint32_t>(
"/entry/instrument/detector/detectorSpecific/pixel_mask",
{0, 0},
{image_size_y, image_size_x}
);
if (mask_tmp.empty())
mask_tmp = std::vector<uint32_t>(image_size_x * image_size_y);
dataset->pixel_mask = PixelMask(mask_tmp);
}
ReadROIMetadata(*master_file, *dataset);
// Resolve VDS mapping filenames to absolute paths so the image source's locator only ever
// deals with real paths, then report the layout to the caller.
for (auto &m : vds_data_mappings)
m.filename = ResolveRelativeToMaster(master_file_directory, m.filename);
dataset->experiment.ImagesPerTrigger(number_of_images);
cached_geom = dataset->experiment.GetDiffractionGeometry();
// Image-index -> original-image-number map (written as /entry/detector/number). When it is a
// genuine subset/strided selection, keep it so plots and per-image lookups use the original
// numbering; a plain 0..N-1 sequence is identity and left empty.
image_to_local_.clear();
auto numbers = master_file->ReadOptVector<uint64_t>("/entry/detector/number");
if (numbers.size() == number_of_images) {
bool identity = true;
for (size_t i = 0; i < numbers.size(); i++)
if (numbers[i] != i) { identity = false; break; }
if (!identity) {
dataset->source_image_number.assign(numbers.begin(), numbers.end());
for (size_t i = 0; i < numbers.size(); i++)
image_to_local_[static_cast<int64_t>(numbers[i])] = static_cast<int64_t>(i);
}
}
dataset_ = dataset;
return OpenResult{
.image_layout = HDF5ImageLocator::Layout{
.format = format,
.data_layout = data_layout,
.master_file = master_file,
.master_filename = master_filename,
.legacy_files = std::move(legacy_format_files),
.images_per_file = images_per_file,
.vds_mappings = std::move(vds_data_mappings)
},
.number_of_images = number_of_images
};
} catch (const std::exception &e) {
master_file = {};
master_filename.clear();
number_of_images = 0;
dataset_.reset();
cached_geom = DiffractionGeometry{};
throw;
}
}
HDF5ImageLocator::Location HDF5MetadataSource::ResolveMeta(int64_t global) const {
// Per-image metadata is co-located with the pixels for the original file (resolve via the
// shared image source); for an integrated _process.h5 snapshot it lives in this master at the
// global index.
if (image_source_)
return image_source_->Resolve(global);
return {master_file, static_cast<uint32_t>(global)};
}
std::optional<int64_t> HDF5MetadataSource::ToLocalIndex(int64_t image_number) const {
if (image_to_local_.empty())
return image_number; // 1:1 source (identity)
const auto it = image_to_local_.find(image_number);
if (it == image_to_local_.end())
return std::nullopt; // this source does not cover that image
return it->second;
}
// Reads spot data for a single image from the appropriate HDF5 source.
// master_image / source_image are the logical indices within master_file and
// source_file respectively (identical for NXmxVDS contiguous / integrated;
// differ for NXmxLegacy and NXmxVDS virtual layouts).
// Appends assembled SpotToSave entries to message.spots and fills the
// spot_count* fields; does NOT touch the image pixel data.
static void ReadSpotsFromFiles(HDF5Object &master_file,
HDF5Object &source_file,
hsize_t master_image,
hsize_t source_image,
int64_t image_number,
const DiffractionGeometry &geom,
DataMessage &message) {
auto spot_count_opt = ReadElementMasterFirst<uint32_t>(master_file,
source_file,
"/entry/MX/nPeaks",
master_image,
source_image);
if (!spot_count_opt.has_value() || spot_count_opt.value() == 0)
return;
const size_t spot_count = spot_count_opt.value();
auto spot_x = ReadVectorMasterFirst<float>(
master_file, source_file,
"/entry/MX/peakXPosRaw",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
auto spot_y = ReadVectorMasterFirst<float>(
master_file, source_file,
"/entry/MX/peakYPosRaw",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
auto spot_intensity = ReadVectorMasterFirst<float>(
master_file, source_file,
"/entry/MX/peakTotalIntensity",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
if (spot_x.size() < spot_count || spot_y.size() < spot_count || spot_intensity.size() < spot_count)
throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong size of spot dataset");
auto spot_indexed = ReadVectorMasterFirst<uint8_t>(
master_file, source_file,
"/entry/MX/peakIndexed",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
auto spot_ice = ReadVectorMasterFirst<uint8_t>(
master_file, source_file,
"/entry/MX/peakIceRingRes",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
auto spot_h = ReadVectorMasterFirst<int32_t>(
master_file, source_file,
"/entry/MX/peakH",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
auto spot_k = ReadVectorMasterFirst<int32_t>(
master_file, source_file,
"/entry/MX/peakK",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
auto spot_l = ReadVectorMasterFirst<int32_t>(
master_file, source_file,
"/entry/MX/peakL",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
auto spot_lattice = ReadVectorMasterFirst<int8_t>(
master_file, source_file,
"/entry/MX/peakLattice",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
auto spot_dist_ewald_sphere = ReadVectorMasterFirst<float>(
master_file, source_file,
"/entry/MX/peakDistEwaldSphere",
{master_image, 0}, {source_image, 0}, {1, spot_count}
);
message.spots.reserve(message.spots.size() + spot_count);
for (size_t i = 0; i < spot_count; i++) {
const auto x = spot_x.at(i);
const auto y = spot_y.at(i);
SpotToSave s{
.x = x,
.y = y,
.intensity = spot_intensity.at(i),
.image = image_number,
.d_A = geom.PxlToRes(x, y)
};
if (spot_indexed.size() > i)
s.indexed = (spot_indexed.at(i) != 0);
if (spot_h.size() > i)
s.h = spot_h.at(i);
if (spot_k.size() > i)
s.k = spot_k.at(i);
if (spot_l.size() > i)
s.l = spot_l.at(i);
if (spot_dist_ewald_sphere.size() > i)
s.dist_ewald_sphere = spot_dist_ewald_sphere.at(i);
if (spot_ice.size() > i)
s.ice_ring = (spot_ice.at(i) != 0);
if (spot_lattice.size() > i)
s.lattice = spot_lattice.at(i);
message.spots.emplace_back(s);
}
if (auto v = ReadElementMasterFirst<uint32_t>(master_file, source_file,
"/entry/MX/peakCountUnfiltered",
master_image, source_image); v)
message.spot_count = v;
else
message.spot_count = spot_count_opt;
message.spot_count_ice_rings = ReadElementMasterFirst<uint32_t>(
master_file, source_file, "/entry/MX/peakCountIceRingRes", master_image, source_image);
message.spot_count_low_res = ReadElementMasterFirst<uint32_t>(
master_file, source_file, "/entry/MX/peakCountLowRes", master_image, source_image);
message.spot_count_indexed = ReadElementMasterFirst<uint32_t>(
master_file, source_file, "/entry/MX/peakCountIndexed", master_image, source_image);
GenerateSpotPlot(message, 1.5);
}
void HDF5MetadataSource::FillPerImage(DataMessage &message, int64_t requested_image,
const std::shared_ptr<const JFJochReaderDataset> &dataset) const {
const auto local_opt = ToLocalIndex(requested_image);
if (!local_opt)
return; // this metadata source does not cover the requested image
const int64_t image_number = *local_opt; // local index into this source (identity for 1:1)
auto loc = ResolveMeta(image_number);
auto &source_file = loc.file;
const uint32_t image_id = loc.local_index;
const auto master_image = static_cast<hsize_t>(image_number);
const auto source_image = static_cast<hsize_t>(image_id);
ReadSpotsFromFiles(*master_file, *source_file, master_image, source_image,
requested_image, dataset->experiment.GetDiffractionGeometry(), message);
if (!dataset->az_int_bin_to_q.empty()) {
if (dataset->azimuthal_bins == 0) {
message.az_int_profile = ReadVectorMasterFirst<float>(
*master_file,
*source_file,
"/entry/azint/image",
{master_image, 0},
{source_image, 0},
{1, dataset->az_int_bin_to_q.size()}
);
} else {
message.az_int_profile = ReadVectorMasterFirst<float>(
*master_file,
*source_file,
"/entry/azint/image",
{master_image, 0, 0},
{source_image, 0, 0},
{1, dataset->azimuthal_bins, dataset->q_bins}
);
}
}
if (dataset->integrated_reflections.size() > image_number)
message.integrated_reflections = static_cast<int64_t>(std::lround(
dataset->integrated_reflections.at(image_number)));
if (dataset->resolution_estimate.size() > image_number)
message.resolution_estimate = dataset->resolution_estimate[image_number];
if (dataset->indexing_result.size() > image_number)
message.indexing_result = dataset->indexing_result[image_number];
if (dataset->indexing_lattice_count.size() > image_number)
message.indexing_lattice_count = dataset->indexing_lattice_count[image_number];
if (dataset->bkg_estimate.size() > image_number)
message.bkg_estimate = dataset->bkg_estimate[image_number];
if (dataset->efficiency.size() > image_number)
message.image_collection_efficiency = dataset->efficiency[image_number];
if (dataset->profile_radius.size() > image_number)
message.profile_radius = dataset->profile_radius[image_number];
if (dataset->mosaicity_deg.size() > image_number)
message.mosaicity_deg = dataset->mosaicity_deg[image_number];
if (dataset->b_factor.size() > image_number)
message.b_factor = dataset->b_factor[image_number];
if (dataset->image_scale_b.size() > image_number)
message.image_scale_b_factor = dataset->image_scale_b[image_number];
if (dataset->image_scale_factor.size() > image_number)
message.image_scale_factor = dataset->image_scale_factor[image_number];
if (dataset->image_scale_cc.size() > image_number)
message.image_scale_cc = dataset->image_scale_cc[image_number];
if (dataset->indexing_result.size() > image_number
&& dataset->indexing_result[image_number] != 0
&& (master_file->Exists("/entry/MX/latticeIndexed") ||
source_file->Exists("/entry/MX/latticeIndexed"))) {
std::vector<float> tmp = ReadVectorMasterFirst<float>(
*master_file,
*source_file,
"/entry/MX/latticeIndexed",
{master_image, 0},
{source_image, 0},
{1, 9}
);
if (tmp.size() == 9)
message.indexing_lattice = CrystalLattice(tmp);
std::optional<std::string> lattice;
if (master_file->Exists("/entry/MX/bravaisLattice"))
lattice = master_file->ReadElement<std::string>("/entry/MX/bravaisLattice", image_number);
else
lattice = source_file->ReadElement<std::string>("/entry/MX/bravaisLattice", image_id);
std::optional<uint32_t> niggli_opt;
if (master_file->Exists("/entry/MX/niggli_class"))
niggli_opt = master_file->ReadElement<uint32_t>("/entry/MX/niggli_class", image_number);
else if (master_file->Exists("/entry/MX/niggliClass"))
niggli_opt = master_file->ReadElement<uint32_t>("/entry/MX/niggliClass", image_number);
else if (source_file->Exists("/entry/MX/niggli_class"))
niggli_opt = source_file->ReadElement<uint32_t>("/entry/MX/niggli_class", image_id);
else if (source_file->Exists("/entry/MX/niggliClass"))
niggli_opt = source_file->ReadElement<uint32_t>("/entry/MX/niggliClass", image_id);
if (lattice && !lattice->empty()) {
auto symm_info = parse_bravais_lattice(lattice.value());
message.lattice_type = LatticeMessage{
.centering = symm_info.second,
.niggli_class = static_cast<int64_t>(niggli_opt.value_or(0)),
.crystal_system = symm_info.first,
};
}
}
const std::string master_reflection_group_name = fmt::format("/entry/reflections/image_{:06d}", image_number);
const std::string source_reflection_group_name = fmt::format("/entry/reflections/image_{:06d}", image_id);
if (!ReadReflectionsFromGroup(*master_file, master_reflection_group_name, message.reflections))
ReadReflectionsFromGroup(*source_file, source_reflection_group_name, message.reflections);
if (!message.reflections.empty()) {
CalcISigma(message);
CalcWilsonBFactor(message, !message.b_factor.has_value());
}
}
std::optional<GoniometerAxis> HDF5MetadataSource::ReadAxis(HDF5Object *file, const std::string &name) {
std::string dname = "/entry/sample/transformations/" + name;
if (!file->Exists(dname))
return {};
HDF5DataSet dataset(*file, dname);
std::vector<double> angle;
dataset.ReadVector(angle);
if (angle.size() < 2)
return {};
std::vector<double> end = file->ReadOptVector<double>(dname + "_end");
double start = angle[0];
double incr = angle[1] - angle[0];
if (dataset.ReadAttrStr("transformation_type") != "rotation")
return {};
std::vector<double> axis_vec = dataset.ReadAttrVec("vector");
if (axis_vec.size() != 3)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
dname + " Vector must have 3 elements");
Coord axis(axis_vec[0], axis_vec[1], axis_vec[2]);
GoniometerAxis g_axis(name, start, incr, axis, {});
if (!end.empty())
g_axis.ScreeningWedge(end[0] - angle[0]);
return g_axis;
}
CompressedImage HDF5MetadataSource::ReadCalibration(std::vector<uint8_t> &tmp, const std::string &name) const {
std::vector<hsize_t> start = {0, 0};
if (!master_file)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Master file not loaded");
if (!master_file->Exists("/entry/instrument/detector/calibration/" + name))
throw JFJochException(JFJochExceptionCategory::HDF5, "Calibration dataset not found");
HDF5DataSet dataset(*master_file, "/entry/instrument/detector/calibration/" + name);
HDF5DataSpace dataspace(dataset);
HDF5DataType datatype(dataset);
HDF5Dcpl dcpl(dataset);
if (dataspace.GetNumOfDimensions() != 2)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Calibration dataset must be 2D");
auto dim = dataspace.GetDimensions();
CompressionAlgorithm algorithm = CompressionAlgorithm::NO_COMPRESSION;
dataset.ReadVectorToU8(tmp, start, {dim[0], dim[1]});
algorithm = CompressionAlgorithm::NO_COMPRESSION;
return {
tmp, dim[1], dim[0],
CalcImageMode(datatype.GetElemSize(), datatype.IsFloat(), datatype.IsSigned()),
algorithm
};
}
std::vector<IntegrationOutcome> HDF5MetadataSource::ReadReflections(size_t start_image,
std::optional<size_t> end_image) const {
if (start_image >= number_of_images)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"start_image must be less than number_of_images");
const size_t end_image_val = end_image.value_or(number_of_images - 1);
if (end_image_val < start_image)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"end_image must be greater or equal to start_image if provided");
if (end_image_val >= number_of_images)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"end_image must be less than number_of_images");
if (!master_file)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Cannot read reflections if file not loaded");
std::vector<IntegrationOutcome> ret;
ret.reserve(end_image_val - start_image + 1);
for (size_t img = start_image; img <= end_image_val; img++) {
IntegrationOutcome outcome;
// Generic (non-image-specific) detector geometry from experiment setup.
outcome.geom = cached_geom;
// Per-image reflections and MX metadata live in the same file as the image pixels,
// at the source-local index (the locator keeps the data-file handle cached).
const auto loc = ResolveMeta(static_cast<int64_t>(img));
HDF5Object *meta_file = loc.file.get();
const size_t meta_image_id = loc.local_index;
// ── reflections ──────────────────────────────────────────────────────
const std::string refl_group = fmt::format("/entry/reflections/image_{:06d}", meta_image_id);
ReadReflectionsFromGroup(*meta_file, refl_group, outcome.reflections);
// ── per-image mosaicity ───────────────────────────────────────────────
if (meta_file->Exists("/entry/MX/mosaicity")) {
try {
outcome.mosaicity_deg =
meta_file->ReadElement<float>("/entry/MX/mosaicity", meta_image_id);
} catch (...) {
}
}
// ── indexed lattice (stored as 9-element row-major matrix) ────────────
if (meta_file->Exists("/entry/MX/latticeIndexed")) {
try {
auto lattice_vec = meta_file->ReadOptVector<float>(
"/entry/MX/latticeIndexed", {meta_image_id, 0}, {1, 9});
if (lattice_vec.size() == 9)
outcome.latt = CrystalLattice(lattice_vec);
} catch (...) {
}
}
ret.push_back(std::move(outcome));
}
return ret;
}
std::vector<SpotToSave> HDF5MetadataSource::ReadSpots(int64_t requested_image) const {
if (requested_image < 0)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"image number must be non-negative");
const auto local_opt = ToLocalIndex(requested_image);
if (!local_opt)
return {}; // this (subset) source does not cover the requested image
const int64_t image = *local_opt;
if (image >= number_of_images)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"image must be less than number_of_images");
if (!master_file)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Cannot read spots if file not loaded");
// Per-image spot/MX data, resolved the same way as the pixels (or in our own master at the
// local index for an integrated _process.h5 snapshot).
const auto loc = ResolveMeta(image);
HDF5Object *meta_file = loc.file.get();
const size_t meta_image_id = loc.local_index;
DataMessage tmp_message;
tmp_message.number = requested_image;
ReadSpotsFromFiles(*master_file, *meta_file,
image, meta_image_id,
requested_image,
cached_geom,
tmp_message);
return tmp_message.spots;
}