// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #include "JFJochHDF5Reader.h" #include "spdlog/fmt/fmt.h" #include "../image_analysis/bragg_integration/CalcISigma.h" #include "../image_analysis/spot_finding/SpotUtils.h" std::vector GetDimension(HDF5Object& object, const std::string& path) { const auto dim = object.GetDimension(path); if (dim.size() != 3) throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong dimension of /entry/data/data"); return dim; } template void JFJochHDF5Reader::ReadVector(std::vector &v, HDF5Object &file, const std::string &dataset_name, size_t image0, size_t nimages) { try { auto tmp = file.ReadOptVector(dataset_name); if (tmp.size() == nimages) { v.resize(image0 + nimages); for (int i = 0; i < nimages; i++) v[image0 + i] = tmp[i]; } } catch (JFJochException &e) {} } std::string removeSuffix(const std::string& s, const std::string& suffix) { if (s.rfind(suffix) == s.size() - suffix.size()) { return s.substr(0, s.size() - suffix.size()); } return s; } std::string dataset_name(const std::string& path) { std::string file = path; int pos = file.rfind('/'); if (pos != std::string::npos) file = file.substr(pos+1); file = removeSuffix(file, "_master.h5"); // If previous suffix was not found, try removing this one file = removeSuffix(file, ".h5"); return file; } void JFJochHDF5Reader::ReadFile(const std::string& filename) { std::unique_lock ul(file_mutex); try { auto dataset = std::make_shared(); master_file = std::make_unique(filename); dataset->experiment = default_experiment; std::filesystem::path fsPath(filename); dataset->experiment.FilePrefix(dataset_name(filename)); // JFJochReader is always using int32_t dataset->experiment.BitDepthImage(32); dataset->experiment.PixelSigned(true); size_t image_size_x = 0; size_t image_size_y = 0; if (master_file->Exists("/entry/data/data")) { legacy_format = false; auto dim = GetDimension(*master_file, "/entry/data/data"); number_of_images = dim[0]; image_size_y = dim[1]; image_size_x = dim[2]; images_per_file = number_of_images; dataset->efficiency = master_file->ReadVector( "/entry/instrument/detector/detectorSpecific/data_collection_efficiency_image"); if (master_file->Exists("/entry/MX")) { if (master_file->Exists("/entry/MX/peakCountUnfiltered")) dataset->spot_count = master_file->ReadOptVector("/entry/MX/peakCountUnfiltered"); else dataset->spot_count = master_file->ReadOptVector("/entry/MX/nPeaks"); dataset->spot_count_low_res = master_file->ReadOptVector("/entry/MX/peakCountLowRes"); dataset->spot_count_indexed = master_file->ReadOptVector("/entry/MX/peakCountIndexed"); dataset->spot_count_ice_rings = master_file->ReadOptVector("/entry/MX/peakCountIceRing"); dataset->indexing_result = master_file->ReadOptVector("/entry/MX/imageIndexed"); dataset->bkg_estimate = master_file->ReadOptVector("/entry/MX/bkgEstimate"); dataset->resolution_estimate = master_file->ReadOptVector("/entry/MX/resolutionEstimate"); dataset->profile_radius = master_file->ReadOptVector("/entry/MX/profileRadius"); dataset->b_factor = master_file->ReadOptVector("/entry/MX/bFactor"); } if (master_file->Exists("/entry/image")) dataset->max_value = master_file->ReadOptVector("/entry/image/max_value"); } else if (master_file->Exists("/entry/data/data_000001")) { legacy_format = true; legacy_format_files.clear(); image_size_x = master_file->GetInt("/entry/instrument/detector/detectorSpecific/x_pixels_in_detector"); image_size_y = master_file->GetInt("/entry/instrument/detector/detectorSpecific/y_pixels_in_detector"); //size_t expected_images = master_file->GetInt("/entry/instrument/detector/detectorSpecific/nimages"); images_per_file = 0; number_of_images = 0; uint32_t nfiles = 0; std::filesystem::path file_path(filename); std::filesystem::path directory = file_path.parent_path(); while (true) { std::string dname = fmt::format("/entry/data/data_{:06d}", nfiles + 1); if (!master_file->Exists(dname)) break; size_t fimages = 0; try { auto fname = master_file->GetLinkedFileName(dname); if (!directory.empty()) fname = fmt::format("{}/{}", directory.string(),fname); HDF5ReadOnlyFile data_file(fname); fimages = GetDimension(data_file, "/entry/data/data")[0]; legacy_format_files.push_back(fname); if (data_file.Exists("/entry/detector")) { ReadVector(dataset->efficiency, data_file, "/entry/detector/data_collection_efficiency_image", number_of_images, fimages); } if (data_file.Exists("/entry/MX")) { if (data_file.Exists("/entry/MX/peakCountUnfiltered")) ReadVector(dataset->spot_count, data_file, "/entry/MX/peakCountUnfiltered", number_of_images, fimages); else ReadVector(dataset->spot_count, data_file, "/entry/MX/nPeaks", number_of_images, fimages); ReadVector(dataset->spot_count_ice_rings, data_file, "/entry/MX/peakCountIceRingRes", number_of_images, fimages); ReadVector(dataset->spot_count_low_res, data_file, "/entry/MX/peakCountLowRes", number_of_images, fimages); ReadVector(dataset->spot_count_indexed, data_file, "/entry/MX/peakCountIndexed", number_of_images, fimages); ReadVector(dataset->indexing_result, data_file, "/entry/MX/imageIndexed", number_of_images, fimages); ReadVector(dataset->bkg_estimate, data_file, "/entry/MX/bkgEstimate", number_of_images, fimages); ReadVector(dataset->profile_radius, data_file, "/entry/MX/profileRadius", number_of_images, fimages); ReadVector(dataset->b_factor, data_file, "/entry/MX/bFactor", number_of_images, fimages); } if (data_file.Exists("/entry/image")) { ReadVector(dataset->max_value, data_file, "/entry/image/max_value", number_of_images, fimages); } } catch (JFJochException &e) {} if (nfiles == 0) images_per_file = fimages; number_of_images += fimages; nfiles++; } } else { image_size_x = master_file->GetInt("/entry/instrument/detector/detectorSpecific/x_pixels_in_detector"); image_size_y = master_file->GetInt("/entry/instrument/detector/detectorSpecific/y_pixels_in_detector"); number_of_images = 0; } if (master_file->Exists("/entry/MX")) { auto indexing = master_file->GetString("/entry/MX/indexing_algorithm", "none"); if (indexing == "fft") dataset->experiment.IndexingAlgorithm(IndexingAlgorithmEnum::FFT); else if (indexing == "ffbidx") dataset->experiment.IndexingAlgorithm(IndexingAlgorithmEnum::FFBIDX); } auto ring_current_A = master_file->GetOptFloat("/entry/source/current"); if (ring_current_A) dataset->experiment.RingCurrent_mA(ring_current_A.value() * 1000.0); dataset->experiment.DetectIceRings(master_file->GetOptBool("/entry/instrument/detector/detectorSpecific/detect_ice_rings").value_or(false)); dataset->experiment.PoniRot1_rad(master_file->GetOptFloat("/entry/instrument/detector/transformations/rot1").value_or(0.0)); dataset->experiment.PoniRot2_rad(master_file->GetOptFloat("/entry/instrument/detector/transformations/rot2").value_or(0.0)); dataset->experiment.PoniRot3_rad(master_file->GetOptFloat("/entry/instrument/detector/transformations/rot3").value_or(0.0)); if (master_file->Exists("/entry/instrument/source")) dataset->experiment.RingCurrent_mA(master_file->GetOptFloat("/entry/instrument/source/current")); dataset->experiment.SampleTemperature_K(master_file->GetOptFloat("/entry/sample/temperature")); dataset->experiment.BeamX_pxl(master_file->GetFloat("/entry/instrument/detector/beam_center_x")); dataset->experiment.BeamY_pxl(master_file->GetFloat("/entry/instrument/detector/beam_center_y")); float det_distance = master_file->GetFloat("/entry/instrument/detector/distance"); if (det_distance < 0.001) det_distance = 0.1; // Set to 100 mm, if det distance is less than 1 mm dataset->experiment.DetectorDistance_mm(det_distance * 1000.0); dataset->experiment.IncidentEnergy_keV(WVL_1A_IN_KEV / master_file->GetFloat("/entry/instrument/beam/incident_wavelength")); dataset->error_value = master_file->GetOptInt("/entry/instrument/detector/error_value"); dataset->jfjoch_release = master_file->GetString("/entry/instrument/detector/jfjoch_release"); InstrumentMetadata metadata; metadata.InstrumentName(master_file->GetString("/entry/instrument/name")); metadata.SourceName(master_file->GetString("/entry/source/name")); dataset->experiment.ImportInstrumentMetadata(metadata); if (master_file->Exists("/entry/sample/transformations")) { auto omega = ReadAxis(master_file.get(), "omega"); dataset->experiment.Goniometer(omega); } auto tmp = master_file->ReadOptVector("/entry/sample/unit_cell"); if (tmp.size() == 6) dataset->experiment.SetUnitCell(UnitCell{ .a = tmp[0], .b = tmp[1], .c = tmp[2], .alpha = tmp[3], .beta = tmp[4], .gamma = tmp[5]}); dataset->experiment.SpaceGroupNumber(master_file->GetOptInt("/entry/sample/space_group_number")); dataset->experiment.SampleName(master_file->GetString("/entry/sample/name")); if (master_file->Exists("/entry/roi")) dataset->roi = master_file->FindLeafs("/entry/roi"); for (const auto &s: dataset->roi) { dataset->roi_max.emplace_back(master_file->ReadVector("/entry/roi/" + s + "/max")); dataset->roi_sum.emplace_back(master_file->ReadVector("/entry/roi/" + s + "/sum")); dataset->roi_sum_sq.emplace_back(master_file->ReadVector("/entry/roi/" + s + "/sum_sq")); dataset->roi_npixel.emplace_back(master_file->ReadVector("/entry/roi/" + s + "/npixel")); } if (master_file->Exists("/entry/instrument/attenuator")) dataset->experiment.AttenuatorTransmission(master_file->GetOptFloat("/entry/instrument/attenuator/attenuator_transmission")); dataset->experiment.TotalFlux(master_file->GetOptFloat("/entry/instrument/beam/total_flux")); if (master_file->Exists("/entry/azint") && master_file->Exists("/entry/azint/bin_to_q")) { HDF5DataSet bin_to_q_dataset(*master_file, "/entry/azint/bin_to_q"); HDF5DataSpace bin_to_q_dataspace(bin_to_q_dataset); auto dim = bin_to_q_dataspace.GetDimensions(); if (dim.size() == 1) { dataset->azimuthal_bins = 0; dataset->q_bins = dim[0]; bin_to_q_dataset.ReadVector(dataset->az_int_bin_to_q); } else if (dim.size() == 2) { dataset->azimuthal_bins = dim[0]; dataset->q_bins = dim[1]; dataset->az_int_bin_to_q.resize(dim[0] * dim[1]); bin_to_q_dataset.ReadVector(dataset->az_int_bin_to_q, {0,0}, dim); } else throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong dimension of /entry/azint/image dataset"); if (master_file->Exists("/entry/azint/bin_to_phi")) { HDF5DataSet bin_to_phi_dataset(*master_file, "/entry/azint/bin_to_phi"); if (dataset->q_bins > 0) { dataset->az_int_bin_to_phi.resize(dim[0] * dim[1]); bin_to_phi_dataset.ReadVector(dataset->az_int_bin_to_phi, {0,0}, dim); } else { bin_to_phi_dataset.ReadVector(dataset->az_int_bin_to_phi); } } } // Read fluorescence spectrum if present if (master_file->Exists("/entry/instrument/fluorescence")) { auto energy = master_file->ReadOptVector("/entry/instrument/fluorescence/energy"); auto data = master_file->ReadOptVector("/entry/instrument/fluorescence/data"); if (!energy.empty() && energy.size() == data.size()) dataset->experiment.FluorescenceSpectrum(XrayFluorescenceSpectrum(energy, data)); } auto detector_name = master_file->GetString("/entry/instrument/detector/description"); DetectorSetup detector = DetDECTRIS(image_size_x, image_size_y, detector_name, {}); detector.PixelSize_um(master_file->GetFloat("/entry/instrument/detector/x_pixel_size") * 1e6); detector.SaturationLimit(master_file->GetInt("/entry/instrument/detector/saturation_value")); detector.MinFrameTime(std::chrono::microseconds(0)); detector.MinCountTime(std::chrono::microseconds(0)); detector.ReadOutTime(std::chrono::microseconds (0)); dataset->experiment.Detector(detector); dataset->experiment.FrameTime( std::chrono::microseconds(std::lround(master_file->GetFloat("/entry/instrument/detector/frame_time") * 1e6)), std::chrono::microseconds(std::lround(master_file->GetFloat("/entry/instrument/detector/count_time") * 1e6))); if (image_size_x * image_size_y > 0) { auto mask_tmp = master_file->ReadOptVector( "/entry/instrument/detector/pixel_mask", {0, 0}, {image_size_y, image_size_x} ); if (mask_tmp.empty()) mask_tmp = std::vector(image_size_x * image_size_y); dataset->pixel_mask = PixelMask(mask_tmp); } dataset->experiment.ImagesPerTrigger(number_of_images); SetStartMessage(dataset); } catch (const std::exception& e) { master_file = {}; number_of_images = 0; SetStartMessage({}); throw; } } uint64_t JFJochHDF5Reader::GetNumberOfImages() const { std::unique_lock ul(file_mutex); return number_of_images; } CompressedImage JFJochHDF5Reader::LoadImageDataset(std::vector &tmp, HDF5Object &file, hsize_t number) { std::vector start = {static_cast(number), 0, 0}; HDF5DataSet dataset(file, "/entry/data/data"); HDF5DataSpace dataspace(dataset); HDF5DataType datatype(dataset); HDF5Dcpl dcpl(dataset); if (dataspace.GetNumOfDimensions() != 3) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "/entry/data/data dataset must be 3D"); auto dim = dataspace.GetDimensions(); CompressionAlgorithm algorithm = CompressionAlgorithm::NO_COMPRESSION; auto chunk_size = dcpl.GetChunking(); if ((chunk_size.size() == 3) && (chunk_size[0] == 1) && (chunk_size[1] == dim[1]) && (chunk_size[2] == dim[2])) { dataset.ReadDirectChunk(tmp, start); algorithm = dcpl.GetCompression(); } else { dataset.ReadVectorToU8(tmp, start, {1, dim[1], dim[2]}); algorithm = CompressionAlgorithm::NO_COMPRESSION; } if (datatype.IsFloat()) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,"Float datasets not supported at this time"); return {tmp, dim[1], dim[2], CalcImageMode(datatype.GetElemSize(), datatype.IsFloat(), datatype.IsSigned()), algorithm}; } bool JFJochHDF5Reader::LoadImage_i(std::shared_ptr &dataset, DataMessage &message, std::vector &buffer, int64_t image_number, bool update_dataset) { std::unique_lock ul(file_mutex); if (!master_file) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Cannot load image if file not loaded"); if (image_number >= number_of_images) throw JFJochException(JFJochExceptionCategory::HDF5, "Image out of bounds"); std::unique_ptr tmp_data_file; uint32_t image_id; HDF5Object *source_file; if (legacy_format) { uint32_t file_id = image_number / images_per_file; image_id = image_number % images_per_file; tmp_data_file = std::make_unique(legacy_format_files.at(file_id)); source_file = tmp_data_file.get(); } else { image_id = image_number; source_file = master_file.get(); } message.image = LoadImageDataset(buffer, *source_file, image_id); message.number = image_number; auto spot_count_opt = source_file->ReadElement("/entry/MX/nPeaks", image_id); if (spot_count_opt.has_value() && spot_count_opt.value() > 0) { size_t spot_count = spot_count_opt.value(); auto spot_x = source_file->ReadVector( "/entry/MX/peakXPosRaw", {(hsize_t) image_id, 0}, {1, spot_count} ); auto spot_y = source_file->ReadVector( "/entry/MX/peakYPosRaw", {(hsize_t) image_id, 0}, {1, spot_count} ); auto spot_intensity = source_file->ReadVector( "/entry/MX/peakTotalIntensity", {(hsize_t) image_id, 0}, {1, spot_count} ); auto spot_indexed = source_file->ReadOptVector( "/entry/MX/peakIndexed", {(hsize_t) image_id, 0}, {1, spot_count} ); auto spot_ice = source_file->ReadOptVector( "/entry/MX/peakIceRingRes", {(hsize_t) image_id, 0}, {1, spot_count} ); auto spot_h = source_file->ReadOptVector( "/entry/MX/peakH", {(hsize_t) image_id, 0}, {1, spot_count} ); auto spot_k = source_file->ReadOptVector( "/entry/MX/peakK", {(hsize_t) image_id, 0}, {1, spot_count} ); auto spot_l = source_file->ReadOptVector( "/entry/MX/peakL", {(hsize_t) image_id, 0}, {1, spot_count} ); auto spot_dist_ewald_sphere = source_file->ReadOptVector( "/entry/MX/peakDistEwaldSphere", {(hsize_t) image_id, 0}, {1, spot_count} ); auto geom = dataset->experiment.GetDiffractionGeometry(); for (int i = 0; i < spot_count; i++) { auto x = spot_x.at(i); auto y = spot_y.at(i); auto d = geom.PxlToRes(x, y); SpotToSave s{ .x = x, .y = y, .intensity = spot_intensity.at(i), .d_A = d }; if (spot_indexed.size() > i) s.indexed = (spot_indexed.at(i) != 0); if (spot_h.size() > i) s.h = spot_h.at(i); if (spot_k.size() > i) s.k = spot_k.at(i); if (spot_l.size() > i) s.l = spot_l.at(i); if (spot_dist_ewald_sphere.size() > i) s.dist_ewald_sphere = spot_dist_ewald_sphere.at(i); if (spot_ice.size() > i) s.ice_ring = (spot_ice.at(i) != 0); message.spots.emplace_back(s); } message.spot_count = source_file->ReadElement("/entry/MX/peakCountUnfiltered", image_id); message.spot_count_ice_rings = source_file->ReadElement("/entry/MX/peakCountIceRingRes", image_id); message.spot_count_low_res = source_file->ReadElement("/entry/MX/peakCountLowRes", image_id); message.spot_count_indexed = source_file->ReadElement("/entry/MX/peakCountIndexed", image_id); GenerateSpotPlot(message, 1.5); } if (!dataset->az_int_bin_to_q.empty()) { if (dataset->azimuthal_bins == 0) message.az_int_profile = source_file->ReadOptVector( "/entry/azint/image", {(hsize_t) image_id, 0}, {1, dataset->az_int_bin_to_q.size()} ); else { message.az_int_profile.resize(dataset->azimuthal_bins * dataset->q_bins, 0); message.az_int_profile = source_file->ReadOptVector( "/entry/azint/image", {(hsize_t) image_id, 0, 0}, {1, dataset->azimuthal_bins, dataset->q_bins} ); } } if (dataset->resolution_estimate.size() > image_number) message.resolution_estimate = dataset->resolution_estimate[image_number]; if (dataset->indexing_result.size() > image_number) message.indexing_result = dataset->indexing_result[image_number]; if (dataset->bkg_estimate.size() > image_number) message.bkg_estimate = dataset->bkg_estimate[image_number]; if (dataset->efficiency.size() > image_number) message.image_collection_efficiency = dataset->efficiency[image_number]; if (dataset->profile_radius.size() > image_number) message.profile_radius = dataset->profile_radius[image_number]; if (dataset->b_factor.size() > image_number) message.b_factor = dataset->b_factor[image_number]; if (dataset->indexing_result.size() > image_number && dataset->indexing_result[image_number] != 0 && source_file->Exists("/entry/MX") && source_file->Exists("/entry/MX/latticeIndexed")) { std::vector tmp = source_file->ReadVector( "/entry/MX/latticeIndexed", {(hsize_t) image_id, 0}, {1, 9} ); message.indexing_lattice = CrystalLattice(tmp); } std::string image_group_name = fmt::format("/entry/reflections/image_{:06d}", image_id); if (source_file->Exists("/entry/reflections") && source_file->Exists(image_group_name)) { auto h = source_file->ReadOptVector(image_group_name + "/h"); auto k = source_file->ReadOptVector(image_group_name + "/k"); auto l = source_file->ReadOptVector(image_group_name + "/l"); auto predicted_x = source_file->ReadOptVector(image_group_name + "/predicted_x"); auto predicted_y = source_file->ReadOptVector(image_group_name + "/predicted_y"); auto d = source_file->ReadOptVector(image_group_name + "/d"); auto int_sum = source_file->ReadOptVector(image_group_name + "/int_sum"); auto int_err = source_file->ReadOptVector(image_group_name + "/int_err"); auto bkg = source_file->ReadOptVector(image_group_name + "/background_mean"); if (h.size() != l.size() || h.size() != k.size() || h.size() != d.size() || h.size() != predicted_x.size() || h.size() != predicted_y.size() || h.size() != int_sum.size() || h.size() != int_err.size() || h.size() != bkg.size()) throw JFJochException(JFJochExceptionCategory::HDF5, "Wrong size of reflections dataset"); for (size_t i = 0; i < h.size(); i++) { Reflection r{ .h = h.at(i), .k = k.at(i), .l = l.at(i), .predicted_x = predicted_x.at(i), .predicted_y = predicted_y.at(i), .d = d.at(i), .I = int_sum.at(i), .bkg = bkg.at(i), .sigma = int_err.at(i) }; message.reflections.emplace_back(r); } CalcISigma(message); CalcWilsonBFactor(message, !message.b_factor.has_value()); } return true; } void JFJochHDF5Reader::Close() { std::unique_lock ul(file_mutex); master_file = {}; number_of_images = 0; legacy_format_files.clear(); SetStartMessage({}); } std::optional JFJochHDF5Reader::ReadAxis(HDF5Object *file, const std::string &name) { std::string dname = "/entry/sample/transformations/" + name; if (!file->Exists(dname)) return {}; HDF5DataSet dataset(*file, dname); std::vector angle; dataset.ReadVector(angle); if (angle.size() < 2) return {}; double start = angle[0]; double incr = angle[1] - angle[0]; if (dataset.ReadAttrStr("transformation_type") != "rotation") return {}; std::vector axis_vec = dataset.ReadAttrVec("vector"); if (axis_vec.size() != 3) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, dname + " Vector must have 3 elements"); Coord axis(axis_vec[0], axis_vec[1], axis_vec[2]); return GoniometerAxis(name, start, incr, axis, {}); }