v1.0.0-rc.145 (#55)
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 16m26s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 14m26s
Build Packages / build:rpm (rocky8) (push) Successful in 17m23s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 17m32s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 18m16s
Build Packages / build:rpm (rocky9) (push) Successful in 12m45s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 12m58s
Build Packages / XDS test (durin plugin) (push) Successful in 11m22s
Build Packages / DIALS test (push) Successful in 14m28s
Build Packages / Generate python client (push) Successful in 1m1s
Build Packages / Build documentation (push) Successful in 2m40s
Build Packages / Create release (push) Has been skipped
Build Packages / XDS test (neggia plugin) (push) Successful in 10m52s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 15m2s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 17m25s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 11m49s
Build Packages / XDS test (JFJoch plugin) (push) Successful in 11m34s
Build Packages / Unit tests (push) Successful in 44m51s

This is an UNSTABLE release. The release has significant modifications for HDF5 writing logic - in case of troubles go back to 1.0.0-rc.144.

* **Default HDF5 writing mode is with VDS, not soft-links** - this improves DIALS compatibility and makes format more future-proof, NXmx legacy format might be phased-out in the future.
* XDS plugin: Improve performance of VDS reading.
* jfjoch_writer: Significant improvement on how file systems I/O are handled through a dedicated pass-through VFD.
* jfjoch_writer: Clean-up of HDF5 routines to better handle issues.

Reviewed-on: #55
This commit was merged in pull request #55.
This commit is contained in:
2026-05-06 21:50:02 +02:00
parent 7d34e8a049
commit caef26873e
152 changed files with 1995 additions and 276 deletions
+368 -28
View File
@@ -3,12 +3,23 @@
#include <cstring>
#include <filesystem>
#include <iostream>
#include <bitshuffle/bshuf_h5filter.h>
#include "HDF5Objects.h"
#include "H5FDpoison_sec2.h"
std::mutex hdf5_mutex;
static void HDF5PoisonCallback(
const char *filename,
const char *operation,
int error_number,
void *user_data
) {
std::cerr << "HDF5 Poison callback triggered: filename=" << filename << ", operation=" << operation << ", error_number=" << error_number << std::endl;
}
hid_t HDF5Id::GetID() const {
return id;
}
@@ -26,23 +37,27 @@ HDF5Id::HDF5Id(const HDF5Id &other) {
}
}
HDF5DataSpace::HDF5DataSpace(ScalarTag) : HDF5Id() {
id = H5Screate(H5S_SCALAR);
if (id < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot create scalar dataspace");
ndims = 0;
}
HDF5DataSpace::HDF5DataSpace(const std::vector<hsize_t> &dims, const std::vector<hsize_t> &max_dims) : HDF5Id() {
if (dims.empty())
throw JFJochException(JFJochExceptionCategory::HDF5, "Dimension vector empty");
if ((dims.size() == 1) && (dims[0] == 1))
id = H5Screate(H5S_SCALAR);
else {
if (max_dims.empty()) {
if (dims[0] == 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Value dimension cannot be 0");
if (max_dims.empty()) {
if (dims[0] == 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Value dimension cannot be 0");
id = H5Screate_simple(dims.size(), dims.data(), nullptr);
} else {
if (max_dims.size() != dims.size())
throw JFJochException(JFJochExceptionCategory::HDF5, "Discrepancy in size of dims/max_dims");
id = H5Screate_simple(dims.size(), dims.data(), max_dims.data());
}
id = H5Screate_simple(dims.size(), dims.data(), nullptr);
} else {
if (max_dims.size() != dims.size())
throw JFJochException(JFJochExceptionCategory::HDF5, "Discrepancy in size of dims/max_dims");
id = H5Screate_simple(dims.size(), dims.data(), max_dims.data());
}
if (id < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot create dataspace");
@@ -90,7 +105,12 @@ void HDF5DataSpace::SelectHyperslabWithStride(const std::vector<hsize_t> &start,
}
HDF5DataSpace::~HDF5DataSpace() {
if (id >= 0) H5Sclose(id);
if (id >= 0) {
H5E_BEGIN_TRY {
H5Sclose(id);
} H5E_END_TRY;
id = -1;
}
}
uint8_t HDF5DataSpace::GetNumOfDimensions() const {
@@ -204,7 +224,12 @@ HDF5DataType::HDF5DataType(const HDF5DataSet &data_set) :HDF5Id() {
}
HDF5DataType::~HDF5DataType() {
if (id >= 0) H5Tclose(id);
if (id >= 0) {
H5E_BEGIN_TRY {
H5Tclose(id);
} H5E_END_TRY;
id = -1;
}
}
size_t HDF5DataType::GetElemSize() const {
@@ -231,30 +256,39 @@ bool HDF5DataType::IsFloat() const {
HDF5Dcpl::HDF5Dcpl() : HDF5Id() {
id = H5Pcreate(H5P_DATASET_CREATE);
ndim = 0;
layout = HDF5DataSetLayout::CONTIGUOUS;
}
HDF5Dcpl::HDF5Dcpl(const HDF5DataSet &data_set) : HDF5Id() {
id = H5Dget_create_plist(data_set.GetID());
// Check if chunking is enabled
H5D_layout_t layout = H5Pget_layout(id);
if (layout != H5D_CHUNKED) {
ndim = 0;
} else {
H5D_layout_t h5_layout = H5Pget_layout(id);
if (h5_layout == H5D_VIRTUAL)
layout = HDF5DataSetLayout::VIRTUAL;
else if (h5_layout == H5D_CHUNKED) {
layout = HDF5DataSetLayout::CHUNKED;
ndim = H5Pget_chunk(id, 0, nullptr);
if (ndim <= 0) {
H5Pclose(id);
throw JFJochException(JFJochExceptionCategory::HDF5,
"Error getting number of chunk dimensions");
}
}
} else
layout = HDF5DataSetLayout::CONTIGUOUS;
}
HDF5Dcpl::~HDF5Dcpl() {
if (id >= 0) H5Pclose(id);
if (id >= 0) {
H5E_BEGIN_TRY {
H5Pclose(id);
} H5E_END_TRY;
id = -1;
}
}
void HDF5Dcpl::SetChunking(const std::vector<hsize_t> &dims) {
layout = HDF5DataSetLayout::CHUNKED;
if ((dims.empty()) || dims[0] == 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Value dimension cannot be 0");
ndim = dims.size();
@@ -298,26 +332,46 @@ void HDF5Dcpl::SetFillValueU16(uint16_t val) {
}
void HDF5Dcpl::SetVirtual(const std::string &path, const std::string &dataset, const HDF5DataSpace &src_dataspace, const HDF5DataSpace &dest_dataspace) {
layout = HDF5DataSetLayout::VIRTUAL;
std::string filename = ExtractFilename(path);
if (H5Pset_virtual(id, dest_dataspace.GetID(), filename.c_str(), dataset.c_str(), src_dataspace.GetID()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot set virtual mapping");
}
HDF5DataSetLayout HDF5Dcpl::GetLayout() const {
return layout;
}
HDF5Fapl::HDF5Fapl() : HDF5Id() {
id = H5Pcreate(H5P_FILE_ACCESS);
}
HDF5Fapl::~HDF5Fapl() {
H5Pclose(id);
if (id >= 0) {
H5E_BEGIN_TRY {
H5Pclose(id);
} H5E_END_TRY;
id = -1;
}
}
void HDF5Fapl::SetVersionTo1p10orNewer() {
H5Pset_libver_bounds(id, H5F_LIBVER_V110, H5F_LIBVER_LATEST);
}
void HDF5Fapl::SetCloseStrong() {
H5Pset_fclose_degree(id, H5F_CLOSE_STRONG);
}
void HDF5Fapl::SetPoisonSec2() {
if (H5Pset_fapl_poison_sec2(id) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot enable poisoned sec2 HDF5 VFD");
}
template <typename T>
static HDF5Object& WriteOrCreateScalarAttr(HDF5Object& object, const std::string& name, const T& val) {
HDF5DataSpace dataspace;
HDF5DataSpace dataspace(HDF5DataSpace::Scalar);
HDF5DataType datatype(val);
hid_t attr_id = -1;
@@ -364,7 +418,7 @@ static HDF5Object& WriteOrCreateScalarAttr(HDF5Object& object, const std::string
}
HDF5Object & HDF5Object::Attr(const std::string &name, const std::string &val) {
HDF5DataSpace dataspace;
HDF5DataSpace dataspace(HDF5DataSpace::Scalar);
HDF5DataType datatype(val);
hid_t attr_id = -1;
@@ -647,7 +701,7 @@ std::unique_ptr<HDF5DataSet> HDF5Object::SaveScalar(const std::string &name, con
std::unique_ptr<HDF5DataSet> HDF5Object::SaveScalar(const std::string &name, const char *val) {
HDF5DataType data_type(val);
HDF5DataSpace data_space({1});
HDF5DataSpace data_space(HDF5DataSpace::Scalar);
auto dataset = std::make_unique<HDF5DataSet>(*this, name, data_type, data_space);
dataset->Write(data_type, val);
return dataset;
@@ -685,21 +739,78 @@ HDF5Group::HDF5Group(const HDF5Object& parent, const char *name) : HDF5Object()
}
HDF5Group::~HDF5Group() {
H5Gclose(id);
if (id >= 0) {
H5E_BEGIN_TRY {
H5Gclose(id);
} H5E_END_TRY;
id = -1;
}
}
HDF5File::HDF5File(const std::string& filename, bool v1_10) : HDF5Object() {
HDF5Fapl fapl;
H5FD_poison_sec2_init();
H5FD_poison_sec2_set_callback(HDF5PoisonCallback, nullptr);
if (v1_10)
fapl.SetVersionTo1p10orNewer();
fapl.SetCloseStrong();
fapl.SetPoisonSec2();
id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, fapl.GetID());
if (id < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot open/create data HDF5 file " + filename);
}
void HDF5File::Close() {
if (id < 0)
return;
// Invalidate first; if anything below fails we must NOT leave a live id
// behind for the destructor or later code to touch.
const hid_t local_id = id;
id = -1;
H5FD_poison_sec2_begin_fail_pass();
herr_t close_err = 0;
H5E_BEGIN_TRY {
close_err = H5Fclose(local_id);
} H5E_END_TRY;
H5FD_poison_sec2_end_fail_pass();
const char *filename = nullptr;
const char *operation = nullptr;
int error_number = 0;
const int poison_error = H5FD_poison_sec2_get_last_error(
&filename,
&operation,
&error_number
);
if (close_err < 0 || poison_error > 0)
throw JFJochException(
JFJochExceptionCategory::HDF5,
"Failed to close HDF5 file, operation=" +
std::string(operation != nullptr ? operation : "") +
", filename=" +
std::string(filename != nullptr ? filename : "") +
", errno=" +
std::to_string(error_number)
);
}
HDF5File::~HDF5File() {
if (id >= 0) H5Fclose(id);
if (id >= 0) {
const hid_t tmp = id;
id = -1;
H5FD_poison_sec2_begin_fail_pass();
H5E_BEGIN_TRY {
H5Fclose(tmp);
} H5E_END_TRY;
H5FD_poison_sec2_end_fail_pass();
}
}
void HDF5File::Delete(const std::string& path) {
@@ -713,7 +824,10 @@ HDF5ReadOnlyFile::HDF5ReadOnlyFile(const std::string &filename) {
}
HDF5ReadOnlyFile::~HDF5ReadOnlyFile() {
if (id >= 0) H5Fclose(id);
if (id >= 0) {
H5E_BEGIN_TRY {H5Fclose(id); } H5E_END_TRY;
id = -1;
}
}
HDF5DataSet::HDF5DataSet(const HDF5Object &parent, const std::string &name, const HDF5DataType &data_type,
@@ -799,8 +913,29 @@ std::string HDF5DataSet::ReadString() const {
return buffer;
}
void HDF5DataSet::Close() {
if (id < 0)
return;
const hid_t local_id = id;
id = -1;
herr_t err = 0;
H5E_BEGIN_TRY {
err = H5Dclose(local_id);
} H5E_END_TRY;
if (err < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot close HDF5 dataset");
}
HDF5DataSet::~HDF5DataSet() {
if (id >= 0) H5Dclose(id);
if (id >= 0) {
H5E_BEGIN_TRY {
H5Dclose(id);
} H5E_END_TRY;
id = -1;
}
}
void HDF5DataSet::ReadDirectChunk(std::vector<uint8_t> &val, const std::vector<hsize_t> &offset) {
@@ -995,3 +1130,208 @@ std::string HDF5Object::GetLinkedFileName(const std::string& name) const {
return s;
}
namespace {
std::vector<hsize_t> GetDataspaceDimensions(hid_t dataspace_id) {
const int ndims = H5Sget_simple_extent_ndims(dataspace_id);
if (ndims < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot read dataspace dimensions");
std::vector<hsize_t> dims(ndims);
if (ndims > 0 && H5Sget_simple_extent_dims(dataspace_id, dims.data(), nullptr) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot read dataspace dimensions");
return dims;
}
void GetRegularSelection(hid_t dataspace_id,
std::vector<hsize_t> &start,
std::vector<hsize_t> &stride,
std::vector<hsize_t> &count,
std::vector<hsize_t> &block) {
const auto dims = GetDataspaceDimensions(dataspace_id);
const H5S_sel_type selection_type = H5Sget_select_type(dataspace_id);
if (selection_type == H5S_SEL_ALL) {
start.clear();
stride.clear();
count.clear();
block.clear();
return;
}
start.assign(dims.size(), 0);
stride.assign(dims.size(), 1);
count.assign(dims.size(), 1);
block = dims;
if (selection_type != H5S_SEL_HYPERSLABS)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Only regular hyperslab VDS selections are supported");
if (H5Sis_regular_hyperslab(dataspace_id) <= 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Only regular hyperslab VDS selections are supported");
if (H5Sget_regular_hyperslab(dataspace_id,
start.data(),
stride.data(),
count.data(),
block.data()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot decode VDS hyperslab selection");
}
bool ContainsInRegularHyperslab(hsize_t value,
hsize_t start,
hsize_t stride,
hsize_t count,
hsize_t block) {
for (hsize_t i = 0; i < count; i++) {
const hsize_t block_start = start + i * stride;
if ((value >= block_start) && (value < block_start + block))
return true;
}
return false;
}
hsize_t IndexInRegularHyperslab(hsize_t value,
hsize_t start,
hsize_t stride,
hsize_t count,
hsize_t block) {
for (hsize_t i = 0; i < count; i++) {
const hsize_t block_start = start + i * stride;
if ((value >= block_start) && (value < block_start + block))
return i * block + (value - block_start);
}
throw JFJochException(JFJochExceptionCategory::HDF5,
"Image is not part of VDS hyperslab");
}
hsize_t ValueFromRegularHyperslabIndex(hsize_t index,
hsize_t start,
hsize_t stride,
hsize_t count,
hsize_t block) {
if (index >= count * block)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Source image is outside of VDS source hyperslab");
const hsize_t block_number = index / block;
const hsize_t in_block = index % block;
return start + block_number * stride + in_block;
}
}
bool HDF5VirtualDatasetMapping::ContainsVirtualImage(hsize_t image_number) const {
if (virtual_start.empty() || virtual_stride.empty() || virtual_count.empty() || virtual_block.empty())
return false;
return ContainsInRegularHyperslab(image_number,
virtual_start[0],
virtual_stride[0],
virtual_count[0],
virtual_block[0]);
}
hsize_t HDF5VirtualDatasetMapping::SourceImage(hsize_t image_number) const {
if (!ContainsVirtualImage(image_number))
throw JFJochException(JFJochExceptionCategory::HDF5,
"Image is outside of VDS mapping");
const hsize_t source_index = IndexInRegularHyperslab(image_number,
virtual_start[0],
virtual_stride[0],
virtual_count[0],
virtual_block[0]);
if (source_start.empty() || source_stride.empty() || source_count.empty() || source_block.empty())
return source_index;
return ValueFromRegularHyperslabIndex(source_index,
source_start[0],
source_stride[0],
source_count[0],
source_block[0]);
}
std::vector<HDF5VirtualDatasetMapping> HDF5Dcpl::GetVirtualMappings() const {
size_t mapping_count = 0;
if (H5Pget_virtual_count(id, &mapping_count) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot get number of VDS mappings");
if (mapping_count < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot get number of VDS mappings");
std::vector<HDF5VirtualDatasetMapping> ret;
ret.reserve(static_cast<size_t>(mapping_count));
for (size_t i = 0; i < static_cast<size_t>(mapping_count); i++) {
HDF5VirtualDatasetMapping mapping;
const ssize_t filename_size = H5Pget_virtual_filename(id, i, nullptr, 0);
if (filename_size < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot get VDS source filename size");
std::vector<char> filename(filename_size + 1, '\0');
if (H5Pget_virtual_filename(id, i, filename.data(), filename.size()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot get VDS source filename");
mapping.filename = filename.data();
const ssize_t dataset_size = H5Pget_virtual_dsetname(id, i, nullptr, 0);
if (dataset_size < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot get VDS source dataset size");
std::vector<char> dataset(dataset_size + 1, '\0');
if (H5Pget_virtual_dsetname(id, i, dataset.data(), dataset.size()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot get VDS source dataset");
mapping.dataset = dataset.data();
const hid_t virtual_space = H5Pget_virtual_vspace(id, i);
if (virtual_space < 0)
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot get VDS virtual dataspace");
const hid_t source_space = H5Pget_virtual_srcspace(id, i);
if (source_space < 0) {
H5Sclose(virtual_space);
throw JFJochException(JFJochExceptionCategory::HDF5,
"Cannot get VDS source dataspace");
}
try {
GetRegularSelection(virtual_space,
mapping.virtual_start,
mapping.virtual_stride,
mapping.virtual_count,
mapping.virtual_block);
GetRegularSelection(source_space,
mapping.source_start,
mapping.source_stride,
mapping.source_count,
mapping.source_block);
} catch (...) {
H5Sclose(source_space);
H5Sclose(virtual_space);
throw;
}
H5Sclose(source_space);
H5Sclose(virtual_space);
ret.emplace_back(std::move(mapping));
}
return ret;
}