StreamWriter: Opus-provided improvements to improve resilience for errors
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 12m17s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 15m22s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 16m11s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 16m41s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 17m3s
Build Packages / build:rpm (rocky8) (push) Successful in 18m10s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 19m15s
Build Packages / build:rpm (rocky9) (push) Successful in 19m17s
Build Packages / Generate python client (push) Successful in 1m46s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 9m9s
Build Packages / Create release (push) Has been skipped
Build Packages / Build documentation (push) Successful in 2m0s
Build Packages / DIALS test (push) Failing after 9m1s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 10m43s
Build Packages / XDS test (JFJoch plugin) (push) Failing after 9m9s
Build Packages / XDS test (durin plugin) (push) Failing after 9m43s
Build Packages / XDS test (neggia plugin) (push) Failing after 8m41s
Build Packages / Unit tests (push) Failing after 56m55s

This commit is contained in:
2026-05-04 11:44:46 +02:00
parent 0a2c370983
commit f7a9e4eab1
7 changed files with 277 additions and 109 deletions
+85 -39
View File
@@ -60,27 +60,66 @@ std::optional<HDF5DataFileStatistics> HDF5DataFile::Close() {
if (!data_file)
return {};
HDF5Group group_exp(*data_file, "/entry/detector");
group_exp.NXClass("NXcollection");
group_exp.SaveVector("timestamp", timestamp);
group_exp.SaveVector("exptime", exptime);
group_exp.SaveVector("number", number);
for (auto &p: plugins)
p->WriteFinal(*data_file);
if (data_set) {
data_set->SetExtent({max_image_number + 1, ypixel, xpixel});
data_set
->Attr("image_nr_low", (int32_t) (image_low + 1))
.Attr("image_nr_high", (int32_t) (image_low + 1 + max_image_number));
data_set->Close();
data_set.reset();
// If a prior write already failed, do not call ANY further HDF5 routines on
// this file (per HDF Forum guidance: behavior after an I/O error is undefined,
// and a subsequent H5Fclose can segfault). Just drop the handles and unlink
// the tmp file. Do NOT rename to the final name.
if (broken) {
if (data_set) data_set.reset();
if (data_set_image_number) data_set_image_number.reset();
data_file.reset();
if (manage_file) {
std::error_code ec;
std::filesystem::remove(tmp_filename, ec);
}
closed = true;
return {};
}
if (manage_file ) {
data_file->Close();
try {
HDF5Group group_exp(*data_file, "/entry/detector");
group_exp.NXClass("NXcollection");
group_exp.SaveVector("timestamp", timestamp);
group_exp.SaveVector("exptime", exptime);
group_exp.SaveVector("number", number);
for (auto &p: plugins)
p->WriteFinal(*data_file);
if (data_set) {
data_set->SetExtent({max_image_number + 1, ypixel, xpixel});
data_set
->Attr("image_nr_low", (int32_t) (image_low + 1))
.Attr("image_nr_high", (int32_t) (image_low + 1 + max_image_number));
data_set->Close();
data_set.reset();
}
} catch (...) {
// Anything during finalize failed (most likely ENOSPC). Mark broken,
// drop handles without further HDF5 calls, remove tmp, propagate.
broken = true;
if (data_set) data_set.reset();
data_file.reset();
if (manage_file) {
std::error_code ec;
std::filesystem::remove(tmp_filename, ec);
}
closed = true;
throw;
}
if (manage_file) {
try {
data_file->Close();
} catch (...) {
broken = true;
data_file.reset();
std::error_code ec;
std::filesystem::remove(tmp_filename, ec);
closed = true;
throw;
}
data_file.reset();
if (std::filesystem::exists(filename) && !overwrite)
@@ -102,7 +141,6 @@ std::optional<HDF5DataFileStatistics> HDF5DataFile::Close() {
ret.total_images = nimages;
ret.filename = filename;
ret.file_number = file_number + 1;
return ret;
}
@@ -116,10 +154,8 @@ HDF5DataFile::~HDF5DataFile() {
std::error_code ec;
std::filesystem::remove(tmp_filename, ec);
}
} catch (const std::exception &e) {
std::cerr << "HDF5DataFile::~HDF5DataFile: " << e.what() << std::endl;
} catch (...) {
std::cerr << "HDF5DataFile::~HDF5DataFile: Unknown error " << std::endl;
// Never throw from destructor; HDF5 may already be in a bad state
}
}
}
@@ -168,6 +204,9 @@ void HDF5DataFile::Write(const DataMessage &msg, uint64_t image_number) {
if (closed)
throw JFJochException(JFJochExceptionCategory::FileWriteError,
"Trying to write to already closed file");
if (broken)
throw JFJochException(JFJochExceptionCategory::FileWriteError,
"Trying to write to file that previously failed");
if (image_number >= images_per_file)
throw JFJochException(JFJochExceptionCategory::FileWriteError,
"Image number out of bounds");
@@ -177,23 +216,30 @@ void HDF5DataFile::Write(const DataMessage &msg, uint64_t image_number) {
CreateFile(msg, std::make_shared<HDF5File>(tmp_filename));
}
if (new_file || (static_cast<int64_t>(image_number) > max_image_number)) {
max_image_number = image_number;
timestamp.resize(max_image_number + 1);
exptime.resize(max_image_number + 1);
number.resize(max_image_number + 1);
new_file = false;
try {
if (new_file || (static_cast<int64_t>(image_number) > max_image_number)) {
max_image_number = image_number;
timestamp.resize(max_image_number + 1);
exptime.resize(max_image_number + 1);
number.resize(max_image_number + 1);
new_file = false;
}
nimages++;
data_set->WriteDirectChunk(msg.image.GetCompressed(), msg.image.GetCompressedSize(),
{image_number, 0, 0});
for (auto &p: plugins)
p->Write(msg, image_number);
timestamp[image_number] = msg.timestamp;
exptime[image_number] = msg.exptime;
number[image_number] = (msg.original_number) ? msg.original_number.value() : msg.number;
} catch (...) {
// Sticky failure: do not call into HDF5 again for this file.
broken = true;
throw;
}
nimages++;
data_set->WriteDirectChunk(msg.image.GetCompressed(), msg.image.GetCompressedSize(), {image_number, 0, 0});
for (auto &p: plugins)
p->Write(msg, image_number);
timestamp[image_number] = msg.timestamp;
exptime[image_number] = msg.exptime;
number[image_number] = (msg.original_number) ? msg.original_number.value() : msg.number;
}
size_t HDF5DataFile::GetNumImages() const {