StreamWriter: Opus-provided improvements to improve resilience for errors
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 12m17s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 15m22s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 16m11s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 16m41s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 17m3s
Build Packages / build:rpm (rocky8) (push) Successful in 18m10s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 19m15s
Build Packages / build:rpm (rocky9) (push) Successful in 19m17s
Build Packages / Generate python client (push) Successful in 1m46s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 9m9s
Build Packages / Create release (push) Has been skipped
Build Packages / Build documentation (push) Successful in 2m0s
Build Packages / DIALS test (push) Failing after 9m1s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 10m43s
Build Packages / XDS test (JFJoch plugin) (push) Failing after 9m9s
Build Packages / XDS test (durin plugin) (push) Failing after 9m43s
Build Packages / XDS test (neggia plugin) (push) Failing after 8m41s
Build Packages / Unit tests (push) Failing after 56m55s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 12m17s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 15m22s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 16m11s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 16m41s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 17m3s
Build Packages / build:rpm (rocky8) (push) Successful in 18m10s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 19m15s
Build Packages / build:rpm (rocky9) (push) Successful in 19m17s
Build Packages / Generate python client (push) Successful in 1m46s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 9m9s
Build Packages / Create release (push) Has been skipped
Build Packages / Build documentation (push) Successful in 2m0s
Build Packages / DIALS test (push) Failing after 9m1s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 10m43s
Build Packages / XDS test (JFJoch plugin) (push) Failing after 9m9s
Build Packages / XDS test (durin plugin) (push) Failing after 9m43s
Build Packages / XDS test (neggia plugin) (push) Failing after 8m41s
Build Packages / Unit tests (push) Failing after 56m55s
This commit is contained in:
+85
-39
@@ -60,27 +60,66 @@ std::optional<HDF5DataFileStatistics> HDF5DataFile::Close() {
|
||||
if (!data_file)
|
||||
return {};
|
||||
|
||||
HDF5Group group_exp(*data_file, "/entry/detector");
|
||||
group_exp.NXClass("NXcollection");
|
||||
|
||||
group_exp.SaveVector("timestamp", timestamp);
|
||||
group_exp.SaveVector("exptime", exptime);
|
||||
group_exp.SaveVector("number", number);
|
||||
|
||||
for (auto &p: plugins)
|
||||
p->WriteFinal(*data_file);
|
||||
|
||||
if (data_set) {
|
||||
data_set->SetExtent({max_image_number + 1, ypixel, xpixel});
|
||||
data_set
|
||||
->Attr("image_nr_low", (int32_t) (image_low + 1))
|
||||
.Attr("image_nr_high", (int32_t) (image_low + 1 + max_image_number));
|
||||
data_set->Close();
|
||||
data_set.reset();
|
||||
// If a prior write already failed, do not call ANY further HDF5 routines on
|
||||
// this file (per HDF Forum guidance: behavior after an I/O error is undefined,
|
||||
// and a subsequent H5Fclose can segfault). Just drop the handles and unlink
|
||||
// the tmp file. Do NOT rename to the final name.
|
||||
if (broken) {
|
||||
if (data_set) data_set.reset();
|
||||
if (data_set_image_number) data_set_image_number.reset();
|
||||
data_file.reset();
|
||||
if (manage_file) {
|
||||
std::error_code ec;
|
||||
std::filesystem::remove(tmp_filename, ec);
|
||||
}
|
||||
closed = true;
|
||||
return {};
|
||||
}
|
||||
|
||||
if (manage_file ) {
|
||||
data_file->Close();
|
||||
try {
|
||||
HDF5Group group_exp(*data_file, "/entry/detector");
|
||||
group_exp.NXClass("NXcollection");
|
||||
|
||||
group_exp.SaveVector("timestamp", timestamp);
|
||||
group_exp.SaveVector("exptime", exptime);
|
||||
group_exp.SaveVector("number", number);
|
||||
|
||||
for (auto &p: plugins)
|
||||
p->WriteFinal(*data_file);
|
||||
|
||||
if (data_set) {
|
||||
data_set->SetExtent({max_image_number + 1, ypixel, xpixel});
|
||||
data_set
|
||||
->Attr("image_nr_low", (int32_t) (image_low + 1))
|
||||
.Attr("image_nr_high", (int32_t) (image_low + 1 + max_image_number));
|
||||
data_set->Close();
|
||||
data_set.reset();
|
||||
}
|
||||
} catch (...) {
|
||||
// Anything during finalize failed (most likely ENOSPC). Mark broken,
|
||||
// drop handles without further HDF5 calls, remove tmp, propagate.
|
||||
broken = true;
|
||||
if (data_set) data_set.reset();
|
||||
data_file.reset();
|
||||
if (manage_file) {
|
||||
std::error_code ec;
|
||||
std::filesystem::remove(tmp_filename, ec);
|
||||
}
|
||||
closed = true;
|
||||
throw;
|
||||
}
|
||||
|
||||
if (manage_file) {
|
||||
try {
|
||||
data_file->Close();
|
||||
} catch (...) {
|
||||
broken = true;
|
||||
data_file.reset();
|
||||
std::error_code ec;
|
||||
std::filesystem::remove(tmp_filename, ec);
|
||||
closed = true;
|
||||
throw;
|
||||
}
|
||||
data_file.reset();
|
||||
|
||||
if (std::filesystem::exists(filename) && !overwrite)
|
||||
@@ -102,7 +141,6 @@ std::optional<HDF5DataFileStatistics> HDF5DataFile::Close() {
|
||||
ret.total_images = nimages;
|
||||
ret.filename = filename;
|
||||
ret.file_number = file_number + 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -116,10 +154,8 @@ HDF5DataFile::~HDF5DataFile() {
|
||||
std::error_code ec;
|
||||
std::filesystem::remove(tmp_filename, ec);
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "HDF5DataFile::~HDF5DataFile: " << e.what() << std::endl;
|
||||
} catch (...) {
|
||||
std::cerr << "HDF5DataFile::~HDF5DataFile: Unknown error " << std::endl;
|
||||
// Never throw from destructor; HDF5 may already be in a bad state
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -168,6 +204,9 @@ void HDF5DataFile::Write(const DataMessage &msg, uint64_t image_number) {
|
||||
if (closed)
|
||||
throw JFJochException(JFJochExceptionCategory::FileWriteError,
|
||||
"Trying to write to already closed file");
|
||||
if (broken)
|
||||
throw JFJochException(JFJochExceptionCategory::FileWriteError,
|
||||
"Trying to write to file that previously failed");
|
||||
if (image_number >= images_per_file)
|
||||
throw JFJochException(JFJochExceptionCategory::FileWriteError,
|
||||
"Image number out of bounds");
|
||||
@@ -177,23 +216,30 @@ void HDF5DataFile::Write(const DataMessage &msg, uint64_t image_number) {
|
||||
CreateFile(msg, std::make_shared<HDF5File>(tmp_filename));
|
||||
}
|
||||
|
||||
if (new_file || (static_cast<int64_t>(image_number) > max_image_number)) {
|
||||
max_image_number = image_number;
|
||||
timestamp.resize(max_image_number + 1);
|
||||
exptime.resize(max_image_number + 1);
|
||||
number.resize(max_image_number + 1);
|
||||
new_file = false;
|
||||
try {
|
||||
if (new_file || (static_cast<int64_t>(image_number) > max_image_number)) {
|
||||
max_image_number = image_number;
|
||||
timestamp.resize(max_image_number + 1);
|
||||
exptime.resize(max_image_number + 1);
|
||||
number.resize(max_image_number + 1);
|
||||
new_file = false;
|
||||
}
|
||||
|
||||
nimages++;
|
||||
data_set->WriteDirectChunk(msg.image.GetCompressed(), msg.image.GetCompressedSize(),
|
||||
{image_number, 0, 0});
|
||||
|
||||
for (auto &p: plugins)
|
||||
p->Write(msg, image_number);
|
||||
|
||||
timestamp[image_number] = msg.timestamp;
|
||||
exptime[image_number] = msg.exptime;
|
||||
number[image_number] = (msg.original_number) ? msg.original_number.value() : msg.number;
|
||||
} catch (...) {
|
||||
// Sticky failure: do not call into HDF5 again for this file.
|
||||
broken = true;
|
||||
throw;
|
||||
}
|
||||
|
||||
nimages++;
|
||||
data_set->WriteDirectChunk(msg.image.GetCompressed(), msg.image.GetCompressedSize(), {image_number, 0, 0});
|
||||
|
||||
for (auto &p: plugins)
|
||||
p->Write(msg, image_number);
|
||||
|
||||
timestamp[image_number] = msg.timestamp;
|
||||
exptime[image_number] = msg.exptime;
|
||||
number[image_number] = (msg.original_number) ? msg.original_number.value() : msg.number;
|
||||
}
|
||||
|
||||
size_t HDF5DataFile::GetNumImages() const {
|
||||
|
||||
Reference in New Issue
Block a user