jfjoch_viewer: Handle errors in reading files much better + include retry logic when using over DBus (to be seen if this requires some extra tweaking)

This commit is contained in:
2025-12-16 11:40:51 +01:00
parent 71b5d26d21
commit 2f299011bb
4 changed files with 259 additions and 4 deletions

View File

@@ -1,6 +1,11 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include <fcntl.h>
#include <unistd.h>
#include <cerrno>
#include <cstring>
#include "JFJochImageReadingWorker.h"
#include "../image_analysis/geom_refinement/AssignSpotsToRings.h"
#include "../image_analysis/spot_finding/StrongPixelSet.h"
@@ -8,9 +13,53 @@
#include "../image_analysis/spot_finding/ImageSpotFinder.h"
#include <QVector>
#include <QMutexLocker>
#include <QFileInfo>
#include "../preview/JFJochTIFF.h"
namespace {
enum class PreflightResult {
Ok,
NotYetVisible, // ENOENT, ESTALE, etc.
PermissionDenied, // EACCES, EPERM
IsDirectory,
OtherError
};
PreflightResult preflight_open_ro(const QString& filename, std::string& reason_out) {
reason_out.clear();
const QByteArray path = filename.toLocal8Bit();
errno = 0;
const int fd = ::open(path.constData(), O_RDONLY | O_CLOEXEC);
if (fd >= 0) {
struct stat st;
if (fstat(fd, &st) == 0 && S_ISDIR(st.st_mode)) {
::close(fd);
reason_out = "Path is a directory";
return PreflightResult::IsDirectory;
}
::close(fd);
return PreflightResult::Ok;
}
const int e = errno;
reason_out = fmt::format("{} (errno={} {})", std::strerror(e), e, std::strerror(e));
// NFS can transiently report missing/stale entries.
if (e == ENOENT || e == ESTALE || e == EIO || e == ETIMEDOUT || e == ENOTCONN)
return PreflightResult::NotYetVisible;
if (e == EACCES || e == EPERM)
return PreflightResult::PermissionDenied;
if (e == EISDIR)
return PreflightResult::IsDirectory;
return PreflightResult::OtherError;
}
}
JFJochImageReadingWorker::JFJochImageReadingWorker(const SpotFindingSettings &settings,
const DiffractionExperiment &experiment, QObject *parent)
: QObject(parent),
@@ -25,12 +74,92 @@ JFJochImageReadingWorker::JFJochImageReadingWorker(const SpotFindingSettings &se
autoload_timer = new QTimer(this);
autoload_timer->setInterval(autoload_interval);
connect(autoload_timer, &QTimer::timeout, this, &JFJochImageReadingWorker::AutoLoadTimerExpired);
file_open_retry_timer = new QTimer(this);
file_open_retry_timer->setSingleShot(true);
connect(file_open_retry_timer, &QTimer::timeout, this, &JFJochImageReadingWorker::FileOpenRetryTimerExpired);
}
void JFJochImageReadingWorker::LoadFile(const QString &filename, qint64 image_number, qint64 summation, bool retry) {
void JFJochImageReadingWorker::ResetFileOpenRetry_i() {
// Assumes m locked!
if (file_open_retry_timer)
file_open_retry_timer->stop();
// Signal UI to close the dialog if we were active
if (file_open_retry_active)
emit fileLoadRetryStatus(false, "");
file_open_retry_active = false;
file_open_retry_warned = false;
file_open_retry_attempts = 0;
file_open_retry_delay_ms = 50;
file_open_retry_elapsed.invalidate();
pending_load = {};
}
void JFJochImageReadingWorker::ScheduleFileOpenRetry_i(const QString& reason) {
// Assumes m locked!
if (!file_open_retry_active) {
file_open_retry_active = true;
file_open_retry_warned = false;
file_open_retry_attempts = 0;
file_open_retry_delay_ms = 50;
file_open_retry_elapsed.restart();
}
if (!file_open_retry_warned) {
file_open_retry_warned = true;
logger.Warning(fmt::format(
"File '{}' not available yet (GPFS/NFS). Retrying with back-off up to 10 s. Reason: {}",
pending_load.filename.toStdString(), reason.toStdString()));
// Signal UI to show the dialog
emit fileLoadRetryStatus(true, fmt::format("Waiting for file {} to appear on disk...", pending_load.filename.toStdString()).c_str());
} else {
logger.Debug(fmt::format(
"Retry pending for file '{}'. Reason: {}",
pending_load.filename.toStdString(), reason.toStdString()));
}
if (file_open_retry_elapsed.isValid() && file_open_retry_elapsed.elapsed() >= 10'000) {
std::string msg = fmt::format(
"Timed out waiting for file '{}' after 10 s ({} attempt(s))",
pending_load.filename.toStdString(), file_open_retry_attempts);
logger.Error(msg);
// Reset first (closes the progress dialog)
ResetFileOpenRetry_i();
// Then show the error dialog
emit fileLoadError("File Open Timeout", QString::fromStdString(msg));
return;
}
const int delay = file_open_retry_delay_ms;
file_open_retry_delay_ms = std::min(file_open_retry_delay_ms * 2, file_open_retry_delay_max_ms);
if (file_open_retry_timer)
file_open_retry_timer->start(delay);
}
void JFJochImageReadingWorker::FileOpenRetryTimerExpired() {
PendingLoadRequest req;
QMutexLocker ul(&m);
try {
if (!file_open_retry_active)
return;
req = pending_load;
// Re-trigger LoadFile, but keep retry=true so we stay in the retry loop.
LoadFile_i(req.filename, req.image_number, req.summation, true);
}
void JFJochImageReadingWorker::LoadFile_i(const QString &filename, qint64 image_number, qint64 summation, bool retry) {
try {
std::shared_ptr<const JFJochReaderDataset> dataset;
auto start = std::chrono::high_resolution_clock::now();
@@ -46,10 +175,61 @@ void JFJochImageReadingWorker::LoadFile(const QString &filename, qint64 image_nu
} else {
http_mode = false;
if (retry) {
pending_load.filename = filename;
pending_load.image_number = image_number;
pending_load.summation = summation;
std::string reason;
const PreflightResult pr = preflight_open_ro(filename, reason);
switch (pr) {
case PreflightResult::Ok:
break;
case PreflightResult::NotYetVisible:
ScheduleFileOpenRetry_i(QString::fromStdString(reason));
return; // IMPORTANT: do not try to open the file yet
case PreflightResult::PermissionDenied:
logger.Error(fmt::format(
"Permission denied opening '{}' (read-only preflight failed: {}). Not retrying.",
filename.toStdString(), reason));
emit fileLoadError("Permission Denied", QString::fromStdString(reason));
ResetFileOpenRetry_i();
return;
case PreflightResult::IsDirectory:
logger.Error(fmt::format(
"Error opening '{}': {}. Not retrying.",
filename.toStdString(), reason));
emit fileLoadError("Cannot open directory", QString::fromStdString(reason));
ResetFileOpenRetry_i();
return;
case PreflightResult::OtherError:
logger.Error(fmt::format(
"Other error '{}' (read-only preflight failed: {}). Not retrying.",
filename.toStdString(), reason));
emit fileLoadError("File Open Error", QString::fromStdString(reason));
ResetFileOpenRetry_i();
return;
}
// At this point we will attempt the real open.
file_open_retry_attempts++;
}
file_reader.ReadFile(filename.toStdString());
total_images = file_reader.GetNumberOfImages();
dataset = file_reader.GetDataset();
setAutoLoadMode_i(AutoloadMode::None);
if (retry && file_open_retry_active) {
logger.Info(fmt::format(
"File '{}' opened after {} attempt(s), waited {} ms",
filename.toStdString(),
file_open_retry_attempts,
file_open_retry_elapsed.isValid() ? file_open_retry_elapsed.elapsed() : 0));
}
ResetFileOpenRetry_i();
}
current_image.reset();
@@ -66,16 +246,31 @@ void JFJochImageReadingWorker::LoadFile(const QString &filename, qint64 image_nu
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
logger.Info("Loaded file {} in {} ms", filename.toStdString(), duration);
logger.Info(fmt::format("Loaded file {} in {} ms", filename.toStdString(), duration));
LoadImage_i(image_number, summation);
} catch (std::exception &e) {
logger.Error("Error loading file {} {}", filename.toStdString(), e.what());
logger.Error(fmt::format("Error loading file {} {}", filename.toStdString(), e.what()));
emit fileLoadError("File Load Error", QString::fromStdString(e.what()));
ResetFileOpenRetry_i();
emit datasetLoaded({});
emit imageLoaded({});
}
}
void JFJochImageReadingWorker::LoadFile(const QString &filename, qint64 image_number, qint64 summation, bool retry) {
QMutexLocker ul(&m);
ResetFileOpenRetry_i();
LoadFile_i(filename, image_number, summation, retry);
}
void JFJochImageReadingWorker::CloseFile() {
QMutexLocker ul(&m);
ResetFileOpenRetry_i();
if (http_mode)
http_reader.Close();
else

View File

@@ -76,6 +76,26 @@ private:
int autoload_interval_max_ms = 2000; // 0.5 Hz as bottom limit
float autoload_safety_factor = 2.0f;
// File open retry/back-off (GPFS via NFSv4 visibility lag)
struct PendingLoadRequest {
QString filename;
qint64 image_number = 0;
qint64 summation = 1;
};
QTimer *file_open_retry_timer = nullptr;
PendingLoadRequest pending_load;
bool file_open_retry_active = false;
bool file_open_retry_warned = false;
int file_open_retry_attempts = 0;
int file_open_retry_delay_ms = 100;
int file_open_retry_delay_max_ms = 10000;
QElapsedTimer file_open_retry_elapsed;
void ResetFileOpenRetry_i();
void ScheduleFileOpenRetry_i(const QString& reason);
void LoadFile_i(const QString &filename, qint64 image_number, qint64 summation, bool retry);
void LoadImage_i(int64_t image_number, int64_t summation);
void ReanalyzeImage_i();
void UpdateDataset_i(const std::optional<DiffractionExperiment>& experiment);
@@ -91,6 +111,8 @@ signals:
void setRings(const QVector<float> &v);
void simpleImageLoaded(std::shared_ptr<const SimpleImage> image);
void autoloadChanged(AutoloadMode mode);
void fileLoadError(QString title, QString message);
void fileLoadRetryStatus(bool active, QString message);
public:
JFJochImageReadingWorker(const SpotFindingSettings &settings, const DiffractionExperiment& experiment, QObject *parent = nullptr);
@@ -98,6 +120,7 @@ public:
private slots:
void AutoLoadTimerExpired();
void FileOpenRetryTimerExpired();
public slots:
void LoadFile(const QString &filename, qint64 image_number, qint64 summation, bool retry);

View File

@@ -24,6 +24,7 @@
#include "toolbar/JFJochViewerToolbarImage.h"
#include "windows/JFJoch2DAzintImageWindow.h"
#include "windows/JFJochAzIntWindow.h"
#include <QMessageBox>
JFJochViewerWindow::JFJochViewerWindow(QWidget *parent, bool dbus, const QString &file) : QMainWindow(parent) {
menuBar = new JFJochViewerMenu(this);
@@ -315,6 +316,12 @@ JFJochViewerWindow::JFJochViewerWindow(QWidget *parent, bool dbus, const QString
connect(reading_worker, &JFJochImageReadingWorker::imageLoaded, this,
[this](std::shared_ptr<const JFJochReaderImage> im) { lastImage = std::move(im); });
connect(reading_worker, &JFJochImageReadingWorker::fileLoadError,
this, &JFJochViewerWindow::OnFileLoadError);
connect(reading_worker, &JFJochImageReadingWorker::fileLoadRetryStatus,
this, &JFJochViewerWindow::OnFileLoadRetryStatus);
connect(menuBar, &JFJochViewerMenu::openDatasetInfo, this, &JFJochViewerWindow::NewDatasetInfo);
NewDatasetInfo();
@@ -384,3 +391,28 @@ void JFJochViewerWindow::keyReleaseEvent(QKeyEvent *event) {
}
QMainWindow::keyReleaseEvent(event);
}
void JFJochViewerWindow::OnFileLoadError(QString title, QString message) {
QMessageBox::critical(this, title, message);
}
void JFJochViewerWindow::OnFileLoadRetryStatus(bool active, QString message) {
if (active) {
if (!retryDialog) {
retryDialog = new QProgressDialog(this);
retryDialog->setWindowModality(Qt::WindowModal);
retryDialog->setRange(0, 0); // Infinite/Busy indicator
retryDialog->setCancelButton(nullptr); // Disable cancel for now
retryDialog->setMinimumDuration(0); // Show immediately
retryDialog->setWindowTitle("Loading File");
}
retryDialog->setLabelText(message);
retryDialog->show();
} else {
if (retryDialog) {
retryDialog->close();
retryDialog->deleteLater();
retryDialog = nullptr;
}
}
}

View File

@@ -5,6 +5,7 @@
#define JFJOCHVIEWERWINDOW_H
#include <QMainWindow>
#include <QProgressDialog>
#include "JFJochViewerMenu.h"
#include "../reader/JFJochHDF5Reader.h"
@@ -29,6 +30,8 @@ private:
std::shared_ptr<const JFJochReaderDataset> lastDataset; // added
std::shared_ptr<const JFJochReaderImage> lastImage; // added
QProgressDialog *retryDialog = nullptr;
QThread *reading_thread;
void keyPressEvent(QKeyEvent *event) override;
@@ -37,6 +40,8 @@ public slots:
void LoadFile(const QString &filename, qint64 image_number, qint64 summation, bool retry);
void LoadImage(qint64 image_number, qint64 summation);
void NewDatasetInfo();
void OnFileLoadError(QString title, QString message);
void OnFileLoadRetryStatus(bool active, QString message);
signals:
void LoadFileRequest(const QString &filename, qint64 image_number, qint64 summation, bool retry);