From 2f299011bbc6fcdedbb612302d5adec33f5f9ed4 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Tue, 16 Dec 2025 11:40:51 +0100 Subject: [PATCH] jfjoch_viewer: Handle errors in reading files much better + include retry logic when using over DBus (to be seen if this requires some extra tweaking) --- viewer/JFJochImageReadingWorker.cpp | 203 +++++++++++++++++++++++++++- viewer/JFJochImageReadingWorker.h | 23 ++++ viewer/JFJochViewerWindow.cpp | 32 +++++ viewer/JFJochViewerWindow.h | 5 + 4 files changed, 259 insertions(+), 4 deletions(-) diff --git a/viewer/JFJochImageReadingWorker.cpp b/viewer/JFJochImageReadingWorker.cpp index 8240c684..01341eba 100644 --- a/viewer/JFJochImageReadingWorker.cpp +++ b/viewer/JFJochImageReadingWorker.cpp @@ -1,6 +1,11 @@ // SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only +#include +#include +#include +#include + #include "JFJochImageReadingWorker.h" #include "../image_analysis/geom_refinement/AssignSpotsToRings.h" #include "../image_analysis/spot_finding/StrongPixelSet.h" @@ -8,9 +13,53 @@ #include "../image_analysis/spot_finding/ImageSpotFinder.h" #include #include +#include #include "../preview/JFJochTIFF.h" +namespace { + enum class PreflightResult { + Ok, + NotYetVisible, // ENOENT, ESTALE, etc. + PermissionDenied, // EACCES, EPERM + IsDirectory, + OtherError + }; + + PreflightResult preflight_open_ro(const QString& filename, std::string& reason_out) { + reason_out.clear(); + + const QByteArray path = filename.toLocal8Bit(); + errno = 0; + const int fd = ::open(path.constData(), O_RDONLY | O_CLOEXEC); + if (fd >= 0) { + struct stat st; + if (fstat(fd, &st) == 0 && S_ISDIR(st.st_mode)) { + ::close(fd); + reason_out = "Path is a directory"; + return PreflightResult::IsDirectory; + } + ::close(fd); + return PreflightResult::Ok; + } + + const int e = errno; + reason_out = fmt::format("{} (errno={} {})", std::strerror(e), e, std::strerror(e)); + + // NFS can transiently report missing/stale entries. + if (e == ENOENT || e == ESTALE || e == EIO || e == ETIMEDOUT || e == ENOTCONN) + return PreflightResult::NotYetVisible; + + if (e == EACCES || e == EPERM) + return PreflightResult::PermissionDenied; + + if (e == EISDIR) + return PreflightResult::IsDirectory; + + return PreflightResult::OtherError; + } +} + JFJochImageReadingWorker::JFJochImageReadingWorker(const SpotFindingSettings &settings, const DiffractionExperiment &experiment, QObject *parent) : QObject(parent), @@ -25,12 +74,92 @@ JFJochImageReadingWorker::JFJochImageReadingWorker(const SpotFindingSettings &se autoload_timer = new QTimer(this); autoload_timer->setInterval(autoload_interval); connect(autoload_timer, &QTimer::timeout, this, &JFJochImageReadingWorker::AutoLoadTimerExpired); + + file_open_retry_timer = new QTimer(this); + file_open_retry_timer->setSingleShot(true); + connect(file_open_retry_timer, &QTimer::timeout, this, &JFJochImageReadingWorker::FileOpenRetryTimerExpired); } -void JFJochImageReadingWorker::LoadFile(const QString &filename, qint64 image_number, qint64 summation, bool retry) { + +void JFJochImageReadingWorker::ResetFileOpenRetry_i() { + // Assumes m locked! + if (file_open_retry_timer) + file_open_retry_timer->stop(); + + // Signal UI to close the dialog if we were active + if (file_open_retry_active) + emit fileLoadRetryStatus(false, ""); + + file_open_retry_active = false; + file_open_retry_warned = false; + file_open_retry_attempts = 0; + file_open_retry_delay_ms = 50; + file_open_retry_elapsed.invalidate(); + pending_load = {}; +} + +void JFJochImageReadingWorker::ScheduleFileOpenRetry_i(const QString& reason) { + // Assumes m locked! + if (!file_open_retry_active) { + file_open_retry_active = true; + file_open_retry_warned = false; + file_open_retry_attempts = 0; + file_open_retry_delay_ms = 50; + file_open_retry_elapsed.restart(); + } + + if (!file_open_retry_warned) { + file_open_retry_warned = true; + logger.Warning(fmt::format( + "File '{}' not available yet (GPFS/NFS). Retrying with back-off up to 10 s. Reason: {}", + pending_load.filename.toStdString(), reason.toStdString())); + + // Signal UI to show the dialog + emit fileLoadRetryStatus(true, fmt::format("Waiting for file {} to appear on disk...", pending_load.filename.toStdString()).c_str()); + } else { + logger.Debug(fmt::format( + "Retry pending for file '{}'. Reason: {}", + pending_load.filename.toStdString(), reason.toStdString())); + } + + if (file_open_retry_elapsed.isValid() && file_open_retry_elapsed.elapsed() >= 10'000) { + std::string msg = fmt::format( + "Timed out waiting for file '{}' after 10 s ({} attempt(s))", + pending_load.filename.toStdString(), file_open_retry_attempts); + + logger.Error(msg); + + // Reset first (closes the progress dialog) + ResetFileOpenRetry_i(); + + // Then show the error dialog + emit fileLoadError("File Open Timeout", QString::fromStdString(msg)); + return; + } + + const int delay = file_open_retry_delay_ms; + file_open_retry_delay_ms = std::min(file_open_retry_delay_ms * 2, file_open_retry_delay_max_ms); + + if (file_open_retry_timer) + file_open_retry_timer->start(delay); +} + +void JFJochImageReadingWorker::FileOpenRetryTimerExpired() { + PendingLoadRequest req; + QMutexLocker ul(&m); - try { + if (!file_open_retry_active) + return; + + req = pending_load; + + // Re-trigger LoadFile, but keep retry=true so we stay in the retry loop. + LoadFile_i(req.filename, req.image_number, req.summation, true); +} + +void JFJochImageReadingWorker::LoadFile_i(const QString &filename, qint64 image_number, qint64 summation, bool retry) { + try { std::shared_ptr dataset; auto start = std::chrono::high_resolution_clock::now(); @@ -46,10 +175,61 @@ void JFJochImageReadingWorker::LoadFile(const QString &filename, qint64 image_nu } else { http_mode = false; + + if (retry) { + pending_load.filename = filename; + pending_load.image_number = image_number; + pending_load.summation = summation; + + std::string reason; + const PreflightResult pr = preflight_open_ro(filename, reason); + + switch (pr) { + case PreflightResult::Ok: + break; + case PreflightResult::NotYetVisible: + ScheduleFileOpenRetry_i(QString::fromStdString(reason)); + return; // IMPORTANT: do not try to open the file yet + case PreflightResult::PermissionDenied: + logger.Error(fmt::format( + "Permission denied opening '{}' (read-only preflight failed: {}). Not retrying.", + filename.toStdString(), reason)); + emit fileLoadError("Permission Denied", QString::fromStdString(reason)); + ResetFileOpenRetry_i(); + return; + case PreflightResult::IsDirectory: + logger.Error(fmt::format( + "Error opening '{}': {}. Not retrying.", + filename.toStdString(), reason)); + emit fileLoadError("Cannot open directory", QString::fromStdString(reason)); + ResetFileOpenRetry_i(); + return; + case PreflightResult::OtherError: + logger.Error(fmt::format( + "Other error '{}' (read-only preflight failed: {}). Not retrying.", + filename.toStdString(), reason)); + emit fileLoadError("File Open Error", QString::fromStdString(reason)); + ResetFileOpenRetry_i(); + return; + } + + // At this point we will attempt the real open. + file_open_retry_attempts++; + } + file_reader.ReadFile(filename.toStdString()); total_images = file_reader.GetNumberOfImages(); dataset = file_reader.GetDataset(); setAutoLoadMode_i(AutoloadMode::None); + + if (retry && file_open_retry_active) { + logger.Info(fmt::format( + "File '{}' opened after {} attempt(s), waited {} ms", + filename.toStdString(), + file_open_retry_attempts, + file_open_retry_elapsed.isValid() ? file_open_retry_elapsed.elapsed() : 0)); + } + ResetFileOpenRetry_i(); } current_image.reset(); @@ -66,16 +246,31 @@ void JFJochImageReadingWorker::LoadFile(const QString &filename, qint64 image_nu auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start).count(); - logger.Info("Loaded file {} in {} ms", filename.toStdString(), duration); + logger.Info(fmt::format("Loaded file {} in {} ms", filename.toStdString(), duration)); + LoadImage_i(image_number, summation); } catch (std::exception &e) { - logger.Error("Error loading file {} {}", filename.toStdString(), e.what()); + logger.Error(fmt::format("Error loading file {} {}", filename.toStdString(), e.what())); + emit fileLoadError("File Load Error", QString::fromStdString(e.what())); + ResetFileOpenRetry_i(); + + emit datasetLoaded({}); + emit imageLoaded({}); } } +void JFJochImageReadingWorker::LoadFile(const QString &filename, qint64 image_number, qint64 summation, bool retry) { + QMutexLocker ul(&m); + + ResetFileOpenRetry_i(); + LoadFile_i(filename, image_number, summation, retry); +} + void JFJochImageReadingWorker::CloseFile() { QMutexLocker ul(&m); + ResetFileOpenRetry_i(); + if (http_mode) http_reader.Close(); else diff --git a/viewer/JFJochImageReadingWorker.h b/viewer/JFJochImageReadingWorker.h index d6ad60b7..5513aca9 100644 --- a/viewer/JFJochImageReadingWorker.h +++ b/viewer/JFJochImageReadingWorker.h @@ -76,6 +76,26 @@ private: int autoload_interval_max_ms = 2000; // 0.5 Hz as bottom limit float autoload_safety_factor = 2.0f; + // File open retry/back-off (GPFS via NFSv4 visibility lag) + struct PendingLoadRequest { + QString filename; + qint64 image_number = 0; + qint64 summation = 1; + }; + + QTimer *file_open_retry_timer = nullptr; + PendingLoadRequest pending_load; + bool file_open_retry_active = false; + bool file_open_retry_warned = false; + int file_open_retry_attempts = 0; + int file_open_retry_delay_ms = 100; + int file_open_retry_delay_max_ms = 10000; + QElapsedTimer file_open_retry_elapsed; + + void ResetFileOpenRetry_i(); + void ScheduleFileOpenRetry_i(const QString& reason); + + void LoadFile_i(const QString &filename, qint64 image_number, qint64 summation, bool retry); void LoadImage_i(int64_t image_number, int64_t summation); void ReanalyzeImage_i(); void UpdateDataset_i(const std::optional& experiment); @@ -91,6 +111,8 @@ signals: void setRings(const QVector &v); void simpleImageLoaded(std::shared_ptr image); void autoloadChanged(AutoloadMode mode); + void fileLoadError(QString title, QString message); + void fileLoadRetryStatus(bool active, QString message); public: JFJochImageReadingWorker(const SpotFindingSettings &settings, const DiffractionExperiment& experiment, QObject *parent = nullptr); @@ -98,6 +120,7 @@ public: private slots: void AutoLoadTimerExpired(); + void FileOpenRetryTimerExpired(); public slots: void LoadFile(const QString &filename, qint64 image_number, qint64 summation, bool retry); diff --git a/viewer/JFJochViewerWindow.cpp b/viewer/JFJochViewerWindow.cpp index 01479635..4a97d19a 100644 --- a/viewer/JFJochViewerWindow.cpp +++ b/viewer/JFJochViewerWindow.cpp @@ -24,6 +24,7 @@ #include "toolbar/JFJochViewerToolbarImage.h" #include "windows/JFJoch2DAzintImageWindow.h" #include "windows/JFJochAzIntWindow.h" +#include JFJochViewerWindow::JFJochViewerWindow(QWidget *parent, bool dbus, const QString &file) : QMainWindow(parent) { menuBar = new JFJochViewerMenu(this); @@ -315,6 +316,12 @@ JFJochViewerWindow::JFJochViewerWindow(QWidget *parent, bool dbus, const QString connect(reading_worker, &JFJochImageReadingWorker::imageLoaded, this, [this](std::shared_ptr im) { lastImage = std::move(im); }); + connect(reading_worker, &JFJochImageReadingWorker::fileLoadError, + this, &JFJochViewerWindow::OnFileLoadError); + + connect(reading_worker, &JFJochImageReadingWorker::fileLoadRetryStatus, + this, &JFJochViewerWindow::OnFileLoadRetryStatus); + connect(menuBar, &JFJochViewerMenu::openDatasetInfo, this, &JFJochViewerWindow::NewDatasetInfo); NewDatasetInfo(); @@ -384,3 +391,28 @@ void JFJochViewerWindow::keyReleaseEvent(QKeyEvent *event) { } QMainWindow::keyReleaseEvent(event); } + +void JFJochViewerWindow::OnFileLoadError(QString title, QString message) { + QMessageBox::critical(this, title, message); +} + +void JFJochViewerWindow::OnFileLoadRetryStatus(bool active, QString message) { + if (active) { + if (!retryDialog) { + retryDialog = new QProgressDialog(this); + retryDialog->setWindowModality(Qt::WindowModal); + retryDialog->setRange(0, 0); // Infinite/Busy indicator + retryDialog->setCancelButton(nullptr); // Disable cancel for now + retryDialog->setMinimumDuration(0); // Show immediately + retryDialog->setWindowTitle("Loading File"); + } + retryDialog->setLabelText(message); + retryDialog->show(); + } else { + if (retryDialog) { + retryDialog->close(); + retryDialog->deleteLater(); + retryDialog = nullptr; + } + } +} \ No newline at end of file diff --git a/viewer/JFJochViewerWindow.h b/viewer/JFJochViewerWindow.h index 33472932..ae53a2c0 100644 --- a/viewer/JFJochViewerWindow.h +++ b/viewer/JFJochViewerWindow.h @@ -5,6 +5,7 @@ #define JFJOCHVIEWERWINDOW_H #include +#include #include "JFJochViewerMenu.h" #include "../reader/JFJochHDF5Reader.h" @@ -29,6 +30,8 @@ private: std::shared_ptr lastDataset; // added std::shared_ptr lastImage; // added + QProgressDialog *retryDialog = nullptr; + QThread *reading_thread; void keyPressEvent(QKeyEvent *event) override; @@ -37,6 +40,8 @@ public slots: void LoadFile(const QString &filename, qint64 image_number, qint64 summation, bool retry); void LoadImage(qint64 image_number, qint64 summation); void NewDatasetInfo(); + void OnFileLoadError(QString title, QString message); + void OnFileLoadRetryStatus(bool active, QString message); signals: void LoadFileRequest(const QString &filename, qint64 image_number, qint64 summation, bool retry);