Files
Jungfraujoch/process/JFJochProcess.h
T
leonarski_fandClaude Opus 4.8 77f1ed2566 jfjoch_process: default scale-fulls for rot3d; skip _process.h5 when merging
Two offline-processing ergonomics changes.

scale-fulls is now ON by default for -P rot3d (it refits the per-frame scale on
the combined fulls and lifts ISa substantially, e.g. HEWL rot3d 7.0 -> 16.4).
--scale-fulls stays as the explicit opt-in for non-rot3d order; new
--no-scale-fulls disables it for rot3d. (scale_fulls is now an optional<bool>
defaulting to combine_3d.) Note: on low-completeness data the Unity-reference
refit can cost a little CC1/2 (endothiapepsin ~70% complete: -5% in a mid shell);
pair with --reject-outliers for the full low-symmetry benefit.

When merging (-M), the merged reflections (.mtz/.cif) are the wanted output, so
the large per-image _process.h5 is no longer written by default - it routinely
ran to hundreds of MB. Pass --write-process-h5 to also emit it. Without merging
the _process.h5 is the only output and is always written. Implemented with a
ProcessConfig.write_process_h5 flag gating the FileWriter; reflection and
image.dat writing are unaffected.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 14:50:18 +02:00

124 lines
5.1 KiB
C++

// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#pragma once
#include <atomic>
#include <optional>
#include <string>
#include <vector>
#include "../common/DiffractionExperiment.h"
#include "../common/PixelMask.h"
#include "../common/CrystalLattice.h"
#include "../common/Reflection.h" // MergedReflection
#include "../common/JFJochMessages.h" // DataMessage
#include "../common/JFJochReceiverPlots.h" // MeanProcessingTime
#include "../image_analysis/spot_finding/SpotFindingSettings.h"
#include "../image_analysis/scale_merge/Merge.h" // MergeStatistics
#include "../image_analysis/scale_merge/TwinningAnalysis.h" // TwinningAnalysisResult
class JFJochHDF5Reader;
// Offline reprocessing of a stored Jungfraujoch HDF5 dataset, shared by jfjoch_process,
// jfjoch_azint and (later) the viewer. The full processing workflow lives here, not in the CLIs:
// setup, an optional two-pass rotation-indexing pre-pass, a parallel per-image loop (std::thread),
// an optional scaling/merging post-pass, and the _process.h5 output. The detector geometry and all
// algorithm settings are configured on the DiffractionExperiment by the caller; ProcessConfig only
// carries run control. Cancellable from any thread (e.g. SIGINT or a GUI button) via Cancel().
enum class ProcessMode {
AzimuthalIntegration, // preprocess + azimuthal integration only (jfjoch_azint)
FullAnalysis // spot finding + indexing + refinement + integration (jfjoch_process)
};
struct ProcessConfig {
ProcessMode mode = ProcessMode::FullAnalysis;
int start_image = 0;
int end_image = -1; // -1 => to the end of the dataset
int stride = 1;
int nthreads = 1;
// Output prefix for the _process.h5 (and scaled reflections). Empty => process without writing.
std::string output_prefix;
// Write the per-image _process.h5. Defaults true; jfjoch_process turns it off when merging
// (the .mtz/.cif is the wanted output and the h5 is large) unless --write-process-h5 is given.
bool write_process_h5 = true;
SpotFindingSettings spot_finding; // FullAnalysis spot finding
// Rotation indexing (FullAnalysis)
bool rotation_indexing = false;
bool two_pass_rotation = true;
bool reuse_rotation_spots = true;
int rotation_indexing_image_count = 100;
std::optional<CrystalLattice> forced_rotation_lattice;
// Scaling / merging (FullAnalysis). reference_data (from a reference MTZ) also drives the
// per-image live scaling path when present.
bool run_scaling = false;
int64_t scaling_iter = 3;
std::vector<MergedReflection> reference_data;
// Diagnostic: if set, the -P rot3d combine writes the unmerged fulls here (for comparison vs XDS).
std::string observation_dump_path;
};
struct ProcessResult {
bool cancelled = false;
uint64_t images_processed = 0;
double processing_time_s = 0.0;
double frame_rate_hz = 0.0;
double throughput_MBs = 0.0;
std::optional<float> indexing_rate;
std::optional<UnitCell> consensus_cell;
bool rotation_lattice_found = false;
MeanProcessingTime mean_processing_time{};
std::optional<std::string> written_master_path;
std::string merge_statistics_text; // populated when scaling/merging ran
// Structured merge statistics (per-shell + overall), populated when scaling/merging ran. ISa is
// the error-model asymptotic I/sigma (1/b); has_reference is true when a reference MTZ drove CCref.
bool has_merge_statistics = false;
MergeStatistics merge_statistics;
double error_model_isa = 0.0;
bool has_reference = false;
// Twinning analysis of the final merged intensities (l_test_pairs == 0 when not computed).
TwinningAnalysisResult twinning;
// Space group determined by the search (when the user did not fix one), used for the final
// re-scale/merge and written to the master file.
std::optional<int64_t> space_group_number;
};
// Callbacks for progress and live results. Methods may be called from worker threads, so an
// implementation must be thread-safe. The default no-ops suit the CLIs.
class JFJochProcessObserver {
public:
virtual ~JFJochProcessObserver() = default;
virtual void OnPhase(const std::string &phase) {}
virtual void OnProgress(uint64_t done, uint64_t total) {}
virtual void OnImageProcessed(const DataMessage &msg) {}
};
class JFJochProcess {
JFJochHDF5Reader &reader_;
DiffractionExperiment experiment_;
PixelMask pixel_mask_;
ProcessConfig config_;
std::atomic<bool> cancelled_{false};
public:
JFJochProcess(JFJochHDF5Reader &reader, DiffractionExperiment experiment,
PixelMask pixel_mask, ProcessConfig config);
// Runs the configured workflow to completion or until Cancel(). Throws on setup failure.
ProcessResult Run(JFJochProcessObserver *observer = nullptr);
// Request cancellation; safe to call from any thread (the worker loop checks between images).
void Cancel() { cancelled_ = true; }
bool IsCancelled() const { return cancelled_; }
};