From e4b30642542f21c189e3d9e32fbb8b05acd7e5a6 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Wed, 24 Jun 2026 18:51:30 +0200 Subject: [PATCH 01/17] Scaling: reference dataset, 3D combine (-P rot3d), R-meas, profile-fit integrator Reference dataset (a): LoadReferenceMtz adds column selection + cell/SG/resolution + a data-vs-reference consistency check; jfjoch_process/jfjoch_scale gain --reference-column; the viewer gets a Reference section in the MX settings dock (worker-owned, independent of the loaded dataset) that flows into reprocessing jobs. 3D combine (-P rot3d): Combine3D weight-sums a reflection's per-frame partials into one counting-limited full before merging (orthogonal ScalingSettings::combine_3d flag, not a partiality model), with a de-biased Poisson variance. Crystal 2: ISa 1.7->8.4, R-meas ~67%->18.9%, intensities unchanged (CCref held). Quality metrics (b): R-meas (Diederichs-Karplus) + redundancy columns in MergeStats; ISa logged. jfjoch fulls 18.9% vs XDS 4.5% (same ASU/run). Profile-fit integrator (experimental): ProfileIntegrate2D (--integrator gaussian|empirical) is a reference-free, rot3d-compatible profile-fit extraction (the decomposed PixelRefine intensity step). Gaussian: R-meas 18.9->14.6%, ISa ->9.5. Anisotropy/per-region add nothing (the discriminating info is in the discarded rocking direction). See NEXTGEN_INTEGRATOR.md. --dump-observations exports the unmerged fulls for XDS comparison. Co-Authored-By: Claude Opus 4.8 --- common/BraggIntegrationSettings.cpp | 9 + common/BraggIntegrationSettings.h | 9 + common/ScalingSettings.cpp | 9 + common/ScalingSettings.h | 7 + image_analysis/IndexAndRefine.cpp | 6 +- image_analysis/LoadFCalcFromMtz.cpp | 155 ++++++++--- image_analysis/LoadFCalcFromMtz.h | 51 +++- .../bragg_integration/CMakeLists.txt | 2 + .../bragg_integration/NEXTGEN_INTEGRATOR.md | 105 ++++++++ .../bragg_integration/ProfileIntegrate2D.cpp | 241 ++++++++++++++++++ .../bragg_integration/ProfileIntegrate2D.h | 52 ++++ image_analysis/scale_merge/CMakeLists.txt | 2 + image_analysis/scale_merge/Combine3D.cpp | 188 ++++++++++++++ image_analysis/scale_merge/Combine3D.h | 37 +++ image_analysis/scale_merge/Merge.cpp | 57 ++++- image_analysis/scale_merge/Merge.h | 4 + process/JFJochProcess.cpp | 21 +- process/JFJochProcess.h | 3 + tools/jfjoch_process.cpp | 74 +++++- tools/jfjoch_scale.cpp | 62 ++++- viewer/JFJochImageReadingWorker.cpp | 64 +++++ viewer/JFJochImageReadingWorker.h | 12 + viewer/JFJochViewerWindow.cpp | 4 + viewer/ReferenceMtzInfo.h | 21 ++ viewer/widgets/JFJochViewerSettingsDock.cpp | 67 +++++ viewer/widgets/JFJochViewerSettingsDock.h | 13 + viewer/windows/JFJochProcessingJobsWindow.cpp | 1 + 27 files changed, 1222 insertions(+), 54 deletions(-) create mode 100644 image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md create mode 100644 image_analysis/bragg_integration/ProfileIntegrate2D.cpp create mode 100644 image_analysis/bragg_integration/ProfileIntegrate2D.h create mode 100644 image_analysis/scale_merge/Combine3D.cpp create mode 100644 image_analysis/scale_merge/Combine3D.h create mode 100644 viewer/ReferenceMtzInfo.h diff --git a/common/BraggIntegrationSettings.cpp b/common/BraggIntegrationSettings.cpp index 6ea3381e..d9353b86 100644 --- a/common/BraggIntegrationSettings.cpp +++ b/common/BraggIntegrationSettings.cpp @@ -68,6 +68,15 @@ std::optional BraggIntegrationSettings::GetFixedProfileRadius_recipA() co return fixed_profile_radius; } +BraggIntegrationSettings &BraggIntegrationSettings::Integrator(IntegratorMode input) { + integrator_mode = input; + return *this; +} + +IntegratorMode BraggIntegrationSettings::GetIntegrator() const { + return integrator_mode; +} + float BraggIntegrationSettings::GetR1() const { return r_1; } diff --git a/common/BraggIntegrationSettings.h b/common/BraggIntegrationSettings.h index 02f30782..7eab221e 100644 --- a/common/BraggIntegrationSettings.h +++ b/common/BraggIntegrationSettings.h @@ -5,7 +5,14 @@ #include +// Spot-intensity extraction method. BoxSum = the classical uniform sum over a fixed disk +// (BraggIntegrate2D). ProfileGaussian / ProfileEmpirical = the draft ProfileIntegrate2D, which +// profile-fits with a measured-width Gaussian (v1) or an empirical profile learned per resolution +// shell from strong spots (v2) - see bragg_integration/NEXTGEN_INTEGRATOR.md. +enum class IntegratorMode { BoxSum, ProfileGaussian, ProfileEmpirical }; + class BraggIntegrationSettings { + IntegratorMode integrator_mode = IntegratorMode::BoxSum; float r_1 = 4; float r_2 = 6; float r_3 = 8; @@ -24,8 +31,10 @@ public: BraggIntegrationSettings& DMinLimit_A(float input); BraggIntegrationSettings& FixedProfileRadius_recipA(std::optional input); BraggIntegrationSettings& ProfileMultiplier(float input); + BraggIntegrationSettings& Integrator(IntegratorMode input); + [[nodiscard]] IntegratorMode GetIntegrator() const; [[nodiscard]] float GetR1() const; [[nodiscard]] float GetR2() const; [[nodiscard]] float GetR3() const; diff --git a/common/ScalingSettings.cpp b/common/ScalingSettings.cpp index 866d48a4..f0cd7ac5 100644 --- a/common/ScalingSettings.cpp +++ b/common/ScalingSettings.cpp @@ -124,6 +124,15 @@ ScalingSettings &ScalingSettings::OutlierRejectNsigma(double input) { return *this; } +ScalingSettings &ScalingSettings::Combine3D(bool input) { + combine_3d = input; + return *this; +} + +bool ScalingSettings::GetCombine3D() const { + return combine_3d; +} + double ScalingSettings::GetMinPartiality() const { return min_partiality; } diff --git a/common/ScalingSettings.h b/common/ScalingSettings.h index 09d591c1..cef40bbb 100644 --- a/common/ScalingSettings.h +++ b/common/ScalingSettings.h @@ -25,6 +25,11 @@ class ScalingSettings { double min_cc_for_image = 0.0; double outlier_reject_nsigma = 0.0; // per-observation merge outlier rejection (XDS/DIALS-style); 0 = off, e.g. 6 enables + // The "-P rot3d" 3D combine: scale per-frame as Rotation, then weight-sum each reflection's + // per-frame partials into one counting-limited full before merging (orthogonal to the partiality + // model, which stays Rotation - see Combine3D). + bool combine_3d = false; + double rfree_fraction = 0.05; IntensityFormat intensity_format = IntensityFormat::MTZ; @@ -39,6 +44,7 @@ public: ScalingSettings& MinPartiality(double min_partiality); ScalingSettings& MinCCForImage(double min_cc_for_image); ScalingSettings& OutlierRejectNsigma(double input); + ScalingSettings& Combine3D(bool input); ScalingSettings& RfreeFraction(double input); ScalingSettings& FileFormat(IntensityFormat input); @@ -66,6 +72,7 @@ public: [[nodiscard]] double GetMinPartiality() const; [[nodiscard]] double GetMinCCForImage() const; [[nodiscard]] double GetOutlierRejectNsigma() const; + [[nodiscard]] bool GetCombine3D() const; [[nodiscard]] double GetRfreeFraction() const; [[nodiscard]] IntensityFormat GetFileFormat() const; diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index 10ba4991..f47fb4f9 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -5,6 +5,7 @@ #include "IndexAndRefine.h" #include "bragg_integration/BraggIntegrate2D.h" +#include "bragg_integration/ProfileIntegrate2D.h" #include "bragg_integration/CalcISigma.h" #include "geom_refinement/XtalOptimizer.h" #include "indexing/AnalyzeIndexing.h" @@ -342,7 +343,10 @@ void IndexAndRefine::QuickPredictAndIntegrate(DataMessage &msg, msg.bragg_prediction_time_s = std::chrono::duration(pred_end_time - pred_start_time).count(); auto integration_start_time = std::chrono::steady_clock::now(); - i_outcome.reflections = BraggIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number); + i_outcome.reflections = + outcome.experiment.GetBraggIntegrationSettings().GetIntegrator() == IntegratorMode::BoxSum + ? BraggIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number) + : ProfileIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number); msg.integrated_reflections = i_outcome.reflections.size(); auto integration_end_time = std::chrono::steady_clock::now(); msg.integration_time_s = std::chrono::duration(integration_end_time - integration_start_time).count(); diff --git a/image_analysis/LoadFCalcFromMtz.cpp b/image_analysis/LoadFCalcFromMtz.cpp index 503ed9fd..2061b9d0 100644 --- a/image_analysis/LoadFCalcFromMtz.cpp +++ b/image_analysis/LoadFCalcFromMtz.cpp @@ -5,61 +5,156 @@ #include #include +#include +#include +#include #include #include #include +#include -#include "../common/Reflection.h" +namespace { -std::vector LoadFCalcFromMtz(const std::string& path) { +// Reference intensities can come from a calculated structure factor F-model (squared to an +// intensity), or from a merged/observed mean-intensity column (used directly - this is what lets +// PixelRefine be self-seeded from the data's own previous merge). column_with_one_of_labels would +// hide which label matched, so the priority list is walked explicitly to record the choice. +const gemmi::Mtz::Column* SelectDefaultColumn(const gemmi::Mtz& mtz, bool& square) { + if (const auto* col = mtz.column_with_label("F-model", nullptr, 'F')) { + square = true; + return col; + } + for (const char* label : {"IMEAN", "I", "IOBS", "Iobs", "I-obs"}) { + if (const auto* col = mtz.column_with_label(label, nullptr, 'J')) { + square = false; + return col; + } + } + for (const auto& c : mtz.columns) + if (c.type == 'J') { + square = false; + return &c; + } + return nullptr; +} + +} // namespace + +ReferenceMtzData LoadReferenceMtz(const std::string& path, + const std::optional& column) { gemmi::Mtz mtz; mtz.read_file_gz(path, true); - // Prefer a calculated structure factor F-model (e.g. a reference structure): I_ref = F^2. - const gemmi::Mtz::Column* col = mtz.column_with_label("F-model", nullptr, 'F'); - const bool square = (col != nullptr); + ReferenceMtzData out; - // Otherwise fall back to a merged/observed intensity column, used directly as I_ref. This - // lets PixelRefine be SELF-SEEDED from the data's own previous merge (a jfjoch merge writes - // IMEAN) instead of an external reference structure - the EM-style outer loop, free of the - // reference's non-isomorphism bias. - if (col == nullptr) { - for (const char* label : {"IMEAN", "I", "IOBS", "Iobs", "I-obs"}) { - col = mtz.column_with_label(label, nullptr, 'J'); - if (col != nullptr) - break; - } - } - if (col == nullptr) { - for (const auto& c : mtz.columns) - if (c.type == 'J') { col = &c; break; } // any mean-intensity column - } - if (col == nullptr) - throw std::runtime_error("MTZ has no F-model or intensity (J) column to use as reference"); + // Columns the caller may pick as reference intensities/structure factors. + for (const auto& c : mtz.columns) + if (c.type == 'J' || c.type == 'F') + out.candidate_columns.push_back({c.label, c.type}); - std::vector result; - result.reserve(static_cast(mtz.nreflections)); + const gemmi::Mtz::Column* col = nullptr; + if (column) { + col = mtz.column_with_label(*column, nullptr); + if (col == nullptr) + throw std::runtime_error("Reference MTZ has no column '" + *column + "'"); + out.squared = (col->type == 'F'); + out.default_column = false; + } else { + col = SelectDefaultColumn(mtz, out.squared); + if (col == nullptr) + throw std::runtime_error("MTZ has no F-model or intensity (J) column to use as reference"); + out.default_column = true; + } + out.used_column = col->label; + out.used_column_type = col->type; + + // Cell and space group the reference was recorded in, for display and the consistency check. + const gemmi::UnitCell& gc = mtz.get_cell(); + const bool cell_valid = gc.a > 0 && gc.b > 0 && gc.c > 0; + if (cell_valid) + out.cell = UnitCell{static_cast(gc.a), static_cast(gc.b), static_cast(gc.c), + static_cast(gc.alpha), static_cast(gc.beta), + static_cast(gc.gamma)}; + if (mtz.spacegroup != nullptr) { + out.space_group_number = mtz.spacegroup->number; + out.space_group_name = mtz.spacegroup->short_name(); + out.point_group = mtz.spacegroup->point_group_hm(); + } + + out.reflections.reserve(static_cast(mtz.nreflections)); const std::size_t stride = mtz.columns.size(); + double d_min = std::numeric_limits::max(); + double d_max = 0.0; for (int i = 0; i < mtz.nreflections; ++i) { - const std::size_t row = static_cast(i) * stride; const float v = (*col)[static_cast(i)]; - if (std::isnan(v)) continue; + const std::size_t row = static_cast(i) * stride; MergedReflection r; r.h = static_cast(mtz.data[row + 0]); r.k = static_cast(mtz.data[row + 1]); r.l = static_cast(mtz.data[row + 2]); - r.I = square ? v * v : v; + r.I = out.squared ? v * v : v; r.sigma = NAN; - r.d = 0.0f; - result.emplace_back(r); + if (cell_valid) { + r.d = static_cast(gc.calculate_d({{r.h, r.k, r.l}})); + if (std::isfinite(r.d) && r.d > 0.0f) { + d_min = std::min(d_min, r.d); + d_max = std::max(d_max, r.d); + } + } + + out.reflections.emplace_back(r); } - return result; + if (d_max > 0.0) { + out.d_min = d_min; + out.d_max = d_max; + } + + return out; +} + +std::string ReferenceConsistencyWarning(const ReferenceMtzData& reference, + const std::optional& data_cell, + std::optional data_space_group_number) { + std::vector issues; + + if (reference.cell && data_cell) { + const auto& r = *reference.cell; + const auto& d = *data_cell; + auto rel = [](float x, float y) { return y != 0.0f ? std::abs(x - y) / std::abs(y) : 0.0f; }; + const bool len_off = rel(r.a, d.a) > 0.02f || rel(r.b, d.b) > 0.02f || rel(r.c, d.c) > 0.02f; + const bool ang_off = std::abs(r.alpha - d.alpha) > 1.5f || std::abs(r.beta - d.beta) > 1.5f + || std::abs(r.gamma - d.gamma) > 1.5f; + if (len_off || ang_off) { + std::ostringstream ss; + ss.setf(std::ios::fixed); + ss.precision(2); + ss << "unit cell differs (reference " << r.a << " " << r.b << " " << r.c << " " + << r.alpha << " " << r.beta << " " << r.gamma << ", data " << d.a << " " << d.b + << " " << d.c << " " << d.alpha << " " << d.beta << " " << d.gamma << ")"; + issues.push_back(ss.str()); + } + } + + if (!reference.point_group.empty() && data_space_group_number) { + const auto* sg = gemmi::find_spacegroup_by_number(*data_space_group_number); + if (sg != nullptr && reference.point_group != sg->point_group_hm()) + issues.push_back("point group differs (reference " + reference.point_group + ", data " + + std::string(sg->point_group_hm()) + ")"); + } + + if (issues.empty()) + return {}; + + std::string out = "reference may not match the data: "; + for (std::size_t i = 0; i < issues.size(); ++i) + out += (i ? "; " : "") + issues[i]; + return out; } diff --git a/image_analysis/LoadFCalcFromMtz.h b/image_analysis/LoadFCalcFromMtz.h index ad2f29c9..3785ad79 100644 --- a/image_analysis/LoadFCalcFromMtz.h +++ b/image_analysis/LoadFCalcFromMtz.h @@ -3,6 +3,53 @@ #pragma once -#include "../common/Reflection.h" +#include +#include +#include -std::vector LoadFCalcFromMtz(const std::string& path); +#include "../common/Reflection.h" +#include "../common/UnitCell.h" + +// A column an MTZ offers as reference intensities/structure factors (type 'J' mean intensity or +// 'F' amplitude). Surfaced so the caller (CLI flag / viewer combo) can pick which one to scale +// against and report the choice, rather than the loader silently guessing. +struct ReferenceMtzColumn { + std::string label; + char type = '\0'; // 'J' = mean intensity, 'F' = structure-factor amplitude +}; + +// Reference reflections loaded from an MTZ, plus the metadata a user needs to judge whether the +// reference matches the data being scaled: which column was used, the cell / space group it was +// recorded in, its resolution range and reflection count. +struct ReferenceMtzData { + std::vector reflections; // h,k,l,I,d set (sigma stays NaN) + std::optional cell; + std::optional space_group_number; + std::string space_group_name; // Hermann-Mauguin short symbol, for display + std::string point_group; // point-group symbol, for the consistency check + std::string used_column; + char used_column_type = '\0'; + bool squared = false; // an 'F' column was squared to an intensity + bool default_column = true; // column was auto-selected, not user-specified + std::vector candidate_columns; + double d_min = 0.0; + double d_max = 0.0; +}; + +// Load reference reflections from an MTZ. With no column requested the smart default is used: +// a calculated structure factor F-model (squared to an intensity), else a merged/observed +// intensity column (IMEAN/I/IOBS/...), else any mean-intensity (J) column - this also lets +// PixelRefine be self-seeded from the data's own previous merge. A requested column overrides +// the default (an 'F'-type column is squared, a 'J'-type used directly). Throws if the requested +// column is missing or no usable column exists. +ReferenceMtzData LoadReferenceMtz(const std::string& path, + const std::optional& column = std::nullopt); + +// Compare a loaded reference against the data it will scale. Returns an empty string when they are +// consistent (or when the data cell / space group is unknown, so nothing can be said); otherwise a +// one-line, human-readable description of the mismatch to warn the user about. Reference intensities +// keyed in a different point group or cell do not correspond to the data, so this is the check that +// makes reference-based scaling and CCref trustworthy. +std::string ReferenceConsistencyWarning(const ReferenceMtzData& reference, + const std::optional& data_cell, + std::optional data_space_group_number); diff --git a/image_analysis/bragg_integration/CMakeLists.txt b/image_analysis/bragg_integration/CMakeLists.txt index 00820ba4..395f775c 100644 --- a/image_analysis/bragg_integration/CMakeLists.txt +++ b/image_analysis/bragg_integration/CMakeLists.txt @@ -1,6 +1,8 @@ ADD_LIBRARY(JFJochBraggIntegration STATIC BraggIntegrate2D.cpp BraggIntegrate2D.h + ProfileIntegrate2D.cpp + ProfileIntegrate2D.h Regression.h CalcISigma.cpp CalcISigma.h diff --git a/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md b/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md new file mode 100644 index 00000000..201dca40 --- /dev/null +++ b/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md @@ -0,0 +1,105 @@ +# Next-generation integrator — design draft (2026-06-24) + +## The question + +Can PixelRefine's **profile-fitted intensity extraction** be separated from its **reference-based +scaling**, so we get a profile-fitting integrator that needs no reference intensities — and could +PixelRefine then be rebuilt as *(profile-fit integrator) + (smarter scaling)*? + +**Yes.** They are already algebraically separate in the code. + +## Why we want it (the diagnosis) + +The residual ~4× gap to XDS on R-meas (jfjoch fulls 18.9% vs XDS 4.5%) was localized to the **2D +spot integration**, not background/scaling/partiality (see `scaling-results-roadmap` memory / the +session experiments): +- the gap is in **strong** reflections (jf/XDS ratio 2.1× weak → 6.5× strong), a ~18% *multiplicative* + per-observation floor, **flat across resolution**, **per-reflection** (only 6.6% of 20.7% is + per-image/scaling), and **immune to box radius** (tight/default/wide all ≈ 18–24%). +- => it is the **box-sum method** (uniform pixel weighting over a fixed disk), `BraggIntegrate2D`. + A fixed disk captures a *width-dependent fraction* of each spot; spot widths vary (mosaicity, + divergence, resolution, detector position, rocking phase) → ~18% multiplicative scatter. +- swapping in PixelRefine's profile-fit swung ISa 6.4→16.5 (confirming the integrator is the lever, + though that path is currently broken — see below). + +## The decomposition (PixelRefine today) + +PixelRefine does three things; only the third needs the reference: + +1. **Profile (Term 2).** Measure the tangential width `R1 = sqrt(2·)` per resolution shell + from the intensity-weighted 2nd moment of strong spots (`PixelRefine.cpp:433`). Reference-free. +2. **Extraction.** `J = Σ Pₚ(Iₚ−B)/vₚ / Σ Pₚ²/vₚ`, `var(J)=1/Σ Pₚ²/vₚ`, with `Pₚ` the + area-normalized tangential profile (Gaussian of width R1) and the **de-biased** variance + `vₚ = max(B,1)` (`PixelRefine.cpp:491-525`, METHODS §1). It also computes the **radial partiality** + `p = P_radial` and a detector-clipping **completeness**. Reference-free. +3. **Scaling (Term 1).** Fit per-image `G,B` from `J ≈ G·B_DW·p·pol·I_ref` (`PixelRefine.cpp:365`, + METHODS §4). **This is the only step that uses I_ref.** + +So **integrator = steps 1+2** (output `I=J/(p·pol)`, `σ`, `partiality`), **scaling = step 3** (or the +existing reference-free `ScaleOnTheFly`). A clean split. + +This split also **fixes the rot3d-combine incompatibility**: PixelRefine-as-a-whole emits +`image_scale_corr=1/G` with no rotation partiality, which is why `-r pixelrefine -P rot3d` produced +garbage σ (CCref 94→84, ⟨I/σ⟩→1.3). The *decomposed* integrator emits `partiality` like +`BraggIntegrate2D`, so `ScaleOnTheFly` + `Combine3D` + merge consume it unchanged. + +## The new-generation integrator + +A new module `ProfileIntegrate2D`, **swappable with `BraggIntegrate2D`**, same output +(`vector` with I, σ, partiality, d, …), **no reference**, no scaling. Algorithm: + +1. **Pass A (rough + learn the profile).** Box-sum every reflection (reuse `IntegrateReflection`) + to get a rough I and the observed centroid; select strong spots (significance ≥ 5). +2. **Learn an EMPIRICAL profile**, per *(detector region × resolution shell)*, by averaging the + background-subtracted, intensity-normalized, centroid-aligned pixel grids of the strong spots. + **This is the key difference from PixelRefine**, which uses a *Gaussian* of width R1×6 — a model + that has the *same width-mismatch* as a box (and ×6 ≈ near-box, which is why PixelRefine's stills + ISa was only 1.1). A *learned* profile matches the real spot — including the 0.4-px undersampling + smear, asymmetry, and position dependence — so the weighting is genuinely optimal. (XDS learns its + profile over 9 detector regions × resolution; this is that idea.) +3. **Pass B (profile-fit extract).** For each reflection, `I = Σ Pₚ(cₚ−B)/vₚ / Σ Pₚ²/vₚ` with the + learned `P` for its region/shell, de-biased variance `vₚ = B + max(I,0)·Pₚ` (Kabsch; iterate a + couple of times), `σ = sqrt(1/Σ Pₚ²/vₚ)` corrected for the signal term. Carry the rotation + `partiality` exactly as `BraggIntegrate2D` does (so rot3d works). + +**Why it removes the floor:** profile-weighting integrates *out to the tail* (capturing the +width-varying part a fixed disk clips) while *downweighting the noisy edge* (avoiding the wide-box +background penalty that made radius 6,9,12 worse). A *learned* profile (not a Gaussian) removes the +model-mismatch that limited PixelRefine. The floor is multiplicative and in strong reflections → this +is the right lever. + +**Caution (from PixelRefine):** a *tight* profile LOST to a generous box for stills (the 0.4-px +centroid undersampling floor). An empirical profile is immune to this *because it is measured from the +undersampled spots themselves* — the template already contains the smear. The lever is *matched +weighting + de-biased variance*, never a tight aperture. + +## Plug-in (no touch to rot3d / scaling / merge) + +- New `ProfileIntegrate2D(experiment, image, predicted, …) -> vector`, output-compatible + with `BraggIntegrate2D`. +- Select it like the geometry-refinement choice (a `BraggIntegrationSettings` enum or an + `IntegrationAlgorithm`), default box-sum. Everything downstream (ScaleOnTheFly, Combine3D, Merge) + is unchanged — they only see `Reflection{I,σ,partiality,d}`. +- Reusable by **stills and rotation** (and the FPGA path later, if the profile is precomputable). + +## Staging + +- **v1** — standalone profile-fit with the *measured-R1 Gaussian* (= PixelRefine steps 1+2 lifted out, + no reference). Cheap; reproduces PixelRefine extraction and proves the decomposition + the + rot3d-combine compatibility. Likely only a modest ISa gain (Gaussian model). +- **v2** — **empirical learned profile per resolution shell.** The expected real win. +- **v3** — empirical profile per *detector region × shell* (XDS-grade); anisotropic. + +## Validation + +A/B per-observation **R-meas / ISa** vs box-sum, both with rot3d, on crystal 2 (`fixed_master.h5`) +against `XDS_ASCII.HKL` (4.5%); repeat treating it as stills (crystal 1 jet) where box-sum's hidden +floor is ISa 1.1. Use `--dump-observations`. Target: close the 18% strong-reflection floor toward +XDS's ~3%. + +## "Could PixelRefine be rebuilt as integrator + scaling?" + +Yes: `ProfileIntegrate2D` (steps 1+2) + a scaling routine. The scaling routine can be the existing +reference-free `ScaleOnTheFly`, or a reference-driven one (Term 1) for the EM/self-seed loop — both +just consume `Reflection{I,σ,partiality}`. PixelRefine then becomes *= ProfileIntegrate2D + reference +ScaleOnTheFly*, and the experimental coupling (and its rot3d breakage) disappears. diff --git a/image_analysis/bragg_integration/ProfileIntegrate2D.cpp b/image_analysis/bragg_integration/ProfileIntegrate2D.cpp new file mode 100644 index 00000000..9d314d22 --- /dev/null +++ b/image_analysis/bragg_integration/ProfileIntegrate2D.cpp @@ -0,0 +1,241 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "ProfileIntegrate2D.h" + +#include +#include +#include +#include +#include + +#include "../../common/JFJochException.h" + +namespace { + +constexpr int N_SHELL = 6; +constexpr double STRONG_I_OVER_SIGMA = 5.0; +constexpr int MIN_STRONG_PER_SHELL = 30; + +// Rough box-sum of one reflection: total intensity over the inner disk (r1) minus the mean of the +// background ring (r2..r3), like BraggIntegrate2D. Used to seed the fit, pick strong spots for +// profile learning, and provide the per-reflection background. ok=false when the inner disk is +// clipped by a gap/edge/bad pixel (same rejection as the box-sum integrator). +struct Rough { + double I = 0.0, sigma = NAN, bkg = 0.0; + int cx = 0, cy = 0, shell = -1; + bool ok = false, strong = false; +}; + +template +Rough BoxSum(const T *image, size_t xpixel, size_t ypixel, int64_t special, int64_t saturation, + const Reflection &r, float r1_sq, float r2_sq, float r3_sq, float r3, float min_sigma_ratio) { + Rough out; + const int64_t x0 = std::max(0, std::floor(r.predicted_x - r3 - 1.0)); + const int64_t x1 = std::min(xpixel - 1, std::ceil(r.predicted_x + r3 + 1.0)); + const int64_t y0 = std::max(0, std::floor(r.predicted_y - r3 - 1.0)); + const int64_t y1 = std::min(ypixel - 1, std::ceil(r.predicted_y + r3 + 1.0)); + + int64_t I_sum = 0, n_inner = 0, n_inner_valid = 0; + double bkg_sum = 0.0; + int n_bkg = 0; + for (int64_t y = y0; y <= y1; ++y) + for (int64_t x = x0; x <= x1; ++x) { + const double d2 = (x - r.predicted_x) * (x - r.predicted_x) + (y - r.predicted_y) * (y - r.predicted_y); + const auto px = image[y * xpixel + x]; + if (d2 < r1_sq) { + ++n_inner; + if (px == special || px == saturation) continue; + I_sum += px; + ++n_inner_valid; + } else if (d2 >= r2_sq && d2 < r3_sq) { + if (px == special || px == saturation) continue; + bkg_sum += static_cast(px); + ++n_bkg; + } + } + if (n_inner_valid != n_inner || n_bkg <= 5) + return out; + out.bkg = bkg_sum / n_bkg; + out.I = static_cast(I_sum) - static_cast(n_inner) * out.bkg; + out.sigma = std::max(1.0, out.I * min_sigma_ratio); + if (I_sum > 0) + out.sigma = std::max(out.sigma, std::sqrt(static_cast(I_sum))); + out.cx = static_cast(std::lround(r.predicted_x)); + out.cy = static_cast(std::lround(r.predicted_y)); + out.ok = true; + out.strong = out.sigma > 0.0 && out.I / out.sigma >= STRONG_I_OVER_SIGMA; + return out; +} + +template +std::vector ProfileIntegrateInternal(const DiffractionExperiment &experiment, + const CompressedImage &image, + const std::vector &predicted, size_t npredicted, + int64_t special, int64_t saturation, int64_t image_number) { + const auto settings = experiment.GetBraggIntegrationSettings(); + const auto geom = experiment.GetDiffractionGeometry(); + const bool empirical = settings.GetIntegrator() == IntegratorMode::ProfileEmpirical; + + const float r1_sq = settings.GetR1() * settings.GetR1(); + const float r2 = settings.GetR2(), r2_sq = r2 * r2; + const float r3 = settings.GetR3(), r3_sq = r3 * r3; + const float min_sigma_ratio = settings.GetMinimumSigmaInRegardsToI(); + const int R = static_cast(std::ceil(r2)); // profile grid half-size + const int G = 2 * R + 1, GG = G * G; + auto idx = [G, R](int dx, int dy) { return (dy + R) * G + (dx + R); }; + + std::vector buffer; + const auto *ptr = reinterpret_cast(image.GetUncompressedPtr(buffer)); + const size_t xpixel = image.GetWidth(), ypixel = image.GetHeight(); + + // --- Pass A: box-sum every reflection (rough I, background, centre, strong flag). --- + std::vector rough(npredicted); + double inv_d2_min = std::numeric_limits::max(), inv_d2_max = 0.0; + for (size_t i = 0; i < npredicted; ++i) { + rough[i] = BoxSum(ptr, xpixel, ypixel, special, saturation, predicted[i], + r1_sq, r2_sq, r3_sq, r3, min_sigma_ratio); + if (rough[i].ok && predicted[i].d > 0.0f) { + const double inv_d2 = 1.0 / (predicted[i].d * predicted[i].d); + inv_d2_min = std::min(inv_d2_min, inv_d2); + inv_d2_max = std::max(inv_d2_max, inv_d2); + } + } + auto shell_of = [&](float d) { + if (!(d > 0.0f) || inv_d2_max <= inv_d2_min) return 0; + const double t = (1.0 / (d * d) - inv_d2_min) / (inv_d2_max - inv_d2_min); + return std::clamp(static_cast(t * N_SHELL), 0, N_SHELL - 1); + }; + for (size_t i = 0; i < npredicted; ++i) + if (rough[i].ok) + rough[i].shell = shell_of(predicted[i].d); + + // --- Learn the profile per shell from the strong spots (+ a global fallback). Each strong + // spot contributes its background-subtracted, intensity-normalised, centroid-aligned grid. --- + std::vector> shell_grid(N_SHELL, std::vector(GG, 0.0)); + std::vector shell_n(N_SHELL, 0); + std::vector global_grid(GG, 0.0); + int global_n = 0; + for (size_t i = 0; i < npredicted; ++i) { + const auto &rh = rough[i]; + if (!rh.ok || !rh.strong || rh.I <= 0.0) continue; + for (int dy = -R; dy <= R; ++dy) + for (int dx = -R; dx <= R; ++dx) { + const int64_t x = rh.cx + dx, y = rh.cy + dy; + if (x < 0 || y < 0 || x >= static_cast(xpixel) || y >= static_cast(ypixel)) continue; + const auto px = ptr[y * xpixel + x]; + if (px == special || px == saturation) continue; + const double v = (static_cast(px) - rh.bkg) / rh.I; + shell_grid[rh.shell][idx(dx, dy)] += v; + global_grid[idx(dx, dy)] += v; + } + ++shell_n[rh.shell]; + ++global_n; + } + + // Build a normalised profile (sum = 1): the empirical average, or an isotropic Gaussian of the + // same (measured) second moment - the spots are ~round in the detector plane, so radial/tangential + // anisotropy was measured and found to add nothing (the elongation is in the discarded rocking + // direction), and a single width is kept for simplicity. + auto build_profile = [&](const std::vector &grid, int n) { + std::vector P(GG, 0.0); + if (n <= 0) return P; + double sum = 0.0, m2 = 0.0, m2w = 0.0; + for (int dy = -R; dy <= R; ++dy) + for (int dx = -R; dx <= R; ++dx) { + const double g = std::max(0.0, grid[idx(dx, dy)]); + m2 += g * (dx * dx + dy * dy); + m2w += g; + sum += g; + if (empirical) P[idx(dx, dy)] = g; + } + if (sum <= 0.0) return P; + if (empirical) { + for (double &p : P) p /= sum; + } else { + const double sigma2 = std::max(0.25, (m2w > 0.0 ? m2 / m2w : 1.0) / 2.0); // = 2 sigma^2 (2D) + double gsum = 0.0; + for (int dy = -R; dy <= R; ++dy) + for (int dx = -R; dx <= R; ++dx) { + const double g = std::exp(-(dx * dx + dy * dy) / (2.0 * sigma2)); + P[idx(dx, dy)] = g; + gsum += g; + } + for (double &p : P) p /= gsum; + } + return P; + }; + const std::vector global_P = build_profile(global_grid, global_n); + std::vector> shell_P(N_SHELL); + for (int s = 0; s < N_SHELL; ++s) + shell_P[s] = shell_n[s] >= MIN_STRONG_PER_SHELL ? build_profile(shell_grid[s], shell_n[s]) : global_P; + + // --- Pass B: profile-fit each reflection (Kabsch, de-biased variance v = B + I*P; iterate). --- + std::vector out; + out.reserve(npredicted); + const auto pol = experiment.GetPolarizationFactor(); + for (size_t i = 0; i < npredicted; ++i) { + const auto &rh = rough[i]; + if (!rh.ok) continue; + const auto &P = shell_P[rh.shell < 0 ? 0 : rh.shell]; + const double B = std::max(rh.bkg, 1.0); + + double I = rh.I, den = 0.0; + for (int iter = 0; iter < 4; ++iter) { + double num = 0.0; + den = 0.0; + for (int dy = -R; dy <= R; ++dy) + for (int dx = -R; dx <= R; ++dx) { + const double Pp = P[idx(dx, dy)]; + if (Pp <= 0.0) continue; + const int64_t x = rh.cx + dx, y = rh.cy + dy; + if (x < 0 || y < 0 || x >= static_cast(xpixel) || y >= static_cast(ypixel)) continue; + const auto px = ptr[y * xpixel + x]; + if (px == special || px == saturation) continue; + const double v = B + std::max(0.0, I) * Pp; + num += Pp * (static_cast(px) - rh.bkg) / v; + den += Pp * Pp / v; + } + if (den > 0.0) I = num / den; else break; + } + if (!(den > 0.0)) continue; + + Reflection refl = predicted[i]; + refl.I = static_cast(I); + refl.sigma = static_cast(std::sqrt(1.0 / den)); + refl.bkg = static_cast(rh.bkg); + refl.observed = true; + if (pol) + refl.rlp /= geom.CalcAzIntPolarizationCorr(refl.predicted_x, refl.predicted_y, pol.value()); + refl.image_scale_corr = refl.rlp / refl.partiality; + refl.image_number = static_cast(image_number); + out.push_back(refl); + } + return out; +} + +} // namespace + +std::vector ProfileIntegrate2D(const DiffractionExperiment &experiment, + const CompressedImage &image, + const std::vector &predicted, size_t npredicted, + int64_t image_number) { + if (image.GetCompressedSize() == 0 || predicted.empty()) + return {}; + switch (image.GetMode()) { + case CompressedImageMode::Int8: + return ProfileIntegrateInternal(experiment, image, predicted, npredicted, INT8_MIN, INT8_MAX, image_number); + case CompressedImageMode::Int16: + return ProfileIntegrateInternal(experiment, image, predicted, npredicted, INT16_MIN, INT16_MAX, image_number); + case CompressedImageMode::Int32: + return ProfileIntegrateInternal(experiment, image, predicted, npredicted, INT32_MIN, INT32_MAX, image_number); + case CompressedImageMode::Uint8: + return ProfileIntegrateInternal(experiment, image, predicted, npredicted, UINT8_MAX, UINT8_MAX, image_number); + case CompressedImageMode::Uint16: + return ProfileIntegrateInternal(experiment, image, predicted, npredicted, UINT16_MAX, UINT16_MAX, image_number); + case CompressedImageMode::Uint32: + return ProfileIntegrateInternal(experiment, image, predicted, npredicted, UINT32_MAX, UINT32_MAX, image_number); + default: + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Image mode not supported"); + } +} diff --git a/image_analysis/bragg_integration/ProfileIntegrate2D.h b/image_analysis/bragg_integration/ProfileIntegrate2D.h new file mode 100644 index 00000000..af00ffc6 --- /dev/null +++ b/image_analysis/bragg_integration/ProfileIntegrate2D.h @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +// ============================================================================= +// ProfileIntegrate2D — profile-fitting 2D integrator (DRAFT / not yet wired in) +// ============================================================================= +// +// A drop-in alternative to BraggIntegrate2D that replaces uniform box summation with +// profile-fitted extraction - the part of PixelRefine that does NOT need reference +// intensities (PixelRefine = this extraction + a reference-based scaling step; here only +// the extraction is kept). See NEXTGEN_INTEGRATOR.md for the rationale: the residual ~4x +// R-meas/ISa gap to XDS is the box-sum method (a fixed disk captures a width-dependent +// fraction of each spot -> ~18% multiplicative per-observation floor on strong reflections). +// +// Output is a vector with I, sigma, partiality, d - IDENTICAL in shape to +// BraggIntegrate2D - so ScaleOnTheFly, Combine3D (-P rot3d) and the merge consume it +// unchanged, and it works for both stills and rotation. +// +// Algorithm (per frame): +// A. Box-sum every reflection (rough I + observed centroid); pick strong spots (signif>=5). +// B. Build the profile per resolution shell from the strong spots: a Gaussian of the measured +// second moment (ProfileGaussian, the keeper) or the empirical average grid (ProfileEmpirical). +// NOTE (measured 2026-06): radial/tangential ANISOTROPY and per-detector-region profiles were +// tried and add nothing - the 2D detector-plane spots are ~round, the real anisotropy is in the +// discarded rocking direction - so an isotropic per-shell width is kept. The empirical grid +// under-performs the Gaussian as built (per-frame, integer-binned -> sub-pixel-smeared + noisy). +// C. Profile-fit each reflection (Kabsch): I = sum P(c-B)/v over sum P^2/v, de-biased +// variance v = B + max(I,0)*P (iterate), sigma = sqrt(1/sum P^2/v). Carry the rotation +// partiality exactly as BraggIntegrate2D does. +// +// Staging: v1 = measured-R1 Gaussian (lifts PixelRefine's extraction out, proves the +// decomposition + rot3d-combine compatibility); v2 = empirical per-shell (the expected win); +// v3 = empirical per detector-region x shell (XDS-grade). +// +// EXPERIMENTAL: selected by BraggIntegrationSettings::Integrator (ProfileGaussian = v1, +// ProfileEmpirical = v2); jfjoch_process exposes it as `--integrator gaussian|empirical` (default +// box-sum). For A/B vs XDS_ASCII.HKL via --dump-observations. Not yet exposed in the OpenAPI / viewer. +// ============================================================================= + +#include + +#include "../../common/DiffractionExperiment.h" +#include "../../common/Reflection.h" +#include "../../common/CompressedImage.h" + +std::vector ProfileIntegrate2D(const DiffractionExperiment &experiment, + const CompressedImage &image, + const std::vector &predicted, + size_t npredicted, + int64_t image_number); diff --git a/image_analysis/scale_merge/CMakeLists.txt b/image_analysis/scale_merge/CMakeLists.txt index 59d75758..190040c7 100644 --- a/image_analysis/scale_merge/CMakeLists.txt +++ b/image_analysis/scale_merge/CMakeLists.txt @@ -5,6 +5,8 @@ ADD_LIBRARY(JFJochScaleMerge Merge.h ScaleOnTheFly.cpp ScaleOnTheFly.h + Combine3D.cpp + Combine3D.h HKLKey.cpp HKLKey.h ScalingResult.h diff --git a/image_analysis/scale_merge/Combine3D.cpp b/image_analysis/scale_merge/Combine3D.cpp new file mode 100644 index 00000000..1899e1c1 --- /dev/null +++ b/image_analysis/scale_merge/Combine3D.cpp @@ -0,0 +1,188 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "Combine3D.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + // A reflection of one event is the same raw (h,k,l) on a run of frames no more than this many + // apart - tolerating a single sub-threshold frame in the middle of the rocking curve. The same + // hkl only recurs after a large rotation (~180 deg), far beyond this, so events stay separate. + constexpr float MAX_FRAME_GAP = 2.0f; + + // Locates one partial inside the input outcomes. + struct PartialRef { + int outcome; + int reflection; + }; +} + +std::vector CombineRotationObservations( + const std::vector& partials, + const DiffractionExperiment& experiment, + Logger* logger, + const std::string& dump_path) { + + const double min_total_partiality = experiment.GetScalingSettings().GetMinPartiality(); + + std::ofstream dump; + if (!dump_path.empty()) { + dump.open(dump_path); + dump << "# h k l I sigma d n_frames captured_fraction\n"; + } + + // 1. Bucket every usable, already-scaled partial by raw (h,k,l). + std::map, std::vector> by_hkl; + size_t n_partials = 0; + for (int o = 0; o < static_cast(partials.size()); ++o) { + const auto& reflections = partials[o].reflections; + for (int j = 0; j < static_cast(reflections.size()); ++j) { + const auto& r = reflections[j]; + if (!std::isfinite(r.image_scale_corr) || r.image_scale_corr <= 0.0f) + continue; + if (!std::isfinite(r.I) || !std::isfinite(r.sigma) || r.sigma <= 0.0f) + continue; + by_hkl[{r.h, r.k, r.l}].push_back({o, j}); + ++n_partials; + } + } + + // 2. Output mirrors the input images (same geom/latt) so per-image masking still works; each + // full is later placed on its peak frame's outcome. + std::vector out(partials.size()); + for (size_t o = 0; o < partials.size(); ++o) { + out[o].geom = partials[o].geom; + out[o].latt = partials[o].latt; + out[o].mosaicity_deg = partials[o].mosaicity_deg; + } + + auto reflection_of = [&](const PartialRef& p) -> const Reflection& { + return partials[p.outcome].reflections[p.reflection]; + }; + + std::map multiplicity_histogram; // frames-per-event -> count + size_t n_events = 0; + + // 3. Within each hkl, split into contiguous-frame rocking events and weight-sum each into a full. + for (auto& [hkl, refs] : by_hkl) { + std::sort(refs.begin(), refs.end(), [&](const PartialRef& a, const PartialRef& b) { + return reflection_of(a).image_number < reflection_of(b).image_number; + }); + + size_t i = 0; + while (i < refs.size()) { + size_t k = i + 1; + float last_frame = reflection_of(refs[i]).image_number; + while (k < refs.size()) { + const float frame = reflection_of(refs[k]).image_number; + if (frame - last_frame > MAX_FRAME_GAP) + break; + last_frame = frame; + ++k; + } + + // First pass: a plain inverse-variance mean seeds F (and the event's peak frame / + // resolution / total captured rocking fraction). + double sum_w = 0.0, sum_wI = 0.0, sum_partiality = 0.0; + float d = NAN; + int peak_outcome = refs[i].outcome; + float peak_frame = reflection_of(refs[i]).image_number; + float peak_partiality = -1.0f; + for (size_t m = i; m < k; ++m) { + const auto& r = reflection_of(refs[m]); + const double sigma_corr = static_cast(r.sigma) * r.image_scale_corr; + const double w = 1.0 / (sigma_corr * sigma_corr); + sum_w += w; + sum_wI += w * static_cast(r.I) * r.image_scale_corr; + sum_partiality += r.partiality; + if (r.partiality > peak_partiality) { + peak_partiality = r.partiality; + peak_outcome = refs[m].outcome; + peak_frame = r.image_number; + } + if (!std::isfinite(d) && std::isfinite(r.d) && r.d > 0.0f) + d = r.d; + } + double F = sum_wI / sum_w; + + // De-biased Poisson reweighting (Kabsch profile fit): each frame's variance is its + // background noise corr^2*(sigma^2 - I) - the part of the counting variance not from + // signal - plus the *model* signal corr*F shared across the event. Using the model signal + // rather than the down-fluctuating observed I stops weak partials being over-weighted, + // the over-weighting that inflates the merge error model. Weights depend on F, so iterate. + // (Per-reflection rocking-curve recentring was tried here and overfits - it scatters the + // weak high-res reflections and lowers both CC1/2 and ISa - so it is deliberately omitted.) + for (int iter = 0; iter < 3; ++iter) { + sum_w = 0.0; + sum_wI = 0.0; + for (size_t m = i; m < k; ++m) { + const auto& r = reflection_of(refs[m]); + const double corr = r.image_scale_corr; + const double I_corr = static_cast(r.I) * corr; + const double sigma_corr = static_cast(r.sigma) * corr; + const double bkg_var = sigma_corr * sigma_corr - corr * I_corr; + double var = std::max(0.0, bkg_var) + corr * std::max(0.0, F); + if (!(var > 0.0)) + var = sigma_corr * sigma_corr; + const double w = 1.0 / var; + sum_w += w; + sum_wI += w * I_corr; + } + F = sum_wI / sum_w; + } + + const int n_frames = static_cast(k - i); + ++multiplicity_histogram[n_frames]; + ++n_events; + i = k; + + // The total captured rocking fraction replaces the per-partial min_partiality cut: an + // event seen at only a few percent of its curve is unreliable however many frames it spans. + if (sum_w <= 0.0 || sum_partiality < min_total_partiality) + continue; + + Reflection full{}; + full.h = std::get<0>(hkl); + full.k = std::get<1>(hkl); + full.l = std::get<2>(hkl); + full.I = static_cast(F); + full.sigma = static_cast(1.0 / std::sqrt(sum_w)); + full.d = d; + full.image_number = peak_frame; + full.partiality = 1.0f; // a combined full, not a slice + full.image_scale_corr = 1.0f; // already fully corrected + full.rlp = 1.0f; + full.observed = true; + out[peak_outcome].reflections.push_back(full); + + if (dump.is_open()) + dump << full.h << ' ' << full.k << ' ' << full.l << ' ' << full.I << ' ' + << full.sigma << ' ' << d << ' ' << n_frames << ' ' << sum_partiality << ' ' + << static_cast(peak_frame) << '\n'; + } + } + + if (logger != nullptr) { + size_t multi_frame = 0; + double frame_sum = 0.0; + for (const auto& [frames, count] : multiplicity_histogram) { + frame_sum += static_cast(frames) * count; + if (frames >= 2) + multi_frame += count; + } + const double mean_frames = n_events > 0 ? frame_sum / static_cast(n_events) : 0.0; + const double multi_pct = n_events > 0 ? 100.0 * multi_frame / static_cast(n_events) : 0.0; + logger->Info("3D combine: {} events from {} partials, mean {:.2f} frames/event, {:.1f}% multi-frame", + n_events, n_partials, mean_frames, multi_pct); + } + + return out; +} diff --git a/image_analysis/scale_merge/Combine3D.h b/image_analysis/scale_merge/Combine3D.h new file mode 100644 index 00000000..4b92c99f --- /dev/null +++ b/image_analysis/scale_merge/Combine3D.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include + +#include "../../common/DiffractionExperiment.h" +#include "../../common/Logger.h" +#include "../IntegrationOutcome.h" + +// Rotation 3D combine (the "-P rot3d" post-pass). +// +// Jungfraujoch integrates each frame as a 2D partial and recovers the full reflection by DIVIDING by +// its rocking-curve fraction (partiality). For rotation data one reflection is spread over several +// frames, so this produces several noisy partial observations of it. The merged intensity is already +// correct (the inverse-variance mean of the partials equals the profile-fit full), but the per-frame +// slicing scatter inflates the error model -> low ISa, high R-meas. +// +// This post-pass instead COMBINES the per-frame partials of one rocking event into a single +// counting-limited "full" observation by inverse-variance weight-summing (not dividing), before +// scaling/merging. Matches XDS's 3D integration on the per-observation statistics while keeping +// Jungfraujoch's per-frame-integrate-then-combine-at-the-end approach (no 3D shoebox / pixel re-read). +// +// Input: per-image outcomes whose reflections are ALREADY scaled (image_scale_corr and partiality +// filled, e.g. by ScaleOnTheFly under -P rot). Reflections of one event are identified as the same +// raw (h,k,l) on a contiguous run of frames. Output: per-image outcomes (same geom/latt) whose +// reflections are fulls (image_scale_corr = 1, partiality = 1, sigma = the combined counting sigma), +// one per event, assigned to the event's peak frame. +// dump_path (optional, diagnostic): if set, write each emitted full as a text row +// "h k l I sigma d n_frames captured_fraction" - the unmerged observations, for comparison against +// e.g. XDS_ASCII.HKL. +std::vector CombineRotationObservations( + const std::vector& partials, + const DiffractionExperiment& experiment, + Logger* logger = nullptr, + const std::string& dump_path = {}); diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index 5661d66e..4d33f60a 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -591,6 +591,18 @@ MergeStatistics MergeOnTheFly::MergeStats(const std::vector &m } } + // Per-reflection mean , and a per-reflection accumulator for R_meas - it needs |I_i - |, + // so the observations are visited again now that the means are known. + std::unordered_map merged_I; + merged_I.reserve(merged.size()); + for (const auto &m: merged) + if (std::isfinite(m.I)) + merged_I[generator(m).pack()] = m.I; + + struct RmeasObs { double sum_abs_dev = 0.0; double sum_I = 0.0; int n = 0; int shell = -1; }; + std::unordered_map rmeas_obs; + rmeas_obs.reserve(merged.size()); + for (int i = 0; i < integration_outcome.size(); ++i) { if (Mask(integration_outcome[i], true)) continue; @@ -614,11 +626,34 @@ MergeStatistics MergeOnTheFly::MergeStats(const std::vector &m if (!shell.has_value()) continue; const int s = *shell; - if (s >= 0 && s < n_shells) + if (s >= 0 && s < n_shells) { acc[s].total_obs++; + const auto key = generator(r).pack(); + const auto mit = merged_I.find(key); + if (mit != merged_I.end()) { + auto &ra = rmeas_obs[key]; + ra.sum_abs_dev += std::abs(static_cast(I_corr) - mit->second); + ra.sum_I += I_corr; + ra.n++; + ra.shell = s; + } + } } } + // R_meas per shell: sum over reflections of sqrt(n/(n-1)) * sum_i|I_i - |, over sum of I_i. + std::vector rmeas_num(n_shells, 0.0), rmeas_den(n_shells, 0.0); + double rmeas_num_all = 0.0, rmeas_den_all = 0.0; + for (const auto &[key, ra]: rmeas_obs) { + if (ra.n < 2 || ra.shell < 0 || ra.shell >= n_shells) + continue; + const double factor = std::sqrt(static_cast(ra.n) / (ra.n - 1)); + rmeas_num[ra.shell] += factor * ra.sum_abs_dev; + rmeas_den[ra.shell] += ra.sum_I; + rmeas_num_all += factor * ra.sum_abs_dev; + rmeas_den_all += ra.sum_I; + } + MergeStatistics out; out.shells.resize(n_shells); @@ -638,6 +673,7 @@ MergeStatistics MergeOnTheFly::MergeStats(const std::vector &m ss.cc_half = sa.cc_half.GetCC(); ss.cc_ref = sa.cc_ref.GetCC(); + ss.r_meas = rmeas_den[s] > 0.0 ? rmeas_num[s] / rmeas_den[s] : NAN; } auto &overall = out.overall; @@ -664,6 +700,7 @@ MergeStatistics MergeOnTheFly::MergeStats(const std::vector &m overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0; overall.cc_half = cc_half_overall.GetCC(); overall.cc_ref = cc_ref_overall.GetCC(); + overall.r_meas = rmeas_den_all > 0.0 ? rmeas_num_all / rmeas_den_all : NAN; return out; } @@ -671,13 +708,17 @@ MergeStatistics MergeOnTheFly::MergeStats(const std::vector &m std::ostream &operator<<(std::ostream &output, const MergeStatisticsShell &in) { double completeness = in.possible_unique_reflections > 0 ? static_cast(in.unique_reflections) / in.possible_unique_reflections * 100.0 : 0.0; + double multiplicity = in.unique_reflections > 0 + ? static_cast(in.total_observations) / in.unique_reflections : 0.0; - output << fmt::format("{:8d} {:8d} {:8d} {:7.1f}% {:8.1f} {:7.1f}% {:7.1f}%", + output << fmt::format("{:8d} {:8d} {:8d} {:7.1f}% {:7.1f} {:8.1f} {:7.1f}% {:7.1f}% {:7.1f}%", in.total_observations, in.unique_reflections, in.possible_unique_reflections, completeness, + multiplicity, in.mean_i_over_sigma, + in.r_meas*100.0, in.cc_half*100.0, in.cc_ref*100.0); return output; @@ -685,11 +726,11 @@ std::ostream &operator<<(std::ostream &output, const MergeStatisticsShell &in) { std::ostream &operator<<(std::ostream &output, const MergeStatistics &in) { output << std::endl; - output << fmt::format(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>8s}", - "d_min", "N_obs", "N_uniq", "N_possib", "Compl","", "CC1/2", "CCref") + output << fmt::format(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>7s} {:>8s} {:>8s} {:>8s} {:>8s}", + "d_min", "N_obs", "N_uniq", "N_possib", "Compl", "Mult", "", "R_meas", "CC1/2", "CCref") << std::endl; - output << fmt::format(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s}", - "", "", "", "", "", "", "", "") << std::endl; + output << fmt::format(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->7s} {:->8s} {:->8s} {:->8s} {:->8s}", + "", "", "", "", "", "", "", "", "", "") << std::endl; for (const auto &sh: in.shells) { if (sh.unique_reflections == 0) continue; @@ -697,8 +738,8 @@ std::ostream &operator<<(std::ostream &output, const MergeStatistics &in) { output << sh; output << std::endl; } - output << fmt::format(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s}", - "", "", "", "", "", "", "", "") << std::endl; + output << fmt::format(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->7s} {:->8s} {:->8s} {:->8s} {:->8s}", + "", "", "", "", "", "", "", "", "", "") << std::endl; output << fmt::format(" {:>8s} ", "Overall"); output << in.overall; diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index 458d9dab..b45b89e4 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -28,6 +28,10 @@ struct MergeStatisticsShell { double cc_half = 0.0f; double cc_ref = NAN; + + // Redundancy-independent merging R-factor (Diederichs & Karplus 1997), computed over the + // observations that enter the merge: R_meas = sum_hkl sqrt(n/(n-1)) sum_i|I_i-| / sum I_i. + double r_meas = NAN; }; struct MergeStatistics { diff --git a/process/JFJochProcess.cpp b/process/JFJochProcess.cpp index 1cd76517..74e4004e 100644 --- a/process/JFJochProcess.cpp +++ b/process/JFJochProcess.cpp @@ -28,6 +28,7 @@ #include "../image_analysis/image_preprocessing/ImagePreprocessorBuffer.h" #include "../image_analysis/scale_merge/Merge.h" #include "../image_analysis/scale_merge/SearchSpaceGroup.h" +#include "../image_analysis/scale_merge/Combine3D.h" #include "../image_analysis/WriteReflections.h" namespace { @@ -387,15 +388,29 @@ ProcessResult JFJochProcess::Run(JFJochProcessObserver *observer) { } } + // -P rot3d: weight-sum each reflection's per-frame partials into one full before merging, so + // the error model sees counting statistics (high ISa) instead of rocking-curve slicing scatter. + const bool rot3d = experiment_.GetScalingSettings().GetCombine3D(); + std::vector combined; + if (rot3d) + combined = CombineRotationObservations(indexer->GetIntegrationOutcome(), experiment_, &logger, + config_.observation_dump_path); + const std::vector &merge_input = + rot3d ? combined : indexer->GetIntegrationOutcome(); + MergeOnTheFly merge_engine(experiment_); if (result.consensus_cell.has_value()) merge_engine.ReferenceCell(*result.consensus_cell); - merge_engine.RefineErrorModel(indexer->GetIntegrationOutcome()); - for (const auto &outcome: indexer->GetIntegrationOutcome()) + merge_engine.RefineErrorModel(merge_input); + if (merge_engine.ErrorModelActive()) + logger.Info("Error model: a={:.3f} b={:.3f} ISa={:.1f}", merge_engine.ErrorModelA(), + merge_engine.ErrorModelB(), + merge_engine.ErrorModelB() > 0 ? 1.0 / merge_engine.ErrorModelB() : 0.0); + for (const auto &outcome: merge_input) merge_engine.AddImage(outcome); auto merged_reflections = merge_engine.ExportReflections(); - auto merged_statistics = merge_engine.MergeStats(merged_reflections, indexer->GetIntegrationOutcome(), + auto merged_statistics = merge_engine.MergeStats(merged_reflections, merge_input, config_.reference_data); logger.Info("Merge complete ({} unique reflections)", merged_reflections.size()); diff --git a/process/JFJochProcess.h b/process/JFJochProcess.h index ecaf3c58..100f480f 100644 --- a/process/JFJochProcess.h +++ b/process/JFJochProcess.h @@ -54,6 +54,9 @@ struct ProcessConfig { bool run_scaling = false; int64_t scaling_iter = 3; std::vector reference_data; + + // Diagnostic: if set, the -P rot3d combine writes the unmerged fulls here (for comparison vs XDS). + std::string observation_dump_path; }; struct ProcessResult { diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 519fc328..682c76e5 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -49,7 +49,7 @@ void print_usage() { std::cout << " Scaling and merging" << std::endl; std::cout << " -M, --scale-merge Scale and merge (refine mosaicity) and write scaled.hkl + image.dat" << std::endl; - std::cout << " -P, --partiality Partiality refinement fixed|rot|unity (default: fixed)" << std::endl; + std::cout << " -P, --partiality Partiality model fixed|rot|rot3d|unity (default: fixed). rot3d = rot + 3D combine of per-frame partials" << std::endl; std::cout << " -A, --anomalous Anomalous mode (don't merge Friedel pairs)" << std::endl; std::cout << " -B, --refine-bfactor Refine per image B-factor" << std::endl; std::cout << " -w, --wedge[=num] Refine image wedge during scaling with starting wedge value" << std::endl; @@ -61,11 +61,13 @@ void print_usage() { std::cout << " --scaling-iterations Number of scaling iterations with no reference data (default: 3)" << std::endl; std::cout << " --scaling-output Output format for scaling results mtz|cif|txt (default: mtz)" << std::endl; std::cout << " -z, --reference-mtz Reference MTZ file" << std::endl; + std::cout << " --reference-column