From 38ea0ec237d0a7ffa0eb1e3855724be05586e95b Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Thu, 25 Jun 2026 15:02:52 +0200 Subject: [PATCH] Remove the experimental PixelRefine integrator On the LysozymeJet5 serial stills the default Gaussian profile-fit integrator (ProfileIntegrate2D) + reference scaling matched or beat whole-PixelRefine on every per-shell CC1/2 (overall 95.7% vs 91.9%), ISa (1.6 vs 1.2) and R-meas (98.5% vs 175%), with CCref a tie -- so PixelRefine has no remaining advantage. Reference-based per-image scaling is integrator-agnostic (IndexAndRefine::ReferenceIntensities builds a ScaleOnTheFly(experiment, reference) applied to any integrator's output), so the reference-dataset feature (CCref + reference scaling) is kept. Delete image_analysis/pixel_refinement/, GeomRefinementAlgorithmEnum:: PixelRefine and its gates, BraggIntegrationSettings::ProfileMultiplier (PixelRefine-only; R1 is shared and kept), and the -r pixelrefine / --profile-multiplier CLI. The inherited lessons (mean background, de-biased variance, tight-profile-loses / centroid floor, R-refinement futile) are folded into NEXTGEN_INTEGRATOR.md. NOTE: this transiently breaks the viewer build -- the committed viewer still references the removed enum and ProfileMultiplier. It is fixed in the next commit (the viewer feature work), held separate while the viewer UI is being tested. Co-Authored-By: Claude Opus 4.8 --- common/BraggIntegrationSettings.cpp | 11 - common/BraggIntegrationSettings.h | 6 - common/IndexingSettings.h | 2 +- image_analysis/CMakeLists.txt | 3 +- image_analysis/IndexAndRefine.cpp | 115 +-- image_analysis/IndexAndRefine.h | 13 - image_analysis/LoadFCalcFromMtz.cpp | 2 +- image_analysis/LoadFCalcFromMtz.h | 2 +- .../bragg_integration/NEXTGEN_INTEGRATOR.md | 43 +- .../bragg_integration/ProfileIntegrate2D.h | 15 +- .../bragg_prediction/BraggPrediction.cpp | 6 +- .../pixel_refinement/CMakeLists.txt | 2 - .../pixel_refinement/FACTORED_MODEL.md | 142 ---- .../pixel_refinement/FINDINGS-2026-06.md | 122 --- image_analysis/pixel_refinement/METHODS.md | 214 ------ .../pixel_refinement/PixelRefine.cpp | 708 ------------------ image_analysis/pixel_refinement/PixelRefine.h | 135 ---- process/JFJochProcess.cpp | 9 +- process/JFJochProcessCommandLine.cpp | 1 - tools/jfjoch_process.cpp | 31 +- 20 files changed, 64 insertions(+), 1518 deletions(-) delete mode 100644 image_analysis/pixel_refinement/CMakeLists.txt delete mode 100644 image_analysis/pixel_refinement/FACTORED_MODEL.md delete mode 100644 image_analysis/pixel_refinement/FINDINGS-2026-06.md delete mode 100644 image_analysis/pixel_refinement/METHODS.md delete mode 100644 image_analysis/pixel_refinement/PixelRefine.cpp delete mode 100644 image_analysis/pixel_refinement/PixelRefine.h diff --git a/common/BraggIntegrationSettings.cpp b/common/BraggIntegrationSettings.cpp index d9353b86..d69a0028 100644 --- a/common/BraggIntegrationSettings.cpp +++ b/common/BraggIntegrationSettings.cpp @@ -81,17 +81,6 @@ float BraggIntegrationSettings::GetR1() const { return r_1; } -float BraggIntegrationSettings::GetProfileMultiplier() const { - return profile_multiplier; -} - -BraggIntegrationSettings &BraggIntegrationSettings::ProfileMultiplier(float input) { - if (input <= 0.0f) - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Profile multiplier must be positive"); - profile_multiplier = input; - return *this; -} - float BraggIntegrationSettings::GetR2() const { return r_2; } diff --git a/common/BraggIntegrationSettings.h b/common/BraggIntegrationSettings.h index 0b35c7ee..073a5d30 100644 --- a/common/BraggIntegrationSettings.h +++ b/common/BraggIntegrationSettings.h @@ -20,10 +20,6 @@ class BraggIntegrationSettings { float d_min_limit_A = 1.0; std::optional fixed_profile_radius; float minimum_sigma_in_regards_to_i = 0.02; - // PixelRefine only: the measured (physical) tangential profile width is multiplied by - // this for the integration template, so the aperture is generous (XDS-style ~6 sigma) - // and tolerant of the centroid-floor scatter. Ignored by the classical integrator. - float profile_multiplier = 6.0; public: BraggIntegrationSettings& R1(float input); @@ -31,7 +27,6 @@ public: BraggIntegrationSettings& R3(float input); BraggIntegrationSettings& DMinLimit_A(float input); BraggIntegrationSettings& FixedProfileRadius_recipA(std::optional input); - BraggIntegrationSettings& ProfileMultiplier(float input); BraggIntegrationSettings& Integrator(IntegratorMode input); @@ -39,7 +34,6 @@ public: [[nodiscard]] float GetR1() const; [[nodiscard]] float GetR2() const; [[nodiscard]] float GetR3() const; - [[nodiscard]] float GetProfileMultiplier() const; [[nodiscard]] std::optional GetFixedProfileRadius_recipA() const; [[nodiscard]] float GetDMinLimit_A() const; diff --git a/common/IndexingSettings.h b/common/IndexingSettings.h index c428f02e..6e900440 100644 --- a/common/IndexingSettings.h +++ b/common/IndexingSettings.h @@ -6,7 +6,7 @@ #include enum class IndexingAlgorithmEnum {FFBIDX, FFT, FFTW, Auto, None}; -enum class GeomRefinementAlgorithmEnum {None, OrientationOnly, BeamCenter, PixelRefine}; +enum class GeomRefinementAlgorithmEnum {None, OrientationOnly, BeamCenter}; class IndexingSettings { IndexingAlgorithmEnum algorithm; diff --git a/image_analysis/CMakeLists.txt b/image_analysis/CMakeLists.txt index 57d3b884..7a9f08e0 100644 --- a/image_analysis/CMakeLists.txt +++ b/image_analysis/CMakeLists.txt @@ -52,6 +52,5 @@ ADD_SUBDIRECTORY(scale_merge) ADD_SUBDIRECTORY(image_preprocessing) ADD_SUBDIRECTORY(azint) ADD_SUBDIRECTORY(roi) -ADD_SUBDIRECTORY(pixel_refinement) -TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochAzIntEngine JFJochROIIntegration JFJochImagePreprocessing JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement JFJochScaleMerge JFJochPixelRefine gemmi) +TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochAzIntEngine JFJochROIIntegration JFJochImagePreprocessing JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement JFJochScaleMerge gemmi) diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index f47fb4f9..64a394f0 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -206,10 +206,6 @@ void IndexAndRefine::RefineGeometryIfNeeded(DataMessage &msg, IndexAndRefine::In XtalOptimizerRotationOnly(data, msg.spots, 0.05); break; case GeomRefinementAlgorithmEnum::BeamCenter: - case GeomRefinementAlgorithmEnum::PixelRefine: - // PixelRefine still benefits from the classical beam-center + cell - // refinement as a starting point; the pixel-level refinement runs - // later, during integration. if (XtalOptimizer(data, {msg.spots})) { outcome.experiment.BeamX_pxl(data.geom.GetBeamX_pxl()) .BeamY_pxl(data.geom.GetBeamY_pxl()); @@ -323,34 +319,20 @@ void IndexAndRefine::QuickPredictAndIntegrate(DataMessage &msg, .bandwidth_sigma = experiment.GetBandwidthFWHM().value_or(0.0f) / 2.3548f }; - // Select the integration path: classical 2D integration, or the experimental - // PixelRefine joint geometry/profile/scale refinement (which also produces the - // integrated reflections that flow into the normal save/merge). - const bool use_pixel_refine = - experiment.GetIndexingSettings().GetGeomRefinementAlgorithm() == GeomRefinementAlgorithmEnum::PixelRefine - && !pixel_reference_.empty(); + // Predict, then integrate with the selected integrator (box-sum or profile-fit). + auto pred_start_time = std::chrono::steady_clock::now(); + auto nrefl = prediction.Calc(outcome.experiment, latt, settings_prediction); + auto pred_end_time = std::chrono::steady_clock::now(); + msg.bragg_prediction_time_s = std::chrono::duration(pred_end_time - pred_start_time).count(); - if (use_pixel_refine) { - auto integration_start_time = std::chrono::steady_clock::now(); - PixelRefineIntegrate(msg, image, prediction, outcome, i_outcome); - msg.integrated_reflections = i_outcome.reflections.size(); - auto integration_end_time = std::chrono::steady_clock::now(); - msg.integration_time_s = std::chrono::duration(integration_end_time - integration_start_time).count(); - } else { - auto pred_start_time = std::chrono::steady_clock::now(); - auto nrefl = prediction.Calc(outcome.experiment, latt, settings_prediction); - auto pred_end_time = std::chrono::steady_clock::now(); - msg.bragg_prediction_time_s = std::chrono::duration(pred_end_time - pred_start_time).count(); - - auto integration_start_time = std::chrono::steady_clock::now(); - i_outcome.reflections = - outcome.experiment.GetBraggIntegrationSettings().GetIntegrator() == IntegratorMode::BoxSum - ? BraggIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number) - : ProfileIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number); - msg.integrated_reflections = i_outcome.reflections.size(); - auto integration_end_time = std::chrono::steady_clock::now(); - msg.integration_time_s = std::chrono::duration(integration_end_time - integration_start_time).count(); - } + auto integration_start_time = std::chrono::steady_clock::now(); + i_outcome.reflections = + outcome.experiment.GetBraggIntegrationSettings().GetIntegrator() == IntegratorMode::BoxSum + ? BraggIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number) + : ProfileIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number); + msg.integrated_reflections = i_outcome.reflections.size(); + auto integration_end_time = std::chrono::steady_clock::now(); + msg.integration_time_s = std::chrono::duration(integration_end_time - integration_start_time).count(); constexpr size_t kMaxReflections = 10000; if (i_outcome.reflections.size() > kMaxReflections) { @@ -371,10 +353,7 @@ void IndexAndRefine::QuickPredictAndIntegrate(DataMessage &msg, CalcISigma(msg, i_outcome.reflections); CalcWilsonBFactor(msg, i_outcome.reflections); - // PixelRefine produces already-scaled reflections; only the classical path - // needs the separate ScaleOnTheFly step. - if (!use_pixel_refine) - ScaleImage(msg, i_outcome); + ScaleImage(msg, i_outcome); // Copy reflections to outgoing message msg.reflections = i_outcome.reflections; @@ -434,7 +413,6 @@ std::optional IndexAndRefine::FinalizeRotationIndexing() IndexAndRefine &IndexAndRefine::ReferenceIntensities(std::vector &reference) { scaling_engine = std::make_unique(experiment, reference); - pixel_reference_ = reference; // kept for the experimental PixelRefine path return *this; } @@ -454,71 +432,6 @@ void IndexAndRefine::ScaleImage(DataMessage &msg, IntegrationOutcome& outcome) { msg.image_scale_time_s = std::chrono::duration(scaling_end_time - scaling_start_time).count(); } -bool IndexAndRefine::PixelRefineIntegrate(DataMessage &msg, - const CompressedImage &image, - BraggPrediction &prediction, - const IndexAndRefine::IndexingOutcome &outcome, - IntegrationOutcome &i_outcome) { - if (!outcome.lattice_candidate) - return false; - - // Build the engine once (lazy). - std::call_once(pixel_refine_once_, [&] { - pixel_refine_ = std::make_unique(experiment, pixel_reference_); - }); - if (!pixel_refine_) - return false; - - PixelRefineData prd; - prd.geom = outcome.experiment.GetDiffractionGeometry(); - prd.latt = *outcome.lattice_candidate; - prd.centering = outcome.symmetry.centering; - if (const auto bw = experiment.GetBandwidthFWHM()) - prd.bandwidth = bw.value() / 2.3548; // FWHM -> sigma - - // Signal-box radius from the shared integration setting (same knob as BraggIntegrate2D). - prd.shoebox_radius = static_cast(std::lround(experiment.GetBraggIntegrationSettings().GetR1())); - prd.r1_multiplier = experiment.GetBraggIntegrationSettings().GetProfileMultiplier(); - - std::vector buffer; - const uint8_t *ptr = image.GetUncompressedPtr(buffer); - switch (image.GetMode()) { - case CompressedImageMode::Int8: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Int16: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Int32: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Uint8: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Uint16: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Uint32: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - default: - return false; - } - - // PixelRefine output flows into the normal save/merge path: the refined - // geometry/lattice and the already-scaled reflections become the outcome. - i_outcome.reflections = std::move(prd.reflections); - i_outcome.geom = prd.geom; - i_outcome.latt = prd.latt; - i_outcome.image_scale_g = static_cast(prd.scale_factor); - i_outcome.image_scale_b_factor_Ang2 = static_cast(prd.B_factor); - if (std::isfinite(prd.cc)) { - i_outcome.image_scale_cc = static_cast(prd.cc); - i_outcome.image_scale_cc_n = prd.cc_n; - } - - msg.image_scale_factor = static_cast(prd.scale_factor); - msg.image_scale_cc = i_outcome.image_scale_cc; - if (prd.B_factor != 0.0) - msg.image_scale_b_factor = static_cast(prd.B_factor); - - return true; -} - ScalingResult IndexAndRefine::ScaleAllImages(const std::vector &reference, size_t nthreads) { ScaleOnTheFly scaling(experiment, reference); scaling.Scale(integration_outcome, nthreads); diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index 5f42194d..b8d2a0ed 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -11,7 +11,6 @@ #include "../common/AzimuthalIntegrationMapping.h" #include "../common/AzimuthalIntegrationProfile.h" #include "bragg_prediction/BraggPrediction.h" -#include "pixel_refinement/PixelRefine.h" #include "indexing/IndexerThreadPool.h" #include "lattice_search/LatticeSearch.h" #include "rotation_indexer/RotationIndexer.h" @@ -70,18 +69,6 @@ class IndexAndRefine { std::unique_ptr scaling_engine; void ScaleImage(DataMessage &msg, IntegrationOutcome& outcome); - // Experimental PixelRefine integration path (selected via - // GeomRefinementAlgorithmEnum::PixelRefine). Needs reference intensities; the - // engine is built lazily on first use (when the azimuthal mapping is known) - // and is safe to share across threads (prediction is supplied per call). - std::vector pixel_reference_; - std::unique_ptr pixel_refine_; - std::once_flag pixel_refine_once_; - bool PixelRefineIntegrate(DataMessage &msg, - const CompressedImage &image, - BraggPrediction &prediction, - const IndexingOutcome &outcome, - IntegrationOutcome &i_outcome); std::optional RotationAngle(int64_t image) const; // mid-exposure angle for the indexer public: IndexAndRefine(const DiffractionExperiment &x, IndexerThreadPool *indexer); diff --git a/image_analysis/LoadFCalcFromMtz.cpp b/image_analysis/LoadFCalcFromMtz.cpp index 2061b9d0..a618538b 100644 --- a/image_analysis/LoadFCalcFromMtz.cpp +++ b/image_analysis/LoadFCalcFromMtz.cpp @@ -18,7 +18,7 @@ namespace { // Reference intensities can come from a calculated structure factor F-model (squared to an // intensity), or from a merged/observed mean-intensity column (used directly - this is what lets -// PixelRefine be self-seeded from the data's own previous merge). column_with_one_of_labels would +// reference-based scaling be self-seeded from the data's own previous merge). column_with_one_of_labels would // hide which label matched, so the priority list is walked explicitly to record the choice. const gemmi::Mtz::Column* SelectDefaultColumn(const gemmi::Mtz& mtz, bool& square) { if (const auto* col = mtz.column_with_label("F-model", nullptr, 'F')) { diff --git a/image_analysis/LoadFCalcFromMtz.h b/image_analysis/LoadFCalcFromMtz.h index 3785ad79..284867e2 100644 --- a/image_analysis/LoadFCalcFromMtz.h +++ b/image_analysis/LoadFCalcFromMtz.h @@ -39,7 +39,7 @@ struct ReferenceMtzData { // Load reference reflections from an MTZ. With no column requested the smart default is used: // a calculated structure factor F-model (squared to an intensity), else a merged/observed // intensity column (IMEAN/I/IOBS/...), else any mean-intensity (J) column - this also lets -// PixelRefine be self-seeded from the data's own previous merge. A requested column overrides +// reference-based scaling be self-seeded from the data's own previous merge. A requested column overrides // the default (an 'F'-type column is squared, a 'J'-type used directly). Throws if the requested // column is missing or no usable column exists. ReferenceMtzData LoadReferenceMtz(const std::string& path, diff --git a/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md b/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md index f0e7120f..3a772db2 100644 --- a/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md +++ b/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md @@ -264,19 +264,34 @@ worked is **2D profile-fit integration**; the keepers are **Gaussian profile-fit --- -## "Could PixelRefine be rebuilt as integrator + scaling?" +## Lineage: PixelRefine (removed 2026-06-25) -Yes, and this is the clean decomposition that `ProfileIntegrate2D` realizes. PixelRefine does three -things; only the third needs a reference: -1. **Profile** — measure the tangential width per shell from strong spots (`PixelRefine.cpp:433`). - Reference-free. -2. **Extraction** — `J = Σ P(I−B)/v ÷ Σ P²/v`, de-biased variance, plus radial partiality - (`PixelRefine.cpp:491-525`). Reference-free. -3. **Scaling** — `J ≈ G·B_DW·p·pol·I_ref` (`PixelRefine.cpp:365`). **The only step using the reference.** +`ProfileIntegrate2D` is the surviving half of an earlier experimental still-integrator, **PixelRefine**, +which did three things bundled together: a reference-free **profile + extraction** (`J = Σ P(I−B)/v ÷ +Σ P²/v`, de-biased variance, plus a radial partiality) and a reference-based **per-image scaling** +(`J ≈ G·B_DW·p·pol·I_ref`). The decomposition observation was that the first two are independent of the +reference and identical in spirit to what a profile-fitting integrator does — so they were lifted out as +`ProfileIntegrate2D`, while scaling stayed in the shared `ScaleOnTheFly` (which already does reference +scaling when a reference is supplied: `IndexAndRefine::ReferenceIntensities` builds a +`ScaleOnTheFly(experiment, reference)` and the per-image pass calls it on whatever the integrator +produced — **integrator-agnostic**). -So **integrator = steps 1+2** (= `ProfileIntegrate2D`), **scaling = step 3** (or the reference-free -`ScaleOnTheFly`). The decomposition also **fixes the rot3d incompatibility**: whole-PixelRefine emits -`image_scale_corr = 1/G` with no rotation partiality (which gave garbage σ with `-P rot3d`), whereas the -decomposed integrator emits `partiality` like `BraggIntegrate2D`, so `ScaleOnTheFly` + `Combine3D` + -merge consume it unchanged. PixelRefine then becomes `ProfileIntegrate2D + reference ScaleOnTheFly`, and -the experimental coupling disappears. +**The decomposition won, so PixelRefine was deleted.** On the LysozymeJet5 serial stills, the default +Gaussian path (`ProfileIntegrate2D` + the same reference scaling) **matched or beat** whole-PixelRefine +in every per-shell CC½ (overall 95.7% vs 91.9%), ISa (1.6 vs 1.2) and R-meas (98.5% vs 175%), with +CCref a tie — same indexing, same uniques. PixelRefine collected ~1.8× more observations (a wider Ewald +band) but they were noisier and didn't help. Whole-PixelRefine was also incompatible with the rot3d +combine (it emitted `image_scale_corr = 1/G` with no rotation partiality → garbage σ), which the +decomposed integrator avoids by emitting `partiality` like `BraggIntegrate2D`. + +**Lessons inherited from PixelRefine (kept here so they survive the deletion):** +- **Mean (not median) background** in both integrators — the median of a skewed background + under-subtracts and biases weak intensities positive (fake ⟨I/σ⟩ at CC≈0). Single biggest + "untrustworthy σ" cause. +- **De-biased variance** (weight by background, not observed counts) — fixed negative high-res ⟨I/σ⟩ + and scale collapse. +- **A tight profile loses to a generous box for stills** — the ~0.4-px centroid-undersampling floor of + ~2×2 spots is a real sampling floor, not a fixable misprediction; recentring/adaptive-width add noise. + The lever is matched weighting + de-biased variance, never a tight aperture. +- **Per-image R (profile-width) refinement is futile** — G↔R is intrinsically degenerate; refine G/B + freely but never R per image (it slides the per-image scale and wrecks the merge). diff --git a/image_analysis/bragg_integration/ProfileIntegrate2D.h b/image_analysis/bragg_integration/ProfileIntegrate2D.h index 14a501c3..6cba3e98 100644 --- a/image_analysis/bragg_integration/ProfileIntegrate2D.h +++ b/image_analysis/bragg_integration/ProfileIntegrate2D.h @@ -8,11 +8,11 @@ // ============================================================================= // // A drop-in alternative to BraggIntegrate2D that replaces uniform box summation with -// profile-fitted extraction - the part of PixelRefine that does NOT need reference -// intensities (PixelRefine = this extraction + a reference-based scaling step; here only -// the extraction is kept). See NEXTGEN_INTEGRATOR.md for the rationale: the residual ~4x -// R-meas/ISa gap to XDS is the box-sum method (a fixed disk captures a width-dependent -// fraction of each spot -> ~18% multiplicative per-observation floor on strong reflections). +// profile-fitted extraction (no reference intensities needed). See NEXTGEN_INTEGRATOR.md for +// the rationale: the residual ~4x R-meas/ISa gap to XDS is the box-sum method (a fixed disk +// captures a width-dependent fraction of each spot -> ~18% multiplicative per-observation floor +// on strong reflections), and for the lineage (this is the extraction half of the former, +// now-removed PixelRefine, which beat whole-PixelRefine on the serial test). // // Output is a vector with I, sigma, partiality, d - IDENTICAL in shape to // BraggIntegrate2D - so ScaleOnTheFly, Combine3D (-P rot3d) and the merge consume it @@ -30,9 +30,8 @@ // variance v = B + max(I,0)*P (iterate), sigma = sqrt(1/sum P^2/v). Carry the rotation // partiality exactly as BraggIntegrate2D does. // -// Staging: v1 = measured-R1 Gaussian (lifts PixelRefine's extraction out, proves the -// decomposition + rot3d-combine compatibility); v2 = empirical per-shell (the expected win); -// v3 = empirical per detector-region x shell (XDS-grade). +// Staging: v1 = measured-R1 Gaussian (the keeper); v2 = empirical per-shell; v3 = empirical per +// detector-region x shell (XDS-grade). // // Selected by BraggIntegrationSettings::Integrator: ProfileGaussian (the DEFAULT, v1) or // ProfileEmpirical (v2); BoxSum (BraggIntegrate2D) is the fallback. jfjoch_process exposes it as diff --git a/image_analysis/bragg_prediction/BraggPrediction.cpp b/image_analysis/bragg_prediction/BraggPrediction.cpp index 70ebea71..38ce1943 100644 --- a/image_analysis/bragg_prediction/BraggPrediction.cpp +++ b/image_analysis/bragg_prediction/BraggPrediction.cpp @@ -86,9 +86,9 @@ int BraggPrediction::Calc(const DiffractionExperiment &experiment, const Crystal // Energy bandwidth thickens the Ewald shell radially: at the // diffraction condition |S|-1/λ shifts by recip_z·(Δλ/λ), i.e. - // σ_bw = |recip_z|·bandwidth_sigma (= bλ/2d², identical to - // PixelRefine's R_bw). Broaden the acceptance window in quadrature so - // high-resolution shells (smeared most, ∝1/d²) are not clipped. + // σ_bw = |recip_z|·bandwidth_sigma (= bλ/2d²). Broaden the acceptance + // window in quadrature so high-resolution shells (smeared most, ∝1/d²) + // are not clipped. float radial_cutoff = settings.ewald_dist_cutoff; if (settings.bandwidth_sigma > 0.0f) { const float bw_tol = kBandwidthCutoffSigmas * settings.bandwidth_sigma * std::fabs(recip_z); diff --git a/image_analysis/pixel_refinement/CMakeLists.txt b/image_analysis/pixel_refinement/CMakeLists.txt deleted file mode 100644 index 5e94b562..00000000 --- a/image_analysis/pixel_refinement/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -ADD_LIBRARY(JFJochPixelRefine PixelRefine.cpp PixelRefine.h) -TARGET_LINK_LIBRARIES(JFJochPixelRefine JFJochBraggPrediction JFJochCommon JFJochScaleMerge JFJochGeomRefinement ceres_static) diff --git a/image_analysis/pixel_refinement/FACTORED_MODEL.md b/image_analysis/pixel_refinement/FACTORED_MODEL.md deleted file mode 100644 index cb398977..00000000 --- a/image_analysis/pixel_refinement/FACTORED_MODEL.md +++ /dev/null @@ -1,142 +0,0 @@ -# A factored likelihood for joint integration + scaling + geometry - -**Status: Terms 1+2 implemented and shipping as the PixelRefine default** (see -`PixelRefine.cpp`, `METHODS.md`, `FINDINGS-2026-06.md`); Term 3 (geometry) and the -priors/NN extensions of §4 remain future work. Goal: replace the per-pixel least-squares -of PixelRefine with a per-*reflection* likelihood that fuses profile-fit integration, -scaling against the reference, and geometry refinement into one differentiable -objective — the foundation for priors (Bayesian) and learned components (NN), and the -thing that dissolves the empty-pixel and parameter-degeneracy problems by construction -rather than by patching. - -## 0. Notation - -Per image, parameters `θ`: scale `G`, Debye-Waller `B`, orientation + cell (geometry), -profile width `R1` (tangential, possibly a 2×2 tensor), partiality width `R0` -(radial/mosaicity; `R0_eff² = R0² + R_bw²`, `R_bw² = (bλ)²/2d⁴` *known* from bandwidth). -Per reflection `h`: reference intensity `I_ref` (the hypothesis), resolution `d`, -predicted centre `c_pred`, partiality `p = exp(−ε_r²/R0_eff²)`, polarisation `pol`, -`B_term = exp(−B/4d²)`, shoebox pixels `{I_p}` with mean local background `Bg`, and the -area-normalised tangential profile template `P_p = P_tang(ε_t,p; R1)`. - -## 1. The factorisation principle - -A reflection's shoebox carries three (to first order) **orthogonal** pieces of -information — the 0th, 1st and 2nd moments of its intensity distribution: - -| moment | statistic | constrains | -|---|---|---| -| 0th — total | profile-fit amplitude `J` | scale chain `G, B` (and `p`) | -| 1st — position | centroid `c_obs` | geometry (orientation; radial→distance/cell) | -| 2nd — shape | second moment `M₂` | profile width `R1` (and anisotropy) | - -The current per-pixel residual mixes all three into one objective over shared pixels — -*that* is what couples the parameters (measured G–R0 ≈ −0.46, G–R1 ≈ +0.51) and lets the -many empty pixels dominate. Residual-ing each **moment** against its model instead gives -a block-diagonal Jacobian: the couplings vanish because each statistic carries one -parameter block's information. - -## 2. The three residual terms - -### 2.1 Intensity / scaling residual (one scalar per reflection) - -Optimal (Diamond) profile-fit amplitude and its model: -``` -J = Σ_p w_p P_p (I_p − Bg) / Σ_p w_p P_p² w_p = 1/v_p -J_model = G · B_term · p · pol · I_ref -r¹_h = (J − J_model) / σ_J -``` -`J` is ~invariant to `R1` (a well-sampled spot integrates to the same total whatever -width is assumed) → **R1 leaves this residual**. Empty pixels make no residual; they -enter only through `J` with ~zero profile weight → **the empty-pixel problem is gone by -construction.** This residual *is* the scaling residual — integration and scaling are now -one objective. - -### 2.2 Shape residual (constrains R1; decoupled from scale) - -``` -M₂_obs = Σ_p (I_p − Bg) ε_t,p² / Σ_p (I_p − Bg) (intensity-weighted variance, Å⁻²) -M₂_model = R1² / 2 (variance of exp(−ε_t²/R1²)) -r²_h = (M₂_obs − M₂_model) / σ_M2 -``` -A moment is normalised by the total → **scale-invariant → `∂r²/∂G = 0`**. The G↔R1 -degeneracy disappears. Anisotropic extension: use the 2×2 moment tensor -`Σ(I−Bg)(ε_t⊗ε_t)/Σ(I−Bg)` vs `diag(R1a²/2, R1b²/2)` → elliptical R1 (the DMM streak). -Weak spots have huge `σ_M2` → contribute ~nothing → R1 is set by strong spots -automatically (and may be made `R1(d)` per resolution). - -### 2.3 Position residual (constrains geometry; decoupled from scale and shape) - -``` -c_obs = Σ_p (I_p − Bg)(x_p, y_p) / Σ_p (I_p − Bg) -r³_h = (c_obs − c_pred(geometry)) / σ_c (2-vector; split radial / tangential) -``` -Centroid is scale- and width-invariant → `∂r³/∂G = ∂r³/∂R1 ≈ 0`. The **radial** component -constrains distance/cell, the **tangential** constrains orientation — exactly the split -the diagnostic measured (radial≈0 = no distance error; tangential∝radius = orientation). - -## 3. Fisher / expected-variance weighting (makes it a likelihood) - -Every `σ` uses the **model-expected** variance, never observed counts — this is what -makes strong *expected* reflections carry the information and makes the model "feel pain -when something that should be there is not": -``` -v_p = Bg + J_model · P_p (background + expected signal from I_ref, not I_obs) -σ_J² = 1 / Σ_p (P_p² / v_p) -σ_M2 ≈ M₂ · √(2 / N_eff), σ_c ≈ R1 / √(N_eff), N_eff = (Σ(I−Bg))² / Σ v_p -``` -Fisher information about `G` from term 1 is `∝ (B_term·p·pol·I_ref)² / σ_J²` — driven by -`I_ref`, so a noise spike (high counts, low `I_ref`) gets *no* weight while a strong -expected reflection observed absent (`J≈0`, large residual, moderate `σ_J`) gets a large -penalty. The reference enters at maximum leverage: it sets both the target and the weight. - -## 4. Joint objective and priors - -``` -L(θ) = Σ_h [ (r¹_h)² + (r²_h)² + |r³_h|² ] + priors -``` -No free λ if the σ's are correct — the relative weighting *is* the Fisher information. -Priors are the Bayesian hooks and the principled degeneracy breaks: -- **R0 (partiality/mosaicity) is GLOBAL + prior.** R0 multiplies `J_model` (`p`), so it is - still degenerate with the per-image `G` *within term 1* — the one degeneracy the - factorisation does **not** remove. Resolve it physically, not with a directional G prior - (which would bias every output intensity): `R0 ~ N(mosaicity, σ)`, `R_bw` fixed from the - known bandwidth, and `R0` fit **globally** (one per crystal, from many reflections' - partiality distribution) so per-image G can't trade against it. -- orientation `~ N(spot-centroid, σ)`; `G ~ N(1, σ_G)` or tied to the beam monitor; - distance `~ N(nominal, σ_L)` (loose, since serial/jet alignment is poorly constrained). -- Optional Bayesian intensities: treat `I_true` as a parameter with the reference as its - prior → posterior over intensities, not point estimates. - -## 5. Why the degeneracies vanish (Jacobian structure) - -`Jᵀ W J` is approximately block-diagonal in `(G,B,p | R1 | geometry)`: -``` -∂r¹/∂{G,B,p} ≠ 0 ; ∂r¹/∂R1 ≈ 0 ; ∂r¹/∂geom ≈ 0 -∂r²/∂R1 ≠ 0 ; ∂r²/∂G = 0 ; ∂r²/∂geom ≈ 0 -∂r³/∂geom ≠ 0 ; ∂r³/∂G = 0 ; ∂r³/∂R1 ≈ 0 -``` -So G↔R1 (+0.51) and all the cross-couplings drop to ~0 by construction. Only **G↔R0** -survives (R0 is a scale-multiplier, not a shape), handled by the global+physical prior of -§4. The degeneracies we measured were artifacts of projecting all information onto a -single per-pixel residual. - -## 6. Implementation notes - -- Per reflection: 1 (intensity) + 1 (shape) + 2 (position) residuals = 4, vs ~49 per-pixel - residuals → **cheaper**, and Ceres autodiffs the moment formulas through the pixels. -- The per-pixel forward model still *defines* `P_tang`, `p`, etc.; the **loss** moves to - the moments. -- Geometry (term 3) can run as the global sweep we have (it already maximises a - position/CC objective); terms 1–2 are the per-image photometry. Or solve all three - jointly per image with the global R0/mosaicity shared across images (two-level fit). -- Drop-in path: keep the current extraction, add the three residuals as a new objective - behind a flag, compare against the per-pixel loss on both test crystals. - -## 7. Why this serves the goal - -It is one differentiable likelihood, factored along the physics, that (a) maximises use of -the reference (target + Fisher weight), (b) is the substrate for priors / posteriors over -intensities (Bayesian), and (c) lets any term — profile `P`, partiality `p`, corrections — -be replaced by a learned function trained through the same likelihood. That is the -qualitative move XDS-style empirical profile fitting cannot make. diff --git a/image_analysis/pixel_refinement/FINDINGS-2026-06.md b/image_analysis/pixel_refinement/FINDINGS-2026-06.md deleted file mode 100644 index a7345de4..00000000 --- a/image_analysis/pixel_refinement/FINDINGS-2026-06.md +++ /dev/null @@ -1,122 +0,0 @@ -# PixelRefine — findings, June 2026 - -A record of the main results from the still-integration investigation, validated on two -lysozyme P4₃2₁2 datasets indexed against the same reference (`6G8A_refine_001.mtz`): - -* **Crystal 1 — serial jet stills** (`LysozymeJet5-…`, MicroMAX DMM ~1 % bandwidth). -* **Crystal 2 — rotation crystal** (`fixed_master.h5`, 1800 × 0.2°, Si mono), run *as - stills* as a stress test, and as rotation for reference. XDS output of this crystal - (`CORRECT.LP`, `XDS_ASCII.HKL`) is the external ground truth. - ---- - -## 1. The factored model is a qualitative win (crystal 2, stills, 1.7 Å) - -Replacing the per-pixel least squares with the factored Terms 1+2 (intensity residual + -measured per-resolution $R_1$) turns an erratic, high-res-collapsing result into a flat one: - -| | N_obs | ⟨I/σ⟩ | CC₁/₂ (per shell) | CCref (per shell) | -|---|---:|---:|---|---| -| baseline per-pixel loss | 799 k | 7.2 | 75→81→56→32→…→0 % | erratic, →0 | -| **factored Terms 1+2** | 1.22 M | 10.7 | **84–92 %, flat to 1.7 Å** | **77–92 %, flat** | - -This *reaches the proper rotation (`-R -P rot`) path's quality from the stills path.* The -mechanism is clean and on-thesis: **Term 1 lifts the per-image scale CC vs the reference -from 0.09 → 0.40** (median 0.01 → 0.39) — the per-pixel loss produced near-random per-image -scales; the factored objective makes each image's scale agree with the reference, so the -merge coheres. The reference enters at maximum leverage (it sets both the target and the -Fisher weight). - -**Term 3 (per-spot recentring) is a no-op** on both crystals (93.5 vs 93.6 % CC₁/₂): the -generous integration aperture already contains the spot, so shifting the spotlight by the -sub-pixel prediction offset does not change the integrated total. It was removed; the -position residual's value can only come as a *geometry* term (refine orientation/distance -from the centroid), which is XtalOptimizer's job, not a per-spot mask shift. - ---- - -## 2. The residual centroid offset is a sampling floor, not a recoverable error - -On the jet, predictions land on the spot to within a ~0.4 px tangential scatter. Three -independent cuts show this is the **centroid undersampling floor of the ~2×2 spots**, not a -geometry error or a shape effect: - -* **Flat with intensity.** Binned by significance (≈6σ vs ≈39σ, a 6× span) the tangential - centroid offset is 0.41 → 0.36 px (median). A background-limited centroid would shrink - ∝ 1/significance (≈6×); it does not, so it is a real ~0.35 px floor, not counting noise. -* **Peak is a *worse* predictor than the centroid** (sub-pixel parabolic mode: 0.52 vs 0.36 - px at high signif). If an asymmetric tail were dragging the centroid off a correct peak, - the peak would be *better* — it is not. So there is **no coherent shape asymmetry** to - model; the offset is a zero-mean per-spot scatter from sub-pixel phase aliasing of a spot - only ~2 px wide. -* **Radial centroid offset ≈ 0.01–0.02 px, flat** → no distance/parallax error (parallax is - radial and grows outward). - -Consequences: recentring cannot rescue weak reflections (their prediction is already -centred; the 0.4 px is irreducible sampling scatter), which is *why* a generous box beats a -tight profile mask. A *symmetric* non-Gaussian shape would not move the centroid at all — it -would show up only in Term 2 ($R_1$), not in a position term. - ---- - -## 3. The σ gap to XDS is fulls-vs-partials, not intensities - -XDS reports **ISa = 28.3** (asymptotic relative error 3.5 %); our stills path reports -**ISa ≈ 1.1** and the rotation path **≈ 1.6**. The decisive clue is in `XDS_ASCII.HKL`: the -`PEAK` column (fraction of each reflection captured) is **≈ 100 % for 97 % of reflections**, -and the header gives mosaicity 0.091° < 0.2° oscillation. So **these are full reflections**, -recorded over the 1–3 frames each rocking curve spans — not partials. - -* **XDS** profile-fits in 3D (the third axis is the rotation/rocking direction) and *sums* - the rocking curve with profile weights → one full per reflection, counting-limited σ. -* **jfjoch `-P rot`** integrates a 2D shoebox *per frame* and recovers each full by - *dividing* by the rocking-curve fraction $R_j$ — a cheap approximation of 3D integration. - That division injects per-observation noise (it amplifies each frame's background noise, - pays N independent backgrounds, and carries a random per-observation partiality error). - -Crucially, **ISa and merged-intensity accuracy are different axes, decoupled by -multiplicity.** Our merged intensities are correct (§4), because ~60–240× multiplicity -averages the per-observation noise down; ISa measures the per-*observation* precision, which -multiplicity cannot improve. So "right intensities, wrong σ" is not a contradiction. - -**A cheap probe confirms it.** Raising `--min-partiality` from 0.02 → 0.5 on crystal 2 -(rotation) lifts ISa **1.6 → 3.8** at *zero* completeness cost (high multiplicity) and with -CCref flat — and the high-res shells *improve*. The default 0.02 keeps deep-tail partials -(2 % of a reflection, scaled back ×50) that were over-weighted and polluting the merge. So -of the ~17× rotation gap, **~2.8× is tunable tail-weighting** and **~6× is structural** (the -2D-divide vs 3D-sum difference) — the structural part needs a real rocking-curve sum, not a -knob. - ---- - -## 4. Our intensities are XDS-grade — the limit is σ, not I - -Direct CC of merged intensities (both reduced to the 4/mmm asymmetric unit; 98.8 % of -reflections matched, no manual reindex): - -| vs XDS (CC on merged I, to 1.2 Å) | overall | at 1.2 Å | -|---|---|---| -| **PixelRefine stills** (factored) | **95.9 %** | 96.9 % | -| rotation `-P rot` (min_partiality 0.3) | 98.8 % | 98.5 % | - -PixelRefine-stills intensities track XDS at **95–98 %, flat to the 1.2 Å diffraction -limit**, only ~3 % behind full rotation integration. This is exactly what the CC₁/₂→CC_true -relation predicts (stills CC₁/₂≈0.88 ⇒ 0.967), so it is real, carried by the huge stills -multiplicity averaging per-observation noise down. **Conclusion: the intensity estimation is -right; the remaining gap to XDS is entirely the per-observation σ / partiality axis** — the -3D rocking-curve integration frontier (`FACTORED_MODEL.md` §4), deliberately parked. - ---- - -## What is fixed vs. parked - -**Landed (and now the default model):** mean (not median) background in both integrators; -de-biased (background-limited) variance; widened Ewald prediction band; the factored -Terms 1+2 as the only PixelRefine objective; the global XDS-form merge error model with ISa. - -**Diagnosed dead-ends (do not re-litigate):** per-image $R$ refinement (degenerate with -scale); per-spot recentring (no-op); chasing the 0.4 px centroid floor (sampling, not -recoverable). **Parked (rotation-side):** the 3D rocking-curve sum / one-shot -post-refinement, a `min_partiality` default for high-multiplicity data, and -partiality-aware variance weighting — the path to XDS-grade ISa, but a separate axis from -the intensity work. diff --git a/image_analysis/pixel_refinement/METHODS.md b/image_analysis/pixel_refinement/METHODS.md deleted file mode 100644 index 1c1fd9e1..00000000 --- a/image_analysis/pixel_refinement/METHODS.md +++ /dev/null @@ -1,214 +0,0 @@ -# PixelRefine — methods - -`PixelRefine` is the still-image integrator. It integrates the Bragg reflections of one -image by **profile fitting against a reference intensity set** $I^\mathrm{ref}$ (e.g. -`F_calc` from a deposited model, or the current merged estimate in an EM-style outer loop) -and returns already-scaled intensities. It is an **intensity-wise** operation: the -detector geometry (orientation, cell, beam, distance) is taken as fixed — it was refined -upstream by `XtalOptimizer` (`IndexAndRefine::RefineGeometryIfNeeded`) — and PixelRefine -only measures the spot shape and fits the per-image scale. - -The objective is the factored per-reflection likelihood of `FACTORED_MODEL.md`, **Terms 1 -and 2**. This note records the equations and the reasons behind each design choice. - -Throughout, a reflection's shoebox is a small box of raw detector pixels $I_p$ with a -local flat background $B$; the area-normalised tangential profile at pixel $p$ is $P_p$, -and $v_p$ is the variance used to weight pixel $p$. - ---- - -## 0. The forward model - -The recorded amplitude of a still reflection is the profile-fit amplitude - -$$ -J \;=\; \frac{\sum_p P_p\,(I_p - B)/v_p}{\sum_p P_p^{2}/v_p}, -\qquad -\operatorname{var}(J) = \frac{1}{\sum_p P_p^{2}/v_p}. -$$ - -The full (rotation-equivalent) intensity is recovered by dividing out the factors a still -does not record, - -$$ -I \;=\; \frac{J}{p\,B_\mathrm{DW}\,\mathrm{pol}},\qquad -p = \exp\!\left(-\frac{\epsilon_r^{2}}{R_{0,\mathrm{eff}}^{2}}\right),\quad -B_\mathrm{DW}=\exp\!\left(-\frac{B_\mathrm{fac}}{4 d^{2}}\right), -$$ - -where the **partiality** $p$ is the fraction of the mosaic block crossing the Ewald -sphere, $\epsilon_r$ is the radial excitation error, $R_0$ the radial (rocking) width, and -$\mathrm{pol}$ the polarisation correction. The tangential profile is a separable, -area-normalised Gaussian of width $R_1$: - -$$ -P_p = \frac{1}{\pi R_1^{2}}\exp\!\left(-\frac{\epsilon_{t,p}^{2}}{R_1^{2}}\right). -$$ - -A finite **X-ray bandwidth** thickens the Ewald shell radially, adding a fixed, -resolution-dependent term to the radial width, $R_{0,\mathrm{eff}}^2 = R_0^2 + -(b\lambda)^2/(2d^4)$ ($b$ = relative bandwidth; the pink-beam / DMM signature). $b=0$ is a -monochromatic no-op. - ---- - -## 1. De-biased variance (the load-bearing fix) - -**Symptom.** Mean intensities went **negative** in the high-resolution shells -($\langle I/\sigma\rangle$ down to $-12$), and the per-image scale $G$ collapsed to $0$ on -most images, dropping ~80 % of observations. - -**Cause.** The extraction weighted each pixel by its **observed** count, $v_p = I_p$. A -down-fluctuated background pixel ($I_p < B$) then gets a small $v_p$, hence a large -$w_p=1/v_p$, and its contribution $P_p(I_p-B)/v_p < 0$ is large in magnitude. Summed over -the many empty shoebox pixels this drags $J$ below zero — the *inverse-observed-count* -(Poisson-on-data) bias, worst where the true signal is weakest (high resolution). - -**Fix.** For background-limited reflections the correct variance is the local background, -**constant over the shoebox**, $v_p = \max(B,1)$, giving the unbiased uniform-variance -estimator $J = \sum_p P_p (I_p-B)/\sum_p P_p^2$. This turned $\langle I/\sigma\rangle$ -positive at all resolutions and stopped the scale collapse. - ---- - -## 2. Prediction band and multiplicity - -A reflection is given a shoebox only when it lies within a radial band of the Ewald -sphere, $\bigl|\,|S_{hkl}| - 1/\lambda\,\bigr| \le \delta$. For randomly oriented stills -the number of images on which a given $hkl$ qualifies is $\propto \delta$. The original -$\delta = 5\times10^{-4}\,\text{Å}^{-1}$ was 4–6× tighter than a box integrator, giving 4× -fewer observations per reflection. Widening to $\delta = 2\times10^{-3}\,\text{Å}^{-1}$ -(`ewald_dist_cutoff`) restores the multiplicity; the partiality $p$ downweights the -slightly-off-Ewald tails it admits. (Widening is only safe with the de-biased variance of -§1 and the factored objective of §§3–4 — with a per-pixel geometry fit it diverged.) - ---- - -## 3. Term 2 — measured per-resolution profile width $R_1$ - -$R_1$ is **measured, not fitted**. Fitting $R_1$ inside a per-image least squares is -degenerate with the scale $G$ (a narrower profile and a larger scale trade off), and that -degeneracy slides the per-image scale and wrecks the merge. But a **second moment** is -normalised by the total intensity, so it carries shape information *decoupled from scale*: - -$$ -R_1^2 = 2\,\langle \epsilon_t^2\rangle, -\qquad -\langle \epsilon_t^2\rangle = \frac{\sum_p (I_p-B)\,\epsilon_{t,p}^2}{\sum_p (I_p-B)} . -$$ - -We bin the strong spots ($\mathrm{signif}\ge 5$) by resolution ($1/d^2$, 6 bins) and take -the **median** $\langle\epsilon_t^2\rangle$ per bin, so each reflection integrates with the -$R_1$ of its resolution shell (low-res spots are tight; high-res anisotropic streaks are -wider). Weak spots fall back to the global $R_1$. Measuring the width rather than fitting -it is what makes profile-width refinement stable — and it is a selling point: the mask -adapts to the data per shell. - ---- - -## 4. Term 1 — the intensity / scaling residual - -The per-pixel least squares is replaced by **one residual per reflection**: the profile-fit -amplitude $J$ (using the Term-2 $R_1$) should equal the scaled reference, - -$$ -r_h = \frac{J_h - G\,B_\mathrm{DW}\,p_h\,\mathrm{pol}_h\,I^\mathrm{ref}_h}{\sigma_{J,h}}, -\qquad -L = \sum_h r_h^2 + \text{(scale prior)} . -$$ - -Only the per-image scale $G$ and Debye–Waller $B$ are optimised; geometry and $R$ are -fixed. Three consequences: - -* **Integration and scaling become one objective.** $J$ *is* the integrated intensity and - the residual *is* the scaling residual. -* **The empty-pixel problem disappears by construction.** Empty pixels enter only through - $J$ (with ~zero profile weight); they make no residual of their own and cannot dominate. -* **Fisher weighting puts the reference at maximum leverage.** $\sigma_J$ uses the - *model-expected* variance $v_p = B + \max(J,0)\,P_p$ (background plus expected signal from - $I^\mathrm{ref}$), not the observed counts — so a strong *expected* reflection observed - absent is penalised, and a noise spike with low $I^\mathrm{ref}$ gets no weight. - -The scale is regularised towards 1 with a data-scaled weight $w_G=\sqrt{N_\mathrm{refl}/ -\sigma_G}$ (mirrors `ScaleOnTheFly`) so weakly-measured images cannot drift and scramble -the merge. - -**Geometry is not refined here.** PixelRefine's earlier per-image geometry refinement -(regularised orientation LSQ, signal-weighting, a global orientation/cell sweep) was -removed: on true stills the predictions are already good (radial centroid error ≈ 0, -tangential ≈ a 0.4 px sampling floor that is *not* a recoverable misprediction), and per-image -geometry refinement only overfit the sparse signal. Geometry is the job of `XtalOptimizer`. - ---- - -## 5. Background estimator — mean, not median (both integrators) - -**Symptom.** Pushed past the true resolution limit, the no-signal shells reported -$\langle I/\sigma\rangle\approx4\text{–}6$ at $\mathrm{CC}_{1/2}\approx0$ — confident "data" -where there is none. Present in *both* PixelRefine and the classical `BraggIntegrate2D`. - -**Cause.** Both used the **median** of the background ring. For a right-skewed (Poisson) -background $\operatorname{median}(B) < \mathbb{E}[B]$, so subtraction under-subtracts by a -tiny but *coherent* per-pixel offset that grows over an $n_\mathrm{pix}$ peak and a -multiplicity-$m$ merge into a fake $\langle I/\sigma\rangle\propto\sqrt{m}$, worst where the -real signal is weakest. - -**Fix.** Use the **mean** of the ring (spot cores and saturation sentinels already -excluded). $\langle I/\sigma\rangle$ then collapses to ~0 wherever $\mathrm{CC}\approx0$ and -the honest resolution limit becomes visible. This was the single largest contributor to -untrustworthy σ — a one-line change in each integrator. - ---- - -## 6. Error model (global $a,b$; XDS form) - -Counting statistics under-estimate the variance of strong reflections, which carry -systematic errors proportional to $I$, not $\sqrt{I}$. The standard correction inflates the -variance with a **global** two-parameter model, applied at the merge level so both -integrators benefit: - -$$ -\sigma'^{\,2} = a\,\sigma^{2} + (b\,\langle I\rangle)^{2}, -\qquad -\mathrm{ISa} = \frac{1}{b} = \lim_{I\to\infty}\frac{I}{\sigma'} . -$$ - -The $I^2$ term uses the reflection **mean** $\langle I\rangle$ (not the per-observation -$I_i$, which would bias the merged mean and collapse CC); $a,b$ are fit from the spread of -symmetry equivalents with a relative ($1/\mathrm{dev}^4$) weight so the strong bins (which -fix $b$) do not swamp the weak bins (which fix $a$). `jfjoch_process` prints the model and -ISa. - ---- - -## Results (lysozyme rotation crystal `fixed_master.h5`, treated as stills, 1.7 Å) - -| Configuration | N_obs | $\langle I/\sigma\rangle$ | CC$_{1/2}$ | CC$_\mathrm{ref}$ | -|---|---:|---:|---:|---:| -| Baseline per-pixel loss | 799 k | 7.2 | erratic, →0 at 1.7 Å | erratic | -| **Factored Terms 1+2 (this model)** | 1.22 M | 10.7 | **84–92 % flat** | **77–92 % flat** | - -The factored objective turns the erratic, high-res-collapsing per-pixel result into flat -~90 % CC$_{1/2}$/CC$_\mathrm{ref}$ to 1.7 Å — matching the proper rotation integration path -from the *stills* path. (See `FINDINGS-2026-06.md`.) - ---- - -## Default recipe - -§§1–4 are PixelRefine-specific; §§5–6 act at the integration/merge level and apply to the -classical route too. - -| Field / behaviour | Default | Section | -|---|---|---| -| fit/extraction variance | local background $B$ | 1 | -| `ewald_dist_cutoff` | $2\times10^{-3}\,\text{Å}^{-1}$ | 2 | -| tangential width $R_1$ | measured per resolution shell | 3 | -| objective | per-reflection intensity residual, Fisher-weighted | 4 | -| refined parameters | per-image $G$ (and $B$); geometry fixed | 4 | -| `scale_reg_sigma` | 2.0 | 4 | -| local background estimator | **mean** of the ring | 5 | -| merge error model | global $a,b$ (ISa printed) | 6 | - -`ewald_dist_cutoff` (multiplicity vs. cost) and `bandwidth` (Si vs. DMM) are the two knobs -worth setting per dataset. diff --git a/image_analysis/pixel_refinement/PixelRefine.cpp b/image_analysis/pixel_refinement/PixelRefine.cpp deleted file mode 100644 index 59d92de2..00000000 --- a/image_analysis/pixel_refinement/PixelRefine.cpp +++ /dev/null @@ -1,708 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute -// SPDX-License-Identifier: GPL-3.0-only - -#include "../../common/JFJochMath.h" -#include "PixelRefine.h" - -#include -#include -#include -#include -#include - -#include -#include - -namespace { - -// Per-pixel observation, in *raw* detector counts (no per-pixel solid-angle or -// polarization correction - same units the "normal" integrator works in; the -// per-reflection polarization correction is applied via ReflGroup::pol). -struct PixelObs { - double x, y; // detector pixel coordinate - double Iobs; // raw pixel value (signal + background) - double Ibkg; // local background estimate (per-shoebox level, raw counts) -}; - -// One reflection together with the pixels of its shoebox. -struct ReflGroup { - int h, k, l; - double d; - double Itrue; // reference intensity (held fixed) - double R_bw_sq; // bandwidth radial-width^2 contribution (0 = monochromatic) - double pol; // per-reflection polarization correction (raw = true * pol) - double Ibkg; // local flat background (raw counts, constant over the shoebox) - double predicted_x, predicted_y; - double R1_eff = 0.0; // tangential profile width to use (Term 2) - std::vector pixels; -}; - -// Median of a vector (in place, partially reorders it). -double MedianInPlace(std::vector &v) { - if (v.empty()) - return 0.0; - const size_t mid = v.size() / 2; - std::nth_element(v.begin(), v.begin() + mid, v.end()); - if (v.size() % 2 == 1) - return v[mid]; - const double hi = v[mid]; - std::nth_element(v.begin(), v.begin() + mid - 1, v.begin() + mid); - return 0.5 * (v[mid - 1] + hi); -} - -// Mask marking the *core* (radius `radius`) of every predicted spot, so that the -// local-background sampling of one reflection never picks up a neighbouring -// reflection's signal. Same idea as BraggIntegrate2D::BuildReflectionMask. -std::vector BuildSpotMask(const std::vector &predicted, int nrefl, - size_t xpixel, size_t ypixel, int radius) { - std::vector mask(xpixel * ypixel, 0); - const double r_sq = static_cast(radius) * radius; - for (int i = 0; i < nrefl; ++i) { - const auto &r = predicted[i]; - const int cx = static_cast(std::lround(r.predicted_x)); - const int cy = static_cast(std::lround(r.predicted_y)); - const int x0 = std::max(0, cx - radius); - const int x1 = std::min(static_cast(xpixel) - 1, cx + radius); - const int y0 = std::max(0, cy - radius); - const int y1 = std::min(static_cast(ypixel) - 1, cy + radius); - for (int y = y0; y <= y1; ++y) { - for (int x = x0; x <= x1; ++x) { - const double dx = x - r.predicted_x; - const double dy = y - r.predicted_y; - if (dx * dx + dy * dy <= r_sq) - mask[static_cast(xpixel) * y + x] = 1; - } - } - } - return mask; -} - -// Square shoebox bounds (inclusive) around a predicted spot, clamped to the -// detector. The centre is rounded to the nearest pixel with std::lround so the -// signal box is centred identically to the spot-core mask (BuildSpotMask) and -// the local-background ring (EstimateLocalBackground), which also lround. -struct ShoeboxBox { int min_x, max_x, min_y, max_y; }; -ShoeboxBox ShoeboxBounds(double px, double py, int radius, size_t xpixel, size_t ypixel) { - const int cx = static_cast(std::lround(px)); - const int cy = static_cast(std::lround(py)); - return { - std::max(cx - radius, 0), - std::min(cx + radius, static_cast(xpixel) - 1), - std::max(cy - radius, 0), - std::min(cy + radius, static_cast(ypixel) - 1) - }; -} - -// Local flat background around one shoebox, in raw detector counts. Samples the -// square ring shoebox_radius < max(|dx|,|dy|) <= bkg_outer_radius centred on the -// spot, dropping pixels that belong to any spot core (spot_mask) or carry a -// masked/saturated sentinel, and returns their MEAN. (The median, used previously, -// sits below the mean for a skewed background and so biases the subtracted intensity -// positive.) Mirrors the local-background region of BraggIntegrate2D. -template -bool EstimateLocalBackground(const T *image, - const std::vector &spot_mask, - size_t xpixel, size_t ypixel, - double cx, double cy, - int shoebox_radius, int bkg_outer_radius, - double &bkg_mean) { - const int icx = static_cast(std::lround(cx)); - const int icy = static_cast(std::lround(cy)); - const int x0 = std::max(0, icx - bkg_outer_radius); - const int x1 = std::min(static_cast(xpixel) - 1, icx + bkg_outer_radius); - const int y0 = std::max(0, icy - bkg_outer_radius); - const int y1 = std::min(static_cast(ypixel) - 1, icy + bkg_outer_radius); - - std::vector vals; - vals.reserve(static_cast((x1 - x0 + 1) * (y1 - y0 + 1))); - for (int y = y0; y <= y1; ++y) { - for (int x = x0; x <= x1; ++x) { - // Skip the square shoebox core: that is signal, not background. - if (std::abs(x - icx) <= shoebox_radius && std::abs(y - icy) <= shoebox_radius) - continue; - const size_t np = static_cast(xpixel) * y + x; - if (spot_mask[np]) - continue; - const T raw = image[np]; - if (raw == std::numeric_limits::max()) - continue; - if (std::is_signed_v && raw == std::numeric_limits::min()) - continue; - vals.push_back(static_cast(raw)); - } - } - - if (vals.size() < 5) - return false; - - // Mean background, NOT median. The median of a right-skewed (Poisson) background sits - // below the mean, so subtracting it under-subtracts and biases every weak integrated - // intensity positive - which averages up over multiplicity into fake in the - // no-signal high-resolution shells. The mean is unbiased; modern fast detectors put - // few zingers in the ring, so it is not robustified here. - double sum = 0.0; - for (const double v : vals) - sum += v; - bkg_mean = sum / static_cast(vals.size()); - return true; -} - -// Per-pixel: map a detector pixel through the current geometry into the -// reference reciprocal frame. Cheap (a few trig + one rotation); depends on the -// pixel and the detector geometry, not on the lattice. -template -void ObservedRecip(const T *beam, const T *distance_mm, const T *detector_rot, - double obs_x, double obs_y, - double pixel_size, double inv_lambda, - Eigen::Matrix &e_obs_recip) { - // PyFAI convention (left-handed for rot1/rot2): rot3 = 0 assumed. - const T c1 = ceres::cos(detector_rot[0]); - const T s1 = ceres::sin(detector_rot[0]); - const T c2 = ceres::cos(detector_rot[1]); - const T s2 = ceres::sin(detector_rot[1]); - - const T det_x = (T(obs_x) - beam[0]) * T(pixel_size); - const T det_y = (T(obs_y) - beam[1]) * T(pixel_size); - const T det_z = T(distance_mm[0]); - - const T t1_x = c1 * det_x + s1 * det_z; - const T t1_y = det_y; - const T t1_z = -s1 * det_x + c1 * det_z; - - const T x = t1_x; - const T y = c2 * t1_y + s2 * t1_z; - const T z = -s2 * t1_y + c2 * t1_z; - - const T inv_norm = T(1) / ceres::sqrt(x * x + y * y + z * z); - - T recip_raw[3] = { - x * inv_norm * T(inv_lambda), - y * inv_norm * T(inv_lambda), - (z * inv_norm - T(1.0)) * T(inv_lambda) - }; - e_obs_recip = Eigen::Matrix(recip_raw[0], recip_raw[1], recip_raw[2]); -} - -// Per-reflection: predicted node g_hkl, |g_hkl|^2, and the Ewald-sphere normal. -// (a0,a1,a2) are the three real-space lattice column vectors in the lab frame -// (latt.Vec0/1/2()), used exactly as indexed: PixelRefine does not refine the cell, -// so there is no symmetry manifold to constrain - a general (triclinic) reciprocal -// inverse reproduces every crystal system from the actual cell. Depends only on the -// lattice and hkl, so for a whole shoebox it is computed once. -bool PredictedNode(const double *a0, const double *a1, const double *a2, - double exp_h, double exp_k, double exp_l, double inv_lambda, - Eigen::Vector3d &e_pred_recip, - Eigen::Vector3d &n_radial, double &q_sq) { - const Eigen::Vector3d A(a0[0], a0[1], a0[2]); - const Eigen::Vector3d Bv(a1[0], a1[1], a1[2]); - const Eigen::Vector3d C(a2[0], a2[1], a2[2]); - - const Eigen::Vector3d BxC = Bv.cross(C); - const Eigen::Vector3d CxA = C.cross(A); - const Eigen::Vector3d AxB = A.cross(Bv); - - const double Vol = A.dot(BxC); - if (std::abs(Vol) < 1e-12) - return false; - const double invV = 1.0 / Vol; - - e_pred_recip = (BxC * exp_h + CxA * exp_k + AxB * exp_l) * invV; - q_sq = e_pred_recip.squaredNorm(); - - // Ewald sphere centre at -k_i = (0,0,-inv_lambda); radial normal at g_hkl. - const Eigen::Vector3d S_pred( - e_pred_recip[0], e_pred_recip[1], e_pred_recip[2] + inv_lambda); - const double S_pred_norm = S_pred.norm(); - if (S_pred_norm < 1e-10) - return false; - - n_radial = S_pred / S_pred_norm; - return true; -} - -// Geometry terms for one shoebox pixel under a FIXED geometry (PixelRefine no -// longer refines geometry, so this is a plain double evaluation, not a Ceres cost): -// q_sq = |g_hkl|^2 (predicted node, for the B-factor) -// eps_radial = deviation along the Ewald normal (the partiality direction) -// eps_tang_sq = squared deviation in the tangent plane (the profile direction) -bool GeometryProbe(double obs_x, double obs_y, double lambda, double pixel_size, - int h, int k, int l, - const double beam[2], double dist_mm, const double detector_rot[2], - const double p0[3], const double p1[3], const double p2[3], - double &q_sq, double &eps_radial, double &eps_tang_sq) { - const double inv_lambda = 1.0 / lambda; - Eigen::Vector3d e_obs; - ObservedRecip(beam, &dist_mm, detector_rot, obs_x, obs_y, pixel_size, inv_lambda, e_obs); - - Eigen::Vector3d e_pred, n_radial; - if (!PredictedNode(p0, p1, p2, h, k, l, inv_lambda, e_pred, n_radial, q_sq)) - return false; - - const Eigen::Vector3d delta_q = e_obs - e_pred; - eps_radial = delta_q.dot(n_radial); - eps_tang_sq = (delta_q - eps_radial * n_radial).squaredNorm(); - return true; -} - -// Pulls a scalar parameter towards an expected value with a fixed weight (the -// data-scaled prior). Identical in spirit to ScaleOnTheFly's regularizer: it is what -// keeps the per-image scale G from wandering on weakly-constrained images and -// scrambling the cross-image merge. -struct ScalarRegularizer { - ScalarRegularizer(double weight, double expected) : weight(weight), expected(expected) {} - template - bool operator()(const T *p, T *residual) const { - residual[0] = T(weight) * (p[0] - T(expected)); - return true; - } - double weight; - double expected; -}; - -// Term 1 of the factored likelihood (FACTORED_MODEL.md): the per-reflection -// *intensity* (0th-moment) residual. The profile-fit amplitude J should equal the -// scaled reference J_model = G * exp(-B/4d^2) * partiality * pol * I_ref. One scalar -// residual per reflection, weighted by the model-expected (Fisher) sigma_J. This is -// the scaling residual - integration and scaling become one objective, and the empty -// pixels (which make no residual of their own) stop dominating the fit. Geometry is -// fixed, so J, partiality and sigma_J are constants and only G and B are free. -struct IntensityResidual { - IntensityResidual(double J, double sigma_J, double partiality, double pol, - double I_ref, double inv_4d2) - : J(J), inv_sigma(1.0 / sigma_J), partiality(partiality), pol(pol), - I_ref(I_ref), inv_4d2(inv_4d2) {} - template - bool operator()(const T *const G, const T *const B, T *residual) const { - const T B_term = ceres::exp(-B[0] * T(inv_4d2)); - const T J_model = G[0] * B_term * T(partiality) * T(pol) * T(I_ref); - residual[0] = (J_model - T(J)) * T(inv_sigma); - return true; - } - double J, inv_sigma, partiality, pol, I_ref, inv_4d2; -}; - -} // namespace - -PixelRefine::PixelRefine(const DiffractionExperiment &experiment, - const std::vector &reference) - : xpixel(experiment.GetXPixelsNum()), - ypixel(experiment.GetYPixelsNum()), - experiment(experiment), - hkl_key_generator(experiment.GetScalingSettings().GetMergeFriedel(), - experiment.GetSpaceGroupNumber().value_or(1)) { - for (const auto &ref: reference) - reference_data[hkl_key_generator(ref)] = ref.I; -} - -void PixelRefine::BuildParameterBlocks(const PixelRefineData &data, - double beam[2], double &dist_mm, - double detector_rot[2], - double latt_vec0[3], double latt_vec1[3], double latt_vec2[3]) const { - beam[0] = data.geom.GetBeamX_pxl(); - beam[1] = data.geom.GetBeamY_pxl(); - dist_mm = data.geom.GetDetectorDistance_mm(); - detector_rot[0] = data.geom.GetPoniRot1_rad(); - detector_rot[1] = data.geom.GetPoniRot2_rad(); - - // The three real-space lattice columns in the lab frame, used as-is. PixelRefine - // does not refine the cell, so there is no symmetry manifold to constrain; the - // indexed cell is taken exactly, with no re-idealisation to ideal symmetry. - const Coord &a = data.latt.Vec0(); - const Coord &b = data.latt.Vec1(); - const Coord &c = data.latt.Vec2(); - for (int i = 0; i < 3; ++i) { - latt_vec0[i] = a[i]; - latt_vec1[i] = b[i]; - latt_vec2[i] = c[i]; - } -} - -template -void PixelRefine::Run(const T *image, - BraggPrediction &prediction, - PixelRefineData &data) { - data.solved = false; - data.reflections.clear(); - - const double lambda = data.geom.GetWavelength_A(); - const double pixel_size = data.geom.GetPixelSize_mm(); - - const BraggPredictionSettings settings_prediction{ - .high_res_A = experiment.GetBraggIntegrationSettings().GetDMinLimit_A(), - .ewald_dist_cutoff = static_cast(data.ewald_dist_cutoff), - .max_hkl = 100, - .centering = data.centering, - .bandwidth_sigma = static_cast(data.bandwidth) // relative Δλ/λ sigma - }; - - const int radius = data.shoebox_radius; - const int bkg_outer_radius = std::max(radius + 1, data.bkg_outer_radius); - - // Per-reflection polarization correction (raw = true * pol), evaluated once at - // the predicted spot - same handling as BraggIntegrate2D. Identity if disabled. - const auto pol_factor = experiment.GetPolarizationFactor(); - auto polarization = [&](double x, double y) -> double { - if (!pol_factor) - return 1.0; - return data.geom.CalcAzIntPolarizationCorr(static_cast(x), static_cast(y), - pol_factor.value()); - }; - - // Bandwidth radial-width^2 (in the code's R = sqrt(2)*sigma convention): - // R_bw^2 = (b*lambda)^2 / (2 d^4), b = relative bandwidth (sigma). - // A fixed per-reflection constant; data.bandwidth == 0 -> monochromatic no-op. - const double bw = data.bandwidth; - auto bandwidth_radial_sq = [&](double d) -> double { - if (bw <= 0.0 || d <= 0.0) - return 0.0; - const double bl = bw * lambda; - return bl * bl / (2.0 * d * d * d * d); - }; - - // Geometry is FIXED here: orientation/cell/detector were already refined upstream - // by XtalOptimizer (IndexAndRefine::RefineGeometryIfNeeded). PixelRefine is an - // intensity-only operation - it predicts shoeboxes with this geometry, measures the - // tangential profile width, and fits the per-image scale G (and B) to the reference. - double beam[2], dist_mm, detector_rot[2]; - double latt_vec0[3], latt_vec1[3], latt_vec2[3]; - BuildParameterBlocks(data, beam, dist_mm, detector_rot, latt_vec0, latt_vec1, latt_vec2); - - // ---- 1. Predict shoeboxes for the current geometry ------------------------ - DiffractionExperiment exp_iter = experiment; - exp_iter.BeamX_pxl(data.geom.GetBeamX_pxl()) - .BeamY_pxl(data.geom.GetBeamY_pxl()) - .DetectorDistance_mm(data.geom.GetDetectorDistance_mm()) - .PoniRot1_rad(data.geom.GetPoniRot1_rad()) - .PoniRot2_rad(data.geom.GetPoniRot2_rad()); - const int nrefl = prediction.Calc(exp_iter, data.latt, settings_prediction); - - // ---- 2. Collect per-reflection shoebox pixels + local background ---------- - // GetReflections() returns the full pre-sized buffer; only the first nrefl - // entries are valid for this image. A spot-core mask over ALL predictions keeps - // each reflection's background ring from picking up a neighbour's signal. - const auto &predicted = prediction.GetReflections(); - const auto spot_mask = BuildSpotMask(predicted, nrefl, xpixel, ypixel, radius); - - std::vector groups; - for (int ri = 0; ri < nrefl; ++ri) { - const auto &refl = predicted[ri]; - const auto hkl = hkl_key_generator(refl); - if (!reference_data.contains(hkl)) - continue; - - // Local flat background from the ring around the shoebox (raw counts). If we - // cannot estimate a clean local background the reflection is dropped, exactly - // as BraggIntegrate2D marks it unobserved when too few background pixels survive. - double Ibkg = 0.0; - if (!EstimateLocalBackground(image, spot_mask, xpixel, ypixel, - refl.predicted_x, refl.predicted_y, - radius, bkg_outer_radius, Ibkg)) - continue; - - ReflGroup g; - g.h = refl.h; - g.k = refl.k; - g.l = refl.l; - g.d = refl.d; - g.Itrue = reference_data[hkl]; - g.R_bw_sq = bandwidth_radial_sq(refl.d); - g.pol = polarization(refl.predicted_x, refl.predicted_y); - g.Ibkg = Ibkg; - g.predicted_x = refl.predicted_x; - g.predicted_y = refl.predicted_y; - - const auto box = ShoeboxBounds(refl.predicted_x, refl.predicted_y, radius, xpixel, ypixel); - for (int y = box.min_y; y <= box.max_y; ++y) { - for (int x = box.min_x; x <= box.max_x; ++x) { - const size_t npixel = xpixel * y + x; - // Skip sentinel (masked / saturated) pixels. - if (image[npixel] == std::numeric_limits::max()) - continue; - if (std::is_signed_v && (image[npixel] == std::numeric_limits::min())) - continue; - g.pixels.push_back({static_cast(x), static_cast(y), - static_cast(image[npixel]), Ibkg}); - } - } - if (!g.pixels.empty()) - groups.push_back(std::move(g)); - } - if (groups.empty()) - return; - - // ---- 3. Term 2: per-resolution tangential profile width R1 ---------------- - // R1 = sqrt(2*) from the intensity-weighted tangential second moment of - // the strong spots, binned by resolution (low res small spots, high res larger). - // A *shape* statistic, normalised by the total intensity, so it is decoupled from - // the per-image scale - which is what makes measuring it (rather than fitting it, - // where it is degenerate with G) stable. Weak spots fall back to the global R[1]. - for (auto &g : groups) - g.R1_eff = data.R[1]; - { - constexpr int n_bins = 6; - const double box_px = (2.0 * radius + 1.0) * (2.0 * radius + 1.0); - double s2min = 1e30, s2max = 0.0; - for (const auto &g : groups) { - const double s2 = 1.0 / (g.d * g.d); - s2min = std::min(s2min, s2); - s2max = std::max(s2max, s2); - } - const double span = std::max(s2max - s2min, 1e-12); - auto bin_of = [&](double d) { - return std::clamp(static_cast((1.0 / (d * d) - s2min) / span * n_bins), 0, n_bins - 1); - }; - std::vector> bin_M2(n_bins); - for (const auto &g : groups) { - double sw = 0.0, sw_et2 = 0.0; - for (const auto &px : g.pixels) { - double q_sq, eps_r, eps_t_sq; - if (!GeometryProbe(px.x, px.y, lambda, pixel_size, g.h, g.k, g.l, - beam, dist_mm, detector_rot, latt_vec0, latt_vec1, latt_vec2, - q_sq, eps_r, eps_t_sq)) - continue; - const double w = std::max(px.Iobs - g.Ibkg, 0.0); - sw += w; - sw_et2 += w * eps_t_sq; - } - if (sw <= 0.0) - continue; - const double signif = sw / std::sqrt(std::max(box_px * g.Ibkg, 1.0)); - if (signif >= 5.0) // only well-measured spots define the shape - bin_M2[bin_of(g.d)].push_back(sw_et2 / sw); - } - std::vector bin_R1(n_bins, data.R[1]); - for (int b = 0; b < n_bins; ++b) - if (bin_M2[b].size() >= 5) { - const double r1 = data.r1_multiplier * std::sqrt(2.0 * std::max(MedianInPlace(bin_M2[b]), 0.0)); - if (std::isfinite(r1) && r1 > 1e-4) - bin_R1[b] = std::clamp(r1, 1e-4, 0.05); - } - for (auto &g : groups) - g.R1_eff = bin_R1[bin_of(g.d)]; - } - - // ---- 4. Term 1: one intensity residual per reflection; fit G (and B) ------ - // J / partiality / sigma_J are computed here as constants (geometry & R fixed), - // and only the per-image scale G and Debye-Waller B are optimised. - ceres::Problem problem; - size_t n_blocks = 0; - const double R0 = data.R[0]; - for (const auto &g : groups) { - const double R1 = g.R1_eff; // Term 2: per-resolution profile width - if (!(R1 > 0.0) || !(R0 > 0.0)) - continue; - double num = 0.0, den = 0.0, rad = 0.0; - std::vector> pt_sig; // (P_t, Iobs-Bg) for the Fisher pass - pt_sig.reserve(g.pixels.size()); - for (const auto &px : g.pixels) { - double q_sq, eps_r, eps_t_sq; - if (!GeometryProbe(px.x, px.y, lambda, pixel_size, g.h, g.k, g.l, - beam, dist_mm, detector_rot, latt_vec0, latt_vec1, latt_vec2, - q_sq, eps_r, eps_t_sq)) - continue; - const double P_t = std::exp(-eps_t_sq / (R1 * R1)) / (PI * R1 * R1); - const double R0_eff_sq = R0 * R0 + g.R_bw_sq; - const double P_rad = std::exp(-eps_r * eps_r / R0_eff_sq); - const double v = std::max(g.Ibkg, 1.0); - const double sig = px.Iobs - g.Ibkg; - num += P_t * sig / v; - den += P_t * P_t / v; - rad += P_rad * P_t * P_t / v; - pt_sig.emplace_back(P_t, sig); - } - if (!(den > 0.0)) - continue; - const double J = num / den; - const double partiality = rad / den; - // Model-expected (Fisher) variance: v_p = background + expected signal J*P_t, - // not the per-pixel observed counts (which down-bias) - so the weight tracks - // information, and an expected-strong reflection that is absent hurts. - double den_f = 0.0; - for (const auto &[P_t, sig] : pt_sig) { - const double v_f = std::max(g.Ibkg + std::max(J, 0.0) * P_t, 1.0); - den_f += P_t * P_t / v_f; - } - const double sigma_J = std::sqrt(1.0 / std::max(den_f, 1e-30)); - const double inv_4d2 = (g.d > 0.0) ? 1.0 / (4.0 * g.d * g.d) : 0.0; - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityResidual(J, sigma_J, partiality, g.pol, g.Itrue, inv_4d2)); - problem.AddResidualBlock(cost, nullptr, &data.scale_factor, &data.B_factor); - ++n_blocks; - } - data.residual_count = n_blocks; - if (n_blocks == 0) - return; - - // G >= 0; B fixed unless requested; G regularized -> 1 with weight sqrt(n/sigma) - // (mirrors ScaleOnTheFly) so weakly-measured images cannot drift and scramble the merge. - problem.SetParameterLowerBound(&data.scale_factor, 0, 0.0); - if (!data.refine_B) - problem.SetParameterBlockConstant(&data.B_factor); - if (data.scale_reg_sigma > 0.0) { - const double w = std::sqrt(static_cast(groups.size()) / data.scale_reg_sigma); - auto *reg = new ceres::AutoDiffCostFunction( - new ScalarRegularizer(w, 1.0)); - problem.AddResidualBlock(reg, nullptr, &data.scale_factor); - } - - ceres::Solver::Options options; - options.linear_solver_type = ceres::DENSE_QR; - options.minimizer_progress_to_stdout = false; - options.logging_type = ceres::LoggingType::SILENT; - options.max_solver_time_in_seconds = data.max_time_s; - options.num_threads = 1; - ceres::Solver::Summary summary; - ceres::Solve(options, &problem, &summary); - data.final_cost = summary.final_cost; - data.solved = summary.IsSolutionUsable(); - - // ---- 5. Extract integrated reflections ------------------------------------ - // Profile fitting gives the recorded amplitude (fitting the tangential profile P_t - // against the background-subtracted pixels): - // J = sum_p[ P_t,p (Iobs_p - Ibkg)/v_p ] / sum_p[ P_t,p^2 / v_p ] - // ~ G * Itrue * B_term * partiality * pol (recorded raw counts) - // var(J) = 1 / sum_p[ P_t,p^2 / v_p ] - // - // Output split (Merge multiplies r.I * image_scale_corr and weights by - // 1/(sigma*image_scale_corr)^2): - // r.I = J / (B_term * partiality * pol) = G * Itrue - // r.sigma = sqrt(var(J)) / (B_term * partiality * pol) - // r.partiality = profile-weighted P_radial in (0,1] (the rocking fraction) - // r.completeness = live/total tangential profile in (0,1] (detector clipping) - // r.image_scale_corr = 1/G (per-image scale ONLY) - // so r.I * image_scale_corr = Itrue: B, partiality and polarization live on the - // intensity, G lives on image_scale_corr - one clean meaning per field. We walk the - // full (unclamped) shoebox once: every grid point feeds the total tangential profile - // (completeness denominator); points that are real, live pixels also feed the fit. - data.reflections.reserve(groups.size()); - for (const auto &g : groups) { - const int cx = static_cast(std::lround(g.predicted_x)); - const int cy = static_cast(std::lround(g.predicted_y)); - const double B_term = std::exp(-data.B_factor / (4.0 * g.d * g.d)); - - double num = 0.0, den = 0.0, bkg_sum = 0.0, radial_sum = 0.0; - double prof_live = 0.0, prof_full = 0.0; // tangential profile: captured / total - size_t n = 0; - - for (int y = cy - radius; y <= cy + radius; ++y) { - for (int x = cx - radius; x <= cx + radius; ++x) { - double q_sq, eps_r, eps_t_sq; - if (!GeometryProbe(static_cast(x), static_cast(y), - lambda, pixel_size, g.h, g.k, g.l, - beam, dist_mm, detector_rot, latt_vec0, latt_vec1, latt_vec2, - q_sq, eps_r, eps_t_sq)) - continue; - if (!(data.R[0] > 0.0) || !(g.R1_eff > 0.0)) - continue; - - // Tangential profile shape (area-normalized) -> the fit template, using the - // per-reflection R1_eff (Term 2). - const double R1 = g.R1_eff; - const double P_t = std::exp(-eps_t_sq / (R1 * R1)) / (PI * R1 * R1); - prof_full += P_t; // whole shoebox, on- or off-detector - - // Only real, unmasked detector pixels carry signal. - if (x < 0 || x >= static_cast(xpixel) || y < 0 || y >= static_cast(ypixel)) - continue; - const size_t np = static_cast(xpixel) * y + x; - if (image[np] == std::numeric_limits::max()) - continue; - if (std::is_signed_v && image[np] == std::numeric_limits::min()) - continue; - - const double Iobs = static_cast(image[np]); // raw counts - // Background-limited variance (constant over the shoebox): weighting by - // the observed count biases the amplitude negative where signal is weakest. - const double v = std::max(g.Ibkg, 1.0); - - // Peak-normalized radial factor (the partiality), in (0,1]. MUST use the - // same P_t^2/v weights as the amplitude, else an R0_eff-dependent (hence - // resolution-dependent) factor is left behind in r.I. - const double R0_eff_sq = data.R[0] * data.R[0] + g.R_bw_sq; - const double P_radial = std::exp(-eps_r * eps_r / R0_eff_sq); - - const double w = P_t * P_t / v; - num += P_t * (Iobs - g.Ibkg) / v; - den += w; - radial_sum += P_radial * w; - prof_live += P_t; - bkg_sum += g.Ibkg; - ++n; - } - } - - Reflection r{}; - r.h = g.h; - r.k = g.k; - r.l = g.l; - r.d = static_cast(g.d); - r.predicted_x = static_cast(g.predicted_x); - r.predicted_y = static_cast(g.predicted_y); - r.observed_x = NAN; - r.observed_y = NAN; - r.rlp = 1.0f; - r.partiality = (den > 0.0) ? static_cast(radial_sum / den) : 1.0f; - r.completeness = (prof_full > 0.0) ? static_cast(prof_live / prof_full) : 1.0f; - - if (den > 0.0 && n > 0) { - const double I_amp = num / den; // ~ G*Itrue*B_term*partiality*pol - const double sigma_amp = std::sqrt(1.0 / den); - const double corr = static_cast(r.partiality) * B_term * g.pol; - r.bkg = static_cast(bkg_sum / static_cast(n)); - r.observed = true; - - if (corr > 0.0 && data.scale_factor > 0.0) { - r.I = static_cast(I_amp / corr); // = G*Itrue - r.sigma = static_cast(sigma_amp / corr); - r.image_scale_corr = static_cast(1.0 / data.scale_factor); // G only - } else { - r.I = static_cast(I_amp); - r.sigma = static_cast(sigma_amp); - r.image_scale_corr = NAN; - } - } else { - r.I = 0.0f; - r.sigma = NAN; - r.bkg = 0.0f; - r.observed = false; - } - data.reflections.push_back(r); - } - - // ---- 6. Per-image CC vs reference (diagnostic) ---------------------------- - // Pearson CC of the scaled estimate (r.I * image_scale_corr = Itrue_est) against - // the reference intensities, over the matched reflections. - { - double sx = 0, sy = 0, sxx = 0, syy = 0, sxy = 0; - size_t cn = 0; - for (const auto &r : data.reflections) { - if (!r.observed || !std::isfinite(r.I) || !std::isfinite(r.image_scale_corr)) - continue; - const auto it = reference_data.find(hkl_key_generator(r)); - if (it == reference_data.end()) - continue; - const double x = static_cast(r.I) * static_cast(r.image_scale_corr); - const double y = it->second; - if (!std::isfinite(x) || !std::isfinite(y)) - continue; - sx += x; sy += y; sxx += x * x; syy += y * y; sxy += x * y; ++cn; - } - data.cc = NAN; - data.cc_n = static_cast(cn); - if (cn >= 3) { - const double nd = static_cast(cn); - const double cov = sxy - sx * sy / nd; - const double vx = sxx - sx * sx / nd; - const double vy = syy - sy * sy / nd; - if (vx > 0.0 && vy > 0.0) - data.cc = cov / std::sqrt(vx * vy); - } - } -} - -template void PixelRefine::Run(const int8_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const int16_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const int32_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const uint8_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const uint16_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const uint32_t *, BraggPrediction &, PixelRefineData &); diff --git a/image_analysis/pixel_refinement/PixelRefine.h b/image_analysis/pixel_refinement/PixelRefine.h deleted file mode 100644 index b180e1de..00000000 --- a/image_analysis/pixel_refinement/PixelRefine.h +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute -// SPDX-License-Identifier: GPL-3.0-only - -#pragma once - -#include "../bragg_prediction/BraggPrediction.h" -#include "../common/DiffractionExperiment.h" -#include "../scale_merge/HKLKey.h" - -// ============================================================================= -// PixelRefine — reference-driven profile-fit integration + scaling for stills -// ============================================================================= -// -// PixelRefine is the still-image integrator: given a reference set of merged -// intensities I_ref (the current best hypothesis for each reflection's full -// intensity), it integrates one image and returns already-scaled intensities. It -// is an *intensity-wise* operation - the detector geometry is taken as fixed (it -// was refined upstream by XtalOptimizer in IndexAndRefine::RefineGeometryIfNeeded); -// PixelRefine does not touch orientation, cell or detector parameters. -// -// The objective is the factored per-reflection likelihood of FACTORED_MODEL.md, -// Terms 1 + 2: -// -// Term 2 (shape) — for each resolution shell, the tangential profile width R1 is -// *measured* from the intensity-weighted second moment of the strong spots: -// R1 = sqrt(2*). A second moment is normalised by the total intensity, -// so it is decoupled from the per-image scale - which is why measuring R1 is -// stable where *fitting* it (degenerate with G) is not. -// -// Term 1 (intensity / scaling) — one residual per reflection: the profile-fit -// amplitude J (using the Term-2 R1) should equal the scaled reference -// J_model = G * exp(-B/4d^2) * partiality * pol * I_ref, -// weighted by the model-expected (Fisher) sigma_J. Only the per-image scale G -// and Debye-Waller B are optimised. Integration and scaling become one objective; -// the many empty shoebox pixels enter only through J (with ~zero profile weight) -// instead of dominating a per-pixel loss. -// -// I_ref is NOT refined here - it is a fixed hypothesis for the pass. The intended -// outer loop is EM-like: run PixelRefine on every image against the current I_ref, -// re-merge to a new I_ref, repeat. -// -// Forward model per pixel (raw detector counts, no per-pixel solid-angle/Lorentz -// weighting - same units as the classical integrator): -// signal = G * I_ref * B_term * P_radial * P_tangential * pol , + I_bkg -// B_term = exp(-B |q|^2 / 4) (Debye-Waller) -// P_radial = exp(-eps_r^2 / R0_eff^2) (still partiality, <= 1) -// P_tangential = exp(-eps_t^2 / R1^2) / (pi R1^2) (area-normalized profile) -// where eps_r / eps_t are the radial / tangential deviations of the pixel from the -// predicted node, and pol is the per-reflection polarization correction. -// -// X-ray bandwidth (optional): a finite bandwidth thickens the Ewald shell radially, -// adding a fixed, resolution-dependent term to R0 that grows like 1/d^2 (the -// pink-beam/DMM signature): R0_eff^2 = R0^2 + (b*lambda)^2/(2 d^4). b = 0 (the -// default) is a monochromatic no-op; set it for DMM-type data, leave it for Si. -// ============================================================================= - -struct PixelRefineData { - // --- model state (input as initial guess, output as refined result) --- - DiffractionGeometry geom; // fixed (refined upstream by XtalOptimizer) - CrystalLattice latt; // fixed - char centering = 'P'; - - double B_factor = 0.0; // Debye-Waller B (A^2), refined - double scale_factor = 1.0; // overall per-image scale G, refined - double R[2] = {0.005, 0.005}; // R[0] = radial (partiality) width; R[1] = fallback - // tangential profile width before Term 2 measures it (A^-1) - bool refine_B = true; // refine the per-image B-factor along with G - - // Term 2 measures the physical tangential width R1, but the *integration* profile must - // be generous (XDS-style: integrate over ~6 sigma) or a tight template centred on the - // prediction sits off the ~0.4 px centroid-floor scatter and underestimates the - // intensity (validated on the jet R-free: 0.34 with the raw width -> 0.26 at x6). The - // measured R1 is multiplied by this before use. - double r1_multiplier = 6.0; - - // Relative X-ray bandwidth (sigma of dlambda/lambda), e.g. ~0.004 for a 1% FWHM - // DMM, ~1e-4 for Si(111). Adds a resolution-dependent radial broadening to R[0]. - // 0 = monochromatic (the term switches off entirely). - double bandwidth = 0.0; - - // Per-image scale G is regularized towards 1 with weight sqrt(n_refl/scale_reg_sigma) - // (mirrors ScaleOnTheFly). Without this the unconstrained G wanders on weakly - // measured images and 1/G scrambles the cross-image merge. <= 0 disables. - double scale_reg_sigma = 2.0; - - // Radial Ewald-sphere acceptance band for prediction (A^-1): a reflection is given - // a shoebox when ||S|-1/lambda| <= this. Widened from the on-sphere default towards - // the integrator's profile radius so slightly-misaligned high-resolution reflections - // are still integrated (multiplicity), while the partiality downweights their tails. - double ewald_dist_cutoff = 0.0020; - - double max_time_s = 5.0; - int shoebox_radius = 3; // half-size of the per-reflection signal box - // Half-size of the local-background sampling box. Background is the MEAN of the ring - // shoebox_radius < |dx|,|dy| <= bkg_outer_radius (excluding spot cores), like - // BraggIntegrate2D. Must be > shoebox_radius. - int bkg_outer_radius = 6; - - // --- output --- - std::vector reflections; // profile-fitted, scaled integration result - bool solved = false; - double final_cost = NAN; - size_t residual_count = 0; - double cc = NAN; // per-image CC of scaled intensities vs reference - int64_t cc_n = 0; // number of reflections in the CC -}; - -class PixelRefine { - const size_t xpixel, ypixel; - const DiffractionExperiment &experiment; - - const HKLKeyGenerator hkl_key_generator; - std::map reference_data; - - // Fills the fixed geometry (beam, distance, detector tilt) and the three - // real-space lattice column vectors from the current model state, for the - // per-pixel geometry evaluation. - void BuildParameterBlocks(const PixelRefineData &data, - double beam[2], double &dist_mm, - double detector_rot[2], - double latt_vec0[3], double latt_vec1[3], double latt_vec2[3]) const; -public: - PixelRefine(const DiffractionExperiment &experiment, - const std::vector &reference); - - // The BraggPrediction is supplied per call (it is mutated): this keeps a single - // PixelRefine instance usable from several threads, each passing its own prediction - // buffer. Only `data` is written; PixelRefine state is const. The image is in raw - // detector counts (masked/saturated pixels carry the type sentinel); background is - // estimated locally per shoebox from the image itself. - template - void Run(const T *image, - BraggPrediction &prediction, - PixelRefineData &data); -}; diff --git a/process/JFJochProcess.cpp b/process/JFJochProcess.cpp index 59070af3..ab7e9d47 100644 --- a/process/JFJochProcess.cpp +++ b/process/JFJochProcess.cpp @@ -409,12 +409,9 @@ ProcessResult JFJochProcess::Run(JFJochProcessObserver *observer) { }; phase("Scaling and merging"); - const bool pixel_refine_path = - experiment_.GetIndexingSettings().GetGeomRefinementAlgorithm() == GeomRefinementAlgorithmEnum::PixelRefine; - - // ScaleOnTheFly is only for the classical, no-reference path; with a reference (or - // PixelRefine) each image is already scaled, so we merge directly. - if (config_.reference_data.empty() && !pixel_refine_path) { + // ScaleOnTheFly self-scaling is only for the no-reference path; with a reference each image + // is already scaled against it during the per-image pass, so we merge directly. + if (config_.reference_data.empty()) { logger.Info("Running scaling ..."); ScalingResult scale_result(0); double t_merge_all = 0.0, t_scale = 0.0; diff --git a/process/JFJochProcessCommandLine.cpp b/process/JFJochProcessCommandLine.cpp index 50586ae3..caa165ec 100644 --- a/process/JFJochProcessCommandLine.cpp +++ b/process/JFJochProcessCommandLine.cpp @@ -36,7 +36,6 @@ namespace { switch (r) { case GeomRefinementAlgorithmEnum::None: return "none"; case GeomRefinementAlgorithmEnum::OrientationOnly: return "orientation"; - case GeomRefinementAlgorithmEnum::PixelRefine: return "pixelrefine"; case GeomRefinementAlgorithmEnum::BeamCenter: default: return "beam_and_lattice"; } diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index dfc1310e..762f2af7 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -44,7 +44,7 @@ void print_usage() { std::cout << " -X, --indexing-algorithm Indexing algorithm (FFBIDX|FFT|FFTW|Auto|None)" << std::endl; std::cout << " -S, --space-group Space group number - used for both indexing and scaling" << std::endl; std::cout << " -C, --unit-cell Fix reference unit cell: \"a,b,c,alpha,beta,gamma\"" << std::endl; - std::cout << " -r, --refine Geometry refinement algorithm (none|orientation|beam_and_lattice|pixelrefine)" << std::endl; + std::cout << " -r, --refine Geometry refinement algorithm (none|orientation|beam_and_lattice)" << std::endl; std::cout << std::endl; std::cout << " Scaling and merging" << std::endl; @@ -65,11 +65,10 @@ void print_usage() { std::cout << " --reference-column