diff --git a/common/BraggIntegrationSettings.cpp b/common/BraggIntegrationSettings.cpp index d9353b86..d69a0028 100644 --- a/common/BraggIntegrationSettings.cpp +++ b/common/BraggIntegrationSettings.cpp @@ -81,17 +81,6 @@ float BraggIntegrationSettings::GetR1() const { return r_1; } -float BraggIntegrationSettings::GetProfileMultiplier() const { - return profile_multiplier; -} - -BraggIntegrationSettings &BraggIntegrationSettings::ProfileMultiplier(float input) { - if (input <= 0.0f) - throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Profile multiplier must be positive"); - profile_multiplier = input; - return *this; -} - float BraggIntegrationSettings::GetR2() const { return r_2; } diff --git a/common/BraggIntegrationSettings.h b/common/BraggIntegrationSettings.h index 0b35c7ee..073a5d30 100644 --- a/common/BraggIntegrationSettings.h +++ b/common/BraggIntegrationSettings.h @@ -20,10 +20,6 @@ class BraggIntegrationSettings { float d_min_limit_A = 1.0; std::optional fixed_profile_radius; float minimum_sigma_in_regards_to_i = 0.02; - // PixelRefine only: the measured (physical) tangential profile width is multiplied by - // this for the integration template, so the aperture is generous (XDS-style ~6 sigma) - // and tolerant of the centroid-floor scatter. Ignored by the classical integrator. - float profile_multiplier = 6.0; public: BraggIntegrationSettings& R1(float input); @@ -31,7 +27,6 @@ public: BraggIntegrationSettings& R3(float input); BraggIntegrationSettings& DMinLimit_A(float input); BraggIntegrationSettings& FixedProfileRadius_recipA(std::optional input); - BraggIntegrationSettings& ProfileMultiplier(float input); BraggIntegrationSettings& Integrator(IntegratorMode input); @@ -39,7 +34,6 @@ public: [[nodiscard]] float GetR1() const; [[nodiscard]] float GetR2() const; [[nodiscard]] float GetR3() const; - [[nodiscard]] float GetProfileMultiplier() const; [[nodiscard]] std::optional GetFixedProfileRadius_recipA() const; [[nodiscard]] float GetDMinLimit_A() const; diff --git a/common/IndexingSettings.h b/common/IndexingSettings.h index c428f02e..6e900440 100644 --- a/common/IndexingSettings.h +++ b/common/IndexingSettings.h @@ -6,7 +6,7 @@ #include enum class IndexingAlgorithmEnum {FFBIDX, FFT, FFTW, Auto, None}; -enum class GeomRefinementAlgorithmEnum {None, OrientationOnly, BeamCenter, PixelRefine}; +enum class GeomRefinementAlgorithmEnum {None, OrientationOnly, BeamCenter}; class IndexingSettings { IndexingAlgorithmEnum algorithm; diff --git a/image_analysis/CMakeLists.txt b/image_analysis/CMakeLists.txt index 57d3b884..7a9f08e0 100644 --- a/image_analysis/CMakeLists.txt +++ b/image_analysis/CMakeLists.txt @@ -52,6 +52,5 @@ ADD_SUBDIRECTORY(scale_merge) ADD_SUBDIRECTORY(image_preprocessing) ADD_SUBDIRECTORY(azint) ADD_SUBDIRECTORY(roi) -ADD_SUBDIRECTORY(pixel_refinement) -TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochAzIntEngine JFJochROIIntegration JFJochImagePreprocessing JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement JFJochScaleMerge JFJochPixelRefine gemmi) +TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochAzIntEngine JFJochROIIntegration JFJochImagePreprocessing JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement JFJochScaleMerge gemmi) diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index f47fb4f9..64a394f0 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -206,10 +206,6 @@ void IndexAndRefine::RefineGeometryIfNeeded(DataMessage &msg, IndexAndRefine::In XtalOptimizerRotationOnly(data, msg.spots, 0.05); break; case GeomRefinementAlgorithmEnum::BeamCenter: - case GeomRefinementAlgorithmEnum::PixelRefine: - // PixelRefine still benefits from the classical beam-center + cell - // refinement as a starting point; the pixel-level refinement runs - // later, during integration. if (XtalOptimizer(data, {msg.spots})) { outcome.experiment.BeamX_pxl(data.geom.GetBeamX_pxl()) .BeamY_pxl(data.geom.GetBeamY_pxl()); @@ -323,34 +319,20 @@ void IndexAndRefine::QuickPredictAndIntegrate(DataMessage &msg, .bandwidth_sigma = experiment.GetBandwidthFWHM().value_or(0.0f) / 2.3548f }; - // Select the integration path: classical 2D integration, or the experimental - // PixelRefine joint geometry/profile/scale refinement (which also produces the - // integrated reflections that flow into the normal save/merge). - const bool use_pixel_refine = - experiment.GetIndexingSettings().GetGeomRefinementAlgorithm() == GeomRefinementAlgorithmEnum::PixelRefine - && !pixel_reference_.empty(); + // Predict, then integrate with the selected integrator (box-sum or profile-fit). + auto pred_start_time = std::chrono::steady_clock::now(); + auto nrefl = prediction.Calc(outcome.experiment, latt, settings_prediction); + auto pred_end_time = std::chrono::steady_clock::now(); + msg.bragg_prediction_time_s = std::chrono::duration(pred_end_time - pred_start_time).count(); - if (use_pixel_refine) { - auto integration_start_time = std::chrono::steady_clock::now(); - PixelRefineIntegrate(msg, image, prediction, outcome, i_outcome); - msg.integrated_reflections = i_outcome.reflections.size(); - auto integration_end_time = std::chrono::steady_clock::now(); - msg.integration_time_s = std::chrono::duration(integration_end_time - integration_start_time).count(); - } else { - auto pred_start_time = std::chrono::steady_clock::now(); - auto nrefl = prediction.Calc(outcome.experiment, latt, settings_prediction); - auto pred_end_time = std::chrono::steady_clock::now(); - msg.bragg_prediction_time_s = std::chrono::duration(pred_end_time - pred_start_time).count(); - - auto integration_start_time = std::chrono::steady_clock::now(); - i_outcome.reflections = - outcome.experiment.GetBraggIntegrationSettings().GetIntegrator() == IntegratorMode::BoxSum - ? BraggIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number) - : ProfileIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number); - msg.integrated_reflections = i_outcome.reflections.size(); - auto integration_end_time = std::chrono::steady_clock::now(); - msg.integration_time_s = std::chrono::duration(integration_end_time - integration_start_time).count(); - } + auto integration_start_time = std::chrono::steady_clock::now(); + i_outcome.reflections = + outcome.experiment.GetBraggIntegrationSettings().GetIntegrator() == IntegratorMode::BoxSum + ? BraggIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number) + : ProfileIntegrate2D(outcome.experiment, image, prediction.GetReflections(), nrefl, msg.number); + msg.integrated_reflections = i_outcome.reflections.size(); + auto integration_end_time = std::chrono::steady_clock::now(); + msg.integration_time_s = std::chrono::duration(integration_end_time - integration_start_time).count(); constexpr size_t kMaxReflections = 10000; if (i_outcome.reflections.size() > kMaxReflections) { @@ -371,10 +353,7 @@ void IndexAndRefine::QuickPredictAndIntegrate(DataMessage &msg, CalcISigma(msg, i_outcome.reflections); CalcWilsonBFactor(msg, i_outcome.reflections); - // PixelRefine produces already-scaled reflections; only the classical path - // needs the separate ScaleOnTheFly step. - if (!use_pixel_refine) - ScaleImage(msg, i_outcome); + ScaleImage(msg, i_outcome); // Copy reflections to outgoing message msg.reflections = i_outcome.reflections; @@ -434,7 +413,6 @@ std::optional IndexAndRefine::FinalizeRotationIndexing() IndexAndRefine &IndexAndRefine::ReferenceIntensities(std::vector &reference) { scaling_engine = std::make_unique(experiment, reference); - pixel_reference_ = reference; // kept for the experimental PixelRefine path return *this; } @@ -454,71 +432,6 @@ void IndexAndRefine::ScaleImage(DataMessage &msg, IntegrationOutcome& outcome) { msg.image_scale_time_s = std::chrono::duration(scaling_end_time - scaling_start_time).count(); } -bool IndexAndRefine::PixelRefineIntegrate(DataMessage &msg, - const CompressedImage &image, - BraggPrediction &prediction, - const IndexAndRefine::IndexingOutcome &outcome, - IntegrationOutcome &i_outcome) { - if (!outcome.lattice_candidate) - return false; - - // Build the engine once (lazy). - std::call_once(pixel_refine_once_, [&] { - pixel_refine_ = std::make_unique(experiment, pixel_reference_); - }); - if (!pixel_refine_) - return false; - - PixelRefineData prd; - prd.geom = outcome.experiment.GetDiffractionGeometry(); - prd.latt = *outcome.lattice_candidate; - prd.centering = outcome.symmetry.centering; - if (const auto bw = experiment.GetBandwidthFWHM()) - prd.bandwidth = bw.value() / 2.3548; // FWHM -> sigma - - // Signal-box radius from the shared integration setting (same knob as BraggIntegrate2D). - prd.shoebox_radius = static_cast(std::lround(experiment.GetBraggIntegrationSettings().GetR1())); - prd.r1_multiplier = experiment.GetBraggIntegrationSettings().GetProfileMultiplier(); - - std::vector buffer; - const uint8_t *ptr = image.GetUncompressedPtr(buffer); - switch (image.GetMode()) { - case CompressedImageMode::Int8: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Int16: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Int32: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Uint8: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Uint16: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - case CompressedImageMode::Uint32: - pixel_refine_->Run(reinterpret_cast(ptr), prediction, prd); break; - default: - return false; - } - - // PixelRefine output flows into the normal save/merge path: the refined - // geometry/lattice and the already-scaled reflections become the outcome. - i_outcome.reflections = std::move(prd.reflections); - i_outcome.geom = prd.geom; - i_outcome.latt = prd.latt; - i_outcome.image_scale_g = static_cast(prd.scale_factor); - i_outcome.image_scale_b_factor_Ang2 = static_cast(prd.B_factor); - if (std::isfinite(prd.cc)) { - i_outcome.image_scale_cc = static_cast(prd.cc); - i_outcome.image_scale_cc_n = prd.cc_n; - } - - msg.image_scale_factor = static_cast(prd.scale_factor); - msg.image_scale_cc = i_outcome.image_scale_cc; - if (prd.B_factor != 0.0) - msg.image_scale_b_factor = static_cast(prd.B_factor); - - return true; -} - ScalingResult IndexAndRefine::ScaleAllImages(const std::vector &reference, size_t nthreads) { ScaleOnTheFly scaling(experiment, reference); scaling.Scale(integration_outcome, nthreads); diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index 5f42194d..b8d2a0ed 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -11,7 +11,6 @@ #include "../common/AzimuthalIntegrationMapping.h" #include "../common/AzimuthalIntegrationProfile.h" #include "bragg_prediction/BraggPrediction.h" -#include "pixel_refinement/PixelRefine.h" #include "indexing/IndexerThreadPool.h" #include "lattice_search/LatticeSearch.h" #include "rotation_indexer/RotationIndexer.h" @@ -70,18 +69,6 @@ class IndexAndRefine { std::unique_ptr scaling_engine; void ScaleImage(DataMessage &msg, IntegrationOutcome& outcome); - // Experimental PixelRefine integration path (selected via - // GeomRefinementAlgorithmEnum::PixelRefine). Needs reference intensities; the - // engine is built lazily on first use (when the azimuthal mapping is known) - // and is safe to share across threads (prediction is supplied per call). - std::vector pixel_reference_; - std::unique_ptr pixel_refine_; - std::once_flag pixel_refine_once_; - bool PixelRefineIntegrate(DataMessage &msg, - const CompressedImage &image, - BraggPrediction &prediction, - const IndexingOutcome &outcome, - IntegrationOutcome &i_outcome); std::optional RotationAngle(int64_t image) const; // mid-exposure angle for the indexer public: IndexAndRefine(const DiffractionExperiment &x, IndexerThreadPool *indexer); diff --git a/image_analysis/LoadFCalcFromMtz.cpp b/image_analysis/LoadFCalcFromMtz.cpp index 2061b9d0..a618538b 100644 --- a/image_analysis/LoadFCalcFromMtz.cpp +++ b/image_analysis/LoadFCalcFromMtz.cpp @@ -18,7 +18,7 @@ namespace { // Reference intensities can come from a calculated structure factor F-model (squared to an // intensity), or from a merged/observed mean-intensity column (used directly - this is what lets -// PixelRefine be self-seeded from the data's own previous merge). column_with_one_of_labels would +// reference-based scaling be self-seeded from the data's own previous merge). column_with_one_of_labels would // hide which label matched, so the priority list is walked explicitly to record the choice. const gemmi::Mtz::Column* SelectDefaultColumn(const gemmi::Mtz& mtz, bool& square) { if (const auto* col = mtz.column_with_label("F-model", nullptr, 'F')) { diff --git a/image_analysis/LoadFCalcFromMtz.h b/image_analysis/LoadFCalcFromMtz.h index 3785ad79..284867e2 100644 --- a/image_analysis/LoadFCalcFromMtz.h +++ b/image_analysis/LoadFCalcFromMtz.h @@ -39,7 +39,7 @@ struct ReferenceMtzData { // Load reference reflections from an MTZ. With no column requested the smart default is used: // a calculated structure factor F-model (squared to an intensity), else a merged/observed // intensity column (IMEAN/I/IOBS/...), else any mean-intensity (J) column - this also lets -// PixelRefine be self-seeded from the data's own previous merge. A requested column overrides +// reference-based scaling be self-seeded from the data's own previous merge. A requested column overrides // the default (an 'F'-type column is squared, a 'J'-type used directly). Throws if the requested // column is missing or no usable column exists. ReferenceMtzData LoadReferenceMtz(const std::string& path, diff --git a/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md b/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md index f0e7120f..3a772db2 100644 --- a/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md +++ b/image_analysis/bragg_integration/NEXTGEN_INTEGRATOR.md @@ -264,19 +264,34 @@ worked is **2D profile-fit integration**; the keepers are **Gaussian profile-fit --- -## "Could PixelRefine be rebuilt as integrator + scaling?" +## Lineage: PixelRefine (removed 2026-06-25) -Yes, and this is the clean decomposition that `ProfileIntegrate2D` realizes. PixelRefine does three -things; only the third needs a reference: -1. **Profile** — measure the tangential width per shell from strong spots (`PixelRefine.cpp:433`). - Reference-free. -2. **Extraction** — `J = Σ P(I−B)/v ÷ Σ P²/v`, de-biased variance, plus radial partiality - (`PixelRefine.cpp:491-525`). Reference-free. -3. **Scaling** — `J ≈ G·B_DW·p·pol·I_ref` (`PixelRefine.cpp:365`). **The only step using the reference.** +`ProfileIntegrate2D` is the surviving half of an earlier experimental still-integrator, **PixelRefine**, +which did three things bundled together: a reference-free **profile + extraction** (`J = Σ P(I−B)/v ÷ +Σ P²/v`, de-biased variance, plus a radial partiality) and a reference-based **per-image scaling** +(`J ≈ G·B_DW·p·pol·I_ref`). The decomposition observation was that the first two are independent of the +reference and identical in spirit to what a profile-fitting integrator does — so they were lifted out as +`ProfileIntegrate2D`, while scaling stayed in the shared `ScaleOnTheFly` (which already does reference +scaling when a reference is supplied: `IndexAndRefine::ReferenceIntensities` builds a +`ScaleOnTheFly(experiment, reference)` and the per-image pass calls it on whatever the integrator +produced — **integrator-agnostic**). -So **integrator = steps 1+2** (= `ProfileIntegrate2D`), **scaling = step 3** (or the reference-free -`ScaleOnTheFly`). The decomposition also **fixes the rot3d incompatibility**: whole-PixelRefine emits -`image_scale_corr = 1/G` with no rotation partiality (which gave garbage σ with `-P rot3d`), whereas the -decomposed integrator emits `partiality` like `BraggIntegrate2D`, so `ScaleOnTheFly` + `Combine3D` + -merge consume it unchanged. PixelRefine then becomes `ProfileIntegrate2D + reference ScaleOnTheFly`, and -the experimental coupling disappears. +**The decomposition won, so PixelRefine was deleted.** On the LysozymeJet5 serial stills, the default +Gaussian path (`ProfileIntegrate2D` + the same reference scaling) **matched or beat** whole-PixelRefine +in every per-shell CC½ (overall 95.7% vs 91.9%), ISa (1.6 vs 1.2) and R-meas (98.5% vs 175%), with +CCref a tie — same indexing, same uniques. PixelRefine collected ~1.8× more observations (a wider Ewald +band) but they were noisier and didn't help. Whole-PixelRefine was also incompatible with the rot3d +combine (it emitted `image_scale_corr = 1/G` with no rotation partiality → garbage σ), which the +decomposed integrator avoids by emitting `partiality` like `BraggIntegrate2D`. + +**Lessons inherited from PixelRefine (kept here so they survive the deletion):** +- **Mean (not median) background** in both integrators — the median of a skewed background + under-subtracts and biases weak intensities positive (fake ⟨I/σ⟩ at CC≈0). Single biggest + "untrustworthy σ" cause. +- **De-biased variance** (weight by background, not observed counts) — fixed negative high-res ⟨I/σ⟩ + and scale collapse. +- **A tight profile loses to a generous box for stills** — the ~0.4-px centroid-undersampling floor of + ~2×2 spots is a real sampling floor, not a fixable misprediction; recentring/adaptive-width add noise. + The lever is matched weighting + de-biased variance, never a tight aperture. +- **Per-image R (profile-width) refinement is futile** — G↔R is intrinsically degenerate; refine G/B + freely but never R per image (it slides the per-image scale and wrecks the merge). diff --git a/image_analysis/bragg_integration/ProfileIntegrate2D.h b/image_analysis/bragg_integration/ProfileIntegrate2D.h index 14a501c3..6cba3e98 100644 --- a/image_analysis/bragg_integration/ProfileIntegrate2D.h +++ b/image_analysis/bragg_integration/ProfileIntegrate2D.h @@ -8,11 +8,11 @@ // ============================================================================= // // A drop-in alternative to BraggIntegrate2D that replaces uniform box summation with -// profile-fitted extraction - the part of PixelRefine that does NOT need reference -// intensities (PixelRefine = this extraction + a reference-based scaling step; here only -// the extraction is kept). See NEXTGEN_INTEGRATOR.md for the rationale: the residual ~4x -// R-meas/ISa gap to XDS is the box-sum method (a fixed disk captures a width-dependent -// fraction of each spot -> ~18% multiplicative per-observation floor on strong reflections). +// profile-fitted extraction (no reference intensities needed). See NEXTGEN_INTEGRATOR.md for +// the rationale: the residual ~4x R-meas/ISa gap to XDS is the box-sum method (a fixed disk +// captures a width-dependent fraction of each spot -> ~18% multiplicative per-observation floor +// on strong reflections), and for the lineage (this is the extraction half of the former, +// now-removed PixelRefine, which beat whole-PixelRefine on the serial test). // // Output is a vector with I, sigma, partiality, d - IDENTICAL in shape to // BraggIntegrate2D - so ScaleOnTheFly, Combine3D (-P rot3d) and the merge consume it @@ -30,9 +30,8 @@ // variance v = B + max(I,0)*P (iterate), sigma = sqrt(1/sum P^2/v). Carry the rotation // partiality exactly as BraggIntegrate2D does. // -// Staging: v1 = measured-R1 Gaussian (lifts PixelRefine's extraction out, proves the -// decomposition + rot3d-combine compatibility); v2 = empirical per-shell (the expected win); -// v3 = empirical per detector-region x shell (XDS-grade). +// Staging: v1 = measured-R1 Gaussian (the keeper); v2 = empirical per-shell; v3 = empirical per +// detector-region x shell (XDS-grade). // // Selected by BraggIntegrationSettings::Integrator: ProfileGaussian (the DEFAULT, v1) or // ProfileEmpirical (v2); BoxSum (BraggIntegrate2D) is the fallback. jfjoch_process exposes it as diff --git a/image_analysis/bragg_prediction/BraggPrediction.cpp b/image_analysis/bragg_prediction/BraggPrediction.cpp index 70ebea71..38ce1943 100644 --- a/image_analysis/bragg_prediction/BraggPrediction.cpp +++ b/image_analysis/bragg_prediction/BraggPrediction.cpp @@ -86,9 +86,9 @@ int BraggPrediction::Calc(const DiffractionExperiment &experiment, const Crystal // Energy bandwidth thickens the Ewald shell radially: at the // diffraction condition |S|-1/λ shifts by recip_z·(Δλ/λ), i.e. - // σ_bw = |recip_z|·bandwidth_sigma (= bλ/2d², identical to - // PixelRefine's R_bw). Broaden the acceptance window in quadrature so - // high-resolution shells (smeared most, ∝1/d²) are not clipped. + // σ_bw = |recip_z|·bandwidth_sigma (= bλ/2d²). Broaden the acceptance + // window in quadrature so high-resolution shells (smeared most, ∝1/d²) + // are not clipped. float radial_cutoff = settings.ewald_dist_cutoff; if (settings.bandwidth_sigma > 0.0f) { const float bw_tol = kBandwidthCutoffSigmas * settings.bandwidth_sigma * std::fabs(recip_z); diff --git a/image_analysis/pixel_refinement/CMakeLists.txt b/image_analysis/pixel_refinement/CMakeLists.txt deleted file mode 100644 index 5e94b562..00000000 --- a/image_analysis/pixel_refinement/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -ADD_LIBRARY(JFJochPixelRefine PixelRefine.cpp PixelRefine.h) -TARGET_LINK_LIBRARIES(JFJochPixelRefine JFJochBraggPrediction JFJochCommon JFJochScaleMerge JFJochGeomRefinement ceres_static) diff --git a/image_analysis/pixel_refinement/FACTORED_MODEL.md b/image_analysis/pixel_refinement/FACTORED_MODEL.md deleted file mode 100644 index cb398977..00000000 --- a/image_analysis/pixel_refinement/FACTORED_MODEL.md +++ /dev/null @@ -1,142 +0,0 @@ -# A factored likelihood for joint integration + scaling + geometry - -**Status: Terms 1+2 implemented and shipping as the PixelRefine default** (see -`PixelRefine.cpp`, `METHODS.md`, `FINDINGS-2026-06.md`); Term 3 (geometry) and the -priors/NN extensions of §4 remain future work. Goal: replace the per-pixel least-squares -of PixelRefine with a per-*reflection* likelihood that fuses profile-fit integration, -scaling against the reference, and geometry refinement into one differentiable -objective — the foundation for priors (Bayesian) and learned components (NN), and the -thing that dissolves the empty-pixel and parameter-degeneracy problems by construction -rather than by patching. - -## 0. Notation - -Per image, parameters `θ`: scale `G`, Debye-Waller `B`, orientation + cell (geometry), -profile width `R1` (tangential, possibly a 2×2 tensor), partiality width `R0` -(radial/mosaicity; `R0_eff² = R0² + R_bw²`, `R_bw² = (bλ)²/2d⁴` *known* from bandwidth). -Per reflection `h`: reference intensity `I_ref` (the hypothesis), resolution `d`, -predicted centre `c_pred`, partiality `p = exp(−ε_r²/R0_eff²)`, polarisation `pol`, -`B_term = exp(−B/4d²)`, shoebox pixels `{I_p}` with mean local background `Bg`, and the -area-normalised tangential profile template `P_p = P_tang(ε_t,p; R1)`. - -## 1. The factorisation principle - -A reflection's shoebox carries three (to first order) **orthogonal** pieces of -information — the 0th, 1st and 2nd moments of its intensity distribution: - -| moment | statistic | constrains | -|---|---|---| -| 0th — total | profile-fit amplitude `J` | scale chain `G, B` (and `p`) | -| 1st — position | centroid `c_obs` | geometry (orientation; radial→distance/cell) | -| 2nd — shape | second moment `M₂` | profile width `R1` (and anisotropy) | - -The current per-pixel residual mixes all three into one objective over shared pixels — -*that* is what couples the parameters (measured G–R0 ≈ −0.46, G–R1 ≈ +0.51) and lets the -many empty pixels dominate. Residual-ing each **moment** against its model instead gives -a block-diagonal Jacobian: the couplings vanish because each statistic carries one -parameter block's information. - -## 2. The three residual terms - -### 2.1 Intensity / scaling residual (one scalar per reflection) - -Optimal (Diamond) profile-fit amplitude and its model: -``` -J = Σ_p w_p P_p (I_p − Bg) / Σ_p w_p P_p² w_p = 1/v_p -J_model = G · B_term · p · pol · I_ref -r¹_h = (J − J_model) / σ_J -``` -`J` is ~invariant to `R1` (a well-sampled spot integrates to the same total whatever -width is assumed) → **R1 leaves this residual**. Empty pixels make no residual; they -enter only through `J` with ~zero profile weight → **the empty-pixel problem is gone by -construction.** This residual *is* the scaling residual — integration and scaling are now -one objective. - -### 2.2 Shape residual (constrains R1; decoupled from scale) - -``` -M₂_obs = Σ_p (I_p − Bg) ε_t,p² / Σ_p (I_p − Bg) (intensity-weighted variance, Å⁻²) -M₂_model = R1² / 2 (variance of exp(−ε_t²/R1²)) -r²_h = (M₂_obs − M₂_model) / σ_M2 -``` -A moment is normalised by the total → **scale-invariant → `∂r²/∂G = 0`**. The G↔R1 -degeneracy disappears. Anisotropic extension: use the 2×2 moment tensor -`Σ(I−Bg)(ε_t⊗ε_t)/Σ(I−Bg)` vs `diag(R1a²/2, R1b²/2)` → elliptical R1 (the DMM streak). -Weak spots have huge `σ_M2` → contribute ~nothing → R1 is set by strong spots -automatically (and may be made `R1(d)` per resolution). - -### 2.3 Position residual (constrains geometry; decoupled from scale and shape) - -``` -c_obs = Σ_p (I_p − Bg)(x_p, y_p) / Σ_p (I_p − Bg) -r³_h = (c_obs − c_pred(geometry)) / σ_c (2-vector; split radial / tangential) -``` -Centroid is scale- and width-invariant → `∂r³/∂G = ∂r³/∂R1 ≈ 0`. The **radial** component -constrains distance/cell, the **tangential** constrains orientation — exactly the split -the diagnostic measured (radial≈0 = no distance error; tangential∝radius = orientation). - -## 3. Fisher / expected-variance weighting (makes it a likelihood) - -Every `σ` uses the **model-expected** variance, never observed counts — this is what -makes strong *expected* reflections carry the information and makes the model "feel pain -when something that should be there is not": -``` -v_p = Bg + J_model · P_p (background + expected signal from I_ref, not I_obs) -σ_J² = 1 / Σ_p (P_p² / v_p) -σ_M2 ≈ M₂ · √(2 / N_eff), σ_c ≈ R1 / √(N_eff), N_eff = (Σ(I−Bg))² / Σ v_p -``` -Fisher information about `G` from term 1 is `∝ (B_term·p·pol·I_ref)² / σ_J²` — driven by -`I_ref`, so a noise spike (high counts, low `I_ref`) gets *no* weight while a strong -expected reflection observed absent (`J≈0`, large residual, moderate `σ_J`) gets a large -penalty. The reference enters at maximum leverage: it sets both the target and the weight. - -## 4. Joint objective and priors - -``` -L(θ) = Σ_h [ (r¹_h)² + (r²_h)² + |r³_h|² ] + priors -``` -No free λ if the σ's are correct — the relative weighting *is* the Fisher information. -Priors are the Bayesian hooks and the principled degeneracy breaks: -- **R0 (partiality/mosaicity) is GLOBAL + prior.** R0 multiplies `J_model` (`p`), so it is - still degenerate with the per-image `G` *within term 1* — the one degeneracy the - factorisation does **not** remove. Resolve it physically, not with a directional G prior - (which would bias every output intensity): `R0 ~ N(mosaicity, σ)`, `R_bw` fixed from the - known bandwidth, and `R0` fit **globally** (one per crystal, from many reflections' - partiality distribution) so per-image G can't trade against it. -- orientation `~ N(spot-centroid, σ)`; `G ~ N(1, σ_G)` or tied to the beam monitor; - distance `~ N(nominal, σ_L)` (loose, since serial/jet alignment is poorly constrained). -- Optional Bayesian intensities: treat `I_true` as a parameter with the reference as its - prior → posterior over intensities, not point estimates. - -## 5. Why the degeneracies vanish (Jacobian structure) - -`Jᵀ W J` is approximately block-diagonal in `(G,B,p | R1 | geometry)`: -``` -∂r¹/∂{G,B,p} ≠ 0 ; ∂r¹/∂R1 ≈ 0 ; ∂r¹/∂geom ≈ 0 -∂r²/∂R1 ≠ 0 ; ∂r²/∂G = 0 ; ∂r²/∂geom ≈ 0 -∂r³/∂geom ≠ 0 ; ∂r³/∂G = 0 ; ∂r³/∂R1 ≈ 0 -``` -So G↔R1 (+0.51) and all the cross-couplings drop to ~0 by construction. Only **G↔R0** -survives (R0 is a scale-multiplier, not a shape), handled by the global+physical prior of -§4. The degeneracies we measured were artifacts of projecting all information onto a -single per-pixel residual. - -## 6. Implementation notes - -- Per reflection: 1 (intensity) + 1 (shape) + 2 (position) residuals = 4, vs ~49 per-pixel - residuals → **cheaper**, and Ceres autodiffs the moment formulas through the pixels. -- The per-pixel forward model still *defines* `P_tang`, `p`, etc.; the **loss** moves to - the moments. -- Geometry (term 3) can run as the global sweep we have (it already maximises a - position/CC objective); terms 1–2 are the per-image photometry. Or solve all three - jointly per image with the global R0/mosaicity shared across images (two-level fit). -- Drop-in path: keep the current extraction, add the three residuals as a new objective - behind a flag, compare against the per-pixel loss on both test crystals. - -## 7. Why this serves the goal - -It is one differentiable likelihood, factored along the physics, that (a) maximises use of -the reference (target + Fisher weight), (b) is the substrate for priors / posteriors over -intensities (Bayesian), and (c) lets any term — profile `P`, partiality `p`, corrections — -be replaced by a learned function trained through the same likelihood. That is the -qualitative move XDS-style empirical profile fitting cannot make. diff --git a/image_analysis/pixel_refinement/FINDINGS-2026-06.md b/image_analysis/pixel_refinement/FINDINGS-2026-06.md deleted file mode 100644 index a7345de4..00000000 --- a/image_analysis/pixel_refinement/FINDINGS-2026-06.md +++ /dev/null @@ -1,122 +0,0 @@ -# PixelRefine — findings, June 2026 - -A record of the main results from the still-integration investigation, validated on two -lysozyme P4₃2₁2 datasets indexed against the same reference (`6G8A_refine_001.mtz`): - -* **Crystal 1 — serial jet stills** (`LysozymeJet5-…`, MicroMAX DMM ~1 % bandwidth). -* **Crystal 2 — rotation crystal** (`fixed_master.h5`, 1800 × 0.2°, Si mono), run *as - stills* as a stress test, and as rotation for reference. XDS output of this crystal - (`CORRECT.LP`, `XDS_ASCII.HKL`) is the external ground truth. - ---- - -## 1. The factored model is a qualitative win (crystal 2, stills, 1.7 Å) - -Replacing the per-pixel least squares with the factored Terms 1+2 (intensity residual + -measured per-resolution $R_1$) turns an erratic, high-res-collapsing result into a flat one: - -| | N_obs | ⟨I/σ⟩ | CC₁/₂ (per shell) | CCref (per shell) | -|---|---:|---:|---|---| -| baseline per-pixel loss | 799 k | 7.2 | 75→81→56→32→…→0 % | erratic, →0 | -| **factored Terms 1+2** | 1.22 M | 10.7 | **84–92 %, flat to 1.7 Å** | **77–92 %, flat** | - -This *reaches the proper rotation (`-R -P rot`) path's quality from the stills path.* The -mechanism is clean and on-thesis: **Term 1 lifts the per-image scale CC vs the reference -from 0.09 → 0.40** (median 0.01 → 0.39) — the per-pixel loss produced near-random per-image -scales; the factored objective makes each image's scale agree with the reference, so the -merge coheres. The reference enters at maximum leverage (it sets both the target and the -Fisher weight). - -**Term 3 (per-spot recentring) is a no-op** on both crystals (93.5 vs 93.6 % CC₁/₂): the -generous integration aperture already contains the spot, so shifting the spotlight by the -sub-pixel prediction offset does not change the integrated total. It was removed; the -position residual's value can only come as a *geometry* term (refine orientation/distance -from the centroid), which is XtalOptimizer's job, not a per-spot mask shift. - ---- - -## 2. The residual centroid offset is a sampling floor, not a recoverable error - -On the jet, predictions land on the spot to within a ~0.4 px tangential scatter. Three -independent cuts show this is the **centroid undersampling floor of the ~2×2 spots**, not a -geometry error or a shape effect: - -* **Flat with intensity.** Binned by significance (≈6σ vs ≈39σ, a 6× span) the tangential - centroid offset is 0.41 → 0.36 px (median). A background-limited centroid would shrink - ∝ 1/significance (≈6×); it does not, so it is a real ~0.35 px floor, not counting noise. -* **Peak is a *worse* predictor than the centroid** (sub-pixel parabolic mode: 0.52 vs 0.36 - px at high signif). If an asymmetric tail were dragging the centroid off a correct peak, - the peak would be *better* — it is not. So there is **no coherent shape asymmetry** to - model; the offset is a zero-mean per-spot scatter from sub-pixel phase aliasing of a spot - only ~2 px wide. -* **Radial centroid offset ≈ 0.01–0.02 px, flat** → no distance/parallax error (parallax is - radial and grows outward). - -Consequences: recentring cannot rescue weak reflections (their prediction is already -centred; the 0.4 px is irreducible sampling scatter), which is *why* a generous box beats a -tight profile mask. A *symmetric* non-Gaussian shape would not move the centroid at all — it -would show up only in Term 2 ($R_1$), not in a position term. - ---- - -## 3. The σ gap to XDS is fulls-vs-partials, not intensities - -XDS reports **ISa = 28.3** (asymptotic relative error 3.5 %); our stills path reports -**ISa ≈ 1.1** and the rotation path **≈ 1.6**. The decisive clue is in `XDS_ASCII.HKL`: the -`PEAK` column (fraction of each reflection captured) is **≈ 100 % for 97 % of reflections**, -and the header gives mosaicity 0.091° < 0.2° oscillation. So **these are full reflections**, -recorded over the 1–3 frames each rocking curve spans — not partials. - -* **XDS** profile-fits in 3D (the third axis is the rotation/rocking direction) and *sums* - the rocking curve with profile weights → one full per reflection, counting-limited σ. -* **jfjoch `-P rot`** integrates a 2D shoebox *per frame* and recovers each full by - *dividing* by the rocking-curve fraction $R_j$ — a cheap approximation of 3D integration. - That division injects per-observation noise (it amplifies each frame's background noise, - pays N independent backgrounds, and carries a random per-observation partiality error). - -Crucially, **ISa and merged-intensity accuracy are different axes, decoupled by -multiplicity.** Our merged intensities are correct (§4), because ~60–240× multiplicity -averages the per-observation noise down; ISa measures the per-*observation* precision, which -multiplicity cannot improve. So "right intensities, wrong σ" is not a contradiction. - -**A cheap probe confirms it.** Raising `--min-partiality` from 0.02 → 0.5 on crystal 2 -(rotation) lifts ISa **1.6 → 3.8** at *zero* completeness cost (high multiplicity) and with -CCref flat — and the high-res shells *improve*. The default 0.02 keeps deep-tail partials -(2 % of a reflection, scaled back ×50) that were over-weighted and polluting the merge. So -of the ~17× rotation gap, **~2.8× is tunable tail-weighting** and **~6× is structural** (the -2D-divide vs 3D-sum difference) — the structural part needs a real rocking-curve sum, not a -knob. - ---- - -## 4. Our intensities are XDS-grade — the limit is σ, not I - -Direct CC of merged intensities (both reduced to the 4/mmm asymmetric unit; 98.8 % of -reflections matched, no manual reindex): - -| vs XDS (CC on merged I, to 1.2 Å) | overall | at 1.2 Å | -|---|---|---| -| **PixelRefine stills** (factored) | **95.9 %** | 96.9 % | -| rotation `-P rot` (min_partiality 0.3) | 98.8 % | 98.5 % | - -PixelRefine-stills intensities track XDS at **95–98 %, flat to the 1.2 Å diffraction -limit**, only ~3 % behind full rotation integration. This is exactly what the CC₁/₂→CC_true -relation predicts (stills CC₁/₂≈0.88 ⇒ 0.967), so it is real, carried by the huge stills -multiplicity averaging per-observation noise down. **Conclusion: the intensity estimation is -right; the remaining gap to XDS is entirely the per-observation σ / partiality axis** — the -3D rocking-curve integration frontier (`FACTORED_MODEL.md` §4), deliberately parked. - ---- - -## What is fixed vs. parked - -**Landed (and now the default model):** mean (not median) background in both integrators; -de-biased (background-limited) variance; widened Ewald prediction band; the factored -Terms 1+2 as the only PixelRefine objective; the global XDS-form merge error model with ISa. - -**Diagnosed dead-ends (do not re-litigate):** per-image $R$ refinement (degenerate with -scale); per-spot recentring (no-op); chasing the 0.4 px centroid floor (sampling, not -recoverable). **Parked (rotation-side):** the 3D rocking-curve sum / one-shot -post-refinement, a `min_partiality` default for high-multiplicity data, and -partiality-aware variance weighting — the path to XDS-grade ISa, but a separate axis from -the intensity work. diff --git a/image_analysis/pixel_refinement/METHODS.md b/image_analysis/pixel_refinement/METHODS.md deleted file mode 100644 index 1c1fd9e1..00000000 --- a/image_analysis/pixel_refinement/METHODS.md +++ /dev/null @@ -1,214 +0,0 @@ -# PixelRefine — methods - -`PixelRefine` is the still-image integrator. It integrates the Bragg reflections of one -image by **profile fitting against a reference intensity set** $I^\mathrm{ref}$ (e.g. -`F_calc` from a deposited model, or the current merged estimate in an EM-style outer loop) -and returns already-scaled intensities. It is an **intensity-wise** operation: the -detector geometry (orientation, cell, beam, distance) is taken as fixed — it was refined -upstream by `XtalOptimizer` (`IndexAndRefine::RefineGeometryIfNeeded`) — and PixelRefine -only measures the spot shape and fits the per-image scale. - -The objective is the factored per-reflection likelihood of `FACTORED_MODEL.md`, **Terms 1 -and 2**. This note records the equations and the reasons behind each design choice. - -Throughout, a reflection's shoebox is a small box of raw detector pixels $I_p$ with a -local flat background $B$; the area-normalised tangential profile at pixel $p$ is $P_p$, -and $v_p$ is the variance used to weight pixel $p$. - ---- - -## 0. The forward model - -The recorded amplitude of a still reflection is the profile-fit amplitude - -$$ -J \;=\; \frac{\sum_p P_p\,(I_p - B)/v_p}{\sum_p P_p^{2}/v_p}, -\qquad -\operatorname{var}(J) = \frac{1}{\sum_p P_p^{2}/v_p}. -$$ - -The full (rotation-equivalent) intensity is recovered by dividing out the factors a still -does not record, - -$$ -I \;=\; \frac{J}{p\,B_\mathrm{DW}\,\mathrm{pol}},\qquad -p = \exp\!\left(-\frac{\epsilon_r^{2}}{R_{0,\mathrm{eff}}^{2}}\right),\quad -B_\mathrm{DW}=\exp\!\left(-\frac{B_\mathrm{fac}}{4 d^{2}}\right), -$$ - -where the **partiality** $p$ is the fraction of the mosaic block crossing the Ewald -sphere, $\epsilon_r$ is the radial excitation error, $R_0$ the radial (rocking) width, and -$\mathrm{pol}$ the polarisation correction. The tangential profile is a separable, -area-normalised Gaussian of width $R_1$: - -$$ -P_p = \frac{1}{\pi R_1^{2}}\exp\!\left(-\frac{\epsilon_{t,p}^{2}}{R_1^{2}}\right). -$$ - -A finite **X-ray bandwidth** thickens the Ewald shell radially, adding a fixed, -resolution-dependent term to the radial width, $R_{0,\mathrm{eff}}^2 = R_0^2 + -(b\lambda)^2/(2d^4)$ ($b$ = relative bandwidth; the pink-beam / DMM signature). $b=0$ is a -monochromatic no-op. - ---- - -## 1. De-biased variance (the load-bearing fix) - -**Symptom.** Mean intensities went **negative** in the high-resolution shells -($\langle I/\sigma\rangle$ down to $-12$), and the per-image scale $G$ collapsed to $0$ on -most images, dropping ~80 % of observations. - -**Cause.** The extraction weighted each pixel by its **observed** count, $v_p = I_p$. A -down-fluctuated background pixel ($I_p < B$) then gets a small $v_p$, hence a large -$w_p=1/v_p$, and its contribution $P_p(I_p-B)/v_p < 0$ is large in magnitude. Summed over -the many empty shoebox pixels this drags $J$ below zero — the *inverse-observed-count* -(Poisson-on-data) bias, worst where the true signal is weakest (high resolution). - -**Fix.** For background-limited reflections the correct variance is the local background, -**constant over the shoebox**, $v_p = \max(B,1)$, giving the unbiased uniform-variance -estimator $J = \sum_p P_p (I_p-B)/\sum_p P_p^2$. This turned $\langle I/\sigma\rangle$ -positive at all resolutions and stopped the scale collapse. - ---- - -## 2. Prediction band and multiplicity - -A reflection is given a shoebox only when it lies within a radial band of the Ewald -sphere, $\bigl|\,|S_{hkl}| - 1/\lambda\,\bigr| \le \delta$. For randomly oriented stills -the number of images on which a given $hkl$ qualifies is $\propto \delta$. The original -$\delta = 5\times10^{-4}\,\text{Å}^{-1}$ was 4–6× tighter than a box integrator, giving 4× -fewer observations per reflection. Widening to $\delta = 2\times10^{-3}\,\text{Å}^{-1}$ -(`ewald_dist_cutoff`) restores the multiplicity; the partiality $p$ downweights the -slightly-off-Ewald tails it admits. (Widening is only safe with the de-biased variance of -§1 and the factored objective of §§3–4 — with a per-pixel geometry fit it diverged.) - ---- - -## 3. Term 2 — measured per-resolution profile width $R_1$ - -$R_1$ is **measured, not fitted**. Fitting $R_1$ inside a per-image least squares is -degenerate with the scale $G$ (a narrower profile and a larger scale trade off), and that -degeneracy slides the per-image scale and wrecks the merge. But a **second moment** is -normalised by the total intensity, so it carries shape information *decoupled from scale*: - -$$ -R_1^2 = 2\,\langle \epsilon_t^2\rangle, -\qquad -\langle \epsilon_t^2\rangle = \frac{\sum_p (I_p-B)\,\epsilon_{t,p}^2}{\sum_p (I_p-B)} . -$$ - -We bin the strong spots ($\mathrm{signif}\ge 5$) by resolution ($1/d^2$, 6 bins) and take -the **median** $\langle\epsilon_t^2\rangle$ per bin, so each reflection integrates with the -$R_1$ of its resolution shell (low-res spots are tight; high-res anisotropic streaks are -wider). Weak spots fall back to the global $R_1$. Measuring the width rather than fitting -it is what makes profile-width refinement stable — and it is a selling point: the mask -adapts to the data per shell. - ---- - -## 4. Term 1 — the intensity / scaling residual - -The per-pixel least squares is replaced by **one residual per reflection**: the profile-fit -amplitude $J$ (using the Term-2 $R_1$) should equal the scaled reference, - -$$ -r_h = \frac{J_h - G\,B_\mathrm{DW}\,p_h\,\mathrm{pol}_h\,I^\mathrm{ref}_h}{\sigma_{J,h}}, -\qquad -L = \sum_h r_h^2 + \text{(scale prior)} . -$$ - -Only the per-image scale $G$ and Debye–Waller $B$ are optimised; geometry and $R$ are -fixed. Three consequences: - -* **Integration and scaling become one objective.** $J$ *is* the integrated intensity and - the residual *is* the scaling residual. -* **The empty-pixel problem disappears by construction.** Empty pixels enter only through - $J$ (with ~zero profile weight); they make no residual of their own and cannot dominate. -* **Fisher weighting puts the reference at maximum leverage.** $\sigma_J$ uses the - *model-expected* variance $v_p = B + \max(J,0)\,P_p$ (background plus expected signal from - $I^\mathrm{ref}$), not the observed counts — so a strong *expected* reflection observed - absent is penalised, and a noise spike with low $I^\mathrm{ref}$ gets no weight. - -The scale is regularised towards 1 with a data-scaled weight $w_G=\sqrt{N_\mathrm{refl}/ -\sigma_G}$ (mirrors `ScaleOnTheFly`) so weakly-measured images cannot drift and scramble -the merge. - -**Geometry is not refined here.** PixelRefine's earlier per-image geometry refinement -(regularised orientation LSQ, signal-weighting, a global orientation/cell sweep) was -removed: on true stills the predictions are already good (radial centroid error ≈ 0, -tangential ≈ a 0.4 px sampling floor that is *not* a recoverable misprediction), and per-image -geometry refinement only overfit the sparse signal. Geometry is the job of `XtalOptimizer`. - ---- - -## 5. Background estimator — mean, not median (both integrators) - -**Symptom.** Pushed past the true resolution limit, the no-signal shells reported -$\langle I/\sigma\rangle\approx4\text{–}6$ at $\mathrm{CC}_{1/2}\approx0$ — confident "data" -where there is none. Present in *both* PixelRefine and the classical `BraggIntegrate2D`. - -**Cause.** Both used the **median** of the background ring. For a right-skewed (Poisson) -background $\operatorname{median}(B) < \mathbb{E}[B]$, so subtraction under-subtracts by a -tiny but *coherent* per-pixel offset that grows over an $n_\mathrm{pix}$ peak and a -multiplicity-$m$ merge into a fake $\langle I/\sigma\rangle\propto\sqrt{m}$, worst where the -real signal is weakest. - -**Fix.** Use the **mean** of the ring (spot cores and saturation sentinels already -excluded). $\langle I/\sigma\rangle$ then collapses to ~0 wherever $\mathrm{CC}\approx0$ and -the honest resolution limit becomes visible. This was the single largest contributor to -untrustworthy σ — a one-line change in each integrator. - ---- - -## 6. Error model (global $a,b$; XDS form) - -Counting statistics under-estimate the variance of strong reflections, which carry -systematic errors proportional to $I$, not $\sqrt{I}$. The standard correction inflates the -variance with a **global** two-parameter model, applied at the merge level so both -integrators benefit: - -$$ -\sigma'^{\,2} = a\,\sigma^{2} + (b\,\langle I\rangle)^{2}, -\qquad -\mathrm{ISa} = \frac{1}{b} = \lim_{I\to\infty}\frac{I}{\sigma'} . -$$ - -The $I^2$ term uses the reflection **mean** $\langle I\rangle$ (not the per-observation -$I_i$, which would bias the merged mean and collapse CC); $a,b$ are fit from the spread of -symmetry equivalents with a relative ($1/\mathrm{dev}^4$) weight so the strong bins (which -fix $b$) do not swamp the weak bins (which fix $a$). `jfjoch_process` prints the model and -ISa. - ---- - -## Results (lysozyme rotation crystal `fixed_master.h5`, treated as stills, 1.7 Å) - -| Configuration | N_obs | $\langle I/\sigma\rangle$ | CC$_{1/2}$ | CC$_\mathrm{ref}$ | -|---|---:|---:|---:|---:| -| Baseline per-pixel loss | 799 k | 7.2 | erratic, →0 at 1.7 Å | erratic | -| **Factored Terms 1+2 (this model)** | 1.22 M | 10.7 | **84–92 % flat** | **77–92 % flat** | - -The factored objective turns the erratic, high-res-collapsing per-pixel result into flat -~90 % CC$_{1/2}$/CC$_\mathrm{ref}$ to 1.7 Å — matching the proper rotation integration path -from the *stills* path. (See `FINDINGS-2026-06.md`.) - ---- - -## Default recipe - -§§1–4 are PixelRefine-specific; §§5–6 act at the integration/merge level and apply to the -classical route too. - -| Field / behaviour | Default | Section | -|---|---|---| -| fit/extraction variance | local background $B$ | 1 | -| `ewald_dist_cutoff` | $2\times10^{-3}\,\text{Å}^{-1}$ | 2 | -| tangential width $R_1$ | measured per resolution shell | 3 | -| objective | per-reflection intensity residual, Fisher-weighted | 4 | -| refined parameters | per-image $G$ (and $B$); geometry fixed | 4 | -| `scale_reg_sigma` | 2.0 | 4 | -| local background estimator | **mean** of the ring | 5 | -| merge error model | global $a,b$ (ISa printed) | 6 | - -`ewald_dist_cutoff` (multiplicity vs. cost) and `bandwidth` (Si vs. DMM) are the two knobs -worth setting per dataset. diff --git a/image_analysis/pixel_refinement/PixelRefine.cpp b/image_analysis/pixel_refinement/PixelRefine.cpp deleted file mode 100644 index 59d92de2..00000000 --- a/image_analysis/pixel_refinement/PixelRefine.cpp +++ /dev/null @@ -1,708 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute -// SPDX-License-Identifier: GPL-3.0-only - -#include "../../common/JFJochMath.h" -#include "PixelRefine.h" - -#include -#include -#include -#include -#include - -#include -#include - -namespace { - -// Per-pixel observation, in *raw* detector counts (no per-pixel solid-angle or -// polarization correction - same units the "normal" integrator works in; the -// per-reflection polarization correction is applied via ReflGroup::pol). -struct PixelObs { - double x, y; // detector pixel coordinate - double Iobs; // raw pixel value (signal + background) - double Ibkg; // local background estimate (per-shoebox level, raw counts) -}; - -// One reflection together with the pixels of its shoebox. -struct ReflGroup { - int h, k, l; - double d; - double Itrue; // reference intensity (held fixed) - double R_bw_sq; // bandwidth radial-width^2 contribution (0 = monochromatic) - double pol; // per-reflection polarization correction (raw = true * pol) - double Ibkg; // local flat background (raw counts, constant over the shoebox) - double predicted_x, predicted_y; - double R1_eff = 0.0; // tangential profile width to use (Term 2) - std::vector pixels; -}; - -// Median of a vector (in place, partially reorders it). -double MedianInPlace(std::vector &v) { - if (v.empty()) - return 0.0; - const size_t mid = v.size() / 2; - std::nth_element(v.begin(), v.begin() + mid, v.end()); - if (v.size() % 2 == 1) - return v[mid]; - const double hi = v[mid]; - std::nth_element(v.begin(), v.begin() + mid - 1, v.begin() + mid); - return 0.5 * (v[mid - 1] + hi); -} - -// Mask marking the *core* (radius `radius`) of every predicted spot, so that the -// local-background sampling of one reflection never picks up a neighbouring -// reflection's signal. Same idea as BraggIntegrate2D::BuildReflectionMask. -std::vector BuildSpotMask(const std::vector &predicted, int nrefl, - size_t xpixel, size_t ypixel, int radius) { - std::vector mask(xpixel * ypixel, 0); - const double r_sq = static_cast(radius) * radius; - for (int i = 0; i < nrefl; ++i) { - const auto &r = predicted[i]; - const int cx = static_cast(std::lround(r.predicted_x)); - const int cy = static_cast(std::lround(r.predicted_y)); - const int x0 = std::max(0, cx - radius); - const int x1 = std::min(static_cast(xpixel) - 1, cx + radius); - const int y0 = std::max(0, cy - radius); - const int y1 = std::min(static_cast(ypixel) - 1, cy + radius); - for (int y = y0; y <= y1; ++y) { - for (int x = x0; x <= x1; ++x) { - const double dx = x - r.predicted_x; - const double dy = y - r.predicted_y; - if (dx * dx + dy * dy <= r_sq) - mask[static_cast(xpixel) * y + x] = 1; - } - } - } - return mask; -} - -// Square shoebox bounds (inclusive) around a predicted spot, clamped to the -// detector. The centre is rounded to the nearest pixel with std::lround so the -// signal box is centred identically to the spot-core mask (BuildSpotMask) and -// the local-background ring (EstimateLocalBackground), which also lround. -struct ShoeboxBox { int min_x, max_x, min_y, max_y; }; -ShoeboxBox ShoeboxBounds(double px, double py, int radius, size_t xpixel, size_t ypixel) { - const int cx = static_cast(std::lround(px)); - const int cy = static_cast(std::lround(py)); - return { - std::max(cx - radius, 0), - std::min(cx + radius, static_cast(xpixel) - 1), - std::max(cy - radius, 0), - std::min(cy + radius, static_cast(ypixel) - 1) - }; -} - -// Local flat background around one shoebox, in raw detector counts. Samples the -// square ring shoebox_radius < max(|dx|,|dy|) <= bkg_outer_radius centred on the -// spot, dropping pixels that belong to any spot core (spot_mask) or carry a -// masked/saturated sentinel, and returns their MEAN. (The median, used previously, -// sits below the mean for a skewed background and so biases the subtracted intensity -// positive.) Mirrors the local-background region of BraggIntegrate2D. -template -bool EstimateLocalBackground(const T *image, - const std::vector &spot_mask, - size_t xpixel, size_t ypixel, - double cx, double cy, - int shoebox_radius, int bkg_outer_radius, - double &bkg_mean) { - const int icx = static_cast(std::lround(cx)); - const int icy = static_cast(std::lround(cy)); - const int x0 = std::max(0, icx - bkg_outer_radius); - const int x1 = std::min(static_cast(xpixel) - 1, icx + bkg_outer_radius); - const int y0 = std::max(0, icy - bkg_outer_radius); - const int y1 = std::min(static_cast(ypixel) - 1, icy + bkg_outer_radius); - - std::vector vals; - vals.reserve(static_cast((x1 - x0 + 1) * (y1 - y0 + 1))); - for (int y = y0; y <= y1; ++y) { - for (int x = x0; x <= x1; ++x) { - // Skip the square shoebox core: that is signal, not background. - if (std::abs(x - icx) <= shoebox_radius && std::abs(y - icy) <= shoebox_radius) - continue; - const size_t np = static_cast(xpixel) * y + x; - if (spot_mask[np]) - continue; - const T raw = image[np]; - if (raw == std::numeric_limits::max()) - continue; - if (std::is_signed_v && raw == std::numeric_limits::min()) - continue; - vals.push_back(static_cast(raw)); - } - } - - if (vals.size() < 5) - return false; - - // Mean background, NOT median. The median of a right-skewed (Poisson) background sits - // below the mean, so subtracting it under-subtracts and biases every weak integrated - // intensity positive - which averages up over multiplicity into fake in the - // no-signal high-resolution shells. The mean is unbiased; modern fast detectors put - // few zingers in the ring, so it is not robustified here. - double sum = 0.0; - for (const double v : vals) - sum += v; - bkg_mean = sum / static_cast(vals.size()); - return true; -} - -// Per-pixel: map a detector pixel through the current geometry into the -// reference reciprocal frame. Cheap (a few trig + one rotation); depends on the -// pixel and the detector geometry, not on the lattice. -template -void ObservedRecip(const T *beam, const T *distance_mm, const T *detector_rot, - double obs_x, double obs_y, - double pixel_size, double inv_lambda, - Eigen::Matrix &e_obs_recip) { - // PyFAI convention (left-handed for rot1/rot2): rot3 = 0 assumed. - const T c1 = ceres::cos(detector_rot[0]); - const T s1 = ceres::sin(detector_rot[0]); - const T c2 = ceres::cos(detector_rot[1]); - const T s2 = ceres::sin(detector_rot[1]); - - const T det_x = (T(obs_x) - beam[0]) * T(pixel_size); - const T det_y = (T(obs_y) - beam[1]) * T(pixel_size); - const T det_z = T(distance_mm[0]); - - const T t1_x = c1 * det_x + s1 * det_z; - const T t1_y = det_y; - const T t1_z = -s1 * det_x + c1 * det_z; - - const T x = t1_x; - const T y = c2 * t1_y + s2 * t1_z; - const T z = -s2 * t1_y + c2 * t1_z; - - const T inv_norm = T(1) / ceres::sqrt(x * x + y * y + z * z); - - T recip_raw[3] = { - x * inv_norm * T(inv_lambda), - y * inv_norm * T(inv_lambda), - (z * inv_norm - T(1.0)) * T(inv_lambda) - }; - e_obs_recip = Eigen::Matrix(recip_raw[0], recip_raw[1], recip_raw[2]); -} - -// Per-reflection: predicted node g_hkl, |g_hkl|^2, and the Ewald-sphere normal. -// (a0,a1,a2) are the three real-space lattice column vectors in the lab frame -// (latt.Vec0/1/2()), used exactly as indexed: PixelRefine does not refine the cell, -// so there is no symmetry manifold to constrain - a general (triclinic) reciprocal -// inverse reproduces every crystal system from the actual cell. Depends only on the -// lattice and hkl, so for a whole shoebox it is computed once. -bool PredictedNode(const double *a0, const double *a1, const double *a2, - double exp_h, double exp_k, double exp_l, double inv_lambda, - Eigen::Vector3d &e_pred_recip, - Eigen::Vector3d &n_radial, double &q_sq) { - const Eigen::Vector3d A(a0[0], a0[1], a0[2]); - const Eigen::Vector3d Bv(a1[0], a1[1], a1[2]); - const Eigen::Vector3d C(a2[0], a2[1], a2[2]); - - const Eigen::Vector3d BxC = Bv.cross(C); - const Eigen::Vector3d CxA = C.cross(A); - const Eigen::Vector3d AxB = A.cross(Bv); - - const double Vol = A.dot(BxC); - if (std::abs(Vol) < 1e-12) - return false; - const double invV = 1.0 / Vol; - - e_pred_recip = (BxC * exp_h + CxA * exp_k + AxB * exp_l) * invV; - q_sq = e_pred_recip.squaredNorm(); - - // Ewald sphere centre at -k_i = (0,0,-inv_lambda); radial normal at g_hkl. - const Eigen::Vector3d S_pred( - e_pred_recip[0], e_pred_recip[1], e_pred_recip[2] + inv_lambda); - const double S_pred_norm = S_pred.norm(); - if (S_pred_norm < 1e-10) - return false; - - n_radial = S_pred / S_pred_norm; - return true; -} - -// Geometry terms for one shoebox pixel under a FIXED geometry (PixelRefine no -// longer refines geometry, so this is a plain double evaluation, not a Ceres cost): -// q_sq = |g_hkl|^2 (predicted node, for the B-factor) -// eps_radial = deviation along the Ewald normal (the partiality direction) -// eps_tang_sq = squared deviation in the tangent plane (the profile direction) -bool GeometryProbe(double obs_x, double obs_y, double lambda, double pixel_size, - int h, int k, int l, - const double beam[2], double dist_mm, const double detector_rot[2], - const double p0[3], const double p1[3], const double p2[3], - double &q_sq, double &eps_radial, double &eps_tang_sq) { - const double inv_lambda = 1.0 / lambda; - Eigen::Vector3d e_obs; - ObservedRecip(beam, &dist_mm, detector_rot, obs_x, obs_y, pixel_size, inv_lambda, e_obs); - - Eigen::Vector3d e_pred, n_radial; - if (!PredictedNode(p0, p1, p2, h, k, l, inv_lambda, e_pred, n_radial, q_sq)) - return false; - - const Eigen::Vector3d delta_q = e_obs - e_pred; - eps_radial = delta_q.dot(n_radial); - eps_tang_sq = (delta_q - eps_radial * n_radial).squaredNorm(); - return true; -} - -// Pulls a scalar parameter towards an expected value with a fixed weight (the -// data-scaled prior). Identical in spirit to ScaleOnTheFly's regularizer: it is what -// keeps the per-image scale G from wandering on weakly-constrained images and -// scrambling the cross-image merge. -struct ScalarRegularizer { - ScalarRegularizer(double weight, double expected) : weight(weight), expected(expected) {} - template - bool operator()(const T *p, T *residual) const { - residual[0] = T(weight) * (p[0] - T(expected)); - return true; - } - double weight; - double expected; -}; - -// Term 1 of the factored likelihood (FACTORED_MODEL.md): the per-reflection -// *intensity* (0th-moment) residual. The profile-fit amplitude J should equal the -// scaled reference J_model = G * exp(-B/4d^2) * partiality * pol * I_ref. One scalar -// residual per reflection, weighted by the model-expected (Fisher) sigma_J. This is -// the scaling residual - integration and scaling become one objective, and the empty -// pixels (which make no residual of their own) stop dominating the fit. Geometry is -// fixed, so J, partiality and sigma_J are constants and only G and B are free. -struct IntensityResidual { - IntensityResidual(double J, double sigma_J, double partiality, double pol, - double I_ref, double inv_4d2) - : J(J), inv_sigma(1.0 / sigma_J), partiality(partiality), pol(pol), - I_ref(I_ref), inv_4d2(inv_4d2) {} - template - bool operator()(const T *const G, const T *const B, T *residual) const { - const T B_term = ceres::exp(-B[0] * T(inv_4d2)); - const T J_model = G[0] * B_term * T(partiality) * T(pol) * T(I_ref); - residual[0] = (J_model - T(J)) * T(inv_sigma); - return true; - } - double J, inv_sigma, partiality, pol, I_ref, inv_4d2; -}; - -} // namespace - -PixelRefine::PixelRefine(const DiffractionExperiment &experiment, - const std::vector &reference) - : xpixel(experiment.GetXPixelsNum()), - ypixel(experiment.GetYPixelsNum()), - experiment(experiment), - hkl_key_generator(experiment.GetScalingSettings().GetMergeFriedel(), - experiment.GetSpaceGroupNumber().value_or(1)) { - for (const auto &ref: reference) - reference_data[hkl_key_generator(ref)] = ref.I; -} - -void PixelRefine::BuildParameterBlocks(const PixelRefineData &data, - double beam[2], double &dist_mm, - double detector_rot[2], - double latt_vec0[3], double latt_vec1[3], double latt_vec2[3]) const { - beam[0] = data.geom.GetBeamX_pxl(); - beam[1] = data.geom.GetBeamY_pxl(); - dist_mm = data.geom.GetDetectorDistance_mm(); - detector_rot[0] = data.geom.GetPoniRot1_rad(); - detector_rot[1] = data.geom.GetPoniRot2_rad(); - - // The three real-space lattice columns in the lab frame, used as-is. PixelRefine - // does not refine the cell, so there is no symmetry manifold to constrain; the - // indexed cell is taken exactly, with no re-idealisation to ideal symmetry. - const Coord &a = data.latt.Vec0(); - const Coord &b = data.latt.Vec1(); - const Coord &c = data.latt.Vec2(); - for (int i = 0; i < 3; ++i) { - latt_vec0[i] = a[i]; - latt_vec1[i] = b[i]; - latt_vec2[i] = c[i]; - } -} - -template -void PixelRefine::Run(const T *image, - BraggPrediction &prediction, - PixelRefineData &data) { - data.solved = false; - data.reflections.clear(); - - const double lambda = data.geom.GetWavelength_A(); - const double pixel_size = data.geom.GetPixelSize_mm(); - - const BraggPredictionSettings settings_prediction{ - .high_res_A = experiment.GetBraggIntegrationSettings().GetDMinLimit_A(), - .ewald_dist_cutoff = static_cast(data.ewald_dist_cutoff), - .max_hkl = 100, - .centering = data.centering, - .bandwidth_sigma = static_cast(data.bandwidth) // relative Δλ/λ sigma - }; - - const int radius = data.shoebox_radius; - const int bkg_outer_radius = std::max(radius + 1, data.bkg_outer_radius); - - // Per-reflection polarization correction (raw = true * pol), evaluated once at - // the predicted spot - same handling as BraggIntegrate2D. Identity if disabled. - const auto pol_factor = experiment.GetPolarizationFactor(); - auto polarization = [&](double x, double y) -> double { - if (!pol_factor) - return 1.0; - return data.geom.CalcAzIntPolarizationCorr(static_cast(x), static_cast(y), - pol_factor.value()); - }; - - // Bandwidth radial-width^2 (in the code's R = sqrt(2)*sigma convention): - // R_bw^2 = (b*lambda)^2 / (2 d^4), b = relative bandwidth (sigma). - // A fixed per-reflection constant; data.bandwidth == 0 -> monochromatic no-op. - const double bw = data.bandwidth; - auto bandwidth_radial_sq = [&](double d) -> double { - if (bw <= 0.0 || d <= 0.0) - return 0.0; - const double bl = bw * lambda; - return bl * bl / (2.0 * d * d * d * d); - }; - - // Geometry is FIXED here: orientation/cell/detector were already refined upstream - // by XtalOptimizer (IndexAndRefine::RefineGeometryIfNeeded). PixelRefine is an - // intensity-only operation - it predicts shoeboxes with this geometry, measures the - // tangential profile width, and fits the per-image scale G (and B) to the reference. - double beam[2], dist_mm, detector_rot[2]; - double latt_vec0[3], latt_vec1[3], latt_vec2[3]; - BuildParameterBlocks(data, beam, dist_mm, detector_rot, latt_vec0, latt_vec1, latt_vec2); - - // ---- 1. Predict shoeboxes for the current geometry ------------------------ - DiffractionExperiment exp_iter = experiment; - exp_iter.BeamX_pxl(data.geom.GetBeamX_pxl()) - .BeamY_pxl(data.geom.GetBeamY_pxl()) - .DetectorDistance_mm(data.geom.GetDetectorDistance_mm()) - .PoniRot1_rad(data.geom.GetPoniRot1_rad()) - .PoniRot2_rad(data.geom.GetPoniRot2_rad()); - const int nrefl = prediction.Calc(exp_iter, data.latt, settings_prediction); - - // ---- 2. Collect per-reflection shoebox pixels + local background ---------- - // GetReflections() returns the full pre-sized buffer; only the first nrefl - // entries are valid for this image. A spot-core mask over ALL predictions keeps - // each reflection's background ring from picking up a neighbour's signal. - const auto &predicted = prediction.GetReflections(); - const auto spot_mask = BuildSpotMask(predicted, nrefl, xpixel, ypixel, radius); - - std::vector groups; - for (int ri = 0; ri < nrefl; ++ri) { - const auto &refl = predicted[ri]; - const auto hkl = hkl_key_generator(refl); - if (!reference_data.contains(hkl)) - continue; - - // Local flat background from the ring around the shoebox (raw counts). If we - // cannot estimate a clean local background the reflection is dropped, exactly - // as BraggIntegrate2D marks it unobserved when too few background pixels survive. - double Ibkg = 0.0; - if (!EstimateLocalBackground(image, spot_mask, xpixel, ypixel, - refl.predicted_x, refl.predicted_y, - radius, bkg_outer_radius, Ibkg)) - continue; - - ReflGroup g; - g.h = refl.h; - g.k = refl.k; - g.l = refl.l; - g.d = refl.d; - g.Itrue = reference_data[hkl]; - g.R_bw_sq = bandwidth_radial_sq(refl.d); - g.pol = polarization(refl.predicted_x, refl.predicted_y); - g.Ibkg = Ibkg; - g.predicted_x = refl.predicted_x; - g.predicted_y = refl.predicted_y; - - const auto box = ShoeboxBounds(refl.predicted_x, refl.predicted_y, radius, xpixel, ypixel); - for (int y = box.min_y; y <= box.max_y; ++y) { - for (int x = box.min_x; x <= box.max_x; ++x) { - const size_t npixel = xpixel * y + x; - // Skip sentinel (masked / saturated) pixels. - if (image[npixel] == std::numeric_limits::max()) - continue; - if (std::is_signed_v && (image[npixel] == std::numeric_limits::min())) - continue; - g.pixels.push_back({static_cast(x), static_cast(y), - static_cast(image[npixel]), Ibkg}); - } - } - if (!g.pixels.empty()) - groups.push_back(std::move(g)); - } - if (groups.empty()) - return; - - // ---- 3. Term 2: per-resolution tangential profile width R1 ---------------- - // R1 = sqrt(2*) from the intensity-weighted tangential second moment of - // the strong spots, binned by resolution (low res small spots, high res larger). - // A *shape* statistic, normalised by the total intensity, so it is decoupled from - // the per-image scale - which is what makes measuring it (rather than fitting it, - // where it is degenerate with G) stable. Weak spots fall back to the global R[1]. - for (auto &g : groups) - g.R1_eff = data.R[1]; - { - constexpr int n_bins = 6; - const double box_px = (2.0 * radius + 1.0) * (2.0 * radius + 1.0); - double s2min = 1e30, s2max = 0.0; - for (const auto &g : groups) { - const double s2 = 1.0 / (g.d * g.d); - s2min = std::min(s2min, s2); - s2max = std::max(s2max, s2); - } - const double span = std::max(s2max - s2min, 1e-12); - auto bin_of = [&](double d) { - return std::clamp(static_cast((1.0 / (d * d) - s2min) / span * n_bins), 0, n_bins - 1); - }; - std::vector> bin_M2(n_bins); - for (const auto &g : groups) { - double sw = 0.0, sw_et2 = 0.0; - for (const auto &px : g.pixels) { - double q_sq, eps_r, eps_t_sq; - if (!GeometryProbe(px.x, px.y, lambda, pixel_size, g.h, g.k, g.l, - beam, dist_mm, detector_rot, latt_vec0, latt_vec1, latt_vec2, - q_sq, eps_r, eps_t_sq)) - continue; - const double w = std::max(px.Iobs - g.Ibkg, 0.0); - sw += w; - sw_et2 += w * eps_t_sq; - } - if (sw <= 0.0) - continue; - const double signif = sw / std::sqrt(std::max(box_px * g.Ibkg, 1.0)); - if (signif >= 5.0) // only well-measured spots define the shape - bin_M2[bin_of(g.d)].push_back(sw_et2 / sw); - } - std::vector bin_R1(n_bins, data.R[1]); - for (int b = 0; b < n_bins; ++b) - if (bin_M2[b].size() >= 5) { - const double r1 = data.r1_multiplier * std::sqrt(2.0 * std::max(MedianInPlace(bin_M2[b]), 0.0)); - if (std::isfinite(r1) && r1 > 1e-4) - bin_R1[b] = std::clamp(r1, 1e-4, 0.05); - } - for (auto &g : groups) - g.R1_eff = bin_R1[bin_of(g.d)]; - } - - // ---- 4. Term 1: one intensity residual per reflection; fit G (and B) ------ - // J / partiality / sigma_J are computed here as constants (geometry & R fixed), - // and only the per-image scale G and Debye-Waller B are optimised. - ceres::Problem problem; - size_t n_blocks = 0; - const double R0 = data.R[0]; - for (const auto &g : groups) { - const double R1 = g.R1_eff; // Term 2: per-resolution profile width - if (!(R1 > 0.0) || !(R0 > 0.0)) - continue; - double num = 0.0, den = 0.0, rad = 0.0; - std::vector> pt_sig; // (P_t, Iobs-Bg) for the Fisher pass - pt_sig.reserve(g.pixels.size()); - for (const auto &px : g.pixels) { - double q_sq, eps_r, eps_t_sq; - if (!GeometryProbe(px.x, px.y, lambda, pixel_size, g.h, g.k, g.l, - beam, dist_mm, detector_rot, latt_vec0, latt_vec1, latt_vec2, - q_sq, eps_r, eps_t_sq)) - continue; - const double P_t = std::exp(-eps_t_sq / (R1 * R1)) / (PI * R1 * R1); - const double R0_eff_sq = R0 * R0 + g.R_bw_sq; - const double P_rad = std::exp(-eps_r * eps_r / R0_eff_sq); - const double v = std::max(g.Ibkg, 1.0); - const double sig = px.Iobs - g.Ibkg; - num += P_t * sig / v; - den += P_t * P_t / v; - rad += P_rad * P_t * P_t / v; - pt_sig.emplace_back(P_t, sig); - } - if (!(den > 0.0)) - continue; - const double J = num / den; - const double partiality = rad / den; - // Model-expected (Fisher) variance: v_p = background + expected signal J*P_t, - // not the per-pixel observed counts (which down-bias) - so the weight tracks - // information, and an expected-strong reflection that is absent hurts. - double den_f = 0.0; - for (const auto &[P_t, sig] : pt_sig) { - const double v_f = std::max(g.Ibkg + std::max(J, 0.0) * P_t, 1.0); - den_f += P_t * P_t / v_f; - } - const double sigma_J = std::sqrt(1.0 / std::max(den_f, 1e-30)); - const double inv_4d2 = (g.d > 0.0) ? 1.0 / (4.0 * g.d * g.d) : 0.0; - auto *cost = new ceres::AutoDiffCostFunction( - new IntensityResidual(J, sigma_J, partiality, g.pol, g.Itrue, inv_4d2)); - problem.AddResidualBlock(cost, nullptr, &data.scale_factor, &data.B_factor); - ++n_blocks; - } - data.residual_count = n_blocks; - if (n_blocks == 0) - return; - - // G >= 0; B fixed unless requested; G regularized -> 1 with weight sqrt(n/sigma) - // (mirrors ScaleOnTheFly) so weakly-measured images cannot drift and scramble the merge. - problem.SetParameterLowerBound(&data.scale_factor, 0, 0.0); - if (!data.refine_B) - problem.SetParameterBlockConstant(&data.B_factor); - if (data.scale_reg_sigma > 0.0) { - const double w = std::sqrt(static_cast(groups.size()) / data.scale_reg_sigma); - auto *reg = new ceres::AutoDiffCostFunction( - new ScalarRegularizer(w, 1.0)); - problem.AddResidualBlock(reg, nullptr, &data.scale_factor); - } - - ceres::Solver::Options options; - options.linear_solver_type = ceres::DENSE_QR; - options.minimizer_progress_to_stdout = false; - options.logging_type = ceres::LoggingType::SILENT; - options.max_solver_time_in_seconds = data.max_time_s; - options.num_threads = 1; - ceres::Solver::Summary summary; - ceres::Solve(options, &problem, &summary); - data.final_cost = summary.final_cost; - data.solved = summary.IsSolutionUsable(); - - // ---- 5. Extract integrated reflections ------------------------------------ - // Profile fitting gives the recorded amplitude (fitting the tangential profile P_t - // against the background-subtracted pixels): - // J = sum_p[ P_t,p (Iobs_p - Ibkg)/v_p ] / sum_p[ P_t,p^2 / v_p ] - // ~ G * Itrue * B_term * partiality * pol (recorded raw counts) - // var(J) = 1 / sum_p[ P_t,p^2 / v_p ] - // - // Output split (Merge multiplies r.I * image_scale_corr and weights by - // 1/(sigma*image_scale_corr)^2): - // r.I = J / (B_term * partiality * pol) = G * Itrue - // r.sigma = sqrt(var(J)) / (B_term * partiality * pol) - // r.partiality = profile-weighted P_radial in (0,1] (the rocking fraction) - // r.completeness = live/total tangential profile in (0,1] (detector clipping) - // r.image_scale_corr = 1/G (per-image scale ONLY) - // so r.I * image_scale_corr = Itrue: B, partiality and polarization live on the - // intensity, G lives on image_scale_corr - one clean meaning per field. We walk the - // full (unclamped) shoebox once: every grid point feeds the total tangential profile - // (completeness denominator); points that are real, live pixels also feed the fit. - data.reflections.reserve(groups.size()); - for (const auto &g : groups) { - const int cx = static_cast(std::lround(g.predicted_x)); - const int cy = static_cast(std::lround(g.predicted_y)); - const double B_term = std::exp(-data.B_factor / (4.0 * g.d * g.d)); - - double num = 0.0, den = 0.0, bkg_sum = 0.0, radial_sum = 0.0; - double prof_live = 0.0, prof_full = 0.0; // tangential profile: captured / total - size_t n = 0; - - for (int y = cy - radius; y <= cy + radius; ++y) { - for (int x = cx - radius; x <= cx + radius; ++x) { - double q_sq, eps_r, eps_t_sq; - if (!GeometryProbe(static_cast(x), static_cast(y), - lambda, pixel_size, g.h, g.k, g.l, - beam, dist_mm, detector_rot, latt_vec0, latt_vec1, latt_vec2, - q_sq, eps_r, eps_t_sq)) - continue; - if (!(data.R[0] > 0.0) || !(g.R1_eff > 0.0)) - continue; - - // Tangential profile shape (area-normalized) -> the fit template, using the - // per-reflection R1_eff (Term 2). - const double R1 = g.R1_eff; - const double P_t = std::exp(-eps_t_sq / (R1 * R1)) / (PI * R1 * R1); - prof_full += P_t; // whole shoebox, on- or off-detector - - // Only real, unmasked detector pixels carry signal. - if (x < 0 || x >= static_cast(xpixel) || y < 0 || y >= static_cast(ypixel)) - continue; - const size_t np = static_cast(xpixel) * y + x; - if (image[np] == std::numeric_limits::max()) - continue; - if (std::is_signed_v && image[np] == std::numeric_limits::min()) - continue; - - const double Iobs = static_cast(image[np]); // raw counts - // Background-limited variance (constant over the shoebox): weighting by - // the observed count biases the amplitude negative where signal is weakest. - const double v = std::max(g.Ibkg, 1.0); - - // Peak-normalized radial factor (the partiality), in (0,1]. MUST use the - // same P_t^2/v weights as the amplitude, else an R0_eff-dependent (hence - // resolution-dependent) factor is left behind in r.I. - const double R0_eff_sq = data.R[0] * data.R[0] + g.R_bw_sq; - const double P_radial = std::exp(-eps_r * eps_r / R0_eff_sq); - - const double w = P_t * P_t / v; - num += P_t * (Iobs - g.Ibkg) / v; - den += w; - radial_sum += P_radial * w; - prof_live += P_t; - bkg_sum += g.Ibkg; - ++n; - } - } - - Reflection r{}; - r.h = g.h; - r.k = g.k; - r.l = g.l; - r.d = static_cast(g.d); - r.predicted_x = static_cast(g.predicted_x); - r.predicted_y = static_cast(g.predicted_y); - r.observed_x = NAN; - r.observed_y = NAN; - r.rlp = 1.0f; - r.partiality = (den > 0.0) ? static_cast(radial_sum / den) : 1.0f; - r.completeness = (prof_full > 0.0) ? static_cast(prof_live / prof_full) : 1.0f; - - if (den > 0.0 && n > 0) { - const double I_amp = num / den; // ~ G*Itrue*B_term*partiality*pol - const double sigma_amp = std::sqrt(1.0 / den); - const double corr = static_cast(r.partiality) * B_term * g.pol; - r.bkg = static_cast(bkg_sum / static_cast(n)); - r.observed = true; - - if (corr > 0.0 && data.scale_factor > 0.0) { - r.I = static_cast(I_amp / corr); // = G*Itrue - r.sigma = static_cast(sigma_amp / corr); - r.image_scale_corr = static_cast(1.0 / data.scale_factor); // G only - } else { - r.I = static_cast(I_amp); - r.sigma = static_cast(sigma_amp); - r.image_scale_corr = NAN; - } - } else { - r.I = 0.0f; - r.sigma = NAN; - r.bkg = 0.0f; - r.observed = false; - } - data.reflections.push_back(r); - } - - // ---- 6. Per-image CC vs reference (diagnostic) ---------------------------- - // Pearson CC of the scaled estimate (r.I * image_scale_corr = Itrue_est) against - // the reference intensities, over the matched reflections. - { - double sx = 0, sy = 0, sxx = 0, syy = 0, sxy = 0; - size_t cn = 0; - for (const auto &r : data.reflections) { - if (!r.observed || !std::isfinite(r.I) || !std::isfinite(r.image_scale_corr)) - continue; - const auto it = reference_data.find(hkl_key_generator(r)); - if (it == reference_data.end()) - continue; - const double x = static_cast(r.I) * static_cast(r.image_scale_corr); - const double y = it->second; - if (!std::isfinite(x) || !std::isfinite(y)) - continue; - sx += x; sy += y; sxx += x * x; syy += y * y; sxy += x * y; ++cn; - } - data.cc = NAN; - data.cc_n = static_cast(cn); - if (cn >= 3) { - const double nd = static_cast(cn); - const double cov = sxy - sx * sy / nd; - const double vx = sxx - sx * sx / nd; - const double vy = syy - sy * sy / nd; - if (vx > 0.0 && vy > 0.0) - data.cc = cov / std::sqrt(vx * vy); - } - } -} - -template void PixelRefine::Run(const int8_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const int16_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const int32_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const uint8_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const uint16_t *, BraggPrediction &, PixelRefineData &); -template void PixelRefine::Run(const uint32_t *, BraggPrediction &, PixelRefineData &); diff --git a/image_analysis/pixel_refinement/PixelRefine.h b/image_analysis/pixel_refinement/PixelRefine.h deleted file mode 100644 index b180e1de..00000000 --- a/image_analysis/pixel_refinement/PixelRefine.h +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute -// SPDX-License-Identifier: GPL-3.0-only - -#pragma once - -#include "../bragg_prediction/BraggPrediction.h" -#include "../common/DiffractionExperiment.h" -#include "../scale_merge/HKLKey.h" - -// ============================================================================= -// PixelRefine — reference-driven profile-fit integration + scaling for stills -// ============================================================================= -// -// PixelRefine is the still-image integrator: given a reference set of merged -// intensities I_ref (the current best hypothesis for each reflection's full -// intensity), it integrates one image and returns already-scaled intensities. It -// is an *intensity-wise* operation - the detector geometry is taken as fixed (it -// was refined upstream by XtalOptimizer in IndexAndRefine::RefineGeometryIfNeeded); -// PixelRefine does not touch orientation, cell or detector parameters. -// -// The objective is the factored per-reflection likelihood of FACTORED_MODEL.md, -// Terms 1 + 2: -// -// Term 2 (shape) — for each resolution shell, the tangential profile width R1 is -// *measured* from the intensity-weighted second moment of the strong spots: -// R1 = sqrt(2*). A second moment is normalised by the total intensity, -// so it is decoupled from the per-image scale - which is why measuring R1 is -// stable where *fitting* it (degenerate with G) is not. -// -// Term 1 (intensity / scaling) — one residual per reflection: the profile-fit -// amplitude J (using the Term-2 R1) should equal the scaled reference -// J_model = G * exp(-B/4d^2) * partiality * pol * I_ref, -// weighted by the model-expected (Fisher) sigma_J. Only the per-image scale G -// and Debye-Waller B are optimised. Integration and scaling become one objective; -// the many empty shoebox pixels enter only through J (with ~zero profile weight) -// instead of dominating a per-pixel loss. -// -// I_ref is NOT refined here - it is a fixed hypothesis for the pass. The intended -// outer loop is EM-like: run PixelRefine on every image against the current I_ref, -// re-merge to a new I_ref, repeat. -// -// Forward model per pixel (raw detector counts, no per-pixel solid-angle/Lorentz -// weighting - same units as the classical integrator): -// signal = G * I_ref * B_term * P_radial * P_tangential * pol , + I_bkg -// B_term = exp(-B |q|^2 / 4) (Debye-Waller) -// P_radial = exp(-eps_r^2 / R0_eff^2) (still partiality, <= 1) -// P_tangential = exp(-eps_t^2 / R1^2) / (pi R1^2) (area-normalized profile) -// where eps_r / eps_t are the radial / tangential deviations of the pixel from the -// predicted node, and pol is the per-reflection polarization correction. -// -// X-ray bandwidth (optional): a finite bandwidth thickens the Ewald shell radially, -// adding a fixed, resolution-dependent term to R0 that grows like 1/d^2 (the -// pink-beam/DMM signature): R0_eff^2 = R0^2 + (b*lambda)^2/(2 d^4). b = 0 (the -// default) is a monochromatic no-op; set it for DMM-type data, leave it for Si. -// ============================================================================= - -struct PixelRefineData { - // --- model state (input as initial guess, output as refined result) --- - DiffractionGeometry geom; // fixed (refined upstream by XtalOptimizer) - CrystalLattice latt; // fixed - char centering = 'P'; - - double B_factor = 0.0; // Debye-Waller B (A^2), refined - double scale_factor = 1.0; // overall per-image scale G, refined - double R[2] = {0.005, 0.005}; // R[0] = radial (partiality) width; R[1] = fallback - // tangential profile width before Term 2 measures it (A^-1) - bool refine_B = true; // refine the per-image B-factor along with G - - // Term 2 measures the physical tangential width R1, but the *integration* profile must - // be generous (XDS-style: integrate over ~6 sigma) or a tight template centred on the - // prediction sits off the ~0.4 px centroid-floor scatter and underestimates the - // intensity (validated on the jet R-free: 0.34 with the raw width -> 0.26 at x6). The - // measured R1 is multiplied by this before use. - double r1_multiplier = 6.0; - - // Relative X-ray bandwidth (sigma of dlambda/lambda), e.g. ~0.004 for a 1% FWHM - // DMM, ~1e-4 for Si(111). Adds a resolution-dependent radial broadening to R[0]. - // 0 = monochromatic (the term switches off entirely). - double bandwidth = 0.0; - - // Per-image scale G is regularized towards 1 with weight sqrt(n_refl/scale_reg_sigma) - // (mirrors ScaleOnTheFly). Without this the unconstrained G wanders on weakly - // measured images and 1/G scrambles the cross-image merge. <= 0 disables. - double scale_reg_sigma = 2.0; - - // Radial Ewald-sphere acceptance band for prediction (A^-1): a reflection is given - // a shoebox when ||S|-1/lambda| <= this. Widened from the on-sphere default towards - // the integrator's profile radius so slightly-misaligned high-resolution reflections - // are still integrated (multiplicity), while the partiality downweights their tails. - double ewald_dist_cutoff = 0.0020; - - double max_time_s = 5.0; - int shoebox_radius = 3; // half-size of the per-reflection signal box - // Half-size of the local-background sampling box. Background is the MEAN of the ring - // shoebox_radius < |dx|,|dy| <= bkg_outer_radius (excluding spot cores), like - // BraggIntegrate2D. Must be > shoebox_radius. - int bkg_outer_radius = 6; - - // --- output --- - std::vector reflections; // profile-fitted, scaled integration result - bool solved = false; - double final_cost = NAN; - size_t residual_count = 0; - double cc = NAN; // per-image CC of scaled intensities vs reference - int64_t cc_n = 0; // number of reflections in the CC -}; - -class PixelRefine { - const size_t xpixel, ypixel; - const DiffractionExperiment &experiment; - - const HKLKeyGenerator hkl_key_generator; - std::map reference_data; - - // Fills the fixed geometry (beam, distance, detector tilt) and the three - // real-space lattice column vectors from the current model state, for the - // per-pixel geometry evaluation. - void BuildParameterBlocks(const PixelRefineData &data, - double beam[2], double &dist_mm, - double detector_rot[2], - double latt_vec0[3], double latt_vec1[3], double latt_vec2[3]) const; -public: - PixelRefine(const DiffractionExperiment &experiment, - const std::vector &reference); - - // The BraggPrediction is supplied per call (it is mutated): this keeps a single - // PixelRefine instance usable from several threads, each passing its own prediction - // buffer. Only `data` is written; PixelRefine state is const. The image is in raw - // detector counts (masked/saturated pixels carry the type sentinel); background is - // estimated locally per shoebox from the image itself. - template - void Run(const T *image, - BraggPrediction &prediction, - PixelRefineData &data); -}; diff --git a/process/JFJochProcess.cpp b/process/JFJochProcess.cpp index 59070af3..ab7e9d47 100644 --- a/process/JFJochProcess.cpp +++ b/process/JFJochProcess.cpp @@ -409,12 +409,9 @@ ProcessResult JFJochProcess::Run(JFJochProcessObserver *observer) { }; phase("Scaling and merging"); - const bool pixel_refine_path = - experiment_.GetIndexingSettings().GetGeomRefinementAlgorithm() == GeomRefinementAlgorithmEnum::PixelRefine; - - // ScaleOnTheFly is only for the classical, no-reference path; with a reference (or - // PixelRefine) each image is already scaled, so we merge directly. - if (config_.reference_data.empty() && !pixel_refine_path) { + // ScaleOnTheFly self-scaling is only for the no-reference path; with a reference each image + // is already scaled against it during the per-image pass, so we merge directly. + if (config_.reference_data.empty()) { logger.Info("Running scaling ..."); ScalingResult scale_result(0); double t_merge_all = 0.0, t_scale = 0.0; diff --git a/process/JFJochProcessCommandLine.cpp b/process/JFJochProcessCommandLine.cpp index 50586ae3..caa165ec 100644 --- a/process/JFJochProcessCommandLine.cpp +++ b/process/JFJochProcessCommandLine.cpp @@ -36,7 +36,6 @@ namespace { switch (r) { case GeomRefinementAlgorithmEnum::None: return "none"; case GeomRefinementAlgorithmEnum::OrientationOnly: return "orientation"; - case GeomRefinementAlgorithmEnum::PixelRefine: return "pixelrefine"; case GeomRefinementAlgorithmEnum::BeamCenter: default: return "beam_and_lattice"; } diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index dfc1310e..762f2af7 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -44,7 +44,7 @@ void print_usage() { std::cout << " -X, --indexing-algorithm Indexing algorithm (FFBIDX|FFT|FFTW|Auto|None)" << std::endl; std::cout << " -S, --space-group Space group number - used for both indexing and scaling" << std::endl; std::cout << " -C, --unit-cell Fix reference unit cell: \"a,b,c,alpha,beta,gamma\"" << std::endl; - std::cout << " -r, --refine Geometry refinement algorithm (none|orientation|beam_and_lattice|pixelrefine)" << std::endl; + std::cout << " -r, --refine Geometry refinement algorithm (none|orientation|beam_and_lattice)" << std::endl; std::cout << std::endl; std::cout << " Scaling and merging" << std::endl; @@ -65,11 +65,10 @@ void print_usage() { std::cout << " --reference-column