From 091b3808a4e65dbac2686e0115f9961dffb33f1c Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Tue, 17 Feb 2026 19:54:37 +0100 Subject: [PATCH] ScaleAndMerge: Add statistics --- image_analysis/scale_merge/ScaleAndMerge.cpp | 923 +++++++++---------- 1 file changed, 420 insertions(+), 503 deletions(-) diff --git a/image_analysis/scale_merge/ScaleAndMerge.cpp b/image_analysis/scale_merge/ScaleAndMerge.cpp index 7410c236..8c6c63ca 100644 --- a/image_analysis/scale_merge/ScaleAndMerge.cpp +++ b/image_analysis/scale_merge/ScaleAndMerge.cpp @@ -19,230 +19,233 @@ #include "../../common/ResolutionShells.h" namespace { + struct HKLKey { + int64_t h = 0; + int64_t k = 0; + int64_t l = 0; + bool is_positive = true; // only relevant if opt.merge_friedel == false -struct HKLKey { - int64_t h = 0; - int64_t k = 0; - int64_t l = 0; - bool is_positive = true; // only relevant if opt.merge_friedel == false - - bool operator==(const HKLKey& o) const noexcept { - return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive; - } -}; - -struct HKLKeyHash { - size_t operator()(const HKLKey& key) const noexcept { - auto mix = [](uint64_t x) { - x ^= x >> 33; - x *= 0xff51afd7ed558ccdULL; - x ^= x >> 33; - x *= 0xc4ceb9fe1a85ec53ULL; - x ^= x >> 33; - return x; - }; - const uint64_t a = static_cast(key.h); - const uint64_t b = static_cast(key.k); - const uint64_t c = static_cast(key.l); - const uint64_t d = static_cast(key.is_positive ? 1 : 0); - return static_cast(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3)); - } -}; - -inline int RoundImageId(float image_number, double rounding_step) { - if (!(rounding_step > 0.0)) - rounding_step = 1.0; - const double x = static_cast(image_number) / rounding_step; - const double r = std::round(x) * rounding_step; - return static_cast(std::llround(r / rounding_step)); -} - -inline double SafeSigma(double s, double min_sigma) { - if (!std::isfinite(s) || s <= 0.0) - return min_sigma; - return std::max(s, min_sigma); -} - -inline double SafeD(double d) { - if (!std::isfinite(d) || d <= 0.0) - return std::numeric_limits::quiet_NaN(); - return d; -} - -inline int SafeToInt(int64_t x) { - if (x < std::numeric_limits::min() || x > std::numeric_limits::max()) - throw std::out_of_range("HKL index out of int range for Gemmi"); - return static_cast(x); -} - -inline double SafeInv(double x, double fallback) { - if (!std::isfinite(x) || x == 0.0) - return fallback; - return 1.0 / x; -} - -inline HKLKey CanonicalizeHKLKey(const Reflection& r, const ScaleMergeOptions& opt) { - HKLKey key{}; - key.h = r.h; - key.k = r.k; - key.l = r.l; - key.is_positive = true; - - if (!opt.space_group.has_value()) { - if (!opt.merge_friedel) { - const HKLKey neg{-r.h, -r.k, -r.l, true}; - const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l); - if (!pos) { - key.h = -key.h; - key.k = -key.k; - key.l = -key.l; - key.is_positive = false; - } + bool operator==(const HKLKey &o) const noexcept { + return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive; } + }; + + struct HKLKeyHash { + size_t operator()(const HKLKey &key) const noexcept { + auto mix = [](uint64_t x) { + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + return x; + }; + const uint64_t a = static_cast(key.h); + const uint64_t b = static_cast(key.k); + const uint64_t c = static_cast(key.l); + const uint64_t d = static_cast(key.is_positive ? 1 : 0); + return static_cast(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3)); + } + }; + + inline int RoundImageId(float image_number, double rounding_step) { + if (!(rounding_step > 0.0)) + rounding_step = 1.0; + const double x = static_cast(image_number) / rounding_step; + const double r = std::round(x) * rounding_step; + return static_cast(std::llround(r / rounding_step)); + } + + inline double SafeSigma(double s, double min_sigma) { + if (!std::isfinite(s) || s <= 0.0) + return min_sigma; + return std::max(s, min_sigma); + } + + inline double SafeD(double d) { + if (!std::isfinite(d) || d <= 0.0) + return std::numeric_limits::quiet_NaN(); + return d; + } + + inline int SafeToInt(int64_t x) { + if (x < std::numeric_limits::min() || x > std::numeric_limits::max()) + throw std::out_of_range("HKL index out of int range for Gemmi"); + return static_cast(x); + } + + inline double SafeInv(double x, double fallback) { + if (!std::isfinite(x) || x == 0.0) + return fallback; + return 1.0 / x; + } + + inline HKLKey CanonicalizeHKLKey(const Reflection &r, const ScaleMergeOptions &opt) { + HKLKey key{}; + key.h = r.h; + key.k = r.k; + key.l = r.l; + key.is_positive = true; + + if (!opt.space_group.has_value()) { + if (!opt.merge_friedel) { + const HKLKey neg{-r.h, -r.k, -r.l, true}; + const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l); + if (!pos) { + key.h = -key.h; + key.k = -key.k; + key.l = -key.l; + key.is_positive = false; + } + } + return key; + } + + const gemmi::SpaceGroup &sg = *opt.space_group; + const gemmi::GroupOps gops = sg.operations(); + const gemmi::ReciprocalAsu rasu(&sg); + + const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}}; + const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops); + + key.h = asu_hkl[0]; + key.k = asu_hkl[1]; + key.l = asu_hkl[2]; + key.is_positive = opt.merge_friedel ? true : sign_plus; return key; } - const gemmi::SpaceGroup& sg = *opt.space_group; - const gemmi::GroupOps gops = sg.operations(); - const gemmi::ReciprocalAsu rasu(&sg); - - const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}}; - const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops); - - key.h = asu_hkl[0]; - key.k = asu_hkl[1]; - key.l = asu_hkl[2]; - key.is_positive = opt.merge_friedel ? true : sign_plus; - return key; -} - -/// CrystFEL-like log-scaling residual -/// -/// residual = w * [ ln(I_obs) - ln(G) - ln(partiality) - ln(lp) - ln(I_true) ] -/// -/// Only observations with I_obs > 0 should be fed in (the caller skips the rest). -/// G and I_true are constrained to be positive via Ceres lower bounds. -struct LogIntensityResidual { - LogIntensityResidual(const Reflection& r, double sigma_obs, double wedge_deg, bool refine_partiality) - : log_Iobs_(std::log(std::max(static_cast(r.I), 1e-30))), - weight_(SafeInv(sigma_obs / std::max(static_cast(r.I), 1e-30), 1.0)), - delta_phi_(r.delta_phi_deg), - log_lp_(std::log(std::max(SafeInv(static_cast(r.rlp), 1.0), 1e-30))), - half_wedge_(wedge_deg / 2.0), - c1_(r.zeta / std::sqrt(2.0)), - partiality_(r.partiality), - refine_partiality_(refine_partiality) {} - - template - bool operator()(const T* const G, - const T* const mosaicity, - const T* const Itrue, - T* residual) const { - T partiality; - if (refine_partiality_ && mosaicity[0] != 0.0) { - const T arg_plus = T(delta_phi_ + half_wedge_) * T(c1_) / mosaicity[0]; - const T arg_minus = T(delta_phi_ - half_wedge_) * T(c1_) / mosaicity[0]; - partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); - } else { - partiality = T(partiality_); + /// CrystFEL-like log-scaling residual + /// + /// residual = w * [ ln(I_obs) - ln(G) - ln(partiality) - ln(lp) - ln(I_true) ] + /// + /// Only observations with I_obs > 0 should be fed in (the caller skips the rest). + /// G and I_true are constrained to be positive via Ceres lower bounds. + struct LogIntensityResidual { + LogIntensityResidual(const Reflection &r, double sigma_obs, double wedge_deg, bool refine_partiality) + : log_Iobs_(std::log(std::max(static_cast(r.I), 1e-30))), + weight_(SafeInv(sigma_obs / std::max(static_cast(r.I), 1e-30), 1.0)), + delta_phi_(r.delta_phi_deg), + log_lp_(std::log(std::max(SafeInv(static_cast(r.rlp), 1.0), 1e-30))), + half_wedge_(wedge_deg / 2.0), + c1_(r.zeta / std::sqrt(2.0)), + partiality_(r.partiality), + refine_partiality_(refine_partiality) { } - // Clamp partiality away from zero so log is safe - const T min_p = T(1e-30); - if (partiality < min_p) - partiality = min_p; + template + bool operator()(const T *const G, + const T *const mosaicity, + const T *const Itrue, + T *residual) const { + T partiality; + if (refine_partiality_ && mosaicity[0] != 0.0) { + const T arg_plus = T(delta_phi_ + half_wedge_) * T(c1_) / mosaicity[0]; + const T arg_minus = T(delta_phi_ - half_wedge_) * T(c1_) / mosaicity[0]; + partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); + } else { + partiality = T(partiality_); + } - // ln(I_pred) = ln(G) + ln(partiality) + ln(lp) + ln(Itrue) - const T log_Ipred = ceres::log(G[0]) + ceres::log(partiality) + T(log_lp_) + ceres::log(Itrue[0]); - residual[0] = (log_Ipred - T(log_Iobs_)) * T(weight_); - return true; - } + // Clamp partiality away from zero so log is safe + const T min_p = T(1e-30); + if (partiality < min_p) + partiality = min_p; - double log_Iobs_; - double weight_; // w_i ≈ I_obs / sigma_obs (relative weight in log-space) - double delta_phi_; - double log_lp_; - double half_wedge_; - double c1_; - double partiality_; - bool refine_partiality_; -}; + // ln(I_pred) = ln(G) + ln(partiality) + ln(lp) + ln(Itrue) + const T log_Ipred = ceres::log(G[0]) + ceres::log(partiality) + T(log_lp_) + ceres::log(Itrue[0]); + residual[0] = (log_Ipred - T(log_Iobs_)) * T(weight_); + return true; + } -struct IntensityResidual { - IntensityResidual(const Reflection &r, double sigma_obs, double wedge_deg, bool refine_partiality) - : Iobs_(static_cast(r.I)), - weight_(SafeInv(sigma_obs, 1.0)), - delta_phi_(r.delta_phi_deg), - lp_(SafeInv(static_cast(r.rlp), 1.0)), - half_wedge_(wedge_deg / 2.0), - c1_(r.zeta / std::sqrt(2.0)), - partiality_(r.partiality), - refine_partiality_(refine_partiality) {} + double log_Iobs_; + double weight_; // w_i ≈ I_obs / sigma_obs (relative weight in log-space) + double delta_phi_; + double log_lp_; + double half_wedge_; + double c1_; + double partiality_; + bool refine_partiality_; + }; - template - bool operator()(const T *const G, - const T *const mosaicity, - const T *const Itrue, - T *residual) const { - T partiality; - if (refine_partiality_ && mosaicity[0] != 0.0) { - const T arg_plus = T(delta_phi_ + half_wedge_) * T(c1_) / mosaicity[0]; - const T arg_minus = T(delta_phi_ - half_wedge_) * T(c1_) / mosaicity[0]; - partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); - } else - partiality = T(partiality_); + struct IntensityResidual { + IntensityResidual(const Reflection &r, double sigma_obs, double wedge_deg, bool refine_partiality) + : Iobs_(static_cast(r.I)), + weight_(SafeInv(sigma_obs, 1.0)), + delta_phi_(r.delta_phi_deg), + lp_(SafeInv(static_cast(r.rlp), 1.0)), + half_wedge_(wedge_deg / 2.0), + c1_(r.zeta / std::sqrt(2.0)), + partiality_(r.partiality), + refine_partiality_(refine_partiality) { + } - const T Ipred = G[0] * partiality * T(lp_) * Itrue[0]; - residual[0] = (Ipred - T(Iobs_)) * T(weight_); - return true; - } + template + bool operator()(const T *const G, + const T *const mosaicity, + const T *const Itrue, + T *residual) const { + T partiality; + if (refine_partiality_ && mosaicity[0] != 0.0) { + const T arg_plus = T(delta_phi_ + half_wedge_) * T(c1_) / mosaicity[0]; + const T arg_minus = T(delta_phi_ - half_wedge_) * T(c1_) / mosaicity[0]; + partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); + } else + partiality = T(partiality_); - double Iobs_; - double weight_; - double delta_phi_; - double lp_; - double half_wedge_; - double c1_; - double partiality_; - bool refine_partiality_; -}; + const T Ipred = G[0] * partiality * T(lp_) * Itrue[0]; + residual[0] = (Ipred - T(Iobs_)) * T(weight_); + return true; + } -struct ScaleRegularizationResidual { - explicit ScaleRegularizationResidual(double sigma_k) - : inv_sigma_(SafeInv(sigma_k, 1.0)) {} + double Iobs_; + double weight_; + double delta_phi_; + double lp_; + double half_wedge_; + double c1_; + double partiality_; + bool refine_partiality_; + }; - template - bool operator()(const T* const k, T* residual) const { - residual[0] = (k[0] - T(1.0)) * T(inv_sigma_); - return true; - } + struct ScaleRegularizationResidual { + explicit ScaleRegularizationResidual(double sigma_k) + : inv_sigma_(SafeInv(sigma_k, 1.0)) { + } - double inv_sigma_; -}; - - struct SmoothnessRegularizationResidual { - explicit SmoothnessRegularizationResidual(double sigma) - : inv_sigma_(SafeInv(sigma, 1.0)) {} - template - bool operator()(const T* const k0, - const T* const k1, - const T* const k2, - T* residual) const { - residual[0] = (ceres::log(k0[0]) + ceres::log(k2[0]) - T(2.0) * ceres::log(k1[0])) * T(inv_sigma_); + template + bool operator()(const T *const k, T *residual) const { + residual[0] = (k[0] - T(1.0)) * T(inv_sigma_); return true; } double inv_sigma_; }; + struct SmoothnessRegularizationResidual { + explicit SmoothnessRegularizationResidual(double sigma) + : inv_sigma_(SafeInv(sigma, 1.0)) { + } + + template + bool operator()(const T *const k0, + const T *const k1, + const T *const k2, + T *residual) const { + residual[0] = (ceres::log(k0[0]) + ceres::log(k2[0]) - T(2.0) * ceres::log(k1[0])) * T(inv_sigma_); + return true; + } + + double inv_sigma_; + }; } // namespace -ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& observations, - const ScaleMergeOptions& opt) { +ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector &observations, + const ScaleMergeOptions &opt) { ScaleMergeResult out; struct ObsRef { - const Reflection* r = nullptr; + const Reflection *r = nullptr; int img_id = 0; int img_slot = -1; int hkl_slot = -1; @@ -258,7 +261,7 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob std::unordered_map hklToSlot; hklToSlot.reserve(observations.size()); - for (const auto& r : observations) { + for (const auto &r: observations) { const double d = SafeD(r.d); if (!std::isfinite(d)) continue; @@ -314,7 +317,7 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob out.image_scale_g.assign(nimg, 1.0); out.image_ids.assign(nimg, 0); - for (const auto& kv : imgIdToSlot) { + for (const auto &kv: imgIdToSlot) { out.image_ids[kv.second] = kv.first; } @@ -330,12 +333,12 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob // Initialize Itrue from per-HKL median of observed intensities { - std::vector> per_hkl_I(nhkl); - for (const auto& o : obs) { + std::vector > per_hkl_I(nhkl); + for (const auto &o: obs) { per_hkl_I[o.hkl_slot].push_back(static_cast(o.r->I)); } for (int h = 0; h < nhkl; ++h) { - auto& v = per_hkl_I[h]; + auto &v = per_hkl_I[h]; if (v.empty()) { Itrue[h] = std::max(opt.min_sigma, 1e-6); continue; @@ -354,7 +357,7 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob std::vector is_valid_hkl_slot(nhkl, false); - for (const auto& o : obs) { + for (const auto &o: obs) { size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0; if (opt.log_scaling_residual) { @@ -362,7 +365,7 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob if (o.r->I <= 0.0f) continue; - auto* cost = new ceres::AutoDiffCostFunction( + auto *cost = new ceres::AutoDiffCostFunction( new LogIntensityResidual(*o.r, o.sigma, opt.wedge_deg, refine_partiality)); problem.AddResidualBlock(cost, @@ -372,7 +375,7 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob &Itrue[o.hkl_slot]); is_valid_hkl_slot[o.hkl_slot] = true; } else { - auto* cost = new ceres::AutoDiffCostFunction( + auto *cost = new ceres::AutoDiffCostFunction( new IntensityResidual(*o.r, o.sigma, opt.wedge_deg, refine_partiality)); problem.AddResidualBlock(cost, @@ -457,7 +460,7 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob out.mosaicity_deg[i] = opt.per_image_mosaicity ? mosaicity[i] : mosaicity[0]; std::vector slotToHKL(nhkl); - for (const auto& kv : hklToSlot) { + for (const auto &kv: hklToSlot) { slotToHKL[kv.second] = kv.first; } @@ -473,14 +476,14 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob // Populate d from median of observations per HKL { - std::vector> per_hkl_d(nhkl); - for (const auto& o : obs) { + std::vector > per_hkl_d(nhkl); + for (const auto &o: obs) { const double d_val = static_cast(o.r->d); if (std::isfinite(d_val) && d_val > 0.0) per_hkl_d[o.hkl_slot].push_back(d_val); } for (int h = 0; h < nhkl; ++h) { - auto& v = per_hkl_d[h]; + auto &v = per_hkl_d[h]; if (!v.empty()) { std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); out.merged[h].d = v[v.size() / 2]; @@ -490,324 +493,238 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob std::cout << summary.FullReport() << std::endl; - // Merge (XDS/XSCALE style: inverse-variance weighted mean) - { - struct HKLAccum { - double sum_wI = 0.0; // Σ(w_i * I_i) - double sum_w = 0.0; // Σ(w_i) - double sum_wI2 = 0.0; // Σ(w_i * I_i²) for internal variance - int count = 0; - }; - std::vector accum(nhkl); + // ---- Compute corrected observations once (used for both merging and statistics) ---- + struct CorrectedObs { + int hkl_slot; + double I_corr; + double sigma_corr; + }; + std::vector corr_obs; + corr_obs.reserve(obs.size()); + { const double half_wedge = opt.wedge_deg / 2.0; - for (const auto& o : obs) { - const Reflection& r = *o.r; + for (const auto &o: obs) { + const Reflection &r = *o.r; const double lp = SafeInv(static_cast(r.rlp), 1.0); const double G_i = g[o.img_slot]; // Compute partiality with refined mosaicity double partiality; - size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0; + const size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0; if (refine_partiality && mosaicity[mos_slot] > 0.0) { - double c1 = r.zeta / std::sqrt(2.0); - double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot]; - double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot]; + const double c1 = r.zeta / std::sqrt(2.0); + const double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot]; + const double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot]; partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0; } else { partiality = r.partiality; } - if (partiality > opt.min_partiality_for_merge) { - const double correction = G_i * partiality * lp; - if (correction <= 0.0) continue; + if (partiality <= opt.min_partiality_for_merge) + continue; + const double correction = G_i * partiality * lp; + if (correction <= 0.0) + continue; - // Corrected intensity and propagated sigma - const double I_corr = static_cast(r.I) / correction; - const double sigma_corr = o.sigma / correction; - - // XDS/XSCALE style: weight = 1/σ² - const double w = 1.0 / (sigma_corr * sigma_corr); - - auto& a = accum[o.hkl_slot]; - a.sum_wI += w * I_corr; - a.sum_w += w; - a.sum_wI2 += w * I_corr * I_corr; - a.count++; - } - } - - // Populate merged reflections - for (int h = 0; h < nhkl; ++h) { - const auto& a = accum[h]; - - if (a.sum_w > 0.0) { - // XDS/XSCALE style: weighted mean - out.merged[h].I = a.sum_wI / a.sum_w; - - // Standard error of weighted mean: σ = 1/√(Σw) - double sigma_stat = std::sqrt(1.0 / a.sum_w); - - if (a.count >= 2) { - // XDS/XSCALE style: use internal consistency to estimate sigma - // σ² = (1/(n-1)) * Σw_i(I_i - )² / Σw_i - const double var_internal = a.sum_wI2 - (a.sum_wI * a.sum_wI) / a.sum_w; - const double sigma_int = std::sqrt(var_internal / (a.sum_w * (a.count - 1))); - - // Take the larger of statistical and internal sigma - // (XDS approach: if data is worse than expected, use internal estimate) - out.merged[h].sigma = std::max(sigma_stat, sigma_int); - } else { - // Single observation: use propagated sigma - out.merged[h].sigma = sigma_stat; - } - } - // else: I and sigma stay at initialized values (Itrue[h] and 0.0) + corr_obs.push_back({ + o.hkl_slot, + static_cast(r.I) / correction, + o.sigma / correction + }); } } - // ---- Compute per-shell merging statistics ---- - { - constexpr int kStatShells = 10; + // ---- Merge (XDS/XSCALE style: inverse-variance weighted mean) ---- + { + struct HKLAccum { + double sum_wI = 0.0; + double sum_w = 0.0; + double sum_wI2 = 0.0; + int count = 0; + }; + std::vector accum(nhkl); - // Determine d-range from merged reflections - float stat_d_min = std::numeric_limits::max(); - float stat_d_max = 0.0f; - for (int h = 0; h < nhkl; ++h) { - const auto d = static_cast(out.merged[h].d); - if (std::isfinite(d) && d > 0.0f) { - if (d < stat_d_min) stat_d_min = d; - if (d > stat_d_max) stat_d_max = d; - } + for (const auto &co: corr_obs) { + const double w = 1.0 / (co.sigma_corr * co.sigma_corr); + auto &a = accum[co.hkl_slot]; + a.sum_wI += w * co.I_corr; + a.sum_w += w; + a.sum_wI2 += w * co.I_corr * co.I_corr; + a.count++; + } + + for (int h = 0; h < nhkl; ++h) { + const auto &a = accum[h]; + if (a.sum_w <= 0.0) + continue; + + out.merged[h].I = a.sum_wI / a.sum_w; + const double sigma_stat = std::sqrt(1.0 / a.sum_w); + + if (a.count >= 2) { + const double var_internal = a.sum_wI2 - (a.sum_wI * a.sum_wI) / a.sum_w; + const double sigma_int = std::sqrt(var_internal / (a.sum_w * (a.count - 1))); + out.merged[h].sigma = std::max(sigma_stat, sigma_int); + } else { + out.merged[h].sigma = sigma_stat; } + } + } - if (stat_d_min < stat_d_max && stat_d_min > 0.0f) { - const float d_min_pad = stat_d_min * 0.999f; - const float d_max_pad = stat_d_max * 1.001f; - ResolutionShells stat_shells(d_min_pad, d_max_pad, kStatShells); + // ---- Compute per-shell merging statistics ---- + { + constexpr int kStatShells = 10; - const auto shell_mean_1_d2 = stat_shells.GetShellMeanOneOverResSq(); - const auto shell_min_res = stat_shells.GetShellMinRes(); - - // Assign each unique reflection to a shell - std::vector hkl_shell(nhkl, -1); - for (int h = 0; h < nhkl; ++h) { - const auto d = static_cast(out.merged[h].d); - if (std::isfinite(d) && d > 0.0f) { - auto s = stat_shells.GetShell(d); - if (s.has_value()) - hkl_shell[h] = s.value(); - } - } - - // Build corrected observations per HKL (same correction logic as merge) - struct CorrObs { - int hkl_slot; - int shell; - double I_corr; - }; - std::vector corr_obs; - corr_obs.reserve(obs.size()); - - const double half_wedge = opt.wedge_deg / 2.0; - - for (const auto& o : obs) { - if (hkl_shell[o.hkl_slot] < 0) - continue; - - const Reflection& r = *o.r; - const double lp = SafeInv(static_cast(r.rlp), 1.0); - const double G_i = g[o.img_slot]; - - double partiality; - size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0; - if (refine_partiality && mosaicity[mos_slot] > 0.0) { - double c1 = r.zeta / std::sqrt(2.0); - double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot]; - double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot]; - partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0; - } else { - partiality = r.partiality; - } - - if (partiality <= opt.min_partiality_for_merge) - continue; - const double correction = G_i * partiality * lp; - if (correction <= 0.0) - continue; - - CorrObs co; - co.hkl_slot = o.hkl_slot; - co.shell = hkl_shell[o.hkl_slot]; - co.I_corr = static_cast(r.I) / correction; - corr_obs.push_back(co); - } - - // Per-HKL: collect corrected intensities for Rmeas - struct HKLStats { - double sum_I = 0.0; - int n = 0; - std::vector I_list; - }; - std::vector per_hkl(nhkl); - - for (const auto& co : corr_obs) { - auto& hs = per_hkl[co.hkl_slot]; - hs.sum_I += co.I_corr; - hs.n += 1; - hs.I_list.push_back(co.I_corr); - } - - // Compute per-shell statistics - out.statistics.shells.resize(kStatShells); - - // Accumulators per shell - struct ShellAccum { - int total_obs = 0; - std::unordered_set unique_hkls; - double rmeas_num = 0.0; // Σ sqrt(n/(n-1)) * Σ|I_i - | - double rmeas_den = 0.0; // Σ Σ I_i - double sum_i_over_sig = 0.0; - int n_merged_with_sigma = 0; - }; - std::vector shell_acc(kStatShells); - - // Accumulate per-HKL contributions to each shell - for (int h = 0; h < nhkl; ++h) { - if (hkl_shell[h] < 0) - continue; - const int s = hkl_shell[h]; - auto& sa = shell_acc[s]; - const auto& hs = per_hkl[h]; - if (hs.n == 0) - continue; - - sa.unique_hkls.insert(h); - sa.total_obs += hs.n; - - const double mean_I = hs.sum_I / static_cast(hs.n); - - // Rmeas numerator: sqrt(n/(n-1)) * Σ_i |I_i - | - if (hs.n >= 2) { - double sum_abs_dev = 0.0; - for (double Ii : hs.I_list) - sum_abs_dev += std::abs(Ii - mean_I); - sa.rmeas_num += std::sqrt(static_cast(hs.n) / (hs.n - 1.0)) * sum_abs_dev; - } - - // Rmeas denominator: Σ_i I_i (use absolute values for robustness) - for (double Ii : hs.I_list) - sa.rmeas_den += std::abs(Ii); - - // from merged reflection - if (out.merged[h].sigma > 0.0) { - sa.sum_i_over_sig += out.merged[h].I / out.merged[h].sigma; - sa.n_merged_with_sigma += 1; - } - } - - // Completeness: enumerate ASU reflections per shell if we have space group + unit cell - std::vector possible_per_shell(kStatShells, 0); - int total_possible = 0; - bool have_completeness = false; - - if (opt.space_group.has_value()) { - // Try to build a gemmi UnitCell from the merged reflections' d-spacings - // We need the actual unit cell. Check if any merged reflection has d > 0. - // Actually, we need the real unit cell parameters. We can get them from - // the observations' HKL + d to deduce the metric tensor, but that's complex. - // Instead, we compute d from the unit cell if the caller set it via space_group. - // For now, we count unique reflections in the ASU by brute-force enumeration. - - // We'll try to infer cell parameters from the merged d-spacings + HKL. - // Simpler: use the already-available merged reflections d-spacings to get - // max h, k, l and enumerate. This is a practical approximation. - - const gemmi::SpaceGroup& sg = *opt.space_group; - const gemmi::GroupOps gops = sg.operations(); - const gemmi::ReciprocalAsu rasu(&sg); - - // Find max indices from merged data - int max_h = 0, max_k = 0, max_l = 0; - for (int idx = 0; idx < nhkl; ++idx) { - max_h = std::max(max_h, std::abs(out.merged[idx].h)); - max_k = std::max(max_k, std::abs(out.merged[idx].k)); - max_l = std::max(max_l, std::abs(out.merged[idx].l)); - } - - // We need a metric tensor to compute d-spacing from HKL. - // Estimate reciprocal metric from the merged data using least-squares. - // For simplicity, use the unit cell from the first observation's d + HKL - // This is getting complex - let's use a simpler approach: - // Build a map from HKL key -> d for merged reflections, then for completeness - // just count how many ASU reflections exist at each resolution. - // We enumerate all HKL in the box [-max_h..max_h] x [-max_k..max_k] x [-max_l..max_l], - // check if they're in the ASU, compute d from the observation data. - - // Actually the simplest robust approach: count all ASU HKLs that have d in - // the range of each shell. We need d(hkl) which requires the metric tensor. - // Since we don't have direct access to unit cell parameters here, skip completeness - // or let the caller fill it in. - - // Mark completeness as unavailable for now - the caller (jfjoch_process) can - // fill it in if it has the unit cell. - have_completeness = false; - } - - // Fill output statistics - for (int s = 0; s < kStatShells; ++s) { - auto& ss = out.statistics.shells[s]; - const auto& sa = shell_acc[s]; - - ss.mean_one_over_d2 = shell_mean_1_d2[s]; - // Shell boundaries: shell 0 goes from d_max_pad to shell_min_res[0], etc. - ss.d_min = shell_min_res[s]; - ss.d_max = (s == 0) ? d_max_pad : shell_min_res[s - 1]; - - ss.total_observations = sa.total_obs; - ss.unique_reflections = static_cast(sa.unique_hkls.size()); - ss.rmeas = (sa.rmeas_den > 0.0) ? (sa.rmeas_num / sa.rmeas_den) : 0.0; - ss.mean_i_over_sigma = (sa.n_merged_with_sigma > 0) - ? (sa.sum_i_over_sig / sa.n_merged_with_sigma) : 0.0; - ss.possible_reflections = possible_per_shell[s]; - ss.completeness = (have_completeness && possible_per_shell[s] > 0) - ? static_cast(sa.unique_hkls.size()) / possible_per_shell[s] : 0.0; - } - - // Overall statistics - { - auto& ov = out.statistics.overall; - ov.d_min = stat_d_min; - ov.d_max = stat_d_max; - ov.mean_one_over_d2 = 0.0f; - - int total_obs_all = 0; - std::unordered_set all_unique; - double rmeas_num_all = 0.0, rmeas_den_all = 0.0; - double sum_isig_all = 0.0; - int n_isig_all = 0; - - for (int s = 0; s < kStatShells; ++s) { - const auto& sa = shell_acc[s]; - total_obs_all += sa.total_obs; - all_unique.insert(sa.unique_hkls.begin(), sa.unique_hkls.end()); - rmeas_num_all += sa.rmeas_num; - rmeas_den_all += sa.rmeas_den; - sum_isig_all += sa.sum_i_over_sig; - n_isig_all += sa.n_merged_with_sigma; - } - - ov.total_observations = total_obs_all; - ov.unique_reflections = static_cast(all_unique.size()); - ov.rmeas = (rmeas_den_all > 0.0) ? (rmeas_num_all / rmeas_den_all) : 0.0; - ov.mean_i_over_sigma = (n_isig_all > 0) ? (sum_isig_all / n_isig_all) : 0.0; - ov.possible_reflections = total_possible; - ov.completeness = (have_completeness && total_possible > 0) - ? static_cast(all_unique.size()) / total_possible : 0.0; - } + float stat_d_min = std::numeric_limits::max(); + float stat_d_max = 0.0f; + for (int h = 0; h < nhkl; ++h) { + const auto d = static_cast(out.merged[h].d); + if (std::isfinite(d) && d > 0.0f) { + stat_d_min = std::min(stat_d_min, d); + stat_d_max = std::max(stat_d_max, d); } } + if (stat_d_min < stat_d_max && stat_d_min > 0.0f) { + const float d_min_pad = stat_d_min * 0.999f; + const float d_max_pad = stat_d_max * 1.001f; + ResolutionShells stat_shells(d_min_pad, d_max_pad, kStatShells); + + const auto shell_mean_1_d2 = stat_shells.GetShellMeanOneOverResSq(); + const auto shell_min_res = stat_shells.GetShellMinRes(); + + // Assign each unique reflection to a shell + std::vector hkl_shell(nhkl, -1); + for (int h = 0; h < nhkl; ++h) { + const auto d = static_cast(out.merged[h].d); + if (std::isfinite(d) && d > 0.0f) { + auto s = stat_shells.GetShell(d); + if (s.has_value()) + hkl_shell[h] = s.value(); + } + } + + // Per-HKL: collect corrected intensities for Rmeas + struct HKLStats { + double sum_I = 0.0; + int n = 0; + std::vector I_list; + }; + std::vector per_hkl(nhkl); + + for (const auto &co: corr_obs) { + if (hkl_shell[co.hkl_slot] < 0) + continue; + auto &hs = per_hkl[co.hkl_slot]; + hs.sum_I += co.I_corr; + hs.n += 1; + hs.I_list.push_back(co.I_corr); + } + + // Accumulators per shell + struct ShellAccum { + int total_obs = 0; + std::unordered_set unique_hkls; + double rmeas_num = 0.0; + double rmeas_den = 0.0; + double sum_i_over_sig = 0.0; + int n_merged_with_sigma = 0; + }; + std::vector shell_acc(kStatShells); + + for (int h = 0; h < nhkl; ++h) { + if (hkl_shell[h] < 0) + continue; + const int s = hkl_shell[h]; + auto &sa = shell_acc[s]; + const auto &hs = per_hkl[h]; + if (hs.n == 0) + continue; + + sa.unique_hkls.insert(h); + sa.total_obs += hs.n; + + const double mean_I = hs.sum_I / static_cast(hs.n); + + if (hs.n >= 2) { + double sum_abs_dev = 0.0; + for (double Ii: hs.I_list) + sum_abs_dev += std::abs(Ii - mean_I); + sa.rmeas_num += std::sqrt(static_cast(hs.n) / (hs.n - 1.0)) * sum_abs_dev; + } + + for (double Ii: hs.I_list) + sa.rmeas_den += std::abs(Ii); + + if (out.merged[h].sigma > 0.0) { + sa.sum_i_over_sig += out.merged[h].I / out.merged[h].sigma; + sa.n_merged_with_sigma += 1; + } + } + + // Completeness (not yet available without unit cell) + std::vector possible_per_shell(kStatShells, 0); + int total_possible = 0; + bool have_completeness = false; + + // Fill output statistics + out.statistics.shells.resize(kStatShells); + for (int s = 0; s < kStatShells; ++s) { + auto &ss = out.statistics.shells[s]; + const auto &sa = shell_acc[s]; + + ss.mean_one_over_d2 = shell_mean_1_d2[s]; + ss.d_min = shell_min_res[s]; + ss.d_max = (s == 0) ? d_max_pad : shell_min_res[s - 1]; + + ss.total_observations = sa.total_obs; + ss.unique_reflections = static_cast(sa.unique_hkls.size()); + ss.rmeas = (sa.rmeas_den > 0.0) ? (sa.rmeas_num / sa.rmeas_den) : 0.0; + ss.mean_i_over_sigma = (sa.n_merged_with_sigma > 0) + ? (sa.sum_i_over_sig / sa.n_merged_with_sigma) + : 0.0; + ss.possible_reflections = possible_per_shell[s]; + ss.completeness = (have_completeness && possible_per_shell[s] > 0) + ? static_cast(sa.unique_hkls.size()) / possible_per_shell[s] + : 0.0; + } + + // Overall statistics + { + auto &ov = out.statistics.overall; + ov.d_min = stat_d_min; + ov.d_max = stat_d_max; + ov.mean_one_over_d2 = 0.0f; + + int total_obs_all = 0; + std::unordered_set all_unique; + double rmeas_num_all = 0.0, rmeas_den_all = 0.0; + double sum_isig_all = 0.0; + int n_isig_all = 0; + + for (int s = 0; s < kStatShells; ++s) { + const auto &sa = shell_acc[s]; + total_obs_all += sa.total_obs; + all_unique.insert(sa.unique_hkls.begin(), sa.unique_hkls.end()); + rmeas_num_all += sa.rmeas_num; + rmeas_den_all += sa.rmeas_den; + sum_isig_all += sa.sum_i_over_sig; + n_isig_all += sa.n_merged_with_sigma; + } + + ov.total_observations = total_obs_all; + ov.unique_reflections = static_cast(all_unique.size()); + ov.rmeas = (rmeas_den_all > 0.0) ? (rmeas_num_all / rmeas_den_all) : 0.0; + ov.mean_i_over_sigma = (n_isig_all > 0) ? (sum_isig_all / n_isig_all) : 0.0; + ov.possible_reflections = total_possible; + ov.completeness = (have_completeness && total_possible > 0) + ? static_cast(all_unique.size()) / total_possible + : 0.0; + } + } + } + return out; }