From 0ed72eb2eaeb768ef8ffa9a1168fc6470d31ed7d Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Tue, 17 Feb 2026 19:46:28 +0100 Subject: [PATCH] ScaleAndMerge: Add statistics --- image_analysis/scale_merge/ScaleAndMerge.cpp | 244 +++++++++++++++++++ image_analysis/scale_merge/ScaleAndMerge.h | 22 ++ tools/jfjoch_process.cpp | 43 +++- 3 files changed, 305 insertions(+), 4 deletions(-) diff --git a/image_analysis/scale_merge/ScaleAndMerge.cpp b/image_analysis/scale_merge/ScaleAndMerge.cpp index b7914de8..7410c236 100644 --- a/image_analysis/scale_merge/ScaleAndMerge.cpp +++ b/image_analysis/scale_merge/ScaleAndMerge.cpp @@ -16,6 +16,8 @@ #include #include +#include "../../common/ResolutionShells.h" + namespace { struct HKLKey { @@ -565,5 +567,247 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& ob } } + // ---- Compute per-shell merging statistics ---- + { + constexpr int kStatShells = 10; + + // Determine d-range from merged reflections + float stat_d_min = std::numeric_limits::max(); + float stat_d_max = 0.0f; + for (int h = 0; h < nhkl; ++h) { + const auto d = static_cast(out.merged[h].d); + if (std::isfinite(d) && d > 0.0f) { + if (d < stat_d_min) stat_d_min = d; + if (d > stat_d_max) stat_d_max = d; + } + } + + if (stat_d_min < stat_d_max && stat_d_min > 0.0f) { + const float d_min_pad = stat_d_min * 0.999f; + const float d_max_pad = stat_d_max * 1.001f; + ResolutionShells stat_shells(d_min_pad, d_max_pad, kStatShells); + + const auto shell_mean_1_d2 = stat_shells.GetShellMeanOneOverResSq(); + const auto shell_min_res = stat_shells.GetShellMinRes(); + + // Assign each unique reflection to a shell + std::vector hkl_shell(nhkl, -1); + for (int h = 0; h < nhkl; ++h) { + const auto d = static_cast(out.merged[h].d); + if (std::isfinite(d) && d > 0.0f) { + auto s = stat_shells.GetShell(d); + if (s.has_value()) + hkl_shell[h] = s.value(); + } + } + + // Build corrected observations per HKL (same correction logic as merge) + struct CorrObs { + int hkl_slot; + int shell; + double I_corr; + }; + std::vector corr_obs; + corr_obs.reserve(obs.size()); + + const double half_wedge = opt.wedge_deg / 2.0; + + for (const auto& o : obs) { + if (hkl_shell[o.hkl_slot] < 0) + continue; + + const Reflection& r = *o.r; + const double lp = SafeInv(static_cast(r.rlp), 1.0); + const double G_i = g[o.img_slot]; + + double partiality; + size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0; + if (refine_partiality && mosaicity[mos_slot] > 0.0) { + double c1 = r.zeta / std::sqrt(2.0); + double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot]; + double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot]; + partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0; + } else { + partiality = r.partiality; + } + + if (partiality <= opt.min_partiality_for_merge) + continue; + const double correction = G_i * partiality * lp; + if (correction <= 0.0) + continue; + + CorrObs co; + co.hkl_slot = o.hkl_slot; + co.shell = hkl_shell[o.hkl_slot]; + co.I_corr = static_cast(r.I) / correction; + corr_obs.push_back(co); + } + + // Per-HKL: collect corrected intensities for Rmeas + struct HKLStats { + double sum_I = 0.0; + int n = 0; + std::vector I_list; + }; + std::vector per_hkl(nhkl); + + for (const auto& co : corr_obs) { + auto& hs = per_hkl[co.hkl_slot]; + hs.sum_I += co.I_corr; + hs.n += 1; + hs.I_list.push_back(co.I_corr); + } + + // Compute per-shell statistics + out.statistics.shells.resize(kStatShells); + + // Accumulators per shell + struct ShellAccum { + int total_obs = 0; + std::unordered_set unique_hkls; + double rmeas_num = 0.0; // Σ sqrt(n/(n-1)) * Σ|I_i - | + double rmeas_den = 0.0; // Σ Σ I_i + double sum_i_over_sig = 0.0; + int n_merged_with_sigma = 0; + }; + std::vector shell_acc(kStatShells); + + // Accumulate per-HKL contributions to each shell + for (int h = 0; h < nhkl; ++h) { + if (hkl_shell[h] < 0) + continue; + const int s = hkl_shell[h]; + auto& sa = shell_acc[s]; + const auto& hs = per_hkl[h]; + if (hs.n == 0) + continue; + + sa.unique_hkls.insert(h); + sa.total_obs += hs.n; + + const double mean_I = hs.sum_I / static_cast(hs.n); + + // Rmeas numerator: sqrt(n/(n-1)) * Σ_i |I_i - | + if (hs.n >= 2) { + double sum_abs_dev = 0.0; + for (double Ii : hs.I_list) + sum_abs_dev += std::abs(Ii - mean_I); + sa.rmeas_num += std::sqrt(static_cast(hs.n) / (hs.n - 1.0)) * sum_abs_dev; + } + + // Rmeas denominator: Σ_i I_i (use absolute values for robustness) + for (double Ii : hs.I_list) + sa.rmeas_den += std::abs(Ii); + + // from merged reflection + if (out.merged[h].sigma > 0.0) { + sa.sum_i_over_sig += out.merged[h].I / out.merged[h].sigma; + sa.n_merged_with_sigma += 1; + } + } + + // Completeness: enumerate ASU reflections per shell if we have space group + unit cell + std::vector possible_per_shell(kStatShells, 0); + int total_possible = 0; + bool have_completeness = false; + + if (opt.space_group.has_value()) { + // Try to build a gemmi UnitCell from the merged reflections' d-spacings + // We need the actual unit cell. Check if any merged reflection has d > 0. + // Actually, we need the real unit cell parameters. We can get them from + // the observations' HKL + d to deduce the metric tensor, but that's complex. + // Instead, we compute d from the unit cell if the caller set it via space_group. + // For now, we count unique reflections in the ASU by brute-force enumeration. + + // We'll try to infer cell parameters from the merged d-spacings + HKL. + // Simpler: use the already-available merged reflections d-spacings to get + // max h, k, l and enumerate. This is a practical approximation. + + const gemmi::SpaceGroup& sg = *opt.space_group; + const gemmi::GroupOps gops = sg.operations(); + const gemmi::ReciprocalAsu rasu(&sg); + + // Find max indices from merged data + int max_h = 0, max_k = 0, max_l = 0; + for (int idx = 0; idx < nhkl; ++idx) { + max_h = std::max(max_h, std::abs(out.merged[idx].h)); + max_k = std::max(max_k, std::abs(out.merged[idx].k)); + max_l = std::max(max_l, std::abs(out.merged[idx].l)); + } + + // We need a metric tensor to compute d-spacing from HKL. + // Estimate reciprocal metric from the merged data using least-squares. + // For simplicity, use the unit cell from the first observation's d + HKL + // This is getting complex - let's use a simpler approach: + // Build a map from HKL key -> d for merged reflections, then for completeness + // just count how many ASU reflections exist at each resolution. + // We enumerate all HKL in the box [-max_h..max_h] x [-max_k..max_k] x [-max_l..max_l], + // check if they're in the ASU, compute d from the observation data. + + // Actually the simplest robust approach: count all ASU HKLs that have d in + // the range of each shell. We need d(hkl) which requires the metric tensor. + // Since we don't have direct access to unit cell parameters here, skip completeness + // or let the caller fill it in. + + // Mark completeness as unavailable for now - the caller (jfjoch_process) can + // fill it in if it has the unit cell. + have_completeness = false; + } + + // Fill output statistics + for (int s = 0; s < kStatShells; ++s) { + auto& ss = out.statistics.shells[s]; + const auto& sa = shell_acc[s]; + + ss.mean_one_over_d2 = shell_mean_1_d2[s]; + // Shell boundaries: shell 0 goes from d_max_pad to shell_min_res[0], etc. + ss.d_min = shell_min_res[s]; + ss.d_max = (s == 0) ? d_max_pad : shell_min_res[s - 1]; + + ss.total_observations = sa.total_obs; + ss.unique_reflections = static_cast(sa.unique_hkls.size()); + ss.rmeas = (sa.rmeas_den > 0.0) ? (sa.rmeas_num / sa.rmeas_den) : 0.0; + ss.mean_i_over_sigma = (sa.n_merged_with_sigma > 0) + ? (sa.sum_i_over_sig / sa.n_merged_with_sigma) : 0.0; + ss.possible_reflections = possible_per_shell[s]; + ss.completeness = (have_completeness && possible_per_shell[s] > 0) + ? static_cast(sa.unique_hkls.size()) / possible_per_shell[s] : 0.0; + } + + // Overall statistics + { + auto& ov = out.statistics.overall; + ov.d_min = stat_d_min; + ov.d_max = stat_d_max; + ov.mean_one_over_d2 = 0.0f; + + int total_obs_all = 0; + std::unordered_set all_unique; + double rmeas_num_all = 0.0, rmeas_den_all = 0.0; + double sum_isig_all = 0.0; + int n_isig_all = 0; + + for (int s = 0; s < kStatShells; ++s) { + const auto& sa = shell_acc[s]; + total_obs_all += sa.total_obs; + all_unique.insert(sa.unique_hkls.begin(), sa.unique_hkls.end()); + rmeas_num_all += sa.rmeas_num; + rmeas_den_all += sa.rmeas_den; + sum_isig_all += sa.sum_i_over_sig; + n_isig_all += sa.n_merged_with_sigma; + } + + ov.total_observations = total_obs_all; + ov.unique_reflections = static_cast(all_unique.size()); + ov.rmeas = (rmeas_den_all > 0.0) ? (rmeas_num_all / rmeas_den_all) : 0.0; + ov.mean_i_over_sigma = (n_isig_all > 0) ? (sum_isig_all / n_isig_all) : 0.0; + ov.possible_reflections = total_possible; + ov.completeness = (have_completeness && total_possible > 0) + ? static_cast(all_unique.size()) / total_possible : 0.0; + } + } + } + return out; } diff --git a/image_analysis/scale_merge/ScaleAndMerge.h b/image_analysis/scale_merge/ScaleAndMerge.h index f2c2f35e..7cf9a3c4 100644 --- a/image_analysis/scale_merge/ScaleAndMerge.h +++ b/image_analysis/scale_merge/ScaleAndMerge.h @@ -59,6 +59,25 @@ struct MergedReflection { double d = 0.0; }; +/// Per-resolution-shell merging statistics +struct MergeStatisticsShell { + float d_min = 0.0f; ///< High-resolution limit of this shell (Å) + float d_max = 0.0f; ///< Low-resolution limit of this shell (Å) + float mean_one_over_d2 = 0; ///< Mean 1/d² in shell + int total_observations = 0; ///< Total number of (corrected) observations + int unique_reflections = 0; ///< Number of unique HKLs with observations + double rmeas = 0.0; ///< Redundancy-independent merging R-factor + double mean_i_over_sigma = 0.0; ///< Mean I/σ(I) of the merged reflections + double completeness = 0.0; ///< Fraction of possible reflections observed (0 if unknown) + int possible_reflections = 0;///< Theoretical number of reflections in this shell (0 if unknown) +}; + +/// Overall + per-shell merging statistics +struct MergeStatistics { + std::vector shells; + MergeStatisticsShell overall; ///< Statistics over all shells combined +}; + struct ScaleMergeResult { std::vector merged; std::vector image_scale_g; @@ -66,6 +85,9 @@ struct ScaleMergeResult { // One mosaicity value per image (degrees). std::vector mosaicity_deg; + + /// Per-shell and overall merging statistics (populated after merging) + MergeStatistics statistics; }; ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& observations, diff --git a/tools/jfjoch_process.cpp b/tools/jfjoch_process.cpp index 4836b7e9..54ebc810 100644 --- a/tools/jfjoch_process.cpp +++ b/tools/jfjoch_process.cpp @@ -450,10 +450,45 @@ int main(int argc, char **argv) { double scale_time = std::chrono::duration(scale_end - scale_start).count(); if (scale_result) { - logger.Info("Scaling completed in {:.2f} s ({} unique reflections, {} images)", - scale_time, - scale_result->merged.size(), - scale_result->image_ids.size()); + +// ... existing code ... + logger.Info("Scaling completed in {:.2f} s ({} unique reflections, {} images)", + scale_time, + scale_result->merged.size(), + scale_result->image_ids.size()); + + // Print resolution-shell statistics table + { + const auto& stats = scale_result->statistics; + logger.Info(""); + logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>10s}", + "d_min", "N_obs", "N_uniq", "Rmeas", "", "Complete"); + logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->10s}", + "", "", "", "", "", ""); + for (const auto& sh : stats.shells) { + if (sh.unique_reflections == 0) + continue; + std::string compl_str = (sh.completeness > 0.0) + ? fmt::format("{:8.1f}%", sh.completeness * 100.0) + : " N/A"; + logger.Info(" {:8.2f} {:8d} {:8d} {:8.3f}% {:8.1f} {:>10s}", + sh.d_min, sh.total_observations, sh.unique_reflections, + sh.rmeas * 100, sh.mean_i_over_sigma, compl_str); + } + // Overall + { + const auto& ov = stats.overall; + logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->10s}", + "", "", "", "", "", ""); + std::string compl_str = (ov.completeness > 0.0) + ? fmt::format("{:8.1f}%", ov.completeness * 100.0) + : " N/A"; + logger.Info(" {:>8s} {:8d} {:8d} {:8.3f}% {:8.1f} {:>10s}", + "Overall", ov.total_observations, ov.unique_reflections, + ov.rmeas * 100, ov.mean_i_over_sigma, compl_str); + } + logger.Info(""); + } // Write image.dat (image_id mosaicity_deg K) {