ScaleAndMerge: Add statistics

This commit is contained in:
2026-02-17 19:46:28 +01:00
parent be5dff5370
commit 0ed72eb2ea
3 changed files with 305 additions and 4 deletions

View File

@@ -16,6 +16,8 @@
#include <utility>
#include <vector>
#include "../../common/ResolutionShells.h"
namespace {
struct HKLKey {
@@ -565,5 +567,247 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& ob
}
}
// ---- Compute per-shell merging statistics ----
{
constexpr int kStatShells = 10;
// Determine d-range from merged reflections
float stat_d_min = std::numeric_limits<float>::max();
float stat_d_max = 0.0f;
for (int h = 0; h < nhkl; ++h) {
const auto d = static_cast<float>(out.merged[h].d);
if (std::isfinite(d) && d > 0.0f) {
if (d < stat_d_min) stat_d_min = d;
if (d > stat_d_max) stat_d_max = d;
}
}
if (stat_d_min < stat_d_max && stat_d_min > 0.0f) {
const float d_min_pad = stat_d_min * 0.999f;
const float d_max_pad = stat_d_max * 1.001f;
ResolutionShells stat_shells(d_min_pad, d_max_pad, kStatShells);
const auto shell_mean_1_d2 = stat_shells.GetShellMeanOneOverResSq();
const auto shell_min_res = stat_shells.GetShellMinRes();
// Assign each unique reflection to a shell
std::vector<int32_t> hkl_shell(nhkl, -1);
for (int h = 0; h < nhkl; ++h) {
const auto d = static_cast<float>(out.merged[h].d);
if (std::isfinite(d) && d > 0.0f) {
auto s = stat_shells.GetShell(d);
if (s.has_value())
hkl_shell[h] = s.value();
}
}
// Build corrected observations per HKL (same correction logic as merge)
struct CorrObs {
int hkl_slot;
int shell;
double I_corr;
};
std::vector<CorrObs> corr_obs;
corr_obs.reserve(obs.size());
const double half_wedge = opt.wedge_deg / 2.0;
for (const auto& o : obs) {
if (hkl_shell[o.hkl_slot] < 0)
continue;
const Reflection& r = *o.r;
const double lp = SafeInv(static_cast<double>(r.rlp), 1.0);
const double G_i = g[o.img_slot];
double partiality;
size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0;
if (refine_partiality && mosaicity[mos_slot] > 0.0) {
double c1 = r.zeta / std::sqrt(2.0);
double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot];
double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot];
partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0;
} else {
partiality = r.partiality;
}
if (partiality <= opt.min_partiality_for_merge)
continue;
const double correction = G_i * partiality * lp;
if (correction <= 0.0)
continue;
CorrObs co;
co.hkl_slot = o.hkl_slot;
co.shell = hkl_shell[o.hkl_slot];
co.I_corr = static_cast<double>(r.I) / correction;
corr_obs.push_back(co);
}
// Per-HKL: collect corrected intensities for Rmeas
struct HKLStats {
double sum_I = 0.0;
int n = 0;
std::vector<double> I_list;
};
std::vector<HKLStats> per_hkl(nhkl);
for (const auto& co : corr_obs) {
auto& hs = per_hkl[co.hkl_slot];
hs.sum_I += co.I_corr;
hs.n += 1;
hs.I_list.push_back(co.I_corr);
}
// Compute per-shell statistics
out.statistics.shells.resize(kStatShells);
// Accumulators per shell
struct ShellAccum {
int total_obs = 0;
std::unordered_set<int> unique_hkls;
double rmeas_num = 0.0; // Σ sqrt(n/(n-1)) * Σ|I_i - <I>|
double rmeas_den = 0.0; // Σ Σ I_i
double sum_i_over_sig = 0.0;
int n_merged_with_sigma = 0;
};
std::vector<ShellAccum> shell_acc(kStatShells);
// Accumulate per-HKL contributions to each shell
for (int h = 0; h < nhkl; ++h) {
if (hkl_shell[h] < 0)
continue;
const int s = hkl_shell[h];
auto& sa = shell_acc[s];
const auto& hs = per_hkl[h];
if (hs.n == 0)
continue;
sa.unique_hkls.insert(h);
sa.total_obs += hs.n;
const double mean_I = hs.sum_I / static_cast<double>(hs.n);
// Rmeas numerator: sqrt(n/(n-1)) * Σ_i |I_i - <I>|
if (hs.n >= 2) {
double sum_abs_dev = 0.0;
for (double Ii : hs.I_list)
sum_abs_dev += std::abs(Ii - mean_I);
sa.rmeas_num += std::sqrt(static_cast<double>(hs.n) / (hs.n - 1.0)) * sum_abs_dev;
}
// Rmeas denominator: Σ_i I_i (use absolute values for robustness)
for (double Ii : hs.I_list)
sa.rmeas_den += std::abs(Ii);
// <I/σ> from merged reflection
if (out.merged[h].sigma > 0.0) {
sa.sum_i_over_sig += out.merged[h].I / out.merged[h].sigma;
sa.n_merged_with_sigma += 1;
}
}
// Completeness: enumerate ASU reflections per shell if we have space group + unit cell
std::vector<int> possible_per_shell(kStatShells, 0);
int total_possible = 0;
bool have_completeness = false;
if (opt.space_group.has_value()) {
// Try to build a gemmi UnitCell from the merged reflections' d-spacings
// We need the actual unit cell. Check if any merged reflection has d > 0.
// Actually, we need the real unit cell parameters. We can get them from
// the observations' HKL + d to deduce the metric tensor, but that's complex.
// Instead, we compute d from the unit cell if the caller set it via space_group.
// For now, we count unique reflections in the ASU by brute-force enumeration.
// We'll try to infer cell parameters from the merged d-spacings + HKL.
// Simpler: use the already-available merged reflections d-spacings to get
// max h, k, l and enumerate. This is a practical approximation.
const gemmi::SpaceGroup& sg = *opt.space_group;
const gemmi::GroupOps gops = sg.operations();
const gemmi::ReciprocalAsu rasu(&sg);
// Find max indices from merged data
int max_h = 0, max_k = 0, max_l = 0;
for (int idx = 0; idx < nhkl; ++idx) {
max_h = std::max(max_h, std::abs(out.merged[idx].h));
max_k = std::max(max_k, std::abs(out.merged[idx].k));
max_l = std::max(max_l, std::abs(out.merged[idx].l));
}
// We need a metric tensor to compute d-spacing from HKL.
// Estimate reciprocal metric from the merged data using least-squares.
// For simplicity, use the unit cell from the first observation's d + HKL
// This is getting complex - let's use a simpler approach:
// Build a map from HKL key -> d for merged reflections, then for completeness
// just count how many ASU reflections exist at each resolution.
// We enumerate all HKL in the box [-max_h..max_h] x [-max_k..max_k] x [-max_l..max_l],
// check if they're in the ASU, compute d from the observation data.
// Actually the simplest robust approach: count all ASU HKLs that have d in
// the range of each shell. We need d(hkl) which requires the metric tensor.
// Since we don't have direct access to unit cell parameters here, skip completeness
// or let the caller fill it in.
// Mark completeness as unavailable for now - the caller (jfjoch_process) can
// fill it in if it has the unit cell.
have_completeness = false;
}
// Fill output statistics
for (int s = 0; s < kStatShells; ++s) {
auto& ss = out.statistics.shells[s];
const auto& sa = shell_acc[s];
ss.mean_one_over_d2 = shell_mean_1_d2[s];
// Shell boundaries: shell 0 goes from d_max_pad to shell_min_res[0], etc.
ss.d_min = shell_min_res[s];
ss.d_max = (s == 0) ? d_max_pad : shell_min_res[s - 1];
ss.total_observations = sa.total_obs;
ss.unique_reflections = static_cast<int>(sa.unique_hkls.size());
ss.rmeas = (sa.rmeas_den > 0.0) ? (sa.rmeas_num / sa.rmeas_den) : 0.0;
ss.mean_i_over_sigma = (sa.n_merged_with_sigma > 0)
? (sa.sum_i_over_sig / sa.n_merged_with_sigma) : 0.0;
ss.possible_reflections = possible_per_shell[s];
ss.completeness = (have_completeness && possible_per_shell[s] > 0)
? static_cast<double>(sa.unique_hkls.size()) / possible_per_shell[s] : 0.0;
}
// Overall statistics
{
auto& ov = out.statistics.overall;
ov.d_min = stat_d_min;
ov.d_max = stat_d_max;
ov.mean_one_over_d2 = 0.0f;
int total_obs_all = 0;
std::unordered_set<int> all_unique;
double rmeas_num_all = 0.0, rmeas_den_all = 0.0;
double sum_isig_all = 0.0;
int n_isig_all = 0;
for (int s = 0; s < kStatShells; ++s) {
const auto& sa = shell_acc[s];
total_obs_all += sa.total_obs;
all_unique.insert(sa.unique_hkls.begin(), sa.unique_hkls.end());
rmeas_num_all += sa.rmeas_num;
rmeas_den_all += sa.rmeas_den;
sum_isig_all += sa.sum_i_over_sig;
n_isig_all += sa.n_merged_with_sigma;
}
ov.total_observations = total_obs_all;
ov.unique_reflections = static_cast<int>(all_unique.size());
ov.rmeas = (rmeas_den_all > 0.0) ? (rmeas_num_all / rmeas_den_all) : 0.0;
ov.mean_i_over_sigma = (n_isig_all > 0) ? (sum_isig_all / n_isig_all) : 0.0;
ov.possible_reflections = total_possible;
ov.completeness = (have_completeness && total_possible > 0)
? static_cast<double>(all_unique.size()) / total_possible : 0.0;
}
}
}
return out;
}

View File

@@ -59,6 +59,25 @@ struct MergedReflection {
double d = 0.0;
};
/// Per-resolution-shell merging statistics
struct MergeStatisticsShell {
float d_min = 0.0f; ///< High-resolution limit of this shell (Å)
float d_max = 0.0f; ///< Low-resolution limit of this shell (Å)
float mean_one_over_d2 = 0; ///< Mean 1/d² in shell
int total_observations = 0; ///< Total number of (corrected) observations
int unique_reflections = 0; ///< Number of unique HKLs with observations
double rmeas = 0.0; ///< Redundancy-independent merging R-factor
double mean_i_over_sigma = 0.0; ///< Mean I/σ(I) of the merged reflections
double completeness = 0.0; ///< Fraction of possible reflections observed (0 if unknown)
int possible_reflections = 0;///< Theoretical number of reflections in this shell (0 if unknown)
};
/// Overall + per-shell merging statistics
struct MergeStatistics {
std::vector<MergeStatisticsShell> shells;
MergeStatisticsShell overall; ///< Statistics over all shells combined
};
struct ScaleMergeResult {
std::vector<MergedReflection> merged;
std::vector<double> image_scale_g;
@@ -66,6 +85,9 @@ struct ScaleMergeResult {
// One mosaicity value per image (degrees).
std::vector<double> mosaicity_deg;
/// Per-shell and overall merging statistics (populated after merging)
MergeStatistics statistics;
};
ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& observations,

View File

@@ -450,10 +450,45 @@ int main(int argc, char **argv) {
double scale_time = std::chrono::duration<double>(scale_end - scale_start).count();
if (scale_result) {
logger.Info("Scaling completed in {:.2f} s ({} unique reflections, {} images)",
scale_time,
scale_result->merged.size(),
scale_result->image_ids.size());
// ... existing code ...
logger.Info("Scaling completed in {:.2f} s ({} unique reflections, {} images)",
scale_time,
scale_result->merged.size(),
scale_result->image_ids.size());
// Print resolution-shell statistics table
{
const auto& stats = scale_result->statistics;
logger.Info("");
logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>10s}",
"d_min", "N_obs", "N_uniq", "Rmeas", "<I/sig>", "Complete");
logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->10s}",
"", "", "", "", "", "");
for (const auto& sh : stats.shells) {
if (sh.unique_reflections == 0)
continue;
std::string compl_str = (sh.completeness > 0.0)
? fmt::format("{:8.1f}%", sh.completeness * 100.0)
: " N/A";
logger.Info(" {:8.2f} {:8d} {:8d} {:8.3f}% {:8.1f} {:>10s}",
sh.d_min, sh.total_observations, sh.unique_reflections,
sh.rmeas * 100, sh.mean_i_over_sigma, compl_str);
}
// Overall
{
const auto& ov = stats.overall;
logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->10s}",
"", "", "", "", "", "");
std::string compl_str = (ov.completeness > 0.0)
? fmt::format("{:8.1f}%", ov.completeness * 100.0)
: " N/A";
logger.Info(" {:>8s} {:8d} {:8d} {:8.3f}% {:8.1f} {:>10s}",
"Overall", ov.total_observations, ov.unique_reflections,
ov.rmeas * 100, ov.mean_i_over_sigma, compl_str);
}
logger.Info("");
}
// Write image.dat (image_id mosaicity_deg K)
{