ScaleAndMerge: Add statistics
This commit is contained in:
@@ -16,6 +16,8 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../../common/ResolutionShells.h"
|
||||
|
||||
namespace {
|
||||
|
||||
struct HKLKey {
|
||||
@@ -565,5 +567,247 @@ ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& ob
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Compute per-shell merging statistics ----
|
||||
{
|
||||
constexpr int kStatShells = 10;
|
||||
|
||||
// Determine d-range from merged reflections
|
||||
float stat_d_min = std::numeric_limits<float>::max();
|
||||
float stat_d_max = 0.0f;
|
||||
for (int h = 0; h < nhkl; ++h) {
|
||||
const auto d = static_cast<float>(out.merged[h].d);
|
||||
if (std::isfinite(d) && d > 0.0f) {
|
||||
if (d < stat_d_min) stat_d_min = d;
|
||||
if (d > stat_d_max) stat_d_max = d;
|
||||
}
|
||||
}
|
||||
|
||||
if (stat_d_min < stat_d_max && stat_d_min > 0.0f) {
|
||||
const float d_min_pad = stat_d_min * 0.999f;
|
||||
const float d_max_pad = stat_d_max * 1.001f;
|
||||
ResolutionShells stat_shells(d_min_pad, d_max_pad, kStatShells);
|
||||
|
||||
const auto shell_mean_1_d2 = stat_shells.GetShellMeanOneOverResSq();
|
||||
const auto shell_min_res = stat_shells.GetShellMinRes();
|
||||
|
||||
// Assign each unique reflection to a shell
|
||||
std::vector<int32_t> hkl_shell(nhkl, -1);
|
||||
for (int h = 0; h < nhkl; ++h) {
|
||||
const auto d = static_cast<float>(out.merged[h].d);
|
||||
if (std::isfinite(d) && d > 0.0f) {
|
||||
auto s = stat_shells.GetShell(d);
|
||||
if (s.has_value())
|
||||
hkl_shell[h] = s.value();
|
||||
}
|
||||
}
|
||||
|
||||
// Build corrected observations per HKL (same correction logic as merge)
|
||||
struct CorrObs {
|
||||
int hkl_slot;
|
||||
int shell;
|
||||
double I_corr;
|
||||
};
|
||||
std::vector<CorrObs> corr_obs;
|
||||
corr_obs.reserve(obs.size());
|
||||
|
||||
const double half_wedge = opt.wedge_deg / 2.0;
|
||||
|
||||
for (const auto& o : obs) {
|
||||
if (hkl_shell[o.hkl_slot] < 0)
|
||||
continue;
|
||||
|
||||
const Reflection& r = *o.r;
|
||||
const double lp = SafeInv(static_cast<double>(r.rlp), 1.0);
|
||||
const double G_i = g[o.img_slot];
|
||||
|
||||
double partiality;
|
||||
size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0;
|
||||
if (refine_partiality && mosaicity[mos_slot] > 0.0) {
|
||||
double c1 = r.zeta / std::sqrt(2.0);
|
||||
double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot];
|
||||
double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot];
|
||||
partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0;
|
||||
} else {
|
||||
partiality = r.partiality;
|
||||
}
|
||||
|
||||
if (partiality <= opt.min_partiality_for_merge)
|
||||
continue;
|
||||
const double correction = G_i * partiality * lp;
|
||||
if (correction <= 0.0)
|
||||
continue;
|
||||
|
||||
CorrObs co;
|
||||
co.hkl_slot = o.hkl_slot;
|
||||
co.shell = hkl_shell[o.hkl_slot];
|
||||
co.I_corr = static_cast<double>(r.I) / correction;
|
||||
corr_obs.push_back(co);
|
||||
}
|
||||
|
||||
// Per-HKL: collect corrected intensities for Rmeas
|
||||
struct HKLStats {
|
||||
double sum_I = 0.0;
|
||||
int n = 0;
|
||||
std::vector<double> I_list;
|
||||
};
|
||||
std::vector<HKLStats> per_hkl(nhkl);
|
||||
|
||||
for (const auto& co : corr_obs) {
|
||||
auto& hs = per_hkl[co.hkl_slot];
|
||||
hs.sum_I += co.I_corr;
|
||||
hs.n += 1;
|
||||
hs.I_list.push_back(co.I_corr);
|
||||
}
|
||||
|
||||
// Compute per-shell statistics
|
||||
out.statistics.shells.resize(kStatShells);
|
||||
|
||||
// Accumulators per shell
|
||||
struct ShellAccum {
|
||||
int total_obs = 0;
|
||||
std::unordered_set<int> unique_hkls;
|
||||
double rmeas_num = 0.0; // Σ sqrt(n/(n-1)) * Σ|I_i - <I>|
|
||||
double rmeas_den = 0.0; // Σ Σ I_i
|
||||
double sum_i_over_sig = 0.0;
|
||||
int n_merged_with_sigma = 0;
|
||||
};
|
||||
std::vector<ShellAccum> shell_acc(kStatShells);
|
||||
|
||||
// Accumulate per-HKL contributions to each shell
|
||||
for (int h = 0; h < nhkl; ++h) {
|
||||
if (hkl_shell[h] < 0)
|
||||
continue;
|
||||
const int s = hkl_shell[h];
|
||||
auto& sa = shell_acc[s];
|
||||
const auto& hs = per_hkl[h];
|
||||
if (hs.n == 0)
|
||||
continue;
|
||||
|
||||
sa.unique_hkls.insert(h);
|
||||
sa.total_obs += hs.n;
|
||||
|
||||
const double mean_I = hs.sum_I / static_cast<double>(hs.n);
|
||||
|
||||
// Rmeas numerator: sqrt(n/(n-1)) * Σ_i |I_i - <I>|
|
||||
if (hs.n >= 2) {
|
||||
double sum_abs_dev = 0.0;
|
||||
for (double Ii : hs.I_list)
|
||||
sum_abs_dev += std::abs(Ii - mean_I);
|
||||
sa.rmeas_num += std::sqrt(static_cast<double>(hs.n) / (hs.n - 1.0)) * sum_abs_dev;
|
||||
}
|
||||
|
||||
// Rmeas denominator: Σ_i I_i (use absolute values for robustness)
|
||||
for (double Ii : hs.I_list)
|
||||
sa.rmeas_den += std::abs(Ii);
|
||||
|
||||
// <I/σ> from merged reflection
|
||||
if (out.merged[h].sigma > 0.0) {
|
||||
sa.sum_i_over_sig += out.merged[h].I / out.merged[h].sigma;
|
||||
sa.n_merged_with_sigma += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Completeness: enumerate ASU reflections per shell if we have space group + unit cell
|
||||
std::vector<int> possible_per_shell(kStatShells, 0);
|
||||
int total_possible = 0;
|
||||
bool have_completeness = false;
|
||||
|
||||
if (opt.space_group.has_value()) {
|
||||
// Try to build a gemmi UnitCell from the merged reflections' d-spacings
|
||||
// We need the actual unit cell. Check if any merged reflection has d > 0.
|
||||
// Actually, we need the real unit cell parameters. We can get them from
|
||||
// the observations' HKL + d to deduce the metric tensor, but that's complex.
|
||||
// Instead, we compute d from the unit cell if the caller set it via space_group.
|
||||
// For now, we count unique reflections in the ASU by brute-force enumeration.
|
||||
|
||||
// We'll try to infer cell parameters from the merged d-spacings + HKL.
|
||||
// Simpler: use the already-available merged reflections d-spacings to get
|
||||
// max h, k, l and enumerate. This is a practical approximation.
|
||||
|
||||
const gemmi::SpaceGroup& sg = *opt.space_group;
|
||||
const gemmi::GroupOps gops = sg.operations();
|
||||
const gemmi::ReciprocalAsu rasu(&sg);
|
||||
|
||||
// Find max indices from merged data
|
||||
int max_h = 0, max_k = 0, max_l = 0;
|
||||
for (int idx = 0; idx < nhkl; ++idx) {
|
||||
max_h = std::max(max_h, std::abs(out.merged[idx].h));
|
||||
max_k = std::max(max_k, std::abs(out.merged[idx].k));
|
||||
max_l = std::max(max_l, std::abs(out.merged[idx].l));
|
||||
}
|
||||
|
||||
// We need a metric tensor to compute d-spacing from HKL.
|
||||
// Estimate reciprocal metric from the merged data using least-squares.
|
||||
// For simplicity, use the unit cell from the first observation's d + HKL
|
||||
// This is getting complex - let's use a simpler approach:
|
||||
// Build a map from HKL key -> d for merged reflections, then for completeness
|
||||
// just count how many ASU reflections exist at each resolution.
|
||||
// We enumerate all HKL in the box [-max_h..max_h] x [-max_k..max_k] x [-max_l..max_l],
|
||||
// check if they're in the ASU, compute d from the observation data.
|
||||
|
||||
// Actually the simplest robust approach: count all ASU HKLs that have d in
|
||||
// the range of each shell. We need d(hkl) which requires the metric tensor.
|
||||
// Since we don't have direct access to unit cell parameters here, skip completeness
|
||||
// or let the caller fill it in.
|
||||
|
||||
// Mark completeness as unavailable for now - the caller (jfjoch_process) can
|
||||
// fill it in if it has the unit cell.
|
||||
have_completeness = false;
|
||||
}
|
||||
|
||||
// Fill output statistics
|
||||
for (int s = 0; s < kStatShells; ++s) {
|
||||
auto& ss = out.statistics.shells[s];
|
||||
const auto& sa = shell_acc[s];
|
||||
|
||||
ss.mean_one_over_d2 = shell_mean_1_d2[s];
|
||||
// Shell boundaries: shell 0 goes from d_max_pad to shell_min_res[0], etc.
|
||||
ss.d_min = shell_min_res[s];
|
||||
ss.d_max = (s == 0) ? d_max_pad : shell_min_res[s - 1];
|
||||
|
||||
ss.total_observations = sa.total_obs;
|
||||
ss.unique_reflections = static_cast<int>(sa.unique_hkls.size());
|
||||
ss.rmeas = (sa.rmeas_den > 0.0) ? (sa.rmeas_num / sa.rmeas_den) : 0.0;
|
||||
ss.mean_i_over_sigma = (sa.n_merged_with_sigma > 0)
|
||||
? (sa.sum_i_over_sig / sa.n_merged_with_sigma) : 0.0;
|
||||
ss.possible_reflections = possible_per_shell[s];
|
||||
ss.completeness = (have_completeness && possible_per_shell[s] > 0)
|
||||
? static_cast<double>(sa.unique_hkls.size()) / possible_per_shell[s] : 0.0;
|
||||
}
|
||||
|
||||
// Overall statistics
|
||||
{
|
||||
auto& ov = out.statistics.overall;
|
||||
ov.d_min = stat_d_min;
|
||||
ov.d_max = stat_d_max;
|
||||
ov.mean_one_over_d2 = 0.0f;
|
||||
|
||||
int total_obs_all = 0;
|
||||
std::unordered_set<int> all_unique;
|
||||
double rmeas_num_all = 0.0, rmeas_den_all = 0.0;
|
||||
double sum_isig_all = 0.0;
|
||||
int n_isig_all = 0;
|
||||
|
||||
for (int s = 0; s < kStatShells; ++s) {
|
||||
const auto& sa = shell_acc[s];
|
||||
total_obs_all += sa.total_obs;
|
||||
all_unique.insert(sa.unique_hkls.begin(), sa.unique_hkls.end());
|
||||
rmeas_num_all += sa.rmeas_num;
|
||||
rmeas_den_all += sa.rmeas_den;
|
||||
sum_isig_all += sa.sum_i_over_sig;
|
||||
n_isig_all += sa.n_merged_with_sigma;
|
||||
}
|
||||
|
||||
ov.total_observations = total_obs_all;
|
||||
ov.unique_reflections = static_cast<int>(all_unique.size());
|
||||
ov.rmeas = (rmeas_den_all > 0.0) ? (rmeas_num_all / rmeas_den_all) : 0.0;
|
||||
ov.mean_i_over_sigma = (n_isig_all > 0) ? (sum_isig_all / n_isig_all) : 0.0;
|
||||
ov.possible_reflections = total_possible;
|
||||
ov.completeness = (have_completeness && total_possible > 0)
|
||||
? static_cast<double>(all_unique.size()) / total_possible : 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -59,6 +59,25 @@ struct MergedReflection {
|
||||
double d = 0.0;
|
||||
};
|
||||
|
||||
/// Per-resolution-shell merging statistics
|
||||
struct MergeStatisticsShell {
|
||||
float d_min = 0.0f; ///< High-resolution limit of this shell (Å)
|
||||
float d_max = 0.0f; ///< Low-resolution limit of this shell (Å)
|
||||
float mean_one_over_d2 = 0; ///< Mean 1/d² in shell
|
||||
int total_observations = 0; ///< Total number of (corrected) observations
|
||||
int unique_reflections = 0; ///< Number of unique HKLs with observations
|
||||
double rmeas = 0.0; ///< Redundancy-independent merging R-factor
|
||||
double mean_i_over_sigma = 0.0; ///< Mean I/σ(I) of the merged reflections
|
||||
double completeness = 0.0; ///< Fraction of possible reflections observed (0 if unknown)
|
||||
int possible_reflections = 0;///< Theoretical number of reflections in this shell (0 if unknown)
|
||||
};
|
||||
|
||||
/// Overall + per-shell merging statistics
|
||||
struct MergeStatistics {
|
||||
std::vector<MergeStatisticsShell> shells;
|
||||
MergeStatisticsShell overall; ///< Statistics over all shells combined
|
||||
};
|
||||
|
||||
struct ScaleMergeResult {
|
||||
std::vector<MergedReflection> merged;
|
||||
std::vector<double> image_scale_g;
|
||||
@@ -66,6 +85,9 @@ struct ScaleMergeResult {
|
||||
|
||||
// One mosaicity value per image (degrees).
|
||||
std::vector<double> mosaicity_deg;
|
||||
|
||||
/// Per-shell and overall merging statistics (populated after merging)
|
||||
MergeStatistics statistics;
|
||||
};
|
||||
|
||||
ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& observations,
|
||||
|
||||
@@ -450,10 +450,45 @@ int main(int argc, char **argv) {
|
||||
double scale_time = std::chrono::duration<double>(scale_end - scale_start).count();
|
||||
|
||||
if (scale_result) {
|
||||
logger.Info("Scaling completed in {:.2f} s ({} unique reflections, {} images)",
|
||||
scale_time,
|
||||
scale_result->merged.size(),
|
||||
scale_result->image_ids.size());
|
||||
|
||||
// ... existing code ...
|
||||
logger.Info("Scaling completed in {:.2f} s ({} unique reflections, {} images)",
|
||||
scale_time,
|
||||
scale_result->merged.size(),
|
||||
scale_result->image_ids.size());
|
||||
|
||||
// Print resolution-shell statistics table
|
||||
{
|
||||
const auto& stats = scale_result->statistics;
|
||||
logger.Info("");
|
||||
logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>10s}",
|
||||
"d_min", "N_obs", "N_uniq", "Rmeas", "<I/sig>", "Complete");
|
||||
logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->10s}",
|
||||
"", "", "", "", "", "");
|
||||
for (const auto& sh : stats.shells) {
|
||||
if (sh.unique_reflections == 0)
|
||||
continue;
|
||||
std::string compl_str = (sh.completeness > 0.0)
|
||||
? fmt::format("{:8.1f}%", sh.completeness * 100.0)
|
||||
: " N/A";
|
||||
logger.Info(" {:8.2f} {:8d} {:8d} {:8.3f}% {:8.1f} {:>10s}",
|
||||
sh.d_min, sh.total_observations, sh.unique_reflections,
|
||||
sh.rmeas * 100, sh.mean_i_over_sigma, compl_str);
|
||||
}
|
||||
// Overall
|
||||
{
|
||||
const auto& ov = stats.overall;
|
||||
logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->10s}",
|
||||
"", "", "", "", "", "");
|
||||
std::string compl_str = (ov.completeness > 0.0)
|
||||
? fmt::format("{:8.1f}%", ov.completeness * 100.0)
|
||||
: " N/A";
|
||||
logger.Info(" {:>8s} {:8d} {:8d} {:8.3f}% {:8.1f} {:>10s}",
|
||||
"Overall", ov.total_observations, ov.unique_reflections,
|
||||
ov.rmeas * 100, ov.mean_i_over_sigma, compl_str);
|
||||
}
|
||||
logger.Info("");
|
||||
}
|
||||
|
||||
// Write image.dat (image_id mosaicity_deg K)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user