Files
Jungfraujoch/image_analysis/scale_merge/ScaleAndMerge.cpp

814 lines
31 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "ScaleAndMerge.h"
#include <ceres/ceres.h>
#include <thread>
#include <iostream>
#include <algorithm>
#include <cmath>
#include <limits>
#include <stdexcept>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>
#include "../../common/ResolutionShells.h"
namespace {
struct HKLKey {
int64_t h = 0;
int64_t k = 0;
int64_t l = 0;
bool is_positive = true; // only relevant if opt.merge_friedel == false
bool operator==(const HKLKey& o) const noexcept {
return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive;
}
};
struct HKLKeyHash {
size_t operator()(const HKLKey& key) const noexcept {
auto mix = [](uint64_t x) {
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
x ^= x >> 33;
return x;
};
const uint64_t a = static_cast<uint64_t>(key.h);
const uint64_t b = static_cast<uint64_t>(key.k);
const uint64_t c = static_cast<uint64_t>(key.l);
const uint64_t d = static_cast<uint64_t>(key.is_positive ? 1 : 0);
return static_cast<size_t>(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3));
}
};
inline int RoundImageId(float image_number, double rounding_step) {
if (!(rounding_step > 0.0))
rounding_step = 1.0;
const double x = static_cast<double>(image_number) / rounding_step;
const double r = std::round(x) * rounding_step;
return static_cast<int>(std::llround(r / rounding_step));
}
inline double SafeSigma(double s, double min_sigma) {
if (!std::isfinite(s) || s <= 0.0)
return min_sigma;
return std::max(s, min_sigma);
}
inline double SafeD(double d) {
if (!std::isfinite(d) || d <= 0.0)
return std::numeric_limits<double>::quiet_NaN();
return d;
}
inline int SafeToInt(int64_t x) {
if (x < std::numeric_limits<int>::min() || x > std::numeric_limits<int>::max())
throw std::out_of_range("HKL index out of int range for Gemmi");
return static_cast<int>(x);
}
inline double SafeInv(double x, double fallback) {
if (!std::isfinite(x) || x == 0.0)
return fallback;
return 1.0 / x;
}
inline HKLKey CanonicalizeHKLKey(const Reflection& r, const ScaleMergeOptions& opt) {
HKLKey key{};
key.h = r.h;
key.k = r.k;
key.l = r.l;
key.is_positive = true;
if (!opt.space_group.has_value()) {
if (!opt.merge_friedel) {
const HKLKey neg{-r.h, -r.k, -r.l, true};
const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l);
if (!pos) {
key.h = -key.h;
key.k = -key.k;
key.l = -key.l;
key.is_positive = false;
}
}
return key;
}
const gemmi::SpaceGroup& sg = *opt.space_group;
const gemmi::GroupOps gops = sg.operations();
const gemmi::ReciprocalAsu rasu(&sg);
const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}};
const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops);
key.h = asu_hkl[0];
key.k = asu_hkl[1];
key.l = asu_hkl[2];
key.is_positive = opt.merge_friedel ? true : sign_plus;
return key;
}
/// CrystFEL-like log-scaling residual
///
/// residual = w * [ ln(I_obs) - ln(G) - ln(partiality) - ln(lp) - ln(I_true) ]
///
/// Only observations with I_obs > 0 should be fed in (the caller skips the rest).
/// G and I_true are constrained to be positive via Ceres lower bounds.
struct LogIntensityResidual {
LogIntensityResidual(const Reflection& r, double sigma_obs, double wedge_deg, bool refine_partiality)
: log_Iobs_(std::log(std::max(static_cast<double>(r.I), 1e-30))),
weight_(SafeInv(sigma_obs / std::max(static_cast<double>(r.I), 1e-30), 1.0)),
delta_phi_(r.delta_phi_deg),
log_lp_(std::log(std::max(SafeInv(static_cast<double>(r.rlp), 1.0), 1e-30))),
half_wedge_(wedge_deg / 2.0),
c1_(r.zeta / std::sqrt(2.0)),
partiality_(r.partiality),
refine_partiality_(refine_partiality) {}
template<typename T>
bool operator()(const T* const G,
const T* const mosaicity,
const T* const Itrue,
T* residual) const {
T partiality;
if (refine_partiality_ && mosaicity[0] != 0.0) {
const T arg_plus = T(delta_phi_ + half_wedge_) * T(c1_) / mosaicity[0];
const T arg_minus = T(delta_phi_ - half_wedge_) * T(c1_) / mosaicity[0];
partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0);
} else {
partiality = T(partiality_);
}
// Clamp partiality away from zero so log is safe
const T min_p = T(1e-30);
if (partiality < min_p)
partiality = min_p;
// ln(I_pred) = ln(G) + ln(partiality) + ln(lp) + ln(Itrue)
const T log_Ipred = ceres::log(G[0]) + ceres::log(partiality) + T(log_lp_) + ceres::log(Itrue[0]);
residual[0] = (log_Ipred - T(log_Iobs_)) * T(weight_);
return true;
}
double log_Iobs_;
double weight_; // w_i ≈ I_obs / sigma_obs (relative weight in log-space)
double delta_phi_;
double log_lp_;
double half_wedge_;
double c1_;
double partiality_;
bool refine_partiality_;
};
struct IntensityResidual {
IntensityResidual(const Reflection &r, double sigma_obs, double wedge_deg, bool refine_partiality)
: Iobs_(static_cast<double>(r.I)),
weight_(SafeInv(sigma_obs, 1.0)),
delta_phi_(r.delta_phi_deg),
lp_(SafeInv(static_cast<double>(r.rlp), 1.0)),
half_wedge_(wedge_deg / 2.0),
c1_(r.zeta / std::sqrt(2.0)),
partiality_(r.partiality),
refine_partiality_(refine_partiality) {}
template<typename T>
bool operator()(const T *const G,
const T *const mosaicity,
const T *const Itrue,
T *residual) const {
T partiality;
if (refine_partiality_ && mosaicity[0] != 0.0) {
const T arg_plus = T(delta_phi_ + half_wedge_) * T(c1_) / mosaicity[0];
const T arg_minus = T(delta_phi_ - half_wedge_) * T(c1_) / mosaicity[0];
partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0);
} else
partiality = T(partiality_);
const T Ipred = G[0] * partiality * T(lp_) * Itrue[0];
residual[0] = (Ipred - T(Iobs_)) * T(weight_);
return true;
}
double Iobs_;
double weight_;
double delta_phi_;
double lp_;
double half_wedge_;
double c1_;
double partiality_;
bool refine_partiality_;
};
struct ScaleRegularizationResidual {
explicit ScaleRegularizationResidual(double sigma_k)
: inv_sigma_(SafeInv(sigma_k, 1.0)) {}
template <typename T>
bool operator()(const T* const k, T* residual) const {
residual[0] = (k[0] - T(1.0)) * T(inv_sigma_);
return true;
}
double inv_sigma_;
};
struct SmoothnessRegularizationResidual {
explicit SmoothnessRegularizationResidual(double sigma)
: inv_sigma_(SafeInv(sigma, 1.0)) {}
template <typename T>
bool operator()(const T* const k0,
const T* const k1,
const T* const k2,
T* residual) const {
residual[0] = (ceres::log(k0[0]) + ceres::log(k2[0]) - T(2.0) * ceres::log(k1[0])) * T(inv_sigma_);
return true;
}
double inv_sigma_;
};
} // namespace
ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& observations,
const ScaleMergeOptions& opt) {
ScaleMergeResult out;
struct ObsRef {
const Reflection* r = nullptr;
int img_id = 0;
int img_slot = -1;
int hkl_slot = -1;
double sigma = 0.0;
};
std::vector<ObsRef> obs;
obs.reserve(observations.size());
std::unordered_map<int, int> imgIdToSlot;
imgIdToSlot.reserve(256);
std::unordered_map<HKLKey, int, HKLKeyHash> hklToSlot;
hklToSlot.reserve(observations.size());
for (const auto& r : observations) {
const double d = SafeD(r.d);
if (!std::isfinite(d))
continue;
if (!std::isfinite(r.I))
continue;
if (!std::isfinite(r.zeta) || r.zeta <= 0.0f)
continue;
if (!std::isfinite(r.rlp) || r.rlp == 0.0f)
continue;
const double sigma = SafeSigma(static_cast<double>(r.sigma), opt.min_sigma);
const int img_id = RoundImageId(r.image_number, opt.image_number_rounding);
int img_slot;
{
auto it = imgIdToSlot.find(img_id);
if (it == imgIdToSlot.end()) {
img_slot = static_cast<int>(imgIdToSlot.size());
imgIdToSlot.emplace(img_id, img_slot);
} else {
img_slot = it->second;
}
}
int hkl_slot;
try {
const HKLKey key = CanonicalizeHKLKey(r, opt);
auto it = hklToSlot.find(key);
if (it == hklToSlot.end()) {
hkl_slot = static_cast<int>(hklToSlot.size());
hklToSlot.emplace(key, hkl_slot);
} else {
hkl_slot = it->second;
}
} catch (...) {
continue;
}
ObsRef o;
o.r = &r;
o.img_id = img_id;
o.img_slot = img_slot;
o.hkl_slot = hkl_slot;
o.sigma = sigma;
obs.push_back(o);
}
const int nimg = static_cast<int>(imgIdToSlot.size());
const int nhkl = static_cast<int>(hklToSlot.size());
out.image_scale_g.assign(nimg, 1.0);
out.image_ids.assign(nimg, 0);
for (const auto& kv : imgIdToSlot) {
out.image_ids[kv.second] = kv.first;
}
std::vector<double> g(nimg, 1.0);
std::vector<double> Itrue(nhkl, 0.0);
// Mosaicity: always per-image
std::vector<double> mosaicity(nimg, opt.mosaicity_init_deg);
for (int i = 0; i < nimg && i < opt.mosaicity_init_deg_vec.size(); ++i) {
if (opt.mosaicity_init_deg_vec[i] > 0.0)
mosaicity[i] = opt.mosaicity_init_deg_vec[i];
}
// Initialize Itrue from per-HKL median of observed intensities
{
std::vector<std::vector<double>> per_hkl_I(nhkl);
for (const auto& o : obs) {
per_hkl_I[o.hkl_slot].push_back(static_cast<double>(o.r->I));
}
for (int h = 0; h < nhkl; ++h) {
auto& v = per_hkl_I[h];
if (v.empty()) {
Itrue[h] = std::max(opt.min_sigma, 1e-6);
continue;
}
std::nth_element(v.begin(), v.begin() + static_cast<long>(v.size() / 2), v.end());
double med = v[v.size() / 2];
if (!std::isfinite(med) || med <= opt.min_sigma)
med = opt.min_sigma;
Itrue[h] = med;
}
}
ceres::Problem problem;
const bool refine_partiality = opt.wedge_deg > 0.0;
std::vector<bool> is_valid_hkl_slot(nhkl, false);
for (const auto& o : obs) {
size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0;
if (opt.log_scaling_residual) {
// Log residual requires positive I_obs
if (o.r->I <= 0.0f)
continue;
auto* cost = new ceres::AutoDiffCostFunction<LogIntensityResidual, 1, 1, 1, 1>(
new LogIntensityResidual(*o.r, o.sigma, opt.wedge_deg, refine_partiality));
problem.AddResidualBlock(cost,
nullptr,
&g[o.img_slot],
&mosaicity[mos_slot],
&Itrue[o.hkl_slot]);
is_valid_hkl_slot[o.hkl_slot] = true;
} else {
auto* cost = new ceres::AutoDiffCostFunction<IntensityResidual, 1, 1, 1, 1>(
new IntensityResidual(*o.r, o.sigma, opt.wedge_deg, refine_partiality));
problem.AddResidualBlock(cost,
nullptr,
&g[o.img_slot],
&mosaicity[mos_slot],
&Itrue[o.hkl_slot]);
is_valid_hkl_slot[o.hkl_slot] = true;
}
}
if (opt.smoothen_g) {
for (int i = 0; i < nimg - 2; ++i) {
auto *cost = new ceres::AutoDiffCostFunction<SmoothnessRegularizationResidual, 1, 1, 1, 1>(
new SmoothnessRegularizationResidual(0.05));
problem.AddResidualBlock(cost, nullptr,
&g[i],
&g[i + 1],
&g[i + 2]);
}
}
if (opt.smoothen_mos && opt.per_image_mosaicity) {
for (int i = 0; i < nimg - 2; ++i) {
auto *cost = new ceres::AutoDiffCostFunction<SmoothnessRegularizationResidual, 1, 1, 1, 1>(
new SmoothnessRegularizationResidual(0.05));
problem.AddResidualBlock(cost, nullptr,
&mosaicity[i],
&mosaicity[i + 1],
&mosaicity[i + 2]);
}
}
// Scaling factors must be always positive
for (int i = 0; i < nimg; ++i)
problem.SetParameterLowerBound(&g[i], 0, 1e-12);
// For log residual, Itrue must stay positive
if (opt.log_scaling_residual) {
for (int h = 0; h < nhkl; ++h) {
if (is_valid_hkl_slot[h])
problem.SetParameterLowerBound(&Itrue[h], 0, 1e-12);
}
}
// Mosaicity refinement + bounds
if (!opt.refine_mosaicity) {
for (int i = 0; i < (opt.per_image_mosaicity ? nimg : 1); ++i)
problem.SetParameterBlockConstant(&mosaicity[i]);
} else {
for (int i = 0; i < (opt.per_image_mosaicity ? nimg : 1); ++i) {
problem.SetParameterLowerBound(&mosaicity[i], 0, opt.mosaicity_min_deg);
problem.SetParameterUpperBound(&mosaicity[i], 0, opt.mosaicity_max_deg);
}
}
// use all available threads
unsigned int hw = std::thread::hardware_concurrency();
if (hw == 0)
hw = 1; // fallback
ceres::Solver::Options options;
options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY;
options.minimizer_progress_to_stdout = true;
options.max_num_iterations = opt.max_num_iterations;
options.max_solver_time_in_seconds = opt.max_solver_time_s;
options.num_threads = static_cast<int>(hw);
ceres::Solver::Summary summary;
ceres::Solve(options, &problem, &summary);
// --- Export per-image results ---
for (int i = 0; i < nimg; ++i)
out.image_scale_g[i] = g[i];
out.mosaicity_deg.resize(nimg);
for (int i = 0; i < nimg; ++i)
out.mosaicity_deg[i] = opt.per_image_mosaicity ? mosaicity[i] : mosaicity[0];
std::vector<HKLKey> slotToHKL(nhkl);
for (const auto& kv : hklToSlot) {
slotToHKL[kv.second] = kv.first;
}
out.merged.resize(nhkl);
for (int h = 0; h < nhkl; ++h) {
out.merged[h].h = slotToHKL[h].h;
out.merged[h].k = slotToHKL[h].k;
out.merged[h].l = slotToHKL[h].l;
out.merged[h].I = Itrue[h];
out.merged[h].sigma = 0.0;
out.merged[h].d = 0.0;
}
// Populate d from median of observations per HKL
{
std::vector<std::vector<double>> per_hkl_d(nhkl);
for (const auto& o : obs) {
const double d_val = static_cast<double>(o.r->d);
if (std::isfinite(d_val) && d_val > 0.0)
per_hkl_d[o.hkl_slot].push_back(d_val);
}
for (int h = 0; h < nhkl; ++h) {
auto& v = per_hkl_d[h];
if (!v.empty()) {
std::nth_element(v.begin(), v.begin() + static_cast<long>(v.size() / 2), v.end());
out.merged[h].d = v[v.size() / 2];
}
}
}
std::cout << summary.FullReport() << std::endl;
// Merge (XDS/XSCALE style: inverse-variance weighted mean)
{
struct HKLAccum {
double sum_wI = 0.0; // Σ(w_i * I_i)
double sum_w = 0.0; // Σ(w_i)
double sum_wI2 = 0.0; // Σ(w_i * I_i²) for internal variance
int count = 0;
};
std::vector<HKLAccum> accum(nhkl);
const double half_wedge = opt.wedge_deg / 2.0;
for (const auto& o : obs) {
const Reflection& r = *o.r;
const double lp = SafeInv(static_cast<double>(r.rlp), 1.0);
const double G_i = g[o.img_slot];
// Compute partiality with refined mosaicity
double partiality;
size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0;
if (refine_partiality && mosaicity[mos_slot] > 0.0) {
double c1 = r.zeta / std::sqrt(2.0);
double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot];
double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot];
partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0;
} else {
partiality = r.partiality;
}
if (partiality > opt.min_partiality_for_merge) {
const double correction = G_i * partiality * lp;
if (correction <= 0.0) continue;
// Corrected intensity and propagated sigma
const double I_corr = static_cast<double>(r.I) / correction;
const double sigma_corr = o.sigma / correction;
// XDS/XSCALE style: weight = 1/σ²
const double w = 1.0 / (sigma_corr * sigma_corr);
auto& a = accum[o.hkl_slot];
a.sum_wI += w * I_corr;
a.sum_w += w;
a.sum_wI2 += w * I_corr * I_corr;
a.count++;
}
}
// Populate merged reflections
for (int h = 0; h < nhkl; ++h) {
const auto& a = accum[h];
if (a.sum_w > 0.0) {
// XDS/XSCALE style: weighted mean
out.merged[h].I = a.sum_wI / a.sum_w;
// Standard error of weighted mean: σ = 1/√(Σw)
double sigma_stat = std::sqrt(1.0 / a.sum_w);
if (a.count >= 2) {
// XDS/XSCALE style: use internal consistency to estimate sigma
// σ² = (1/(n-1)) * Σw_i(I_i - <I>)² / Σw_i
const double var_internal = a.sum_wI2 - (a.sum_wI * a.sum_wI) / a.sum_w;
const double sigma_int = std::sqrt(var_internal / (a.sum_w * (a.count - 1)));
// Take the larger of statistical and internal sigma
// (XDS approach: if data is worse than expected, use internal estimate)
out.merged[h].sigma = std::max(sigma_stat, sigma_int);
} else {
// Single observation: use propagated sigma
out.merged[h].sigma = sigma_stat;
}
}
// else: I and sigma stay at initialized values (Itrue[h] and 0.0)
}
}
// ---- Compute per-shell merging statistics ----
{
constexpr int kStatShells = 10;
// Determine d-range from merged reflections
float stat_d_min = std::numeric_limits<float>::max();
float stat_d_max = 0.0f;
for (int h = 0; h < nhkl; ++h) {
const auto d = static_cast<float>(out.merged[h].d);
if (std::isfinite(d) && d > 0.0f) {
if (d < stat_d_min) stat_d_min = d;
if (d > stat_d_max) stat_d_max = d;
}
}
if (stat_d_min < stat_d_max && stat_d_min > 0.0f) {
const float d_min_pad = stat_d_min * 0.999f;
const float d_max_pad = stat_d_max * 1.001f;
ResolutionShells stat_shells(d_min_pad, d_max_pad, kStatShells);
const auto shell_mean_1_d2 = stat_shells.GetShellMeanOneOverResSq();
const auto shell_min_res = stat_shells.GetShellMinRes();
// Assign each unique reflection to a shell
std::vector<int32_t> hkl_shell(nhkl, -1);
for (int h = 0; h < nhkl; ++h) {
const auto d = static_cast<float>(out.merged[h].d);
if (std::isfinite(d) && d > 0.0f) {
auto s = stat_shells.GetShell(d);
if (s.has_value())
hkl_shell[h] = s.value();
}
}
// Build corrected observations per HKL (same correction logic as merge)
struct CorrObs {
int hkl_slot;
int shell;
double I_corr;
};
std::vector<CorrObs> corr_obs;
corr_obs.reserve(obs.size());
const double half_wedge = opt.wedge_deg / 2.0;
for (const auto& o : obs) {
if (hkl_shell[o.hkl_slot] < 0)
continue;
const Reflection& r = *o.r;
const double lp = SafeInv(static_cast<double>(r.rlp), 1.0);
const double G_i = g[o.img_slot];
double partiality;
size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0;
if (refine_partiality && mosaicity[mos_slot] > 0.0) {
double c1 = r.zeta / std::sqrt(2.0);
double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot];
double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot];
partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0;
} else {
partiality = r.partiality;
}
if (partiality <= opt.min_partiality_for_merge)
continue;
const double correction = G_i * partiality * lp;
if (correction <= 0.0)
continue;
CorrObs co;
co.hkl_slot = o.hkl_slot;
co.shell = hkl_shell[o.hkl_slot];
co.I_corr = static_cast<double>(r.I) / correction;
corr_obs.push_back(co);
}
// Per-HKL: collect corrected intensities for Rmeas
struct HKLStats {
double sum_I = 0.0;
int n = 0;
std::vector<double> I_list;
};
std::vector<HKLStats> per_hkl(nhkl);
for (const auto& co : corr_obs) {
auto& hs = per_hkl[co.hkl_slot];
hs.sum_I += co.I_corr;
hs.n += 1;
hs.I_list.push_back(co.I_corr);
}
// Compute per-shell statistics
out.statistics.shells.resize(kStatShells);
// Accumulators per shell
struct ShellAccum {
int total_obs = 0;
std::unordered_set<int> unique_hkls;
double rmeas_num = 0.0; // Σ sqrt(n/(n-1)) * Σ|I_i - <I>|
double rmeas_den = 0.0; // Σ Σ I_i
double sum_i_over_sig = 0.0;
int n_merged_with_sigma = 0;
};
std::vector<ShellAccum> shell_acc(kStatShells);
// Accumulate per-HKL contributions to each shell
for (int h = 0; h < nhkl; ++h) {
if (hkl_shell[h] < 0)
continue;
const int s = hkl_shell[h];
auto& sa = shell_acc[s];
const auto& hs = per_hkl[h];
if (hs.n == 0)
continue;
sa.unique_hkls.insert(h);
sa.total_obs += hs.n;
const double mean_I = hs.sum_I / static_cast<double>(hs.n);
// Rmeas numerator: sqrt(n/(n-1)) * Σ_i |I_i - <I>|
if (hs.n >= 2) {
double sum_abs_dev = 0.0;
for (double Ii : hs.I_list)
sum_abs_dev += std::abs(Ii - mean_I);
sa.rmeas_num += std::sqrt(static_cast<double>(hs.n) / (hs.n - 1.0)) * sum_abs_dev;
}
// Rmeas denominator: Σ_i I_i (use absolute values for robustness)
for (double Ii : hs.I_list)
sa.rmeas_den += std::abs(Ii);
// <I/σ> from merged reflection
if (out.merged[h].sigma > 0.0) {
sa.sum_i_over_sig += out.merged[h].I / out.merged[h].sigma;
sa.n_merged_with_sigma += 1;
}
}
// Completeness: enumerate ASU reflections per shell if we have space group + unit cell
std::vector<int> possible_per_shell(kStatShells, 0);
int total_possible = 0;
bool have_completeness = false;
if (opt.space_group.has_value()) {
// Try to build a gemmi UnitCell from the merged reflections' d-spacings
// We need the actual unit cell. Check if any merged reflection has d > 0.
// Actually, we need the real unit cell parameters. We can get them from
// the observations' HKL + d to deduce the metric tensor, but that's complex.
// Instead, we compute d from the unit cell if the caller set it via space_group.
// For now, we count unique reflections in the ASU by brute-force enumeration.
// We'll try to infer cell parameters from the merged d-spacings + HKL.
// Simpler: use the already-available merged reflections d-spacings to get
// max h, k, l and enumerate. This is a practical approximation.
const gemmi::SpaceGroup& sg = *opt.space_group;
const gemmi::GroupOps gops = sg.operations();
const gemmi::ReciprocalAsu rasu(&sg);
// Find max indices from merged data
int max_h = 0, max_k = 0, max_l = 0;
for (int idx = 0; idx < nhkl; ++idx) {
max_h = std::max(max_h, std::abs(out.merged[idx].h));
max_k = std::max(max_k, std::abs(out.merged[idx].k));
max_l = std::max(max_l, std::abs(out.merged[idx].l));
}
// We need a metric tensor to compute d-spacing from HKL.
// Estimate reciprocal metric from the merged data using least-squares.
// For simplicity, use the unit cell from the first observation's d + HKL
// This is getting complex - let's use a simpler approach:
// Build a map from HKL key -> d for merged reflections, then for completeness
// just count how many ASU reflections exist at each resolution.
// We enumerate all HKL in the box [-max_h..max_h] x [-max_k..max_k] x [-max_l..max_l],
// check if they're in the ASU, compute d from the observation data.
// Actually the simplest robust approach: count all ASU HKLs that have d in
// the range of each shell. We need d(hkl) which requires the metric tensor.
// Since we don't have direct access to unit cell parameters here, skip completeness
// or let the caller fill it in.
// Mark completeness as unavailable for now - the caller (jfjoch_process) can
// fill it in if it has the unit cell.
have_completeness = false;
}
// Fill output statistics
for (int s = 0; s < kStatShells; ++s) {
auto& ss = out.statistics.shells[s];
const auto& sa = shell_acc[s];
ss.mean_one_over_d2 = shell_mean_1_d2[s];
// Shell boundaries: shell 0 goes from d_max_pad to shell_min_res[0], etc.
ss.d_min = shell_min_res[s];
ss.d_max = (s == 0) ? d_max_pad : shell_min_res[s - 1];
ss.total_observations = sa.total_obs;
ss.unique_reflections = static_cast<int>(sa.unique_hkls.size());
ss.rmeas = (sa.rmeas_den > 0.0) ? (sa.rmeas_num / sa.rmeas_den) : 0.0;
ss.mean_i_over_sigma = (sa.n_merged_with_sigma > 0)
? (sa.sum_i_over_sig / sa.n_merged_with_sigma) : 0.0;
ss.possible_reflections = possible_per_shell[s];
ss.completeness = (have_completeness && possible_per_shell[s] > 0)
? static_cast<double>(sa.unique_hkls.size()) / possible_per_shell[s] : 0.0;
}
// Overall statistics
{
auto& ov = out.statistics.overall;
ov.d_min = stat_d_min;
ov.d_max = stat_d_max;
ov.mean_one_over_d2 = 0.0f;
int total_obs_all = 0;
std::unordered_set<int> all_unique;
double rmeas_num_all = 0.0, rmeas_den_all = 0.0;
double sum_isig_all = 0.0;
int n_isig_all = 0;
for (int s = 0; s < kStatShells; ++s) {
const auto& sa = shell_acc[s];
total_obs_all += sa.total_obs;
all_unique.insert(sa.unique_hkls.begin(), sa.unique_hkls.end());
rmeas_num_all += sa.rmeas_num;
rmeas_den_all += sa.rmeas_den;
sum_isig_all += sa.sum_i_over_sig;
n_isig_all += sa.n_merged_with_sigma;
}
ov.total_observations = total_obs_all;
ov.unique_reflections = static_cast<int>(all_unique.size());
ov.rmeas = (rmeas_den_all > 0.0) ? (rmeas_num_all / rmeas_den_all) : 0.0;
ov.mean_i_over_sigma = (n_isig_all > 0) ? (sum_isig_all / n_isig_all) : 0.0;
ov.possible_reflections = total_possible;
ov.completeness = (have_completeness && total_possible > 0)
? static_cast<double>(all_unique.size()) / total_possible : 0.0;
}
}
}
return out;
}