814 lines
31 KiB
C++
814 lines
31 KiB
C++
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
||
// SPDX-License-Identifier: GPL-3.0-only
|
||
|
||
#include "ScaleAndMerge.h"
|
||
|
||
#include <ceres/ceres.h>
|
||
|
||
#include <thread>
|
||
#include <iostream>
|
||
#include <algorithm>
|
||
#include <cmath>
|
||
#include <limits>
|
||
#include <stdexcept>
|
||
#include <tuple>
|
||
#include <unordered_map>
|
||
#include <utility>
|
||
#include <vector>
|
||
|
||
#include "../../common/ResolutionShells.h"
|
||
|
||
namespace {
|
||
|
||
struct HKLKey {
|
||
int64_t h = 0;
|
||
int64_t k = 0;
|
||
int64_t l = 0;
|
||
bool is_positive = true; // only relevant if opt.merge_friedel == false
|
||
|
||
bool operator==(const HKLKey& o) const noexcept {
|
||
return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive;
|
||
}
|
||
};
|
||
|
||
struct HKLKeyHash {
|
||
size_t operator()(const HKLKey& key) const noexcept {
|
||
auto mix = [](uint64_t x) {
|
||
x ^= x >> 33;
|
||
x *= 0xff51afd7ed558ccdULL;
|
||
x ^= x >> 33;
|
||
x *= 0xc4ceb9fe1a85ec53ULL;
|
||
x ^= x >> 33;
|
||
return x;
|
||
};
|
||
const uint64_t a = static_cast<uint64_t>(key.h);
|
||
const uint64_t b = static_cast<uint64_t>(key.k);
|
||
const uint64_t c = static_cast<uint64_t>(key.l);
|
||
const uint64_t d = static_cast<uint64_t>(key.is_positive ? 1 : 0);
|
||
return static_cast<size_t>(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3));
|
||
}
|
||
};
|
||
|
||
inline int RoundImageId(float image_number, double rounding_step) {
|
||
if (!(rounding_step > 0.0))
|
||
rounding_step = 1.0;
|
||
const double x = static_cast<double>(image_number) / rounding_step;
|
||
const double r = std::round(x) * rounding_step;
|
||
return static_cast<int>(std::llround(r / rounding_step));
|
||
}
|
||
|
||
inline double SafeSigma(double s, double min_sigma) {
|
||
if (!std::isfinite(s) || s <= 0.0)
|
||
return min_sigma;
|
||
return std::max(s, min_sigma);
|
||
}
|
||
|
||
inline double SafeD(double d) {
|
||
if (!std::isfinite(d) || d <= 0.0)
|
||
return std::numeric_limits<double>::quiet_NaN();
|
||
return d;
|
||
}
|
||
|
||
inline int SafeToInt(int64_t x) {
|
||
if (x < std::numeric_limits<int>::min() || x > std::numeric_limits<int>::max())
|
||
throw std::out_of_range("HKL index out of int range for Gemmi");
|
||
return static_cast<int>(x);
|
||
}
|
||
|
||
inline double SafeInv(double x, double fallback) {
|
||
if (!std::isfinite(x) || x == 0.0)
|
||
return fallback;
|
||
return 1.0 / x;
|
||
}
|
||
|
||
inline HKLKey CanonicalizeHKLKey(const Reflection& r, const ScaleMergeOptions& opt) {
|
||
HKLKey key{};
|
||
key.h = r.h;
|
||
key.k = r.k;
|
||
key.l = r.l;
|
||
key.is_positive = true;
|
||
|
||
if (!opt.space_group.has_value()) {
|
||
if (!opt.merge_friedel) {
|
||
const HKLKey neg{-r.h, -r.k, -r.l, true};
|
||
const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l);
|
||
if (!pos) {
|
||
key.h = -key.h;
|
||
key.k = -key.k;
|
||
key.l = -key.l;
|
||
key.is_positive = false;
|
||
}
|
||
}
|
||
return key;
|
||
}
|
||
|
||
const gemmi::SpaceGroup& sg = *opt.space_group;
|
||
const gemmi::GroupOps gops = sg.operations();
|
||
const gemmi::ReciprocalAsu rasu(&sg);
|
||
|
||
const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}};
|
||
const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops);
|
||
|
||
key.h = asu_hkl[0];
|
||
key.k = asu_hkl[1];
|
||
key.l = asu_hkl[2];
|
||
key.is_positive = opt.merge_friedel ? true : sign_plus;
|
||
return key;
|
||
}
|
||
|
||
/// CrystFEL-like log-scaling residual
|
||
///
|
||
/// residual = w * [ ln(I_obs) - ln(G) - ln(partiality) - ln(lp) - ln(I_true) ]
|
||
///
|
||
/// Only observations with I_obs > 0 should be fed in (the caller skips the rest).
|
||
/// G and I_true are constrained to be positive via Ceres lower bounds.
|
||
struct LogIntensityResidual {
|
||
LogIntensityResidual(const Reflection& r, double sigma_obs, double wedge_deg, bool refine_partiality)
|
||
: log_Iobs_(std::log(std::max(static_cast<double>(r.I), 1e-30))),
|
||
weight_(SafeInv(sigma_obs / std::max(static_cast<double>(r.I), 1e-30), 1.0)),
|
||
delta_phi_(r.delta_phi_deg),
|
||
log_lp_(std::log(std::max(SafeInv(static_cast<double>(r.rlp), 1.0), 1e-30))),
|
||
half_wedge_(wedge_deg / 2.0),
|
||
c1_(r.zeta / std::sqrt(2.0)),
|
||
partiality_(r.partiality),
|
||
refine_partiality_(refine_partiality) {}
|
||
|
||
template<typename T>
|
||
bool operator()(const T* const G,
|
||
const T* const mosaicity,
|
||
const T* const Itrue,
|
||
T* residual) const {
|
||
T partiality;
|
||
if (refine_partiality_ && mosaicity[0] != 0.0) {
|
||
const T arg_plus = T(delta_phi_ + half_wedge_) * T(c1_) / mosaicity[0];
|
||
const T arg_minus = T(delta_phi_ - half_wedge_) * T(c1_) / mosaicity[0];
|
||
partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0);
|
||
} else {
|
||
partiality = T(partiality_);
|
||
}
|
||
|
||
// Clamp partiality away from zero so log is safe
|
||
const T min_p = T(1e-30);
|
||
if (partiality < min_p)
|
||
partiality = min_p;
|
||
|
||
// ln(I_pred) = ln(G) + ln(partiality) + ln(lp) + ln(Itrue)
|
||
const T log_Ipred = ceres::log(G[0]) + ceres::log(partiality) + T(log_lp_) + ceres::log(Itrue[0]);
|
||
residual[0] = (log_Ipred - T(log_Iobs_)) * T(weight_);
|
||
return true;
|
||
}
|
||
|
||
double log_Iobs_;
|
||
double weight_; // w_i ≈ I_obs / sigma_obs (relative weight in log-space)
|
||
double delta_phi_;
|
||
double log_lp_;
|
||
double half_wedge_;
|
||
double c1_;
|
||
double partiality_;
|
||
bool refine_partiality_;
|
||
};
|
||
|
||
struct IntensityResidual {
|
||
IntensityResidual(const Reflection &r, double sigma_obs, double wedge_deg, bool refine_partiality)
|
||
: Iobs_(static_cast<double>(r.I)),
|
||
weight_(SafeInv(sigma_obs, 1.0)),
|
||
delta_phi_(r.delta_phi_deg),
|
||
lp_(SafeInv(static_cast<double>(r.rlp), 1.0)),
|
||
half_wedge_(wedge_deg / 2.0),
|
||
c1_(r.zeta / std::sqrt(2.0)),
|
||
partiality_(r.partiality),
|
||
refine_partiality_(refine_partiality) {}
|
||
|
||
template<typename T>
|
||
bool operator()(const T *const G,
|
||
const T *const mosaicity,
|
||
const T *const Itrue,
|
||
T *residual) const {
|
||
T partiality;
|
||
if (refine_partiality_ && mosaicity[0] != 0.0) {
|
||
const T arg_plus = T(delta_phi_ + half_wedge_) * T(c1_) / mosaicity[0];
|
||
const T arg_minus = T(delta_phi_ - half_wedge_) * T(c1_) / mosaicity[0];
|
||
partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0);
|
||
} else
|
||
partiality = T(partiality_);
|
||
|
||
const T Ipred = G[0] * partiality * T(lp_) * Itrue[0];
|
||
residual[0] = (Ipred - T(Iobs_)) * T(weight_);
|
||
return true;
|
||
}
|
||
|
||
double Iobs_;
|
||
double weight_;
|
||
double delta_phi_;
|
||
double lp_;
|
||
double half_wedge_;
|
||
double c1_;
|
||
double partiality_;
|
||
bool refine_partiality_;
|
||
};
|
||
|
||
struct ScaleRegularizationResidual {
|
||
explicit ScaleRegularizationResidual(double sigma_k)
|
||
: inv_sigma_(SafeInv(sigma_k, 1.0)) {}
|
||
|
||
template <typename T>
|
||
bool operator()(const T* const k, T* residual) const {
|
||
residual[0] = (k[0] - T(1.0)) * T(inv_sigma_);
|
||
return true;
|
||
}
|
||
|
||
double inv_sigma_;
|
||
};
|
||
|
||
struct SmoothnessRegularizationResidual {
|
||
explicit SmoothnessRegularizationResidual(double sigma)
|
||
: inv_sigma_(SafeInv(sigma, 1.0)) {}
|
||
template <typename T>
|
||
bool operator()(const T* const k0,
|
||
const T* const k1,
|
||
const T* const k2,
|
||
T* residual) const {
|
||
residual[0] = (ceres::log(k0[0]) + ceres::log(k2[0]) - T(2.0) * ceres::log(k1[0])) * T(inv_sigma_);
|
||
return true;
|
||
}
|
||
|
||
double inv_sigma_;
|
||
};
|
||
|
||
} // namespace
|
||
|
||
ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& observations,
|
||
const ScaleMergeOptions& opt) {
|
||
ScaleMergeResult out;
|
||
|
||
struct ObsRef {
|
||
const Reflection* r = nullptr;
|
||
int img_id = 0;
|
||
int img_slot = -1;
|
||
int hkl_slot = -1;
|
||
double sigma = 0.0;
|
||
};
|
||
|
||
std::vector<ObsRef> obs;
|
||
obs.reserve(observations.size());
|
||
|
||
std::unordered_map<int, int> imgIdToSlot;
|
||
imgIdToSlot.reserve(256);
|
||
|
||
std::unordered_map<HKLKey, int, HKLKeyHash> hklToSlot;
|
||
hklToSlot.reserve(observations.size());
|
||
|
||
for (const auto& r : observations) {
|
||
const double d = SafeD(r.d);
|
||
if (!std::isfinite(d))
|
||
continue;
|
||
if (!std::isfinite(r.I))
|
||
continue;
|
||
|
||
if (!std::isfinite(r.zeta) || r.zeta <= 0.0f)
|
||
continue;
|
||
if (!std::isfinite(r.rlp) || r.rlp == 0.0f)
|
||
continue;
|
||
|
||
const double sigma = SafeSigma(static_cast<double>(r.sigma), opt.min_sigma);
|
||
|
||
const int img_id = RoundImageId(r.image_number, opt.image_number_rounding);
|
||
|
||
int img_slot;
|
||
{
|
||
auto it = imgIdToSlot.find(img_id);
|
||
if (it == imgIdToSlot.end()) {
|
||
img_slot = static_cast<int>(imgIdToSlot.size());
|
||
imgIdToSlot.emplace(img_id, img_slot);
|
||
} else {
|
||
img_slot = it->second;
|
||
}
|
||
}
|
||
|
||
int hkl_slot;
|
||
try {
|
||
const HKLKey key = CanonicalizeHKLKey(r, opt);
|
||
auto it = hklToSlot.find(key);
|
||
if (it == hklToSlot.end()) {
|
||
hkl_slot = static_cast<int>(hklToSlot.size());
|
||
hklToSlot.emplace(key, hkl_slot);
|
||
} else {
|
||
hkl_slot = it->second;
|
||
}
|
||
} catch (...) {
|
||
continue;
|
||
}
|
||
|
||
ObsRef o;
|
||
o.r = &r;
|
||
o.img_id = img_id;
|
||
o.img_slot = img_slot;
|
||
o.hkl_slot = hkl_slot;
|
||
o.sigma = sigma;
|
||
obs.push_back(o);
|
||
}
|
||
|
||
const int nimg = static_cast<int>(imgIdToSlot.size());
|
||
const int nhkl = static_cast<int>(hklToSlot.size());
|
||
|
||
out.image_scale_g.assign(nimg, 1.0);
|
||
out.image_ids.assign(nimg, 0);
|
||
|
||
for (const auto& kv : imgIdToSlot) {
|
||
out.image_ids[kv.second] = kv.first;
|
||
}
|
||
|
||
std::vector<double> g(nimg, 1.0);
|
||
std::vector<double> Itrue(nhkl, 0.0);
|
||
|
||
// Mosaicity: always per-image
|
||
std::vector<double> mosaicity(nimg, opt.mosaicity_init_deg);
|
||
for (int i = 0; i < nimg && i < opt.mosaicity_init_deg_vec.size(); ++i) {
|
||
if (opt.mosaicity_init_deg_vec[i] > 0.0)
|
||
mosaicity[i] = opt.mosaicity_init_deg_vec[i];
|
||
}
|
||
|
||
// Initialize Itrue from per-HKL median of observed intensities
|
||
{
|
||
std::vector<std::vector<double>> per_hkl_I(nhkl);
|
||
for (const auto& o : obs) {
|
||
per_hkl_I[o.hkl_slot].push_back(static_cast<double>(o.r->I));
|
||
}
|
||
for (int h = 0; h < nhkl; ++h) {
|
||
auto& v = per_hkl_I[h];
|
||
if (v.empty()) {
|
||
Itrue[h] = std::max(opt.min_sigma, 1e-6);
|
||
continue;
|
||
}
|
||
std::nth_element(v.begin(), v.begin() + static_cast<long>(v.size() / 2), v.end());
|
||
double med = v[v.size() / 2];
|
||
if (!std::isfinite(med) || med <= opt.min_sigma)
|
||
med = opt.min_sigma;
|
||
Itrue[h] = med;
|
||
}
|
||
}
|
||
|
||
ceres::Problem problem;
|
||
|
||
const bool refine_partiality = opt.wedge_deg > 0.0;
|
||
|
||
std::vector<bool> is_valid_hkl_slot(nhkl, false);
|
||
|
||
for (const auto& o : obs) {
|
||
size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0;
|
||
|
||
if (opt.log_scaling_residual) {
|
||
// Log residual requires positive I_obs
|
||
if (o.r->I <= 0.0f)
|
||
continue;
|
||
|
||
auto* cost = new ceres::AutoDiffCostFunction<LogIntensityResidual, 1, 1, 1, 1>(
|
||
new LogIntensityResidual(*o.r, o.sigma, opt.wedge_deg, refine_partiality));
|
||
|
||
problem.AddResidualBlock(cost,
|
||
nullptr,
|
||
&g[o.img_slot],
|
||
&mosaicity[mos_slot],
|
||
&Itrue[o.hkl_slot]);
|
||
is_valid_hkl_slot[o.hkl_slot] = true;
|
||
} else {
|
||
auto* cost = new ceres::AutoDiffCostFunction<IntensityResidual, 1, 1, 1, 1>(
|
||
new IntensityResidual(*o.r, o.sigma, opt.wedge_deg, refine_partiality));
|
||
|
||
problem.AddResidualBlock(cost,
|
||
nullptr,
|
||
&g[o.img_slot],
|
||
&mosaicity[mos_slot],
|
||
&Itrue[o.hkl_slot]);
|
||
is_valid_hkl_slot[o.hkl_slot] = true;
|
||
}
|
||
}
|
||
|
||
if (opt.smoothen_g) {
|
||
for (int i = 0; i < nimg - 2; ++i) {
|
||
auto *cost = new ceres::AutoDiffCostFunction<SmoothnessRegularizationResidual, 1, 1, 1, 1>(
|
||
new SmoothnessRegularizationResidual(0.05));
|
||
|
||
problem.AddResidualBlock(cost, nullptr,
|
||
&g[i],
|
||
&g[i + 1],
|
||
&g[i + 2]);
|
||
}
|
||
}
|
||
|
||
if (opt.smoothen_mos && opt.per_image_mosaicity) {
|
||
for (int i = 0; i < nimg - 2; ++i) {
|
||
auto *cost = new ceres::AutoDiffCostFunction<SmoothnessRegularizationResidual, 1, 1, 1, 1>(
|
||
new SmoothnessRegularizationResidual(0.05));
|
||
|
||
problem.AddResidualBlock(cost, nullptr,
|
||
&mosaicity[i],
|
||
&mosaicity[i + 1],
|
||
&mosaicity[i + 2]);
|
||
}
|
||
}
|
||
|
||
// Scaling factors must be always positive
|
||
for (int i = 0; i < nimg; ++i)
|
||
problem.SetParameterLowerBound(&g[i], 0, 1e-12);
|
||
|
||
|
||
// For log residual, Itrue must stay positive
|
||
if (opt.log_scaling_residual) {
|
||
for (int h = 0; h < nhkl; ++h) {
|
||
if (is_valid_hkl_slot[h])
|
||
problem.SetParameterLowerBound(&Itrue[h], 0, 1e-12);
|
||
}
|
||
}
|
||
|
||
// Mosaicity refinement + bounds
|
||
if (!opt.refine_mosaicity) {
|
||
for (int i = 0; i < (opt.per_image_mosaicity ? nimg : 1); ++i)
|
||
problem.SetParameterBlockConstant(&mosaicity[i]);
|
||
} else {
|
||
for (int i = 0; i < (opt.per_image_mosaicity ? nimg : 1); ++i) {
|
||
problem.SetParameterLowerBound(&mosaicity[i], 0, opt.mosaicity_min_deg);
|
||
problem.SetParameterUpperBound(&mosaicity[i], 0, opt.mosaicity_max_deg);
|
||
}
|
||
}
|
||
|
||
// use all available threads
|
||
unsigned int hw = std::thread::hardware_concurrency();
|
||
if (hw == 0)
|
||
hw = 1; // fallback
|
||
|
||
ceres::Solver::Options options;
|
||
|
||
options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY;
|
||
options.minimizer_progress_to_stdout = true;
|
||
options.max_num_iterations = opt.max_num_iterations;
|
||
options.max_solver_time_in_seconds = opt.max_solver_time_s;
|
||
options.num_threads = static_cast<int>(hw);
|
||
|
||
ceres::Solver::Summary summary;
|
||
ceres::Solve(options, &problem, &summary);
|
||
|
||
// --- Export per-image results ---
|
||
for (int i = 0; i < nimg; ++i)
|
||
out.image_scale_g[i] = g[i];
|
||
|
||
out.mosaicity_deg.resize(nimg);
|
||
for (int i = 0; i < nimg; ++i)
|
||
out.mosaicity_deg[i] = opt.per_image_mosaicity ? mosaicity[i] : mosaicity[0];
|
||
|
||
std::vector<HKLKey> slotToHKL(nhkl);
|
||
for (const auto& kv : hklToSlot) {
|
||
slotToHKL[kv.second] = kv.first;
|
||
}
|
||
|
||
out.merged.resize(nhkl);
|
||
for (int h = 0; h < nhkl; ++h) {
|
||
out.merged[h].h = slotToHKL[h].h;
|
||
out.merged[h].k = slotToHKL[h].k;
|
||
out.merged[h].l = slotToHKL[h].l;
|
||
out.merged[h].I = Itrue[h];
|
||
out.merged[h].sigma = 0.0;
|
||
out.merged[h].d = 0.0;
|
||
}
|
||
|
||
// Populate d from median of observations per HKL
|
||
{
|
||
std::vector<std::vector<double>> per_hkl_d(nhkl);
|
||
for (const auto& o : obs) {
|
||
const double d_val = static_cast<double>(o.r->d);
|
||
if (std::isfinite(d_val) && d_val > 0.0)
|
||
per_hkl_d[o.hkl_slot].push_back(d_val);
|
||
}
|
||
for (int h = 0; h < nhkl; ++h) {
|
||
auto& v = per_hkl_d[h];
|
||
if (!v.empty()) {
|
||
std::nth_element(v.begin(), v.begin() + static_cast<long>(v.size() / 2), v.end());
|
||
out.merged[h].d = v[v.size() / 2];
|
||
}
|
||
}
|
||
}
|
||
|
||
std::cout << summary.FullReport() << std::endl;
|
||
|
||
// Merge (XDS/XSCALE style: inverse-variance weighted mean)
|
||
{
|
||
struct HKLAccum {
|
||
double sum_wI = 0.0; // Σ(w_i * I_i)
|
||
double sum_w = 0.0; // Σ(w_i)
|
||
double sum_wI2 = 0.0; // Σ(w_i * I_i²) for internal variance
|
||
int count = 0;
|
||
};
|
||
std::vector<HKLAccum> accum(nhkl);
|
||
|
||
const double half_wedge = opt.wedge_deg / 2.0;
|
||
|
||
for (const auto& o : obs) {
|
||
const Reflection& r = *o.r;
|
||
const double lp = SafeInv(static_cast<double>(r.rlp), 1.0);
|
||
const double G_i = g[o.img_slot];
|
||
|
||
// Compute partiality with refined mosaicity
|
||
double partiality;
|
||
size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0;
|
||
if (refine_partiality && mosaicity[mos_slot] > 0.0) {
|
||
double c1 = r.zeta / std::sqrt(2.0);
|
||
double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot];
|
||
double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot];
|
||
partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0;
|
||
} else {
|
||
partiality = r.partiality;
|
||
}
|
||
|
||
if (partiality > opt.min_partiality_for_merge) {
|
||
const double correction = G_i * partiality * lp;
|
||
if (correction <= 0.0) continue;
|
||
|
||
// Corrected intensity and propagated sigma
|
||
const double I_corr = static_cast<double>(r.I) / correction;
|
||
const double sigma_corr = o.sigma / correction;
|
||
|
||
// XDS/XSCALE style: weight = 1/σ²
|
||
const double w = 1.0 / (sigma_corr * sigma_corr);
|
||
|
||
auto& a = accum[o.hkl_slot];
|
||
a.sum_wI += w * I_corr;
|
||
a.sum_w += w;
|
||
a.sum_wI2 += w * I_corr * I_corr;
|
||
a.count++;
|
||
}
|
||
}
|
||
|
||
// Populate merged reflections
|
||
for (int h = 0; h < nhkl; ++h) {
|
||
const auto& a = accum[h];
|
||
|
||
if (a.sum_w > 0.0) {
|
||
// XDS/XSCALE style: weighted mean
|
||
out.merged[h].I = a.sum_wI / a.sum_w;
|
||
|
||
// Standard error of weighted mean: σ = 1/√(Σw)
|
||
double sigma_stat = std::sqrt(1.0 / a.sum_w);
|
||
|
||
if (a.count >= 2) {
|
||
// XDS/XSCALE style: use internal consistency to estimate sigma
|
||
// σ² = (1/(n-1)) * Σw_i(I_i - <I>)² / Σw_i
|
||
const double var_internal = a.sum_wI2 - (a.sum_wI * a.sum_wI) / a.sum_w;
|
||
const double sigma_int = std::sqrt(var_internal / (a.sum_w * (a.count - 1)));
|
||
|
||
// Take the larger of statistical and internal sigma
|
||
// (XDS approach: if data is worse than expected, use internal estimate)
|
||
out.merged[h].sigma = std::max(sigma_stat, sigma_int);
|
||
} else {
|
||
// Single observation: use propagated sigma
|
||
out.merged[h].sigma = sigma_stat;
|
||
}
|
||
}
|
||
// else: I and sigma stay at initialized values (Itrue[h] and 0.0)
|
||
}
|
||
}
|
||
|
||
// ---- Compute per-shell merging statistics ----
|
||
{
|
||
constexpr int kStatShells = 10;
|
||
|
||
// Determine d-range from merged reflections
|
||
float stat_d_min = std::numeric_limits<float>::max();
|
||
float stat_d_max = 0.0f;
|
||
for (int h = 0; h < nhkl; ++h) {
|
||
const auto d = static_cast<float>(out.merged[h].d);
|
||
if (std::isfinite(d) && d > 0.0f) {
|
||
if (d < stat_d_min) stat_d_min = d;
|
||
if (d > stat_d_max) stat_d_max = d;
|
||
}
|
||
}
|
||
|
||
if (stat_d_min < stat_d_max && stat_d_min > 0.0f) {
|
||
const float d_min_pad = stat_d_min * 0.999f;
|
||
const float d_max_pad = stat_d_max * 1.001f;
|
||
ResolutionShells stat_shells(d_min_pad, d_max_pad, kStatShells);
|
||
|
||
const auto shell_mean_1_d2 = stat_shells.GetShellMeanOneOverResSq();
|
||
const auto shell_min_res = stat_shells.GetShellMinRes();
|
||
|
||
// Assign each unique reflection to a shell
|
||
std::vector<int32_t> hkl_shell(nhkl, -1);
|
||
for (int h = 0; h < nhkl; ++h) {
|
||
const auto d = static_cast<float>(out.merged[h].d);
|
||
if (std::isfinite(d) && d > 0.0f) {
|
||
auto s = stat_shells.GetShell(d);
|
||
if (s.has_value())
|
||
hkl_shell[h] = s.value();
|
||
}
|
||
}
|
||
|
||
// Build corrected observations per HKL (same correction logic as merge)
|
||
struct CorrObs {
|
||
int hkl_slot;
|
||
int shell;
|
||
double I_corr;
|
||
};
|
||
std::vector<CorrObs> corr_obs;
|
||
corr_obs.reserve(obs.size());
|
||
|
||
const double half_wedge = opt.wedge_deg / 2.0;
|
||
|
||
for (const auto& o : obs) {
|
||
if (hkl_shell[o.hkl_slot] < 0)
|
||
continue;
|
||
|
||
const Reflection& r = *o.r;
|
||
const double lp = SafeInv(static_cast<double>(r.rlp), 1.0);
|
||
const double G_i = g[o.img_slot];
|
||
|
||
double partiality;
|
||
size_t mos_slot = opt.per_image_mosaicity ? o.img_slot : 0;
|
||
if (refine_partiality && mosaicity[mos_slot] > 0.0) {
|
||
double c1 = r.zeta / std::sqrt(2.0);
|
||
double arg_plus = (r.delta_phi_deg + half_wedge) * c1 / mosaicity[mos_slot];
|
||
double arg_minus = (r.delta_phi_deg - half_wedge) * c1 / mosaicity[mos_slot];
|
||
partiality = (std::erf(arg_plus) - std::erf(arg_minus)) / 2.0;
|
||
} else {
|
||
partiality = r.partiality;
|
||
}
|
||
|
||
if (partiality <= opt.min_partiality_for_merge)
|
||
continue;
|
||
const double correction = G_i * partiality * lp;
|
||
if (correction <= 0.0)
|
||
continue;
|
||
|
||
CorrObs co;
|
||
co.hkl_slot = o.hkl_slot;
|
||
co.shell = hkl_shell[o.hkl_slot];
|
||
co.I_corr = static_cast<double>(r.I) / correction;
|
||
corr_obs.push_back(co);
|
||
}
|
||
|
||
// Per-HKL: collect corrected intensities for Rmeas
|
||
struct HKLStats {
|
||
double sum_I = 0.0;
|
||
int n = 0;
|
||
std::vector<double> I_list;
|
||
};
|
||
std::vector<HKLStats> per_hkl(nhkl);
|
||
|
||
for (const auto& co : corr_obs) {
|
||
auto& hs = per_hkl[co.hkl_slot];
|
||
hs.sum_I += co.I_corr;
|
||
hs.n += 1;
|
||
hs.I_list.push_back(co.I_corr);
|
||
}
|
||
|
||
// Compute per-shell statistics
|
||
out.statistics.shells.resize(kStatShells);
|
||
|
||
// Accumulators per shell
|
||
struct ShellAccum {
|
||
int total_obs = 0;
|
||
std::unordered_set<int> unique_hkls;
|
||
double rmeas_num = 0.0; // Σ sqrt(n/(n-1)) * Σ|I_i - <I>|
|
||
double rmeas_den = 0.0; // Σ Σ I_i
|
||
double sum_i_over_sig = 0.0;
|
||
int n_merged_with_sigma = 0;
|
||
};
|
||
std::vector<ShellAccum> shell_acc(kStatShells);
|
||
|
||
// Accumulate per-HKL contributions to each shell
|
||
for (int h = 0; h < nhkl; ++h) {
|
||
if (hkl_shell[h] < 0)
|
||
continue;
|
||
const int s = hkl_shell[h];
|
||
auto& sa = shell_acc[s];
|
||
const auto& hs = per_hkl[h];
|
||
if (hs.n == 0)
|
||
continue;
|
||
|
||
sa.unique_hkls.insert(h);
|
||
sa.total_obs += hs.n;
|
||
|
||
const double mean_I = hs.sum_I / static_cast<double>(hs.n);
|
||
|
||
// Rmeas numerator: sqrt(n/(n-1)) * Σ_i |I_i - <I>|
|
||
if (hs.n >= 2) {
|
||
double sum_abs_dev = 0.0;
|
||
for (double Ii : hs.I_list)
|
||
sum_abs_dev += std::abs(Ii - mean_I);
|
||
sa.rmeas_num += std::sqrt(static_cast<double>(hs.n) / (hs.n - 1.0)) * sum_abs_dev;
|
||
}
|
||
|
||
// Rmeas denominator: Σ_i I_i (use absolute values for robustness)
|
||
for (double Ii : hs.I_list)
|
||
sa.rmeas_den += std::abs(Ii);
|
||
|
||
// <I/σ> from merged reflection
|
||
if (out.merged[h].sigma > 0.0) {
|
||
sa.sum_i_over_sig += out.merged[h].I / out.merged[h].sigma;
|
||
sa.n_merged_with_sigma += 1;
|
||
}
|
||
}
|
||
|
||
// Completeness: enumerate ASU reflections per shell if we have space group + unit cell
|
||
std::vector<int> possible_per_shell(kStatShells, 0);
|
||
int total_possible = 0;
|
||
bool have_completeness = false;
|
||
|
||
if (opt.space_group.has_value()) {
|
||
// Try to build a gemmi UnitCell from the merged reflections' d-spacings
|
||
// We need the actual unit cell. Check if any merged reflection has d > 0.
|
||
// Actually, we need the real unit cell parameters. We can get them from
|
||
// the observations' HKL + d to deduce the metric tensor, but that's complex.
|
||
// Instead, we compute d from the unit cell if the caller set it via space_group.
|
||
// For now, we count unique reflections in the ASU by brute-force enumeration.
|
||
|
||
// We'll try to infer cell parameters from the merged d-spacings + HKL.
|
||
// Simpler: use the already-available merged reflections d-spacings to get
|
||
// max h, k, l and enumerate. This is a practical approximation.
|
||
|
||
const gemmi::SpaceGroup& sg = *opt.space_group;
|
||
const gemmi::GroupOps gops = sg.operations();
|
||
const gemmi::ReciprocalAsu rasu(&sg);
|
||
|
||
// Find max indices from merged data
|
||
int max_h = 0, max_k = 0, max_l = 0;
|
||
for (int idx = 0; idx < nhkl; ++idx) {
|
||
max_h = std::max(max_h, std::abs(out.merged[idx].h));
|
||
max_k = std::max(max_k, std::abs(out.merged[idx].k));
|
||
max_l = std::max(max_l, std::abs(out.merged[idx].l));
|
||
}
|
||
|
||
// We need a metric tensor to compute d-spacing from HKL.
|
||
// Estimate reciprocal metric from the merged data using least-squares.
|
||
// For simplicity, use the unit cell from the first observation's d + HKL
|
||
// This is getting complex - let's use a simpler approach:
|
||
// Build a map from HKL key -> d for merged reflections, then for completeness
|
||
// just count how many ASU reflections exist at each resolution.
|
||
// We enumerate all HKL in the box [-max_h..max_h] x [-max_k..max_k] x [-max_l..max_l],
|
||
// check if they're in the ASU, compute d from the observation data.
|
||
|
||
// Actually the simplest robust approach: count all ASU HKLs that have d in
|
||
// the range of each shell. We need d(hkl) which requires the metric tensor.
|
||
// Since we don't have direct access to unit cell parameters here, skip completeness
|
||
// or let the caller fill it in.
|
||
|
||
// Mark completeness as unavailable for now - the caller (jfjoch_process) can
|
||
// fill it in if it has the unit cell.
|
||
have_completeness = false;
|
||
}
|
||
|
||
// Fill output statistics
|
||
for (int s = 0; s < kStatShells; ++s) {
|
||
auto& ss = out.statistics.shells[s];
|
||
const auto& sa = shell_acc[s];
|
||
|
||
ss.mean_one_over_d2 = shell_mean_1_d2[s];
|
||
// Shell boundaries: shell 0 goes from d_max_pad to shell_min_res[0], etc.
|
||
ss.d_min = shell_min_res[s];
|
||
ss.d_max = (s == 0) ? d_max_pad : shell_min_res[s - 1];
|
||
|
||
ss.total_observations = sa.total_obs;
|
||
ss.unique_reflections = static_cast<int>(sa.unique_hkls.size());
|
||
ss.rmeas = (sa.rmeas_den > 0.0) ? (sa.rmeas_num / sa.rmeas_den) : 0.0;
|
||
ss.mean_i_over_sigma = (sa.n_merged_with_sigma > 0)
|
||
? (sa.sum_i_over_sig / sa.n_merged_with_sigma) : 0.0;
|
||
ss.possible_reflections = possible_per_shell[s];
|
||
ss.completeness = (have_completeness && possible_per_shell[s] > 0)
|
||
? static_cast<double>(sa.unique_hkls.size()) / possible_per_shell[s] : 0.0;
|
||
}
|
||
|
||
// Overall statistics
|
||
{
|
||
auto& ov = out.statistics.overall;
|
||
ov.d_min = stat_d_min;
|
||
ov.d_max = stat_d_max;
|
||
ov.mean_one_over_d2 = 0.0f;
|
||
|
||
int total_obs_all = 0;
|
||
std::unordered_set<int> all_unique;
|
||
double rmeas_num_all = 0.0, rmeas_den_all = 0.0;
|
||
double sum_isig_all = 0.0;
|
||
int n_isig_all = 0;
|
||
|
||
for (int s = 0; s < kStatShells; ++s) {
|
||
const auto& sa = shell_acc[s];
|
||
total_obs_all += sa.total_obs;
|
||
all_unique.insert(sa.unique_hkls.begin(), sa.unique_hkls.end());
|
||
rmeas_num_all += sa.rmeas_num;
|
||
rmeas_den_all += sa.rmeas_den;
|
||
sum_isig_all += sa.sum_i_over_sig;
|
||
n_isig_all += sa.n_merged_with_sigma;
|
||
}
|
||
|
||
ov.total_observations = total_obs_all;
|
||
ov.unique_reflections = static_cast<int>(all_unique.size());
|
||
ov.rmeas = (rmeas_den_all > 0.0) ? (rmeas_num_all / rmeas_den_all) : 0.0;
|
||
ov.mean_i_over_sigma = (n_isig_all > 0) ? (sum_isig_all / n_isig_all) : 0.0;
|
||
ov.possible_reflections = total_possible;
|
||
ov.completeness = (have_completeness && total_possible > 0)
|
||
? static_cast<double>(all_unique.size()) / total_possible : 0.0;
|
||
}
|
||
}
|
||
}
|
||
|
||
return out;
|
||
}
|