Files
Jungfraujoch/image_analysis/scale_merge/ScaleAndMerge.cpp
Filip Leonarski 7f1f28f8d3
All checks were successful
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 11m35s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 13m17s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 13m37s
Build Packages / Generate python client (push) Successful in 20s
Build Packages / build:rpm (rocky8) (push) Successful in 13m59s
Build Packages / Create release (push) Has been skipped
Build Packages / Build documentation (push) Successful in 36s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 14m9s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 14m8s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 14m37s
Build Packages / build:rpm (rocky9) (push) Successful in 14m49s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 7m26s
Build Packages / Unit tests (push) Successful in 51m24s
ScaleAndMerge: Merge intensities taken from scaling - variances taken directly from least-squares (with some magic from Opus 4.6, which I don't yet understand)
2026-02-08 17:22:02 +01:00

458 lines
15 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "ScaleAndMerge.h"
#include <ceres/ceres.h>
#include <ceres/covariance.h>
#include <algorithm>
#include <cmath>
#include <limits>
#include <stdexcept>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>
namespace {
struct HKLKey {
int64_t h = 0;
int64_t k = 0;
int64_t l = 0;
bool is_positive = true; // only relevant if opt.merge_friedel == false
bool operator==(const HKLKey& o) const noexcept {
return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive;
}
};
struct HKLKeyHash {
size_t operator()(const HKLKey& key) const noexcept {
auto mix = [](uint64_t x) {
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
x ^= x >> 33;
return x;
};
const uint64_t a = static_cast<uint64_t>(key.h);
const uint64_t b = static_cast<uint64_t>(key.k);
const uint64_t c = static_cast<uint64_t>(key.l);
const uint64_t d = static_cast<uint64_t>(key.is_positive ? 1 : 0);
return static_cast<size_t>(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3));
}
};
inline int RoundImageId(float image_number, double rounding_step) {
if (!(rounding_step > 0.0))
rounding_step = 1.0;
const double x = static_cast<double>(image_number) / rounding_step;
const double r = std::round(x) * rounding_step;
return static_cast<int>(std::llround(r / rounding_step));
}
inline double SafeSigma(double s, double min_sigma) {
if (!std::isfinite(s) || s <= 0.0)
return min_sigma;
return std::max(s, min_sigma);
}
inline double SafeD(double d) {
if (!std::isfinite(d) || d <= 0.0)
return std::numeric_limits<double>::quiet_NaN();
return d;
}
inline int SafeToInt(int64_t x) {
if (x < std::numeric_limits<int>::min() || x > std::numeric_limits<int>::max())
throw std::out_of_range("HKL index out of int range for Gemmi");
return static_cast<int>(x);
}
inline double SafeInv(double x, double fallback) {
if (!std::isfinite(x) || x == 0.0)
return fallback;
return 1.0 / x;
}
inline HKLKey CanonicalizeHKLKey(const Reflection& r, const ScaleMergeOptions& opt) {
HKLKey key{};
key.h = r.h;
key.k = r.k;
key.l = r.l;
key.is_positive = true;
if (!opt.space_group.has_value()) {
if (!opt.merge_friedel) {
const HKLKey neg{-r.h, -r.k, -r.l, true};
const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l);
if (!pos) {
key.h = -key.h;
key.k = -key.k;
key.l = -key.l;
key.is_positive = false;
}
}
return key;
}
const gemmi::SpaceGroup& sg = *opt.space_group;
const gemmi::GroupOps gops = sg.operations();
const gemmi::ReciprocalAsu rasu(&sg);
const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}};
const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops);
key.h = asu_hkl[0];
key.k = asu_hkl[1];
key.l = asu_hkl[2];
key.is_positive = opt.merge_friedel ? true : sign_plus;
return key;
}
struct IntensityResidual {
IntensityResidual(const Reflection& r, double sigma_obs, double wedge_deg, double s2, bool refine_b,
bool partiality_model)
: Iobs_(static_cast<double>(r.I)),
inv_sigma_(SafeInv(sigma_obs, 1.0)),
delta_phi_rad_(static_cast<double>(r.delta_phi) * M_PI / 180.0),
lp_(SafeInv(static_cast<double>(r.rlp), 1.0)),
half_wedge_rad_(wedge_deg * M_PI / 180.0 / 2.0),
c1_(std::sqrt(2.0) * SafeInv(static_cast<double>(r.zeta), 1.0)),
s2_(s2),
refine_b_(refine_b),
partiality_model_(partiality_model) {}
template<typename T>
bool operator()(const T* const log_k,
const T* const b,
const T* const mosaicity_rad,
const T* const log_Itrue,
T* residual) const {
const T k = ceres::exp(log_k[0]);
const T Itrue = ceres::exp(log_Itrue[0]);
T partiality = T(1.0);
if (partiality_model_) {
const T arg_plus = T(delta_phi_rad_ + half_wedge_rad_) * (T(c1_) * mosaicity_rad[0]);
const T arg_minus = T(delta_phi_rad_ - half_wedge_rad_) * (T(c1_) * mosaicity_rad[0]);
partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0);
}
T wilson = T(1.0);
if (refine_b_) {
wilson = ceres::exp(-b[0] * T(s2_));
}
const T Ipred = k * wilson * partiality * T(lp_) * Itrue;
residual[0] = (Ipred - T(Iobs_)) * T(inv_sigma_);
return true;
}
double Iobs_;
double inv_sigma_;
double delta_phi_rad_;
double lp_;
double half_wedge_rad_;
double c1_;
double s2_;
bool refine_b_;
bool partiality_model_;
};
struct ScaleRegularizationResidual {
explicit ScaleRegularizationResidual(double sigma_k)
: inv_sigma_(SafeInv(sigma_k, 1.0)) {}
template <typename T>
bool operator()(const T* const log_k, T* residual) const {
const T k = ceres::exp(log_k[0]);
residual[0] = (k - T(1.0)) * T(inv_sigma_);
return true;
}
double inv_sigma_;
};
} // namespace
ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector<Reflection>& observations,
const ScaleMergeOptions& opt) {
ScaleMergeResult out;
struct ObsRef {
const Reflection* r = nullptr;
int img_id = 0;
int img_slot = -1;
int hkl_slot = -1;
double s2 = 0.0;
double sigma = 0.0;
};
std::vector<ObsRef> obs;
obs.reserve(observations.size());
std::unordered_map<int, int> imgIdToSlot;
imgIdToSlot.reserve(256);
std::unordered_map<HKLKey, int, HKLKeyHash> hklToSlot;
hklToSlot.reserve(observations.size());
for (const auto& r : observations) {
const double d = SafeD(r.d);
if (!std::isfinite(d))
continue;
if (!std::isfinite(r.I))
continue;
if (!std::isfinite(r.zeta) || r.zeta <= 0.0f)
continue;
if (!std::isfinite(r.rlp) || r.rlp == 0.0f)
continue;
const double sigma = SafeSigma(static_cast<double>(r.sigma), opt.min_sigma);
const double s2 = 1.0 / (d * d);
const int img_id = RoundImageId(r.image_number, opt.image_number_rounding);
int img_slot;
{
auto it = imgIdToSlot.find(img_id);
if (it == imgIdToSlot.end()) {
img_slot = static_cast<int>(imgIdToSlot.size());
imgIdToSlot.emplace(img_id, img_slot);
} else {
img_slot = it->second;
}
}
int hkl_slot;
try {
const HKLKey key = CanonicalizeHKLKey(r, opt);
auto it = hklToSlot.find(key);
if (it == hklToSlot.end()) {
hkl_slot = static_cast<int>(hklToSlot.size());
hklToSlot.emplace(key, hkl_slot);
} else {
hkl_slot = it->second;
}
} catch (...) {
continue;
}
ObsRef o;
o.r = &r;
o.img_id = img_id;
o.img_slot = img_slot;
o.hkl_slot = hkl_slot;
o.s2 = s2;
o.sigma = sigma;
obs.push_back(o);
}
const int nimg = static_cast<int>(imgIdToSlot.size());
const int nhkl = static_cast<int>(hklToSlot.size());
out.image_scale_k.assign(nimg, 1.0);
out.image_b_factor.assign(nimg, 0.0);
out.image_ids.assign(nimg, 0);
for (const auto& kv : imgIdToSlot) {
out.image_ids[kv.second] = kv.first;
}
std::vector<double> log_k(nimg, 0.0);
std::vector<double> b(nimg, 0.0);
std::vector<double> log_Itrue(nhkl, 0.0);
auto deg2rad = [](double deg) { return deg * M_PI / 180.0; };
// Mosaicity: always per-image
std::vector<double> mosaicity_rad(nimg, deg2rad(opt.mosaicity_init_deg));
// Initialize Itrue from per-HKL median of observed intensities
{
std::vector<std::vector<double>> per_hkl_I(nhkl);
for (const auto& o : obs) {
per_hkl_I[o.hkl_slot].push_back(static_cast<double>(o.r->I));
}
for (int h = 0; h < nhkl; ++h) {
auto& v = per_hkl_I[h];
if (v.empty()) {
log_Itrue[h] = std::log(std::max(opt.min_sigma, 1e-6));
continue;
}
std::nth_element(v.begin(), v.begin() + static_cast<long>(v.size() / 2), v.end());
double med = v[v.size() / 2];
if (!std::isfinite(med) || med <= opt.min_sigma)
med = opt.min_sigma;
log_Itrue[h] = std::log(med);
}
}
ceres::Problem problem;
const bool partiality_model = opt.wedge_deg > 0.0;
for (const auto& o : obs) {
auto* cost = new ceres::AutoDiffCostFunction<IntensityResidual, 1, 1, 1, 1, 1>(
new IntensityResidual(*o.r, o.sigma, opt.wedge_deg, o.s2, opt.refine_b_factor, partiality_model));
problem.AddResidualBlock(cost,
nullptr, // no loss function
&log_k[o.img_slot],
&b[o.img_slot],
&mosaicity_rad[o.img_slot],
&log_Itrue[o.hkl_slot]);
}
// Optional Kabsch-like regularization for k
if (opt.regularize_scale_to_one) {
for (int i = 0; i < nimg; ++i) {
auto* rcost = new ceres::AutoDiffCostFunction<ScaleRegularizationResidual, 1, 1>(
new ScaleRegularizationResidual(opt.scale_regularization_sigma));
problem.AddResidualBlock(rcost, nullptr, &log_k[i]);
}
}
// Fix gauge freedom
if (opt.fix_first_image_scale && nimg > 0) {
log_k[0] = 0.0;
problem.SetParameterBlockConstant(&log_k[0]);
}
if (!opt.refine_b_factor) {
for (int i = 0; i < nimg; ++i) {
b[i] = 0.0;
problem.SetParameterBlockConstant(&b[i]);
}
} else {
for (int i = 0; i < nimg; ++i) {
if (opt.b_min)
problem.SetParameterLowerBound(&b[i], 0, *opt.b_min);
if (opt.b_max)
problem.SetParameterUpperBound(&b[i], 0, *opt.b_max);
}
}
// Mosaicity refinement + bounds
if (!opt.refine_mosaicity) {
for (int i = 0; i < nimg; ++i)
problem.SetParameterBlockConstant(&mosaicity_rad[i]);
} else {
const std::optional<double> min_rad = opt.mosaicity_min_deg
? std::optional<double>(deg2rad(*opt.mosaicity_min_deg)) : std::nullopt;
const std::optional<double> max_rad = opt.mosaicity_max_deg
? std::optional<double>(deg2rad(*opt.mosaicity_max_deg)) : std::nullopt;
for (int i = 0; i < nimg; ++i) {
if (min_rad) problem.SetParameterLowerBound(&mosaicity_rad[i], 0, *min_rad);
if (max_rad) problem.SetParameterUpperBound(&mosaicity_rad[i], 0, *max_rad);
}
}
ceres::Solver::Options options;
options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY;
options.minimizer_progress_to_stdout = false;
options.logging_type = ceres::LoggingType::SILENT;
options.max_num_iterations = opt.max_num_iterations;
options.max_solver_time_in_seconds = opt.max_solver_time_s;
ceres::Solver::Summary summary;
ceres::Solve(options, &problem, &summary);
// --- Export per-image results ---
for (int i = 0; i < nimg; ++i) {
out.image_scale_k[i] = std::exp(log_k[i]);
out.image_b_factor[i] = opt.refine_b_factor ? b[i] : 0.0;
}
out.mosaicity_deg.resize(nimg);
for (int i = 0; i < nimg; ++i)
out.mosaicity_deg[i] = mosaicity_rad[i] * 180.0 / M_PI;
// --- Compute goodness-of-fit (reduced chi-squared) ---
const int n_obs = static_cast<int>(obs.size());
// Count free parameters: nhkl log_Itrue + per-image (log_k + b + mosaicity) minus fixed ones
int n_params = nhkl;
for (int i = 0; i < nimg; ++i) {
if (!(opt.fix_first_image_scale && i == 0))
n_params += 1; // log_k
if (opt.refine_b_factor)
n_params += 1; // b
if (opt.refine_mosaicity)
n_params += 1; // mosaicity
}
const double half_wedge_rad = opt.wedge_deg * M_PI / 180.0 / 2.0;
double sum_r2 = 0.0;
for (const auto& o : obs) {
const int i = o.img_slot;
const int h = o.hkl_slot;
const double ki = std::exp(log_k[i]);
const double atten = opt.refine_b_factor ? std::exp(-b[i] * o.s2) : 1.0;
const double Itrue = std::exp(log_Itrue[h]);
const double zeta = static_cast<double>(o.r->zeta);
const double mosa_i = mosaicity_rad[i];
const double c1 = std::sqrt(2.0) * (mosa_i / zeta);
const double delta_phi_rad = static_cast<double>(o.r->delta_phi) * M_PI / 180.0;
const double partiality = partiality_model
? (std::erf((delta_phi_rad + half_wedge_rad) * c1) - std::erf((delta_phi_rad - half_wedge_rad) * c1)) / 2.0
: 1.0;
const double lp = SafeInv(static_cast<double>(o.r->rlp), 1.0);
const double Ipred = ki * atten * partiality * lp * Itrue;
const double resid = (Ipred - static_cast<double>(o.r->I)) / o.sigma;
sum_r2 += resid * resid;
}
const double gof2 = (n_obs > n_params) ? sum_r2 / static_cast<double>(n_obs - n_params) : 1.0;
out.gof2 = gof2;
// --- Covariance: σ(I_true) from (J^T W J)^{-1} scaled by GoF² ---
std::vector<HKLKey> slotToHKL(nhkl);
for (const auto& kv : hklToSlot) {
slotToHKL[kv.second] = kv.first;
}
out.merged.resize(nhkl);
for (int h = 0; h < nhkl; ++h) {
out.merged[h].h = slotToHKL[h].h;
out.merged[h].k = slotToHKL[h].k;
out.merged[h].l = slotToHKL[h].l;
out.merged[h].I = std::exp(log_Itrue[h]);
out.merged[h].sigma = std::numeric_limits<double>::quiet_NaN();
}
ceres::Covariance::Options cov_options;
cov_options.algorithm_type = ceres::SPARSE_QR;
ceres::Covariance covariance(cov_options);
std::vector<std::pair<const double*, const double*>> covariance_blocks;
covariance_blocks.reserve(nhkl);
for (int h = 0; h < nhkl; ++h) {
covariance_blocks.emplace_back(&log_Itrue[h], &log_Itrue[h]);
}
if (covariance.Compute(covariance_blocks, &problem)) {
for (int h = 0; h < nhkl; ++h) {
double var_log_I = 0.0;
covariance.GetCovarianceBlock(&log_Itrue[h], &log_Itrue[h], &var_log_I);
// σ(I) = I * sqrt( var(log I) * GoF² )
const double Itrue = std::exp(log_Itrue[h]);
out.merged[h].sigma = Itrue * std::sqrt(var_log_I * gof2);
}
}
return out;
}