// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #include "ScaleAndMerge.h" #include #include #include #include #include #include #include #include #include namespace { struct HKLKey { int64_t h = 0; int64_t k = 0; int64_t l = 0; bool is_positive = true; // only relevant if opt.merge_friedel == false bool operator==(const HKLKey& o) const noexcept { return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive; } }; struct HKLKeyHash { size_t operator()(const HKLKey& key) const noexcept { auto mix = [](uint64_t x) { x ^= x >> 33; x *= 0xff51afd7ed558ccdULL; x ^= x >> 33; x *= 0xc4ceb9fe1a85ec53ULL; x ^= x >> 33; return x; }; const uint64_t a = static_cast(key.h); const uint64_t b = static_cast(key.k); const uint64_t c = static_cast(key.l); const uint64_t d = static_cast(key.is_positive ? 1 : 0); return static_cast(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3)); } }; inline int RoundImageId(float image_number, double rounding_step) { if (!(rounding_step > 0.0)) rounding_step = 1.0; const double x = static_cast(image_number) / rounding_step; const double r = std::round(x) * rounding_step; return static_cast(std::llround(r / rounding_step)); } inline double SafeSigma(double s, double min_sigma) { if (!std::isfinite(s) || s <= 0.0) return min_sigma; return std::max(s, min_sigma); } inline double SafeD(double d) { if (!std::isfinite(d) || d <= 0.0) return std::numeric_limits::quiet_NaN(); return d; } inline int SafeToInt(int64_t x) { if (x < std::numeric_limits::min() || x > std::numeric_limits::max()) throw std::out_of_range("HKL index out of int range for Gemmi"); return static_cast(x); } // Canonicalize HKL according to Gemmi Reciprocal ASU if space group is provided. // If merge_friedel==true -> Friedel mates collapse (key.is_positive always true). // If merge_friedel==false -> keep I+ vs I- separate by key.is_positive. inline HKLKey CanonicalizeHKLKey(const Reflection& r, const ScaleMergeOptions& opt) { HKLKey key{}; key.h = r.h; key.k = r.k; key.l = r.l; key.is_positive = true; // If no SG provided, we can still optionally separate Friedel mates deterministically. if (!opt.space_group.has_value()) { if (!opt.merge_friedel) { const HKLKey neg{-r.h, -r.k, -r.l, true}; const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l); if (!pos) { key.h = -key.h; key.k = -key.k; key.l = -key.l; key.is_positive = false; } } return key; } const gemmi::SpaceGroup& sg = *opt.space_group; const gemmi::GroupOps gops = sg.operations(); const gemmi::ReciprocalAsu rasu(&sg); const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}}; const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops); key.h = asu_hkl[0]; key.k = asu_hkl[1]; key.l = asu_hkl[2]; key.is_positive = opt.merge_friedel ? true : sign_plus; return key; } struct IntensityResidual { IntensityResidual(double Iobs, double sigma, double s2, bool refine_b) : Iobs_(Iobs), inv_sigma_(1.0 / sigma), s2_(s2), refine_b_(refine_b) {} template bool operator()(const T* const log_k, const T* const b, const T* const log_Ihkl, T* residual) const { const T k = ceres::exp(log_k[0]); const T Ihkl = ceres::exp(log_Ihkl[0]); T atten = T(1); if (refine_b_) { // I_pred = k * exp(-B*s^2) * I_hkl atten = ceres::exp(-b[0] * T(s2_)); } const T Ipred = k * atten * Ihkl; residual[0] = (Ipred - T(Iobs_)) * T(inv_sigma_); return true; } double Iobs_; double inv_sigma_; double s2_; bool refine_b_; }; } // namespace ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& observations, const ScaleMergeOptions& opt) { ScaleMergeResult out; struct ObsRef { const Reflection* r = nullptr; int img_id = 0; // rounded/quantized image id int img_slot = -1; // compact [0..nimg) int hkl_slot = -1; // compact [0..nhkl) double s2 = 0.0; double sigma = 0.0; }; std::vector obs; obs.reserve(observations.size()); std::unordered_map imgIdToSlot; imgIdToSlot.reserve(256); std::unordered_map hklToSlot; hklToSlot.reserve(observations.size()); for (const auto& r : observations) { const double d = SafeD(r.d); if (!std::isfinite(d)) continue; if (!std::isfinite(r.I)) continue; const double sigma = SafeSigma(static_cast(r.sigma), opt.min_sigma); const double s2 = 1.0 / (d * d); const int img_id = RoundImageId(r.image_number, opt.image_number_rounding); int img_slot; { auto it = imgIdToSlot.find(img_id); if (it == imgIdToSlot.end()) { img_slot = static_cast(imgIdToSlot.size()); imgIdToSlot.emplace(img_id, img_slot); } else { img_slot = it->second; } } int hkl_slot; try { const HKLKey key = CanonicalizeHKLKey(r, opt); auto it = hklToSlot.find(key); if (it == hklToSlot.end()) { hkl_slot = static_cast(hklToSlot.size()); hklToSlot.emplace(key, hkl_slot); } else { hkl_slot = it->second; } } catch (...) { continue; // skip problematic HKL } ObsRef o; o.r = &r; o.img_id = img_id; o.img_slot = img_slot; o.hkl_slot = hkl_slot; o.s2 = s2; o.sigma = sigma; obs.push_back(o); } const int nimg = static_cast(imgIdToSlot.size()); const int nhkl = static_cast(hklToSlot.size()); out.image_scale_k.assign(nimg, 1.0); out.image_b_factor.assign(nimg, 0.0); out.image_ids.assign(nimg, 0); for (const auto& kv : imgIdToSlot) { out.image_ids[kv.second] = kv.first; } std::vector log_k(nimg, 0.0); std::vector b(nimg, 0.0); std::vector log_Ihkl(nhkl, 0.0); // Initialize Ihkl from per-HKL median of observed intensities. { std::vector> per_hkl_I(nhkl); for (const auto& o : obs) { per_hkl_I[o.hkl_slot].push_back(static_cast(o.r->I)); } for (int h = 0; h < nhkl; ++h) { auto& v = per_hkl_I[h]; if (v.empty()) { log_Ihkl[h] = std::log(std::max(opt.min_sigma, 1e-6)); continue; } std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); double med = v[v.size() / 2]; if (!std::isfinite(med) || med <= opt.min_sigma) med = opt.min_sigma; log_Ihkl[h] = std::log(med); } } ceres::Problem problem; std::unique_ptr loss; if (opt.use_huber_loss) { loss = std::make_unique(opt.huber_delta); } for (const auto& o : obs) { const double Iobs = static_cast(o.r->I); auto* cost = new ceres::AutoDiffCostFunction( new IntensityResidual(Iobs, o.sigma, o.s2, opt.refine_b_factor)); problem.AddResidualBlock(cost, loss.get(), &log_k[o.img_slot], &b[o.img_slot], &log_Ihkl[o.hkl_slot]); } // Fix gauge freedom: anchor first image scale to 1.0 if (opt.fix_first_image_scale && nimg > 0) { log_k[0] = 0.0; problem.SetParameterBlockConstant(&log_k[0]); } if (!opt.refine_b_factor) { for (int i = 0; i < nimg; ++i) { b[i] = 0.0; problem.SetParameterBlockConstant(&b[i]); } } else { for (int i = 0; i < nimg; ++i) { if (opt.b_min) problem.SetParameterLowerBound(&b[i], 0, *opt.b_min); if (opt.b_max) problem.SetParameterUpperBound(&b[i], 0, *opt.b_max); } } ceres::Solver::Options options; options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY; options.minimizer_progress_to_stdout = false; options.logging_type = ceres::LoggingType::SILENT; options.max_num_iterations = opt.max_num_iterations; options.max_solver_time_in_seconds = opt.max_solver_time_s; ceres::Solver::Summary summary; ceres::Solve(options, &problem, &summary); for (int i = 0; i < nimg; ++i) { out.image_scale_k[i] = std::exp(log_k[i]); out.image_b_factor[i] = opt.refine_b_factor ? b[i] : 0.0; } // Reverse maps for merged output std::vector slotToHKL(nhkl); for (const auto& kv : hklToSlot) { slotToHKL[kv.second] = kv.first; } // crude sigma estimate from model residual scatter std::vector n_per_hkl(nhkl, 0); std::vector ss_per_hkl(nhkl, 0.0); for (const auto& o : obs) { const int i = o.img_slot; const int h = o.hkl_slot; const double k = std::exp(log_k[i]); const double atten = opt.refine_b_factor ? std::exp(-b[i] * o.s2) : 1.0; const double Ihkl = std::exp(log_Ihkl[h]); const double Ipred = k * atten * Ihkl; const double r = (Ipred - static_cast(o.r->I)); n_per_hkl[h] += 1; ss_per_hkl[h] += r * r; } out.merged.reserve(nhkl); for (int h = 0; h < nhkl; ++h) { MergedReflection m{}; m.h = slotToHKL[h].h; m.k = slotToHKL[h].k; m.l = slotToHKL[h].l; m.I = static_cast(std::exp(log_Ihkl[h])); if (n_per_hkl[h] >= 2) { const double rms = std::sqrt(ss_per_hkl[h] / static_cast(n_per_hkl[h] - 1)); m.sigma = static_cast(rms / std::sqrt(static_cast(n_per_hkl[h]))); } else { m.sigma = std::numeric_limits::quiet_NaN(); } out.merged.push_back(m); } return out; }