// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #include "ScaleAndMerge.h" #include #include #include #include #include #include #include #include #include #include namespace { struct HKLKey { int64_t h = 0; int64_t k = 0; int64_t l = 0; bool is_positive = true; // only relevant if opt.merge_friedel == false bool operator==(const HKLKey& o) const noexcept { return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive; } }; struct HKLKeyHash { size_t operator()(const HKLKey& key) const noexcept { auto mix = [](uint64_t x) { x ^= x >> 33; x *= 0xff51afd7ed558ccdULL; x ^= x >> 33; x *= 0xc4ceb9fe1a85ec53ULL; x ^= x >> 33; return x; }; const uint64_t a = static_cast(key.h); const uint64_t b = static_cast(key.k); const uint64_t c = static_cast(key.l); const uint64_t d = static_cast(key.is_positive ? 1 : 0); return static_cast(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3)); } }; inline int RoundImageId(float image_number, double rounding_step) { if (!(rounding_step > 0.0)) rounding_step = 1.0; const double x = static_cast(image_number) / rounding_step; const double r = std::round(x) * rounding_step; return static_cast(std::llround(r / rounding_step)); } inline double SafeSigma(double s, double min_sigma) { if (!std::isfinite(s) || s <= 0.0) return min_sigma; return std::max(s, min_sigma); } inline double SafeD(double d) { if (!std::isfinite(d) || d <= 0.0) return std::numeric_limits::quiet_NaN(); return d; } inline int SafeToInt(int64_t x) { if (x < std::numeric_limits::min() || x > std::numeric_limits::max()) throw std::out_of_range("HKL index out of int range for Gemmi"); return static_cast(x); } inline double SafeInv(double x, double fallback) { if (!std::isfinite(x) || x == 0.0) return fallback; return 1.0 / x; } inline HKLKey CanonicalizeHKLKey(const Reflection& r, const ScaleMergeOptions& opt) { HKLKey key{}; key.h = r.h; key.k = r.k; key.l = r.l; key.is_positive = true; if (!opt.space_group.has_value()) { if (!opt.merge_friedel) { const HKLKey neg{-r.h, -r.k, -r.l, true}; const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l); if (!pos) { key.h = -key.h; key.k = -key.k; key.l = -key.l; key.is_positive = false; } } return key; } const gemmi::SpaceGroup& sg = *opt.space_group; const gemmi::GroupOps gops = sg.operations(); const gemmi::ReciprocalAsu rasu(&sg); const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}}; const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops); key.h = asu_hkl[0]; key.k = asu_hkl[1]; key.l = asu_hkl[2]; key.is_positive = opt.merge_friedel ? true : sign_plus; return key; } struct IntensityResidual { IntensityResidual(const Reflection& r, double sigma_obs, double wedge_deg, double s2, bool refine_b, bool partiality_model) : Iobs_(static_cast(r.I)), inv_sigma_(SafeInv(sigma_obs, 1.0)), delta_phi_rad_(static_cast(r.delta_phi) * M_PI / 180.0), lp_(SafeInv(static_cast(r.rlp), 1.0)), half_wedge_rad_(wedge_deg * M_PI / 180.0 / 2.0), c1_(std::sqrt(2.0) * SafeInv(static_cast(r.zeta), 1.0)), s2_(s2), refine_b_(refine_b), partiality_model_(partiality_model) {} template bool operator()(const T* const log_k, const T* const b, const T* const mosaicity_rad, const T* const log_Itrue, T* residual) const { const T k = ceres::exp(log_k[0]); const T Itrue = ceres::exp(log_Itrue[0]); T partiality = T(1.0); if (partiality_model_) { const T arg_plus = T(delta_phi_rad_ + half_wedge_rad_) * (T(c1_) * mosaicity_rad[0]); const T arg_minus = T(delta_phi_rad_ - half_wedge_rad_) * (T(c1_) * mosaicity_rad[0]); partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); } T wilson = T(1.0); if (refine_b_) { wilson = ceres::exp(-b[0] * T(s2_)); } const T Ipred = k * wilson * partiality * T(lp_) * Itrue; residual[0] = (Ipred - T(Iobs_)) * T(inv_sigma_); return true; } double Iobs_; double inv_sigma_; double delta_phi_rad_; double lp_; double half_wedge_rad_; double c1_; double s2_; bool refine_b_; bool partiality_model_; }; struct ScaleRegularizationResidual { explicit ScaleRegularizationResidual(double sigma_k) : inv_sigma_(SafeInv(sigma_k, 1.0)) {} template bool operator()(const T* const log_k, T* residual) const { const T k = ceres::exp(log_k[0]); residual[0] = (k - T(1.0)) * T(inv_sigma_); return true; } double inv_sigma_; }; } // namespace ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& observations, const ScaleMergeOptions& opt) { ScaleMergeResult out; struct ObsRef { const Reflection* r = nullptr; int img_id = 0; int img_slot = -1; int hkl_slot = -1; double s2 = 0.0; double sigma = 0.0; }; std::vector obs; obs.reserve(observations.size()); std::unordered_map imgIdToSlot; imgIdToSlot.reserve(256); std::unordered_map hklToSlot; hklToSlot.reserve(observations.size()); for (const auto& r : observations) { const double d = SafeD(r.d); if (!std::isfinite(d)) continue; if (!std::isfinite(r.I)) continue; if (!std::isfinite(r.zeta) || r.zeta <= 0.0f) continue; if (!std::isfinite(r.rlp) || r.rlp == 0.0f) continue; const double sigma = SafeSigma(static_cast(r.sigma), opt.min_sigma); const double s2 = 1.0 / (d * d); const int img_id = RoundImageId(r.image_number, opt.image_number_rounding); int img_slot; { auto it = imgIdToSlot.find(img_id); if (it == imgIdToSlot.end()) { img_slot = static_cast(imgIdToSlot.size()); imgIdToSlot.emplace(img_id, img_slot); } else { img_slot = it->second; } } int hkl_slot; try { const HKLKey key = CanonicalizeHKLKey(r, opt); auto it = hklToSlot.find(key); if (it == hklToSlot.end()) { hkl_slot = static_cast(hklToSlot.size()); hklToSlot.emplace(key, hkl_slot); } else { hkl_slot = it->second; } } catch (...) { continue; } ObsRef o; o.r = &r; o.img_id = img_id; o.img_slot = img_slot; o.hkl_slot = hkl_slot; o.s2 = s2; o.sigma = sigma; obs.push_back(o); } const int nimg = static_cast(imgIdToSlot.size()); const int nhkl = static_cast(hklToSlot.size()); out.image_scale_k.assign(nimg, 1.0); out.image_b_factor.assign(nimg, 0.0); out.image_ids.assign(nimg, 0); for (const auto& kv : imgIdToSlot) { out.image_ids[kv.second] = kv.first; } std::vector log_k(nimg, 0.0); std::vector b(nimg, 0.0); std::vector log_Itrue(nhkl, 0.0); auto deg2rad = [](double deg) { return deg * M_PI / 180.0; }; // Mosaicity: always per-image std::vector mosaicity_rad(nimg, deg2rad(opt.mosaicity_init_deg)); // Initialize Itrue from per-HKL median of observed intensities { std::vector> per_hkl_I(nhkl); for (const auto& o : obs) { per_hkl_I[o.hkl_slot].push_back(static_cast(o.r->I)); } for (int h = 0; h < nhkl; ++h) { auto& v = per_hkl_I[h]; if (v.empty()) { log_Itrue[h] = std::log(std::max(opt.min_sigma, 1e-6)); continue; } std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); double med = v[v.size() / 2]; if (!std::isfinite(med) || med <= opt.min_sigma) med = opt.min_sigma; log_Itrue[h] = std::log(med); } } ceres::Problem problem; const bool partiality_model = opt.wedge_deg > 0.0; for (const auto& o : obs) { auto* cost = new ceres::AutoDiffCostFunction( new IntensityResidual(*o.r, o.sigma, opt.wedge_deg, o.s2, opt.refine_b_factor, partiality_model)); problem.AddResidualBlock(cost, nullptr, // no loss function &log_k[o.img_slot], &b[o.img_slot], &mosaicity_rad[o.img_slot], &log_Itrue[o.hkl_slot]); } // Optional Kabsch-like regularization for k if (opt.regularize_scale_to_one) { for (int i = 0; i < nimg; ++i) { auto* rcost = new ceres::AutoDiffCostFunction( new ScaleRegularizationResidual(opt.scale_regularization_sigma)); problem.AddResidualBlock(rcost, nullptr, &log_k[i]); } } // Fix gauge freedom if (opt.fix_first_image_scale && nimg > 0) { log_k[0] = 0.0; problem.SetParameterBlockConstant(&log_k[0]); } if (!opt.refine_b_factor) { for (int i = 0; i < nimg; ++i) { b[i] = 0.0; problem.SetParameterBlockConstant(&b[i]); } } else { for (int i = 0; i < nimg; ++i) { if (opt.b_min) problem.SetParameterLowerBound(&b[i], 0, *opt.b_min); if (opt.b_max) problem.SetParameterUpperBound(&b[i], 0, *opt.b_max); } } // Mosaicity refinement + bounds if (!opt.refine_mosaicity) { for (int i = 0; i < nimg; ++i) problem.SetParameterBlockConstant(&mosaicity_rad[i]); } else { const std::optional min_rad = opt.mosaicity_min_deg ? std::optional(deg2rad(*opt.mosaicity_min_deg)) : std::nullopt; const std::optional max_rad = opt.mosaicity_max_deg ? std::optional(deg2rad(*opt.mosaicity_max_deg)) : std::nullopt; for (int i = 0; i < nimg; ++i) { if (min_rad) problem.SetParameterLowerBound(&mosaicity_rad[i], 0, *min_rad); if (max_rad) problem.SetParameterUpperBound(&mosaicity_rad[i], 0, *max_rad); } } ceres::Solver::Options options; options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY; options.minimizer_progress_to_stdout = false; options.logging_type = ceres::LoggingType::SILENT; options.max_num_iterations = opt.max_num_iterations; options.max_solver_time_in_seconds = opt.max_solver_time_s; ceres::Solver::Summary summary; ceres::Solve(options, &problem, &summary); // --- Export per-image results --- for (int i = 0; i < nimg; ++i) { out.image_scale_k[i] = std::exp(log_k[i]); out.image_b_factor[i] = opt.refine_b_factor ? b[i] : 0.0; } out.mosaicity_deg.resize(nimg); for (int i = 0; i < nimg; ++i) out.mosaicity_deg[i] = mosaicity_rad[i] * 180.0 / M_PI; // --- Compute goodness-of-fit (reduced chi-squared) --- const int n_obs = static_cast(obs.size()); // Count free parameters: nhkl log_Itrue + per-image (log_k + b + mosaicity) minus fixed ones int n_params = nhkl; for (int i = 0; i < nimg; ++i) { if (!(opt.fix_first_image_scale && i == 0)) n_params += 1; // log_k if (opt.refine_b_factor) n_params += 1; // b if (opt.refine_mosaicity) n_params += 1; // mosaicity } const double half_wedge_rad = opt.wedge_deg * M_PI / 180.0 / 2.0; double sum_r2 = 0.0; for (const auto& o : obs) { const int i = o.img_slot; const int h = o.hkl_slot; const double ki = std::exp(log_k[i]); const double atten = opt.refine_b_factor ? std::exp(-b[i] * o.s2) : 1.0; const double Itrue = std::exp(log_Itrue[h]); const double zeta = static_cast(o.r->zeta); const double mosa_i = mosaicity_rad[i]; const double c1 = std::sqrt(2.0) * (mosa_i / zeta); const double delta_phi_rad = static_cast(o.r->delta_phi) * M_PI / 180.0; const double partiality = partiality_model ? (std::erf((delta_phi_rad + half_wedge_rad) * c1) - std::erf((delta_phi_rad - half_wedge_rad) * c1)) / 2.0 : 1.0; const double lp = SafeInv(static_cast(o.r->rlp), 1.0); const double Ipred = ki * atten * partiality * lp * Itrue; const double resid = (Ipred - static_cast(o.r->I)) / o.sigma; sum_r2 += resid * resid; } const double gof2 = (n_obs > n_params) ? sum_r2 / static_cast(n_obs - n_params) : 1.0; out.gof2 = gof2; // --- Covariance: σ(I_true) from (J^T W J)^{-1} scaled by GoF² --- std::vector slotToHKL(nhkl); for (const auto& kv : hklToSlot) { slotToHKL[kv.second] = kv.first; } out.merged.resize(nhkl); for (int h = 0; h < nhkl; ++h) { out.merged[h].h = slotToHKL[h].h; out.merged[h].k = slotToHKL[h].k; out.merged[h].l = slotToHKL[h].l; out.merged[h].I = std::exp(log_Itrue[h]); out.merged[h].sigma = std::numeric_limits::quiet_NaN(); } ceres::Covariance::Options cov_options; cov_options.algorithm_type = ceres::SPARSE_QR; ceres::Covariance covariance(cov_options); std::vector> covariance_blocks; covariance_blocks.reserve(nhkl); for (int h = 0; h < nhkl; ++h) { covariance_blocks.emplace_back(&log_Itrue[h], &log_Itrue[h]); } if (covariance.Compute(covariance_blocks, &problem)) { for (int h = 0; h < nhkl; ++h) { double var_log_I = 0.0; covariance.GetCovarianceBlock(&log_Itrue[h], &log_Itrue[h], &var_log_I); // σ(I) = I * sqrt( var(log I) * GoF² ) const double Itrue = std::exp(log_Itrue[h]); out.merged[h].sigma = Itrue * std::sqrt(var_log_I * gof2); } } return out; }