// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #include "ScaleAndMerge.h" #include #include #include #include #include #include #include #include #include #include namespace { struct HKLKey { int64_t h = 0; int64_t k = 0; int64_t l = 0; bool is_positive = true; // only relevant if opt.merge_friedel == false bool operator==(const HKLKey& o) const noexcept { return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive; } }; struct HKLKeyHash { size_t operator()(const HKLKey& key) const noexcept { auto mix = [](uint64_t x) { x ^= x >> 33; x *= 0xff51afd7ed558ccdULL; x ^= x >> 33; x *= 0xc4ceb9fe1a85ec53ULL; x ^= x >> 33; return x; }; const uint64_t a = static_cast(key.h); const uint64_t b = static_cast(key.k); const uint64_t c = static_cast(key.l); const uint64_t d = static_cast(key.is_positive ? 1 : 0); return static_cast(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3)); } }; inline int RoundImageId(float image_number, double rounding_step) { if (!(rounding_step > 0.0)) rounding_step = 1.0; const double x = static_cast(image_number) / rounding_step; const double r = std::round(x) * rounding_step; return static_cast(std::llround(r / rounding_step)); } inline double SafeSigma(double s, double min_sigma) { if (!std::isfinite(s) || s <= 0.0) return min_sigma; return std::max(s, min_sigma); } inline double SafeD(double d) { if (!std::isfinite(d) || d <= 0.0) return std::numeric_limits::quiet_NaN(); return d; } inline int SafeToInt(int64_t x) { if (x < std::numeric_limits::min() || x > std::numeric_limits::max()) throw std::out_of_range("HKL index out of int range for Gemmi"); return static_cast(x); } inline double SafeInv(double x, double fallback) { if (!std::isfinite(x) || x == 0.0) return fallback; return 1.0 / x; } inline HKLKey CanonicalizeHKLKey(const Reflection& r, const ScaleMergeOptions& opt) { HKLKey key{}; key.h = r.h; key.k = r.k; key.l = r.l; key.is_positive = true; if (!opt.space_group.has_value()) { if (!opt.merge_friedel) { const HKLKey neg{-r.h, -r.k, -r.l, true}; const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l); if (!pos) { key.h = -key.h; key.k = -key.k; key.l = -key.l; key.is_positive = false; } } return key; } const gemmi::SpaceGroup& sg = *opt.space_group; const gemmi::GroupOps gops = sg.operations(); const gemmi::ReciprocalAsu rasu(&sg); const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}}; const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops); key.h = asu_hkl[0]; key.k = asu_hkl[1]; key.l = asu_hkl[2]; key.is_positive = opt.merge_friedel ? true : sign_plus; return key; } struct IntensityResidual { IntensityResidual(const Reflection& r, double sigma_obs, double wedge_deg, bool refine_partiality) : Iobs_(static_cast(r.I)), inv_sigma_(SafeInv(sigma_obs, 1.0)), delta_phi_rad_(static_cast(r.delta_phi) * M_PI / 180.0), lp_(SafeInv(static_cast(r.rlp), 1.0)), half_wedge_rad_(wedge_deg * M_PI / 180.0 / 2.0), c1_(r.zeta / std::sqrt(2.0)), partiality_(r.partiality), refine_partiality_(refine_partiality) {} template bool operator()(const T* const k, const T* const mosaicity_rad, const T* const Itrue, T* residual) const { T partiality; if (refine_partiality_ && mosaicity_rad[0] != 0.0) { const T arg_plus = T(delta_phi_rad_ + half_wedge_rad_) * T(c1_) / mosaicity_rad[0]; const T arg_minus = T(delta_phi_rad_ - half_wedge_rad_) * T(c1_) / mosaicity_rad[0]; partiality = (ceres::erf(arg_plus) - ceres::erf(arg_minus)) / T(2.0); } else partiality = T(partiality_); const T Ipred = k[0] * partiality * T(lp_) * Itrue[0]; residual[0] = (Ipred - T(Iobs_)) * T(inv_sigma_); return true; } double Iobs_; double inv_sigma_; double delta_phi_rad_; double lp_; double half_wedge_rad_; double c1_; double partiality_; bool refine_partiality_; }; struct ScaleRegularizationResidual { explicit ScaleRegularizationResidual(double sigma_k) : inv_sigma_(SafeInv(sigma_k, 1.0)) {} template bool operator()(const T* const k, T* residual) const { residual[0] = (k[0] - T(1.0)) * T(inv_sigma_); return true; } double inv_sigma_; }; } // namespace ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& observations, const ScaleMergeOptions& opt) { ScaleMergeResult out; struct ObsRef { const Reflection* r = nullptr; int img_id = 0; int img_slot = -1; int hkl_slot = -1; double sigma = 0.0; }; std::vector obs; obs.reserve(observations.size()); std::unordered_map imgIdToSlot; imgIdToSlot.reserve(256); std::unordered_map hklToSlot; hklToSlot.reserve(observations.size()); for (const auto& r : observations) { const double d = SafeD(r.d); if (!std::isfinite(d)) continue; if (!std::isfinite(r.I)) continue; if (!std::isfinite(r.zeta) || r.zeta <= 0.0f) continue; if (!std::isfinite(r.rlp) || r.rlp == 0.0f) continue; const double sigma = SafeSigma(static_cast(r.sigma), opt.min_sigma); const int img_id = RoundImageId(r.image_number, opt.image_number_rounding); int img_slot; { auto it = imgIdToSlot.find(img_id); if (it == imgIdToSlot.end()) { img_slot = static_cast(imgIdToSlot.size()); imgIdToSlot.emplace(img_id, img_slot); } else { img_slot = it->second; } } int hkl_slot; try { const HKLKey key = CanonicalizeHKLKey(r, opt); auto it = hklToSlot.find(key); if (it == hklToSlot.end()) { hkl_slot = static_cast(hklToSlot.size()); hklToSlot.emplace(key, hkl_slot); } else { hkl_slot = it->second; } } catch (...) { continue; } ObsRef o; o.r = &r; o.img_id = img_id; o.img_slot = img_slot; o.hkl_slot = hkl_slot; o.sigma = sigma; obs.push_back(o); } const int nimg = static_cast(imgIdToSlot.size()); const int nhkl = static_cast(hklToSlot.size()); out.image_scale_k.assign(nimg, 1.0); out.image_ids.assign(nimg, 0); for (const auto& kv : imgIdToSlot) { out.image_ids[kv.second] = kv.first; } std::vector k(nimg, 1.0); std::vector Itrue(nhkl, 0.0); auto deg2rad = [](double deg) { return deg * M_PI / 180.0; }; // Mosaicity: always per-image std::vector mosaicity_rad(nimg, deg2rad(opt.mosaicity_init_deg)); // Initialize Itrue from per-HKL median of observed intensities { std::vector> per_hkl_I(nhkl); for (const auto& o : obs) { per_hkl_I[o.hkl_slot].push_back(static_cast(o.r->I)); } for (int h = 0; h < nhkl; ++h) { auto& v = per_hkl_I[h]; if (v.empty()) { Itrue[h] = std::max(opt.min_sigma, 1e-6); continue; } std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); double med = v[v.size() / 2]; if (!std::isfinite(med) || med <= opt.min_sigma) med = opt.min_sigma; Itrue[h] = med; } } ceres::Problem problem; const bool refine_partiality = opt.wedge_deg > 0.0; for (const auto& o : obs) { auto* cost = new ceres::AutoDiffCostFunction( new IntensityResidual(*o.r, o.sigma, opt.wedge_deg, refine_partiality)); problem.AddResidualBlock(cost, nullptr, // no loss function &k[o.img_slot], &mosaicity_rad[o.img_slot], &Itrue[o.hkl_slot]); } // Optional Kabsch-like regularization for k if (opt.regularize_scale_to_one) { for (int i = 0; i < nimg; ++i) { auto* rcost = new ceres::AutoDiffCostFunction( new ScaleRegularizationResidual(opt.scale_regularization_sigma)); problem.AddResidualBlock(rcost, nullptr, &k[i]); } } // Fix gauge freedom if (opt.fix_first_image_scale && nimg > 0) { k[0] = 1.0; problem.SetParameterBlockConstant(&k[0]); } // Mosaicity refinement + bounds if (!opt.refine_mosaicity) { for (int i = 0; i < nimg; ++i) problem.SetParameterBlockConstant(&mosaicity_rad[i]); } else { const double min_rad = deg2rad(opt.mosaicity_min_deg); const double max_rad = deg2rad(opt.mosaicity_max_deg); for (int i = 0; i < nimg; ++i) { problem.SetParameterLowerBound(&mosaicity_rad[i], 0, min_rad); problem.SetParameterUpperBound(&mosaicity_rad[i], 0, max_rad); } } // Force k[i] > 0 for all images (scale factors must be positive) { constexpr double k_floor = 1e-8; for (int i = 0; i < nimg; ++i) { if (!(opt.fix_first_image_scale && i == 0)) { problem.SetParameterLowerBound(&k[i], 0, k_floor); } } } ceres::Solver::Options options; options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY; options.minimizer_progress_to_stdout = false; options.logging_type = ceres::LoggingType::SILENT; options.max_num_iterations = opt.max_num_iterations; options.max_solver_time_in_seconds = opt.max_solver_time_s; ceres::Solver::Summary summary; ceres::Solve(options, &problem, &summary); // --- Export per-image results --- for (int i = 0; i < nimg; ++i) out.image_scale_k[i] = k[i]; out.mosaicity_deg.resize(nimg); for (int i = 0; i < nimg; ++i) out.mosaicity_deg[i] = mosaicity_rad[i] * 180.0 / M_PI; // --- Compute goodness-of-fit (reduced chi-squared) --- const int n_obs = static_cast(obs.size()); // Count free parameters: nhkl Itrue + per-image (k + mosaicity) minus fixed ones int n_params = nhkl; for (int i = 0; i < nimg; ++i) { if (!(opt.fix_first_image_scale && i == 0)) n_params += 1; // k if (opt.refine_mosaicity) n_params += 1; // mosaicity } const double half_wedge_rad = opt.wedge_deg * M_PI / 180.0 / 2.0; double sum_r2 = 0.0; for (const auto& o : obs) { const int i = o.img_slot; const int h = o.hkl_slot; const double ki = k[i]; const double mosa_i = mosaicity_rad[i]; const double c2 = o.r->zeta / (std::sqrt(2.0) * mosa_i); const double delta_phi_rad = static_cast(o.r->delta_phi) * M_PI / 180.0; const double partiality = refine_partiality ? (std::erf((delta_phi_rad + half_wedge_rad) * c2) - std::erf((delta_phi_rad - half_wedge_rad) * c2)) / 2.0 : o.r->partiality; const double lp = SafeInv(o.r->rlp, 1.0); const double Ipred = ki * partiality * lp * Itrue[h]; const double resid = (Ipred - static_cast(o.r->I)) / o.sigma; sum_r2 += resid * resid; } const double gof2 = (n_obs > n_params) ? sum_r2 / static_cast(n_obs - n_params) : 1.0; out.gof2 = gof2; // --- Covariance: σ(I_true) from (J^T W J)^{-1} scaled by GoF² --- std::vector slotToHKL(nhkl); for (const auto& kv : hklToSlot) { slotToHKL[kv.second] = kv.first; } out.merged.resize(nhkl); for (int h = 0; h < nhkl; ++h) { out.merged[h].h = slotToHKL[h].h; out.merged[h].k = slotToHKL[h].k; out.merged[h].l = slotToHKL[h].l; out.merged[h].I = Itrue[h]; out.merged[h].sigma = std::numeric_limits::quiet_NaN(); out.merged[h].d = 0.0; } // Populate d from median of observations per HKL { std::vector> per_hkl_d(nhkl); for (const auto& o : obs) { const double d_val = static_cast(o.r->d); if (std::isfinite(d_val) && d_val > 0.0) per_hkl_d[o.hkl_slot].push_back(d_val); } for (int h = 0; h < nhkl; ++h) { auto& v = per_hkl_d[h]; if (!v.empty()) { std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); out.merged[h].d = v[v.size() / 2]; } } } ceres::Covariance::Options cov_options; cov_options.algorithm_type = ceres::SPARSE_QR; ceres::Covariance covariance(cov_options); std::vector> covariance_blocks; covariance_blocks.reserve(nhkl); for (int h = 0; h < nhkl; ++h) { covariance_blocks.emplace_back(&Itrue[h], &Itrue[h]); } if (covariance.Compute(covariance_blocks, &problem)) { for (int h = 0; h < nhkl; ++h) { double var_I = 0.0; covariance.GetCovarianceBlock(&Itrue[h], &Itrue[h], &var_I); // σ(I) = I * sqrt( var(I) * GoF² ) out.merged[h].sigma = std::sqrt(var_I * gof2); } } return out; }