diff --git a/image_analysis/CMakeLists.txt b/image_analysis/CMakeLists.txt index b9d3f310..517844ec 100644 --- a/image_analysis/CMakeLists.txt +++ b/image_analysis/CMakeLists.txt @@ -1,3 +1,23 @@ +SET(MINIGLOG ON) +SET(PROVIDE_UNINSTALL_TARGET OFF) +SET(USE_CUDA OFF) +SET(EIGENSPARSE ON) +SET(LAPACK OFF) +SET(SUITESPARSE OFF) + +# Prevent MKL/BLAS/LAPACK from being found (guarantees no MKL) +SET(CMAKE_DISABLE_FIND_PACKAGE_MKL TRUE) +SET(CERES_THREADING_MODEL "OFF") + +FetchContent_Declare( + ceres + GIT_REPOSITORY https://github.com/ceres-solver/ceres-solver + GIT_TAG 0c70ed3 + EXCLUDE_FROM_ALL +) + +FetchContent_MakeAvailable(ceres) + ADD_LIBRARY(JFJochImageAnalysis STATIC MXAnalysisWithoutFPGA.cpp MXAnalysisWithoutFPGA.h @@ -18,5 +38,6 @@ ADD_SUBDIRECTORY(bragg_prediction) ADD_SUBDIRECTORY(indexing) ADD_SUBDIRECTORY(geom_refinement) ADD_SUBDIRECTORY(lattice_search) +ADD_SUBDIRECTORY(scale_merge) TARGET_LINK_LIBRARIES(JFJochImageAnalysis JFJochBraggPrediction JFJochBraggIntegration JFJochLatticeSearch JFJochIndexing JFJochSpotFinding JFJochCommon JFJochGeomRefinement gemmi) diff --git a/image_analysis/geom_refinement/CMakeLists.txt b/image_analysis/geom_refinement/CMakeLists.txt index e32becb5..a81b8b54 100644 --- a/image_analysis/geom_refinement/CMakeLists.txt +++ b/image_analysis/geom_refinement/CMakeLists.txt @@ -1,22 +1,3 @@ -SET(MINIGLOG ON) -SET(PROVIDE_UNINSTALL_TARGET OFF) -SET(USE_CUDA OFF) -SET(EIGENSPARSE ON) -SET(LAPACK OFF) -SET(SUITESPARSE OFF) - -# Prevent MKL/BLAS/LAPACK from being found (guarantees no MKL) -SET(CMAKE_DISABLE_FIND_PACKAGE_MKL TRUE) -SET(CERES_THREADING_MODEL "OFF") - -FetchContent_Declare( - ceres - GIT_REPOSITORY https://github.com/ceres-solver/ceres-solver - GIT_TAG 0c70ed3 - EXCLUDE_FROM_ALL -) - -FetchContent_MakeAvailable(ceres) ADD_LIBRARY(JFJochGeomRefinement STATIC RingOptimizer.cpp diff --git a/image_analysis/scale_merge/CMakeLists.txt b/image_analysis/scale_merge/CMakeLists.txt new file mode 100644 index 00000000..8b8b99d7 --- /dev/null +++ b/image_analysis/scale_merge/CMakeLists.txt @@ -0,0 +1,2 @@ +ADD_LIBRARY(JFJochScaleMerge ScaleAndMerge.cpp ScaleAndMerge.h) +TARGET_LINK_LIBRARIES(JFJochScaleMerge Ceres::ceres Eigen3::Eigen JFJochCommon) \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleAndMerge.cpp b/image_analysis/scale_merge/ScaleAndMerge.cpp new file mode 100644 index 00000000..9d134fd2 --- /dev/null +++ b/image_analysis/scale_merge/ScaleAndMerge.cpp @@ -0,0 +1,347 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include "ScaleAndMerge.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +struct HKLKey { + int64_t h = 0; + int64_t k = 0; + int64_t l = 0; + bool is_positive = true; // only relevant if opt.merge_friedel == false + + bool operator==(const HKLKey& o) const noexcept { + return h == o.h && k == o.k && l == o.l && is_positive == o.is_positive; + } +}; + +struct HKLKeyHash { + size_t operator()(const HKLKey& key) const noexcept { + auto mix = [](uint64_t x) { + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + return x; + }; + const uint64_t a = static_cast(key.h); + const uint64_t b = static_cast(key.k); + const uint64_t c = static_cast(key.l); + const uint64_t d = static_cast(key.is_positive ? 1 : 0); + return static_cast(mix(a) ^ (mix(b) << 1) ^ (mix(c) << 2) ^ (mix(d) << 3)); + } +}; + +inline int RoundImageId(float image_number, double rounding_step) { + if (!(rounding_step > 0.0)) + rounding_step = 1.0; + const double x = static_cast(image_number) / rounding_step; + const double r = std::round(x) * rounding_step; + return static_cast(std::llround(r / rounding_step)); +} + +inline double SafeSigma(double s, double min_sigma) { + if (!std::isfinite(s) || s <= 0.0) + return min_sigma; + return std::max(s, min_sigma); +} + +inline double SafeD(double d) { + if (!std::isfinite(d) || d <= 0.0) + return std::numeric_limits::quiet_NaN(); + return d; +} + +inline int SafeToInt(int64_t x) { + if (x < std::numeric_limits::min() || x > std::numeric_limits::max()) + throw std::out_of_range("HKL index out of int range for Gemmi"); + return static_cast(x); +} + +// Canonicalize HKL according to Gemmi Reciprocal ASU if space group is provided. +// If merge_friedel==true -> Friedel mates collapse (key.is_positive always true). +// If merge_friedel==false -> keep I+ vs I- separate by key.is_positive. +inline HKLKey CanonicalizeHKLKey(const Reflection& r, const ScaleMergeOptions& opt) { + HKLKey key{}; + key.h = r.h; + key.k = r.k; + key.l = r.l; + key.is_positive = true; + + // If no SG provided, we can still optionally separate Friedel mates deterministically. + if (!opt.space_group.has_value()) { + if (!opt.merge_friedel) { + const HKLKey neg{-r.h, -r.k, -r.l, true}; + const bool pos = std::tie(key.h, key.k, key.l) >= std::tie(neg.h, neg.k, neg.l); + if (!pos) { + key.h = -key.h; + key.k = -key.k; + key.l = -key.l; + key.is_positive = false; + } + } + return key; + } + + const gemmi::SpaceGroup& sg = *opt.space_group; + const gemmi::GroupOps gops = sg.operations(); + const gemmi::ReciprocalAsu rasu(&sg); + + const gemmi::Op::Miller in{{SafeToInt(r.h), SafeToInt(r.k), SafeToInt(r.l)}}; + const auto [asu_hkl, sign_plus] = rasu.to_asu_sign(in, gops); + + key.h = asu_hkl[0]; + key.k = asu_hkl[1]; + key.l = asu_hkl[2]; + key.is_positive = opt.merge_friedel ? true : sign_plus; + return key; +} + +struct IntensityResidual { + IntensityResidual(double Iobs, double sigma, double s2, bool refine_b) + : Iobs_(Iobs), inv_sigma_(1.0 / sigma), s2_(s2), refine_b_(refine_b) {} + + template + bool operator()(const T* const log_k, + const T* const b, + const T* const log_Ihkl, + T* residual) const { + const T k = ceres::exp(log_k[0]); + const T Ihkl = ceres::exp(log_Ihkl[0]); + + T atten = T(1); + if (refine_b_) { + // I_pred = k * exp(-B*s^2) * I_hkl + atten = ceres::exp(-b[0] * T(s2_)); + } + + const T Ipred = k * atten * Ihkl; + residual[0] = (Ipred - T(Iobs_)) * T(inv_sigma_); + return true; + } + + double Iobs_; + double inv_sigma_; + double s2_; + bool refine_b_; +}; + +} // namespace + +ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& observations, + const ScaleMergeOptions& opt) { + ScaleMergeResult out; + + struct ObsRef { + const Reflection* r = nullptr; + int img_id = 0; // rounded/quantized image id + int img_slot = -1; // compact [0..nimg) + int hkl_slot = -1; // compact [0..nhkl) + double s2 = 0.0; + double sigma = 0.0; + }; + + std::vector obs; + obs.reserve(observations.size()); + + std::unordered_map imgIdToSlot; + imgIdToSlot.reserve(256); + + std::unordered_map hklToSlot; + hklToSlot.reserve(observations.size()); + + for (const auto& r : observations) { + const double d = SafeD(r.d); + if (!std::isfinite(d)) + continue; + if (!std::isfinite(r.I)) + continue; + + const double sigma = SafeSigma(static_cast(r.sigma), opt.min_sigma); + const double s2 = 1.0 / (d * d); + + const int img_id = RoundImageId(r.image_number, opt.image_number_rounding); + + int img_slot; + { + auto it = imgIdToSlot.find(img_id); + if (it == imgIdToSlot.end()) { + img_slot = static_cast(imgIdToSlot.size()); + imgIdToSlot.emplace(img_id, img_slot); + } else { + img_slot = it->second; + } + } + + int hkl_slot; + try { + const HKLKey key = CanonicalizeHKLKey(r, opt); + auto it = hklToSlot.find(key); + if (it == hklToSlot.end()) { + hkl_slot = static_cast(hklToSlot.size()); + hklToSlot.emplace(key, hkl_slot); + } else { + hkl_slot = it->second; + } + } catch (...) { + continue; // skip problematic HKL + } + + ObsRef o; + o.r = &r; + o.img_id = img_id; + o.img_slot = img_slot; + o.hkl_slot = hkl_slot; + o.s2 = s2; + o.sigma = sigma; + obs.push_back(o); + } + + const int nimg = static_cast(imgIdToSlot.size()); + const int nhkl = static_cast(hklToSlot.size()); + + out.image_scale_k.assign(nimg, 1.0); + out.image_b_factor.assign(nimg, 0.0); + out.image_ids.assign(nimg, 0); + + for (const auto& kv : imgIdToSlot) { + out.image_ids[kv.second] = kv.first; + } + + std::vector log_k(nimg, 0.0); + std::vector b(nimg, 0.0); + std::vector log_Ihkl(nhkl, 0.0); + + // Initialize Ihkl from per-HKL median of observed intensities. + { + std::vector> per_hkl_I(nhkl); + for (const auto& o : obs) { + per_hkl_I[o.hkl_slot].push_back(static_cast(o.r->I)); + } + for (int h = 0; h < nhkl; ++h) { + auto& v = per_hkl_I[h]; + if (v.empty()) { + log_Ihkl[h] = std::log(std::max(opt.min_sigma, 1e-6)); + continue; + } + std::nth_element(v.begin(), v.begin() + static_cast(v.size() / 2), v.end()); + double med = v[v.size() / 2]; + if (!std::isfinite(med) || med <= opt.min_sigma) + med = opt.min_sigma; + log_Ihkl[h] = std::log(med); + } + } + + ceres::Problem problem; + + std::unique_ptr loss; + if (opt.use_huber_loss) { + loss = std::make_unique(opt.huber_delta); + } + + for (const auto& o : obs) { + const double Iobs = static_cast(o.r->I); + + auto* cost = new ceres::AutoDiffCostFunction( + new IntensityResidual(Iobs, o.sigma, o.s2, opt.refine_b_factor)); + + problem.AddResidualBlock(cost, + loss.get(), + &log_k[o.img_slot], + &b[o.img_slot], + &log_Ihkl[o.hkl_slot]); + } + + // Fix gauge freedom: anchor first image scale to 1.0 + if (opt.fix_first_image_scale && nimg > 0) { + log_k[0] = 0.0; + problem.SetParameterBlockConstant(&log_k[0]); + } + + if (!opt.refine_b_factor) { + for (int i = 0; i < nimg; ++i) { + b[i] = 0.0; + problem.SetParameterBlockConstant(&b[i]); + } + } else { + for (int i = 0; i < nimg; ++i) { + if (opt.b_min) + problem.SetParameterLowerBound(&b[i], 0, *opt.b_min); + if (opt.b_max) + problem.SetParameterUpperBound(&b[i], 0, *opt.b_max); + } + } + + ceres::Solver::Options options; + options.linear_solver_type = ceres::SPARSE_NORMAL_CHOLESKY; + options.minimizer_progress_to_stdout = false; + options.logging_type = ceres::LoggingType::SILENT; + options.max_num_iterations = opt.max_num_iterations; + options.max_solver_time_in_seconds = opt.max_solver_time_s; + + ceres::Solver::Summary summary; + ceres::Solve(options, &problem, &summary); + + for (int i = 0; i < nimg; ++i) { + out.image_scale_k[i] = std::exp(log_k[i]); + out.image_b_factor[i] = opt.refine_b_factor ? b[i] : 0.0; + } + + // Reverse maps for merged output + std::vector slotToHKL(nhkl); + for (const auto& kv : hklToSlot) { + slotToHKL[kv.second] = kv.first; + } + + // crude sigma estimate from model residual scatter + std::vector n_per_hkl(nhkl, 0); + std::vector ss_per_hkl(nhkl, 0.0); + + for (const auto& o : obs) { + const int i = o.img_slot; + const int h = o.hkl_slot; + + const double k = std::exp(log_k[i]); + const double atten = opt.refine_b_factor ? std::exp(-b[i] * o.s2) : 1.0; + const double Ihkl = std::exp(log_Ihkl[h]); + + const double Ipred = k * atten * Ihkl; + const double r = (Ipred - static_cast(o.r->I)); + + n_per_hkl[h] += 1; + ss_per_hkl[h] += r * r; + } + + out.merged.reserve(nhkl); + for (int h = 0; h < nhkl; ++h) { + MergedReflection m{}; + m.h = slotToHKL[h].h; + m.k = slotToHKL[h].k; + m.l = slotToHKL[h].l; + m.I = static_cast(std::exp(log_Ihkl[h])); + + if (n_per_hkl[h] >= 2) { + const double rms = std::sqrt(ss_per_hkl[h] / static_cast(n_per_hkl[h] - 1)); + m.sigma = static_cast(rms / std::sqrt(static_cast(n_per_hkl[h]))); + } else { + m.sigma = std::numeric_limits::quiet_NaN(); + } + + out.merged.push_back(m); + } + + return out; +} \ No newline at end of file diff --git a/image_analysis/scale_merge/ScaleAndMerge.h b/image_analysis/scale_merge/ScaleAndMerge.h new file mode 100644 index 00000000..f14658df --- /dev/null +++ b/image_analysis/scale_merge/ScaleAndMerge.h @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#pragma once + +#include +#include +#include + +#include "../../common/Reflection.h" +#include "gemmi/symmetry.hpp" + +struct ScaleMergeOptions { + bool refine_b_factor = true; + + bool use_huber_loss = true; + double huber_delta = 2.0; + + int max_num_iterations = 100; + double max_solver_time_s = 1.0; + + double image_number_rounding = 1.0; + double min_sigma = 1e-3; + + bool fix_first_image_scale = true; + + std::optional b_min = 0.0; + std::optional b_max = 200.0; + + // Symmetry canonicalization of HKL prior to merging/scaling. + // If not set, the routine uses raw HKL as-is. + std::optional space_group; + + // If true, treat Friedel mates as equivalent (merge anomalous pairs). + // If false, keep them separate by including a sign flag in the HKL key. + bool merge_friedel = true; +}; + +struct MergedReflection { + int h; + int k; + int l; + double I; + double sigma; +}; + +struct ScaleMergeResult { + std::vector merged; + std::vector image_scale_k; + std::vector image_b_factor; + std::vector image_ids; +}; + +ScaleMergeResult ScaleAndMergeReflectionsCeres(const std::vector& observations, + const ScaleMergeOptions& opt = {});