diff --git a/common/Reflection.h b/common/Reflection.h index 219c2763..1f755dd0 100644 --- a/common/Reflection.h +++ b/common/Reflection.h @@ -6,6 +6,8 @@ #include #include +#include + #include "SpotToSave.h" struct Reflection { @@ -29,11 +31,13 @@ struct Reflection { }; struct MergedReflection { - int32_t h; - int32_t k; - int32_t l; - float I; - float sigma; + int32_t h = 0; + int32_t k = 0; + int32_t l = 0; + float I = NAN; + float sigma = NAN; + float I_anom[2] = {NAN, NAN}; + float sigma_anom[2] = {NAN, NAN}; float d = 0.0; }; diff --git a/image_analysis/IndexAndRefine.cpp b/image_analysis/IndexAndRefine.cpp index 253d338c..9b749e89 100644 --- a/image_analysis/IndexAndRefine.cpp +++ b/image_analysis/IndexAndRefine.cpp @@ -281,11 +281,14 @@ void IndexAndRefine::ScaleImage(size_t n, ScaleOnTheFly &scaling, ScalingResult } ScalingResult IndexAndRefine::ScaleAllImages(size_t nthreads) { - auto merge_result = MergeReflections(reflections, experiment); - ScaleOnTheFly scaling(merge_result.merged, experiment); + auto merge_result = MergeAll(experiment, reflections); + ScaleOnTheFly scaling(merge_result, experiment); return scaling.Scale(reflections, mosaicity, nthreads); } -MergeResult IndexAndRefine::Merge() { - return MergeReflections(reflections, experiment); +MergeResult IndexAndRefine::Merge() const { + MergeResult out; + out.merged = MergeAll(experiment, reflections); + out.statistics = MergeStats(experiment, out.merged, reflections); + return out; } diff --git a/image_analysis/IndexAndRefine.h b/image_analysis/IndexAndRefine.h index 6aad02fa..f80335e3 100644 --- a/image_analysis/IndexAndRefine.h +++ b/image_analysis/IndexAndRefine.h @@ -65,7 +65,7 @@ public: void ProcessImage(DataMessage &msg, const SpotFindingSettings &settings, const CompressedImage &image, BraggPrediction &prediction); ScalingResult ScaleAllImages(size_t nthreads = 0); - MergeResult Merge(); + MergeResult Merge() const; std::optional Finalize(); }; diff --git a/image_analysis/scale_merge/HKLKey.cpp b/image_analysis/scale_merge/HKLKey.cpp index 3f05db05..0f72d0ce 100644 --- a/image_analysis/scale_merge/HKLKey.cpp +++ b/image_analysis/scale_merge/HKLKey.cpp @@ -6,6 +6,26 @@ #include "HKLKey.h" #include "gemmi/symmetry.hpp" +uint64_t HKLKey::pack_no_anom() const { + constexpr int bits = 21; + constexpr int bias = 1 << (bits - 1); // 1,048,576 + constexpr int max_value = bias - 1; + constexpr int min_value = -bias; + constexpr std::uint64_t mask = (1ULL << bits) - 1ULL; + + if (h < min_value || h > max_value || + k < min_value || k > max_value || + l < min_value || l > max_value) { + throw std::out_of_range("HKL index outside packable range"); + } + + const std::uint64_t hh = static_cast(h + bias) & mask; + const std::uint64_t kk = static_cast(k + bias) & mask; + const std::uint64_t ll = static_cast(l + bias) & mask; + + return hh | (kk << bits) | (ll << (2 * bits)); +} + HKLKeyGenerator::HKLKeyGenerator(bool merge_friedel, int32_t space_group_number) : HKLKeyGenerator(merge_friedel, *gemmi::find_spacegroup_by_number(space_group_number)) { } @@ -57,6 +77,8 @@ bool AcceptReflection(const Reflection &r, std::optional d_min_limit) { return false; if (!std::isfinite(r.rlp) || r.rlp == 0.0f) return false; + if (!std::isfinite(r.sigma) || r.sigma <= 0.0) + return false; return true; } @@ -69,5 +91,8 @@ bool AcceptReflection(const Reflection &r, double d_min_limit) { return false; if (!std::isfinite(r.rlp) || r.rlp == 0.0f) return false; + if (!std::isfinite(r.sigma) || r.sigma <= 0.0) + return false; return true; } + diff --git a/image_analysis/scale_merge/HKLKey.h b/image_analysis/scale_merge/HKLKey.h index 8084d8e9..74af3aff 100644 --- a/image_analysis/scale_merge/HKLKey.h +++ b/image_analysis/scale_merge/HKLKey.h @@ -3,6 +3,10 @@ #pragma once +#include +#include +#include + #include #include "../../common/Reflection.h" #include "gemmi/symmetry.hpp" @@ -19,6 +23,8 @@ struct HKLKey { bool operator==(const HKLKey &o) const { return h == o.h && k == o.k && l == o.l && plus == o.plus; } + + [[nodiscard]] uint64_t pack_no_anom() const; }; class HKLKeyGenerator { diff --git a/image_analysis/scale_merge/Merge.cpp b/image_analysis/scale_merge/Merge.cpp index e5ef98b1..2cad3d64 100644 --- a/image_analysis/scale_merge/Merge.cpp +++ b/image_analysis/scale_merge/Merge.cpp @@ -6,267 +6,203 @@ #include #include #include -#include -#include -#include -#include +#include #include "../../common/ResolutionShells.h" #include "HKLKey.h" -namespace { - struct Obs { - const Reflection *r = nullptr; - int hkl = -1; - double sigma = 1.0; +std::vector MergeAll(const DiffractionExperiment &x, const std::vector > &reflections) { + auto scaling_settings = x.GetScalingSettings(); + HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1)); + const std::optional high_resolution_limit = scaling_settings.GetHighResolutionLimit_A(); + + struct Accum { + // Keep anomalous + / - together, but separate + int32_t h = 0; + int32_t k = 0; + int32_t l = 0; + float d = NAN; + double sum_wI = 0.0; + double sum_w = 0.0; + double sum_wI_anom[2] = {0.0, 0.0}; + double sum_w_anom[2] = {0.0, 0.0}; + bool present[2] = {false, false}; }; - double SafeSigma(double sigma) { - // TODO: Think about safe sigma... - if (!std::isfinite(sigma) || sigma <= 1e-3) - return 1e-3; - return sigma; - } + std::unordered_map acc; - std::vector BuildObservations(const std::vector> &observations, - const DiffractionExperiment &x, - std::vector &slot_to_hkl) { - std::map hkl_to_slot; - std::vector out; - - size_t nrefl = 0; - for (const auto &image: observations) - nrefl += image.size(); - out.reserve(nrefl); - - auto scaling_settings = x.GetScalingSettings(); - - HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1) ); - - for (const auto &image: observations) { - for (const auto &r: image) { - - if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction)) - continue; - if (!AcceptReflection(r, scaling_settings.GetHighResolutionLimit_A())) - continue; - - HKLKey key = key_generator(r); - - auto it = hkl_to_slot.find(key); - if (it == hkl_to_slot.end()) { - const int slot = static_cast(slot_to_hkl.size()); - it = hkl_to_slot.emplace(key, slot).first; - slot_to_hkl.push_back(key); - } - - out.push_back({ - .r = &r, - .hkl = it->second, - .sigma = SafeSigma(r.sigma) - }); - } - } - - return out; - } - - MergeResult Merge(const std::vector &slot_to_hkl, const std::vector &obs) { - struct Accum { - double sum_wI = 0.0; - double sum_w = 0.0; - double sum_wsigma2 = 0.0; - std::vector d_values; - }; - - std::vector acc(slot_to_hkl.size()); - - for (const auto &o: obs) { - const double I_corr = static_cast(o.r->I) * o.r->scaling_correction; - const double sigma_corr = o.sigma * o.r->scaling_correction; + for (const auto &image: reflections) { + for (const auto &r: image) { + if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction)) + continue; + if (!AcceptReflection(r, high_resolution_limit)) + continue; + const float I_corr = r.I * r.scaling_correction; + const float sigma_corr = r.sigma * r.scaling_correction; if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0) continue; - // TODO: Figure out right way to handle this - const double w = 1.0 / (sigma_corr * sigma_corr); + auto hkl = key_generator(r); + auto hkl_key = hkl.pack_no_anom(); - auto &a = acc[o.hkl]; - a.sum_wI += w * I_corr; - a.sum_w += w; - a.sum_wsigma2 += w * w * sigma_corr * sigma_corr; - if (std::isfinite(o.r->d) && o.r->d > 0.0f) - a.d_values.push_back(o.r->d); + auto it = acc.find(hkl_key); + if (it == acc.end()) + it = acc.emplace(hkl_key, Accum{ + .h = hkl.h, + .k = hkl.k, + .l = hkl.l + }).first; + int solution = hkl.plus ? 0 : 1; + + const float w = 1.0f / (sigma_corr * sigma_corr); + it->second.sum_wI += w * I_corr; + it->second.sum_w += w; + it->second.sum_wI_anom[solution] += w * I_corr; + it->second.sum_w_anom[solution] += w; + + it->second.present[solution] = true; + if (!std::isfinite(it->second.d) && std::isfinite(r.d) && r.d > 0.0f) + it->second.d = r.d; } - - MergeResult out; - out.merged.resize(slot_to_hkl.size()); - - for (int h = 0; h < static_cast(slot_to_hkl.size()); ++h) { - auto &a = acc[h]; - if (a.sum_w <= 0.0) - continue; - out.merged[h].h = slot_to_hkl[h].h; - out.merged[h].k = slot_to_hkl[h].k; - out.merged[h].l = slot_to_hkl[h].l; - out.merged[h].I = a.sum_wI / a.sum_w; - out.merged[h].sigma = std::sqrt(a.sum_wsigma2) / a.sum_w; - if (a.d_values.empty()) - continue; - - std::ranges::nth_element(a.d_values, - a.d_values.begin() + static_cast(a.d_values.size() / 2)); - out.merged[h].d = a.d_values[a.d_values.size() / 2]; - } - return out; } - void Stats(const DiffractionExperiment &x, MergeResult &out, const std::vector &obs) { - constexpr int n_shells = 10; + std::vector out; + out.reserve(acc.size()); + for (const auto &[key, accum]: acc) { + if (accum.sum_w <= 0.0) + continue; - float d_min = std::numeric_limits::max(); - float d_max = 0.0f; + float I_anom[2] = {NAN, NAN}; + float sigma_anom[2] = {NAN, NAN}; - auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A(); - for (const auto &m: out.merged) { - const auto d = static_cast(m.d); - if (!std::isfinite(d) || d <= 0.0f) - continue; - if (d_min_limit_A && d < d_min_limit_A) - continue; - - d_min = std::min(d_min, d); - d_max = std::max(d_max, d); - } - - if (!(d_min < d_max && d_min > 0.0f)) - return; - - const float d_min_pad = d_min * 0.999f; - const float d_max_pad = d_max * 1.001f; - - ResolutionShells shells(d_min_pad, d_max_pad, n_shells); - const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq(); - const auto shell_min_res = shells.GetShellMinRes(); - - std::vector hkl_shell(out.merged.size(), -1); - for (int h = 0; h < static_cast(out.merged.size()); ++h) { - auto s = shells.GetShell(out.merged[h].d); - if (s) - hkl_shell[h] = *s; - } - - struct PerHKL { - double sum_I = 0.0; - std::vector I; - }; - - std::vector per_hkl(out.merged.size()); - - for (const auto &o: obs) { - if (o.hkl < 0 || o.hkl >= static_cast(per_hkl.size())) - continue; - if (hkl_shell[o.hkl] < 0) - continue; - - const double I_corr = static_cast(o.r->I) * o.r->scaling_correction; - if (!std::isfinite(I_corr)) - continue; - - per_hkl[o.hkl].sum_I += I_corr; - per_hkl[o.hkl].I.push_back(I_corr); - } - - struct ShellAccum { - int total_obs = 0; - std::unordered_set unique; - double rmeas_num = 0.0; - double rmeas_den = 0.0; - double sum_i_over_sigma = 0.0; - int n_i_over_sigma = 0; - }; - - std::vector acc(n_shells); - - for (int h = 0; h < static_cast(per_hkl.size()); ++h) { - const int s = hkl_shell[h]; - if (s < 0 || per_hkl[h].I.empty()) - continue; - - auto &sa = acc[s]; - const auto &ph = per_hkl[h]; - const int n = static_cast(ph.I.size()); - const double mean_I = ph.sum_I / n; - - sa.unique.insert(h); - sa.total_obs += n; - - if (n >= 2) { - double sum_abs_dev = 0.0; - for (double I: ph.I) - sum_abs_dev += std::abs(I - mean_I); - - sa.rmeas_num += std::sqrt(static_cast(n) / (n - 1.0)) * sum_abs_dev; - } - - for (double I: ph.I) - sa.rmeas_den += std::abs(I); - - if (out.merged[h].sigma > 0.0) { - sa.sum_i_over_sigma += out.merged[h].I / out.merged[h].sigma; - ++sa.n_i_over_sigma; + for (int i = 0; i < 2; ++i) { + if (accum.present[i] && accum.sum_w_anom[i] > 0.0) { + I_anom[i] = static_cast(accum.sum_wI_anom[i] / accum.sum_w_anom[i]); + sigma_anom[i] = 1.0f / std::sqrt(static_cast(accum.sum_w_anom[i])); } } - - out.statistics.shells.resize(n_shells); - - for (int s = 0; s < n_shells; ++s) { - const auto &sa = acc[s]; - auto &ss = out.statistics.shells[s]; - - ss.mean_one_over_d2 = shell_mean_1_d2[s]; - ss.d_min = shell_min_res[s]; - ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1]; - ss.total_observations = sa.total_obs; - ss.unique_reflections = static_cast(sa.unique.size()); - ss.rmeas = sa.rmeas_den > 0.0 ? sa.rmeas_num / sa.rmeas_den : 0.0; - ss.mean_i_over_sigma = sa.n_i_over_sigma > 0 - ? sa.sum_i_over_sigma / sa.n_i_over_sigma - : 0.0; - } - - auto &overall = out.statistics.overall; - overall.d_min = d_min; - overall.d_max = d_max; - - std::unordered_set all_unique; - double rmeas_num = 0.0; - double rmeas_den = 0.0; - double sum_i_over_sigma = 0.0; - int n_i_over_sigma = 0; - - for (const auto &sa: acc) { - overall.total_observations += sa.total_obs; - all_unique.insert(sa.unique.begin(), sa.unique.end()); - rmeas_num += sa.rmeas_num; - rmeas_den += sa.rmeas_den; - sum_i_over_sigma += sa.sum_i_over_sigma; - n_i_over_sigma += sa.n_i_over_sigma; - } - - overall.unique_reflections = static_cast(all_unique.size()); - overall.rmeas = rmeas_den > 0.0 ? rmeas_num / rmeas_den : 0.0; - overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0; + out.emplace_back(MergedReflection{ + .h = accum.h, + .k = accum.k, + .l = accum.l, + .I = static_cast(accum.sum_wI / accum.sum_w), + .sigma = 1.0f / std::sqrt(static_cast(accum.sum_w)), + .I_anom = {I_anom[0], I_anom[1]}, + .sigma_anom = {sigma_anom[0], sigma_anom[1]}, + .d = accum.d + }); } + return out; } -MergeResult MergeReflections(const std::vector > &observations, - const DiffractionExperiment &x) { - std::vector slot_to_hkl; - auto obs = BuildObservations(observations, x, slot_to_hkl); - auto out = Merge(slot_to_hkl, obs); - Stats(x, out, obs); +MergeStatistics MergeStats(const DiffractionExperiment &x, + const std::vector &merged, + const std::vector > &reflections) { + + constexpr int n_shells = 10; + + float d_min = std::numeric_limits::max(); + float d_max = 0.0f; + + auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A(); + for (const auto &m: merged) { + if (!std::isfinite(m.d) || m.d <= 0.0f) + continue; + if (d_min_limit_A && m.d < d_min_limit_A) + continue; + + d_min = std::min(d_min, m.d); + d_max = std::max(d_max, m.d); + } + + if (!(d_min < d_max && d_min > 0.0f)) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Error in resolution calculation"); + + const float d_min_pad = d_min * 0.999f; + const float d_max_pad = d_max * 1.001f; + + ResolutionShells shells(d_min_pad, d_max_pad, n_shells); + const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq(); + const auto shell_min_res = shells.GetShellMinRes(); + + struct ShellAccum { + int total_obs = 0; + int unique = 0; + double sum_i_over_sigma = 0.0; + int n_i_over_sigma = 0; + }; + + std::vector acc(n_shells); + + for (const auto &m: merged) { + const auto shell = shells.GetShell(m.d); + if (!shell.has_value()) + continue; + + const int s = *shell; + if (s >= 0 && s < n_shells) { + if (std::isfinite(m.I) && std::isfinite(m.sigma) && m.sigma > 0.0) { + acc[s].unique++; + acc[s].sum_i_over_sigma += m.I / m.sigma; + ++acc[s].n_i_over_sigma; + } + + } + } + + for (const auto &image: reflections) { + for (const auto &r: image) { + if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction)) + continue; + + if (!AcceptReflection(r, d_min_limit_A)) + continue; + + const auto shell = shells.GetShell(r.d); + if (!shell.has_value()) + continue; + const int s = *shell; + if (s >= 0 && s < n_shells) + acc[s].total_obs++; + } + } + + MergeStatistics out; + out.shells.resize(n_shells); + + for (int s = 0; s < n_shells; ++s) { + const auto &sa = acc[s]; + auto &ss = out.shells[s]; + + ss.mean_one_over_d2 = shell_mean_1_d2[s]; + ss.d_min = shell_min_res[s]; + ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1]; + ss.total_observations = sa.total_obs; + ss.unique_reflections = sa.unique; + ss.mean_i_over_sigma = sa.n_i_over_sigma > 0 + ? sa.sum_i_over_sigma / sa.n_i_over_sigma + : 0.0; + } + + auto &overall = out.overall; + overall.d_min = d_min; + overall.d_max = d_max; + + int all_unique = 0; + double sum_i_over_sigma = 0.0; + int n_i_over_sigma = 0; + + for (const auto &sa: acc) { + overall.total_observations += sa.total_obs; + all_unique += sa.unique; + sum_i_over_sigma += sa.sum_i_over_sigma; + n_i_over_sigma += sa.n_i_over_sigma; + } + + overall.unique_reflections = all_unique; + overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0; return out; } diff --git a/image_analysis/scale_merge/Merge.h b/image_analysis/scale_merge/Merge.h index 95908919..3ed79f83 100644 --- a/image_analysis/scale_merge/Merge.h +++ b/image_analysis/scale_merge/Merge.h @@ -18,7 +18,6 @@ struct MergeStatisticsShell { float mean_one_over_d2 = 0; int total_observations = 0; int unique_reflections = 0; - double rmeas = 0.0; double mean_i_over_sigma = 0.0; }; @@ -33,5 +32,9 @@ struct MergeResult { MergeStatistics statistics; }; -MergeResult MergeReflections(const std::vector > &observations, - const DiffractionExperiment &x); +std::vector MergeAll(const DiffractionExperiment &x, + const std::vector > &reflections); + +MergeStatistics MergeStats(const DiffractionExperiment &x, + const std::vector &merged, + const std::vector > &reflections); \ No newline at end of file