diff --git a/image_analysis/scale_merge/SearchSpaceGroup.cpp b/image_analysis/scale_merge/SearchSpaceGroup.cpp index d9ca14ca..ebb72f98 100644 --- a/image_analysis/scale_merge/SearchSpaceGroup.cpp +++ b/image_analysis/scale_merge/SearchSpaceGroup.cpp @@ -1,6 +1,10 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + #include "SearchSpaceGroup.h" #include +#include #include #include #include @@ -51,7 +55,7 @@ namespace { MergedHKLKey CanonicalizeMergedHKL(int h, int k, int l, bool merge_friedel) { MergedHKLKey key{h, k, l, true}; - if (merge_friedel) + if (!merge_friedel) return key; const std::tuple pos{h, k, l}; @@ -177,6 +181,15 @@ namespace { return out; } + struct ResolutionBinAccumulator { + double d_min_A = std::numeric_limits::infinity(); + double d_max_A = 0.0; + double absent_sum = 0.0; + int absent_count = 0; + double allowed_sum = 0.0; + int allowed_count = 0; + }; + SpaceGroupAbsenceScore ScoreSystematicAbsences( const gemmi::GroupOps& gops, const std::vector& merged, @@ -187,27 +200,144 @@ namespace { if (!opt.test_systematic_absences) return out; - double sum_i_over_sigma = 0.0; + int n_bins = opt.absence_resolution_bins; + if (n_bins <= 0) + n_bins = 1; + + double min_inv_d2 = std::numeric_limits::infinity(); + double max_inv_d2 = -std::numeric_limits::infinity(); for (const auto& r : merged) { if (!ReflectionPassesFilters(r, opt)) continue; - const gemmi::Op::Miller hkl{{r.h, r.k, r.l}}; - if (!gops.is_systematically_absent(hkl)) - continue; - - out.violating_reflections += 1; - sum_i_over_sigma += std::max(0.0, r.I / r.sigma); + const double inv_d2 = 1.0 / (r.d * r.d); + min_inv_d2 = std::min(min_inv_d2, inv_d2); + max_inv_d2 = std::max(max_inv_d2, inv_d2); } - if (out.violating_reflections > 0) - out.mean_i_over_sigma = sum_i_over_sigma / out.violating_reflections; - else - out.mean_i_over_sigma = 0.0; + if (!std::isfinite(min_inv_d2) || !std::isfinite(max_inv_d2)) + return out; + + if (max_inv_d2 < min_inv_d2) + std::swap(max_inv_d2, min_inv_d2); + + std::vector bins(static_cast(n_bins)); + + auto bin_index = [&](double d) { + if (n_bins == 1 || max_inv_d2 <= min_inv_d2) + return 0; + + const double inv_d2 = 1.0 / (d * d); + const double t = (inv_d2 - min_inv_d2) / (max_inv_d2 - min_inv_d2); + int idx = static_cast(t * n_bins); + if (idx < 0) + idx = 0; + if (idx >= n_bins) + idx = n_bins - 1; + return idx; + }; + + for (const auto& r : merged) { + if (!ReflectionPassesFilters(r, opt)) + continue; + + const int idx = bin_index(r.d); + auto& bin = bins[static_cast(idx)]; + bin.d_min_A = std::min(bin.d_min_A, r.d); + bin.d_max_A = std::max(bin.d_max_A, r.d); + + const double i_over_sigma = std::max(0.0, r.I / r.sigma); + const gemmi::Op::Miller hkl{{r.h, r.k, r.l}}; + + if (gops.is_systematically_absent(hkl)) { + bin.absent_sum += i_over_sigma; + bin.absent_count += 1; + out.absent_reflections += 1; + } else { + bin.allowed_sum += i_over_sigma; + bin.allowed_count += 1; + out.allowed_reflections += 1; + } + } + + double global_absent_sum = 0.0; + double global_allowed_sum = 0.0; + double weighted_ratio_sum = 0.0; + int weighted_ratio_weight = 0; + double worst_ratio = 0.0; + bool any_decision_bin_failed = false; + + out.bins.reserve(bins.size()); + + for (const auto& bin : bins) { + SpaceGroupAbsenceBinScore bin_score; + if (std::isfinite(bin.d_min_A)) + bin_score.d_min_A = bin.d_min_A; + if (bin.d_max_A > 0.0) + bin_score.d_max_A = bin.d_max_A; + + bin_score.absent_reflections = bin.absent_count; + bin_score.allowed_reflections = bin.allowed_count; + + if (bin.absent_count > 0) + bin_score.mean_absent_i_over_sigma = bin.absent_sum / static_cast(bin.absent_count); + if (bin.allowed_count > 0) + bin_score.mean_allowed_i_over_sigma = bin.allowed_sum / static_cast(bin.allowed_count); + + global_absent_sum += bin.absent_sum; + global_allowed_sum += bin.allowed_sum; + + if (bin.absent_count >= opt.min_absent_reflections_per_bin && + bin.allowed_count >= opt.min_allowed_reflections_per_bin && + bin_score.mean_allowed_i_over_sigma > 0.0) { + + bin_score.used_for_decision = true; + bin_score.absent_to_allowed_ratio = + bin_score.mean_absent_i_over_sigma / bin_score.mean_allowed_i_over_sigma; + bin_score.accepted = + bin_score.absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin; + + out.compared_bins += 1; + if (bin_score.accepted) + out.accepted_bins += 1; + else + any_decision_bin_failed = true; + + weighted_ratio_sum += bin_score.absent_to_allowed_ratio * bin.absent_count; + weighted_ratio_weight += bin.absent_count; + worst_ratio = std::max(worst_ratio, bin_score.absent_to_allowed_ratio); + } + + out.bins.push_back(bin_score); + } + + if (out.absent_reflections > 0) + out.mean_absent_i_over_sigma = global_absent_sum / static_cast(out.absent_reflections); + if (out.allowed_reflections > 0) + out.mean_allowed_i_over_sigma = global_allowed_sum / static_cast(out.allowed_reflections); + + if (weighted_ratio_weight > 0) { + out.weighted_absent_to_allowed_ratio = weighted_ratio_sum / static_cast(weighted_ratio_weight); + out.worst_absent_to_allowed_ratio = worst_ratio; + out.accepted = + !any_decision_bin_failed && + out.weighted_absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio; + } else if (out.absent_reflections == 0) { + out.weighted_absent_to_allowed_ratio = 0.0; + out.worst_absent_to_allowed_ratio = 0.0; + out.accepted = true; + } else if (out.mean_allowed_i_over_sigma > 0.0) { + const double global_ratio = out.mean_absent_i_over_sigma / out.mean_allowed_i_over_sigma; + out.weighted_absent_to_allowed_ratio = global_ratio; + out.worst_absent_to_allowed_ratio = global_ratio; + out.accepted = global_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin; + } else { + out.weighted_absent_to_allowed_ratio = std::numeric_limits::infinity(); + out.worst_absent_to_allowed_ratio = std::numeric_limits::infinity(); + out.accepted = false; + } - out.accepted = (out.violating_reflections <= opt.max_absent_violations) && - (out.mean_i_over_sigma <= opt.max_mean_absent_i_over_sigma); return out; } @@ -323,17 +453,24 @@ SearchSpaceGroupResult SearchSpaceGroup( if (a.accepted != b.accepted) return a.accepted > b.accepted; - if (a.absence_score.mean_i_over_sigma != b.absence_score.mean_i_over_sigma) - return a.absence_score.mean_i_over_sigma < b.absence_score.mean_i_over_sigma; + if (a.absence_score.weighted_absent_to_allowed_ratio != + b.absence_score.weighted_absent_to_allowed_ratio) + return a.absence_score.weighted_absent_to_allowed_ratio < + b.absence_score.weighted_absent_to_allowed_ratio; - if (a.absence_score.violating_reflections != b.absence_score.violating_reflections) - return a.absence_score.violating_reflections < b.absence_score.violating_reflections; + if (a.absence_score.worst_absent_to_allowed_ratio != + b.absence_score.worst_absent_to_allowed_ratio) + return a.absence_score.worst_absent_to_allowed_ratio < + b.absence_score.worst_absent_to_allowed_ratio; const int order_a = a.space_group.operations().derive_symmorphic().order(); const int order_b = b.space_group.operations().derive_symmorphic().order(); if (order_a != order_b) return order_a > order_b; + if (a.absence_score.absent_reflections != b.absence_score.absent_reflections) + return a.absence_score.absent_reflections > b.absence_score.absent_reflections; + if (a.accepted_operators != b.accepted_operators) return a.accepted_operators > b.accepted_operators; if (a.min_cc != b.min_cc) @@ -341,7 +478,7 @@ SearchSpaceGroupResult SearchSpaceGroup( if (a.mean_cc != b.mean_cc) return a.mean_cc > b.mean_cc; - return a.space_group.number < b.space_group.number; + return a.space_group.number > b.space_group.number; }); for (const auto& cand : result.candidates) { @@ -376,7 +513,11 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result, << " " << std::setw(8) << "Nabs" << " " - << std::setw(10) << "abs" + << std::setw(8) << "Nallow" + << " " + << std::setw(8) << "Abs/All" + << " " + << std::setw(8) << "worst" << "\n"; os << " " @@ -396,7 +537,11 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result, << " " << std::setw(8) << "--------" << " " - << std::setw(10) << "----------" + << std::setw(8) << "--------" + << " " + << std::setw(8) << "--------" + << " " + << std::setw(8) << "--------" << "\n"; const size_t n = std::min(max_candidates_to_print, result.candidates.size()); @@ -417,9 +562,15 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result, << " " << std::setw(5) << (c.absence_score.accepted ? "yes" : "no") << " " - << std::setw(8) << c.absence_score.violating_reflections + << std::setw(8) << c.absence_score.absent_reflections << " " - << std::setw(10) << std::fixed << std::setprecision(2) << c.absence_score.mean_i_over_sigma + << std::setw(8) << c.absence_score.allowed_reflections + << " " + << std::setw(8) << std::fixed << std::setprecision(3) + << c.absence_score.weighted_absent_to_allowed_ratio + << " " + << std::setw(8) << std::fixed << std::setprecision(3) + << c.absence_score.worst_absent_to_allowed_ratio << "\n"; } diff --git a/image_analysis/scale_merge/SearchSpaceGroup.h b/image_analysis/scale_merge/SearchSpaceGroup.h index b044869c..a66e0fef 100644 --- a/image_analysis/scale_merge/SearchSpaceGroup.h +++ b/image_analysis/scale_merge/SearchSpaceGroup.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + #pragma once #include @@ -14,12 +17,31 @@ struct SpaceGroupOperatorScore { bool accepted = false; }; -struct SpaceGroupAbsenceScore { - int violating_reflections = 0; - double mean_i_over_sigma = 0.0; +struct SpaceGroupAbsenceBinScore { + double d_min_A = 0.0; + double d_max_A = 0.0; + int absent_reflections = 0; + int allowed_reflections = 0; + double mean_absent_i_over_sigma = 0.0; + double mean_allowed_i_over_sigma = 0.0; + double absent_to_allowed_ratio = 0.0; + bool used_for_decision = false; bool accepted = true; }; +struct SpaceGroupAbsenceScore { + int absent_reflections = 0; + int allowed_reflections = 0; + int compared_bins = 0; + int accepted_bins = 0; + double mean_absent_i_over_sigma = 0.0; + double mean_allowed_i_over_sigma = 0.0; + double weighted_absent_to_allowed_ratio = 0.0; + double worst_absent_to_allowed_ratio = 0.0; + bool accepted = true; + std::vector bins; +}; + struct SpaceGroupCandidateScore { gemmi::SpaceGroup space_group; double mean_cc = 0.0; @@ -59,11 +81,16 @@ struct SearchSpaceGroupOptions { // Test systematic absences from centering / screws / glides. bool test_systematic_absences = true; - // Candidate is rejected if forbidden reflections are too strong on average. - double max_mean_absent_i_over_sigma = 1.0; + // Number of resolution bins used for absence-vs-allowed comparison. + int absence_resolution_bins = 10; - // Hard cap on number of observed forbidden reflections. - int max_absent_violations = 0; + // Minimum counts in a bin before it contributes to the decision. + int min_absent_reflections_per_bin = 5; + int min_allowed_reflections_per_bin = 10; + + // Acceptance thresholds for absent/allowed ratios. + double max_absent_to_allowed_i_over_sigma_ratio = 0.20; + double max_absent_to_allowed_i_over_sigma_ratio_in_any_bin = 0.50; }; struct SearchSpaceGroupResult { diff --git a/tests/SearchSpaceGroupTest.cpp b/tests/SearchSpaceGroupTest.cpp index 8e7290bd..6e19baf5 100644 --- a/tests/SearchSpaceGroupTest.cpp +++ b/tests/SearchSpaceGroupTest.cpp @@ -69,9 +69,10 @@ namespace { if (h == 0 && k == 0 && l == 0) continue; + bool absent = false; gemmi::Op::Miller hkl{{h, k, l}}; if (gops.is_systematically_absent(hkl)) - continue; + absent = true; const auto [asu, sign_plus] = rasu.to_asu_sign(hkl, gops); if (!sign_plus) @@ -86,7 +87,7 @@ namespace { .h = h, .k = k, .l = l, - .I = SyntheticIntensityFromAsu(asu), + .I = absent ? 0.0 : SyntheticIntensityFromAsu(asu), .sigma = 1.0, .d = CalcSyntheticD(h, k, l) }); @@ -110,10 +111,13 @@ TEST_CASE("SearchSpaceGroup detects synthetic space groups") { {"P 3 2 1", "P321"}, {"P 4 2 2", "P422"}, {"P 4 3 2", "P432"}, + {"P 43 21 2", "P43212"}, {"P 6 2 2", "P622"}, {"C 1 2 1", "C2"}, {"C 2 2 2", "C222"}, {"I 4 3 2", "I432"}, + {"I 21 21 21", "I212121"}, + {"I 2 1 3", "I213"}, }; for (const auto& tc : cases) { @@ -130,8 +134,11 @@ TEST_CASE("SearchSpaceGroup detects synthetic space groups") { opt.min_pairs_per_operator = 20; opt.min_total_compared = 80; opt.test_systematic_absences = true; - opt.max_mean_absent_i_over_sigma = 0.1; - opt.max_absent_violations = 0; + opt.absence_resolution_bins = 10; + opt.min_absent_reflections_per_bin = 5; + opt.min_allowed_reflections_per_bin = 10; + opt.max_absent_to_allowed_i_over_sigma_ratio = 0.05; + opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin = 0.10; const auto result = SearchSpaceGroup(merged, opt);