SpaceGroupTest: Fix systematic absences test
Build Packages / build:rpm (rocky8_nocuda) (pull_request) Failing after 8m51s
Build Packages / build:rpm (rocky9_nocuda) (pull_request) Failing after 9m16s
Build Packages / build:rpm (ubuntu2404_nocuda) (pull_request) Failing after 8m35s
Build Packages / build:rpm (ubuntu2204_nocuda) (pull_request) Failing after 9m28s
Build Packages / build:rpm (rocky8_sls9) (pull_request) Failing after 10m15s
Build Packages / build:rpm (rocky9_sls9) (pull_request) Failing after 9m23s
Build Packages / Generate python client (pull_request) Successful in 21s
Build Packages / Build documentation (pull_request) Successful in 1m5s
Build Packages / build:rpm (rocky8) (pull_request) Failing after 9m0s
Build Packages / Create release (pull_request) Has been skipped
Build Packages / build:rpm (rocky9) (pull_request) Failing after 9m32s
Build Packages / build:rpm (ubuntu2204) (pull_request) Failing after 9m50s
Build Packages / build:rpm (ubuntu2404) (pull_request) Failing after 9m35s
Build Packages / build:rpm (rocky8_nocuda) (push) Failing after 8m46s
Build Packages / build:rpm (rocky9_nocuda) (push) Failing after 8m24s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Failing after 7m45s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Failing after 9m16s
Build Packages / build:rpm (rocky8_sls9) (push) Failing after 8m38s
Build Packages / build:rpm (rocky9_sls9) (push) Failing after 8m52s
Build Packages / build:rpm (rocky8) (push) Failing after 9m13s
Build Packages / build:rpm (rocky9) (push) Failing after 9m10s
Build Packages / Generate python client (push) Successful in 25s
Build Packages / Build documentation (push) Successful in 49s
Build Packages / Create release (push) Has been skipped
Build Packages / build:rpm (ubuntu2204) (push) Failing after 8m31s
Build Packages / build:rpm (ubuntu2404) (push) Failing after 8m1s
Build Packages / Unit tests (push) Successful in 55m41s
Build Packages / Unit tests (pull_request) Failing after 3h5m24s

This commit is contained in:
2026-03-06 22:27:55 +01:00
parent f1b172a34c
commit 89e5c3e096
3 changed files with 219 additions and 34 deletions
+174 -23
View File
@@ -1,6 +1,10 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "SearchSpaceGroup.h"
#include <algorithm>
#include <array>
#include <cmath>
#include <cctype>
#include <iomanip>
@@ -51,7 +55,7 @@ namespace {
MergedHKLKey CanonicalizeMergedHKL(int h, int k, int l, bool merge_friedel) {
MergedHKLKey key{h, k, l, true};
if (merge_friedel)
if (!merge_friedel)
return key;
const std::tuple<int, int, int> pos{h, k, l};
@@ -177,6 +181,15 @@ namespace {
return out;
}
struct ResolutionBinAccumulator {
double d_min_A = std::numeric_limits<double>::infinity();
double d_max_A = 0.0;
double absent_sum = 0.0;
int absent_count = 0;
double allowed_sum = 0.0;
int allowed_count = 0;
};
SpaceGroupAbsenceScore ScoreSystematicAbsences(
const gemmi::GroupOps& gops,
const std::vector<MergedReflection>& merged,
@@ -187,27 +200,144 @@ namespace {
if (!opt.test_systematic_absences)
return out;
double sum_i_over_sigma = 0.0;
int n_bins = opt.absence_resolution_bins;
if (n_bins <= 0)
n_bins = 1;
double min_inv_d2 = std::numeric_limits<double>::infinity();
double max_inv_d2 = -std::numeric_limits<double>::infinity();
for (const auto& r : merged) {
if (!ReflectionPassesFilters(r, opt))
continue;
const gemmi::Op::Miller hkl{{r.h, r.k, r.l}};
if (!gops.is_systematically_absent(hkl))
continue;
out.violating_reflections += 1;
sum_i_over_sigma += std::max(0.0, r.I / r.sigma);
const double inv_d2 = 1.0 / (r.d * r.d);
min_inv_d2 = std::min(min_inv_d2, inv_d2);
max_inv_d2 = std::max(max_inv_d2, inv_d2);
}
if (out.violating_reflections > 0)
out.mean_i_over_sigma = sum_i_over_sigma / out.violating_reflections;
else
out.mean_i_over_sigma = 0.0;
if (!std::isfinite(min_inv_d2) || !std::isfinite(max_inv_d2))
return out;
if (max_inv_d2 < min_inv_d2)
std::swap(max_inv_d2, min_inv_d2);
std::vector<ResolutionBinAccumulator> bins(static_cast<size_t>(n_bins));
auto bin_index = [&](double d) {
if (n_bins == 1 || max_inv_d2 <= min_inv_d2)
return 0;
const double inv_d2 = 1.0 / (d * d);
const double t = (inv_d2 - min_inv_d2) / (max_inv_d2 - min_inv_d2);
int idx = static_cast<int>(t * n_bins);
if (idx < 0)
idx = 0;
if (idx >= n_bins)
idx = n_bins - 1;
return idx;
};
for (const auto& r : merged) {
if (!ReflectionPassesFilters(r, opt))
continue;
const int idx = bin_index(r.d);
auto& bin = bins[static_cast<size_t>(idx)];
bin.d_min_A = std::min(bin.d_min_A, r.d);
bin.d_max_A = std::max(bin.d_max_A, r.d);
const double i_over_sigma = std::max(0.0, r.I / r.sigma);
const gemmi::Op::Miller hkl{{r.h, r.k, r.l}};
if (gops.is_systematically_absent(hkl)) {
bin.absent_sum += i_over_sigma;
bin.absent_count += 1;
out.absent_reflections += 1;
} else {
bin.allowed_sum += i_over_sigma;
bin.allowed_count += 1;
out.allowed_reflections += 1;
}
}
double global_absent_sum = 0.0;
double global_allowed_sum = 0.0;
double weighted_ratio_sum = 0.0;
int weighted_ratio_weight = 0;
double worst_ratio = 0.0;
bool any_decision_bin_failed = false;
out.bins.reserve(bins.size());
for (const auto& bin : bins) {
SpaceGroupAbsenceBinScore bin_score;
if (std::isfinite(bin.d_min_A))
bin_score.d_min_A = bin.d_min_A;
if (bin.d_max_A > 0.0)
bin_score.d_max_A = bin.d_max_A;
bin_score.absent_reflections = bin.absent_count;
bin_score.allowed_reflections = bin.allowed_count;
if (bin.absent_count > 0)
bin_score.mean_absent_i_over_sigma = bin.absent_sum / static_cast<double>(bin.absent_count);
if (bin.allowed_count > 0)
bin_score.mean_allowed_i_over_sigma = bin.allowed_sum / static_cast<double>(bin.allowed_count);
global_absent_sum += bin.absent_sum;
global_allowed_sum += bin.allowed_sum;
if (bin.absent_count >= opt.min_absent_reflections_per_bin &&
bin.allowed_count >= opt.min_allowed_reflections_per_bin &&
bin_score.mean_allowed_i_over_sigma > 0.0) {
bin_score.used_for_decision = true;
bin_score.absent_to_allowed_ratio =
bin_score.mean_absent_i_over_sigma / bin_score.mean_allowed_i_over_sigma;
bin_score.accepted =
bin_score.absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin;
out.compared_bins += 1;
if (bin_score.accepted)
out.accepted_bins += 1;
else
any_decision_bin_failed = true;
weighted_ratio_sum += bin_score.absent_to_allowed_ratio * bin.absent_count;
weighted_ratio_weight += bin.absent_count;
worst_ratio = std::max(worst_ratio, bin_score.absent_to_allowed_ratio);
}
out.bins.push_back(bin_score);
}
if (out.absent_reflections > 0)
out.mean_absent_i_over_sigma = global_absent_sum / static_cast<double>(out.absent_reflections);
if (out.allowed_reflections > 0)
out.mean_allowed_i_over_sigma = global_allowed_sum / static_cast<double>(out.allowed_reflections);
if (weighted_ratio_weight > 0) {
out.weighted_absent_to_allowed_ratio = weighted_ratio_sum / static_cast<double>(weighted_ratio_weight);
out.worst_absent_to_allowed_ratio = worst_ratio;
out.accepted =
!any_decision_bin_failed &&
out.weighted_absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio;
} else if (out.absent_reflections == 0) {
out.weighted_absent_to_allowed_ratio = 0.0;
out.worst_absent_to_allowed_ratio = 0.0;
out.accepted = true;
} else if (out.mean_allowed_i_over_sigma > 0.0) {
const double global_ratio = out.mean_absent_i_over_sigma / out.mean_allowed_i_over_sigma;
out.weighted_absent_to_allowed_ratio = global_ratio;
out.worst_absent_to_allowed_ratio = global_ratio;
out.accepted = global_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin;
} else {
out.weighted_absent_to_allowed_ratio = std::numeric_limits<double>::infinity();
out.worst_absent_to_allowed_ratio = std::numeric_limits<double>::infinity();
out.accepted = false;
}
out.accepted = (out.violating_reflections <= opt.max_absent_violations) &&
(out.mean_i_over_sigma <= opt.max_mean_absent_i_over_sigma);
return out;
}
@@ -323,17 +453,24 @@ SearchSpaceGroupResult SearchSpaceGroup(
if (a.accepted != b.accepted)
return a.accepted > b.accepted;
if (a.absence_score.mean_i_over_sigma != b.absence_score.mean_i_over_sigma)
return a.absence_score.mean_i_over_sigma < b.absence_score.mean_i_over_sigma;
if (a.absence_score.weighted_absent_to_allowed_ratio !=
b.absence_score.weighted_absent_to_allowed_ratio)
return a.absence_score.weighted_absent_to_allowed_ratio <
b.absence_score.weighted_absent_to_allowed_ratio;
if (a.absence_score.violating_reflections != b.absence_score.violating_reflections)
return a.absence_score.violating_reflections < b.absence_score.violating_reflections;
if (a.absence_score.worst_absent_to_allowed_ratio !=
b.absence_score.worst_absent_to_allowed_ratio)
return a.absence_score.worst_absent_to_allowed_ratio <
b.absence_score.worst_absent_to_allowed_ratio;
const int order_a = a.space_group.operations().derive_symmorphic().order();
const int order_b = b.space_group.operations().derive_symmorphic().order();
if (order_a != order_b)
return order_a > order_b;
if (a.absence_score.absent_reflections != b.absence_score.absent_reflections)
return a.absence_score.absent_reflections > b.absence_score.absent_reflections;
if (a.accepted_operators != b.accepted_operators)
return a.accepted_operators > b.accepted_operators;
if (a.min_cc != b.min_cc)
@@ -341,7 +478,7 @@ SearchSpaceGroupResult SearchSpaceGroup(
if (a.mean_cc != b.mean_cc)
return a.mean_cc > b.mean_cc;
return a.space_group.number < b.space_group.number;
return a.space_group.number > b.space_group.number;
});
for (const auto& cand : result.candidates) {
@@ -376,7 +513,11 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result,
<< " "
<< std::setw(8) << "Nabs"
<< " "
<< std::setw(10) << "<I/sig>abs"
<< std::setw(8) << "Nallow"
<< " "
<< std::setw(8) << "Abs/All"
<< " "
<< std::setw(8) << "worst"
<< "\n";
os << " "
@@ -396,7 +537,11 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result,
<< " "
<< std::setw(8) << "--------"
<< " "
<< std::setw(10) << "----------"
<< std::setw(8) << "--------"
<< " "
<< std::setw(8) << "--------"
<< " "
<< std::setw(8) << "--------"
<< "\n";
const size_t n = std::min(max_candidates_to_print, result.candidates.size());
@@ -417,9 +562,15 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result,
<< " "
<< std::setw(5) << (c.absence_score.accepted ? "yes" : "no")
<< " "
<< std::setw(8) << c.absence_score.violating_reflections
<< std::setw(8) << c.absence_score.absent_reflections
<< " "
<< std::setw(10) << std::fixed << std::setprecision(2) << c.absence_score.mean_i_over_sigma
<< std::setw(8) << c.absence_score.allowed_reflections
<< " "
<< std::setw(8) << std::fixed << std::setprecision(3)
<< c.absence_score.weighted_absent_to_allowed_ratio
<< " "
<< std::setw(8) << std::fixed << std::setprecision(3)
<< c.absence_score.worst_absent_to_allowed_ratio
<< "\n";
}
+34 -7
View File
@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#pragma once
#include <optional>
@@ -14,12 +17,31 @@ struct SpaceGroupOperatorScore {
bool accepted = false;
};
struct SpaceGroupAbsenceScore {
int violating_reflections = 0;
double mean_i_over_sigma = 0.0;
struct SpaceGroupAbsenceBinScore {
double d_min_A = 0.0;
double d_max_A = 0.0;
int absent_reflections = 0;
int allowed_reflections = 0;
double mean_absent_i_over_sigma = 0.0;
double mean_allowed_i_over_sigma = 0.0;
double absent_to_allowed_ratio = 0.0;
bool used_for_decision = false;
bool accepted = true;
};
struct SpaceGroupAbsenceScore {
int absent_reflections = 0;
int allowed_reflections = 0;
int compared_bins = 0;
int accepted_bins = 0;
double mean_absent_i_over_sigma = 0.0;
double mean_allowed_i_over_sigma = 0.0;
double weighted_absent_to_allowed_ratio = 0.0;
double worst_absent_to_allowed_ratio = 0.0;
bool accepted = true;
std::vector<SpaceGroupAbsenceBinScore> bins;
};
struct SpaceGroupCandidateScore {
gemmi::SpaceGroup space_group;
double mean_cc = 0.0;
@@ -59,11 +81,16 @@ struct SearchSpaceGroupOptions {
// Test systematic absences from centering / screws / glides.
bool test_systematic_absences = true;
// Candidate is rejected if forbidden reflections are too strong on average.
double max_mean_absent_i_over_sigma = 1.0;
// Number of resolution bins used for absence-vs-allowed comparison.
int absence_resolution_bins = 10;
// Hard cap on number of observed forbidden reflections.
int max_absent_violations = 0;
// Minimum counts in a bin before it contributes to the decision.
int min_absent_reflections_per_bin = 5;
int min_allowed_reflections_per_bin = 10;
// Acceptance thresholds for absent/allowed <I/sig> ratios.
double max_absent_to_allowed_i_over_sigma_ratio = 0.20;
double max_absent_to_allowed_i_over_sigma_ratio_in_any_bin = 0.50;
};
struct SearchSpaceGroupResult {
+11 -4
View File
@@ -69,9 +69,10 @@ namespace {
if (h == 0 && k == 0 && l == 0)
continue;
bool absent = false;
gemmi::Op::Miller hkl{{h, k, l}};
if (gops.is_systematically_absent(hkl))
continue;
absent = true;
const auto [asu, sign_plus] = rasu.to_asu_sign(hkl, gops);
if (!sign_plus)
@@ -86,7 +87,7 @@ namespace {
.h = h,
.k = k,
.l = l,
.I = SyntheticIntensityFromAsu(asu),
.I = absent ? 0.0 : SyntheticIntensityFromAsu(asu),
.sigma = 1.0,
.d = CalcSyntheticD(h, k, l)
});
@@ -110,10 +111,13 @@ TEST_CASE("SearchSpaceGroup detects synthetic space groups") {
{"P 3 2 1", "P321"},
{"P 4 2 2", "P422"},
{"P 4 3 2", "P432"},
{"P 43 21 2", "P43212"},
{"P 6 2 2", "P622"},
{"C 1 2 1", "C2"},
{"C 2 2 2", "C222"},
{"I 4 3 2", "I432"},
{"I 21 21 21", "I212121"},
{"I 2 1 3", "I213"},
};
for (const auto& tc : cases) {
@@ -130,8 +134,11 @@ TEST_CASE("SearchSpaceGroup detects synthetic space groups") {
opt.min_pairs_per_operator = 20;
opt.min_total_compared = 80;
opt.test_systematic_absences = true;
opt.max_mean_absent_i_over_sigma = 0.1;
opt.max_absent_violations = 0;
opt.absence_resolution_bins = 10;
opt.min_absent_reflections_per_bin = 5;
opt.min_allowed_reflections_per_bin = 10;
opt.max_absent_to_allowed_i_over_sigma_ratio = 0.05;
opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin = 0.10;
const auto result = SearchSpaceGroup(merged, opt);