SpaceGroupTest: Fix systematic absences test
Build Packages / build:rpm (rocky8_nocuda) (pull_request) Failing after 8m51s
Build Packages / build:rpm (rocky9_nocuda) (pull_request) Failing after 9m16s
Build Packages / build:rpm (ubuntu2404_nocuda) (pull_request) Failing after 8m35s
Build Packages / build:rpm (ubuntu2204_nocuda) (pull_request) Failing after 9m28s
Build Packages / build:rpm (rocky8_sls9) (pull_request) Failing after 10m15s
Build Packages / build:rpm (rocky9_sls9) (pull_request) Failing after 9m23s
Build Packages / Generate python client (pull_request) Successful in 21s
Build Packages / Build documentation (pull_request) Successful in 1m5s
Build Packages / build:rpm (rocky8) (pull_request) Failing after 9m0s
Build Packages / Create release (pull_request) Has been skipped
Build Packages / build:rpm (rocky9) (pull_request) Failing after 9m32s
Build Packages / build:rpm (ubuntu2204) (pull_request) Failing after 9m50s
Build Packages / build:rpm (ubuntu2404) (pull_request) Failing after 9m35s
Build Packages / build:rpm (rocky8_nocuda) (push) Failing after 8m46s
Build Packages / build:rpm (rocky9_nocuda) (push) Failing after 8m24s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Failing after 7m45s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Failing after 9m16s
Build Packages / build:rpm (rocky8_sls9) (push) Failing after 8m38s
Build Packages / build:rpm (rocky9_sls9) (push) Failing after 8m52s
Build Packages / build:rpm (rocky8) (push) Failing after 9m13s
Build Packages / build:rpm (rocky9) (push) Failing after 9m10s
Build Packages / Generate python client (push) Successful in 25s
Build Packages / Build documentation (push) Successful in 49s
Build Packages / Create release (push) Has been skipped
Build Packages / build:rpm (ubuntu2204) (push) Failing after 8m31s
Build Packages / build:rpm (ubuntu2404) (push) Failing after 8m1s
Build Packages / Unit tests (push) Successful in 55m41s
Build Packages / Unit tests (pull_request) Failing after 3h5m24s
Build Packages / build:rpm (rocky8_nocuda) (pull_request) Failing after 8m51s
Build Packages / build:rpm (rocky9_nocuda) (pull_request) Failing after 9m16s
Build Packages / build:rpm (ubuntu2404_nocuda) (pull_request) Failing after 8m35s
Build Packages / build:rpm (ubuntu2204_nocuda) (pull_request) Failing after 9m28s
Build Packages / build:rpm (rocky8_sls9) (pull_request) Failing after 10m15s
Build Packages / build:rpm (rocky9_sls9) (pull_request) Failing after 9m23s
Build Packages / Generate python client (pull_request) Successful in 21s
Build Packages / Build documentation (pull_request) Successful in 1m5s
Build Packages / build:rpm (rocky8) (pull_request) Failing after 9m0s
Build Packages / Create release (pull_request) Has been skipped
Build Packages / build:rpm (rocky9) (pull_request) Failing after 9m32s
Build Packages / build:rpm (ubuntu2204) (pull_request) Failing after 9m50s
Build Packages / build:rpm (ubuntu2404) (pull_request) Failing after 9m35s
Build Packages / build:rpm (rocky8_nocuda) (push) Failing after 8m46s
Build Packages / build:rpm (rocky9_nocuda) (push) Failing after 8m24s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Failing after 7m45s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Failing after 9m16s
Build Packages / build:rpm (rocky8_sls9) (push) Failing after 8m38s
Build Packages / build:rpm (rocky9_sls9) (push) Failing after 8m52s
Build Packages / build:rpm (rocky8) (push) Failing after 9m13s
Build Packages / build:rpm (rocky9) (push) Failing after 9m10s
Build Packages / Generate python client (push) Successful in 25s
Build Packages / Build documentation (push) Successful in 49s
Build Packages / Create release (push) Has been skipped
Build Packages / build:rpm (ubuntu2204) (push) Failing after 8m31s
Build Packages / build:rpm (ubuntu2404) (push) Failing after 8m1s
Build Packages / Unit tests (push) Successful in 55m41s
Build Packages / Unit tests (pull_request) Failing after 3h5m24s
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
||||
// SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
#include "SearchSpaceGroup.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cctype>
|
||||
#include <iomanip>
|
||||
@@ -51,7 +55,7 @@ namespace {
|
||||
|
||||
MergedHKLKey CanonicalizeMergedHKL(int h, int k, int l, bool merge_friedel) {
|
||||
MergedHKLKey key{h, k, l, true};
|
||||
if (merge_friedel)
|
||||
if (!merge_friedel)
|
||||
return key;
|
||||
|
||||
const std::tuple<int, int, int> pos{h, k, l};
|
||||
@@ -177,6 +181,15 @@ namespace {
|
||||
return out;
|
||||
}
|
||||
|
||||
struct ResolutionBinAccumulator {
|
||||
double d_min_A = std::numeric_limits<double>::infinity();
|
||||
double d_max_A = 0.0;
|
||||
double absent_sum = 0.0;
|
||||
int absent_count = 0;
|
||||
double allowed_sum = 0.0;
|
||||
int allowed_count = 0;
|
||||
};
|
||||
|
||||
SpaceGroupAbsenceScore ScoreSystematicAbsences(
|
||||
const gemmi::GroupOps& gops,
|
||||
const std::vector<MergedReflection>& merged,
|
||||
@@ -187,27 +200,144 @@ namespace {
|
||||
if (!opt.test_systematic_absences)
|
||||
return out;
|
||||
|
||||
double sum_i_over_sigma = 0.0;
|
||||
int n_bins = opt.absence_resolution_bins;
|
||||
if (n_bins <= 0)
|
||||
n_bins = 1;
|
||||
|
||||
double min_inv_d2 = std::numeric_limits<double>::infinity();
|
||||
double max_inv_d2 = -std::numeric_limits<double>::infinity();
|
||||
|
||||
for (const auto& r : merged) {
|
||||
if (!ReflectionPassesFilters(r, opt))
|
||||
continue;
|
||||
|
||||
const gemmi::Op::Miller hkl{{r.h, r.k, r.l}};
|
||||
if (!gops.is_systematically_absent(hkl))
|
||||
continue;
|
||||
|
||||
out.violating_reflections += 1;
|
||||
sum_i_over_sigma += std::max(0.0, r.I / r.sigma);
|
||||
const double inv_d2 = 1.0 / (r.d * r.d);
|
||||
min_inv_d2 = std::min(min_inv_d2, inv_d2);
|
||||
max_inv_d2 = std::max(max_inv_d2, inv_d2);
|
||||
}
|
||||
|
||||
if (out.violating_reflections > 0)
|
||||
out.mean_i_over_sigma = sum_i_over_sigma / out.violating_reflections;
|
||||
else
|
||||
out.mean_i_over_sigma = 0.0;
|
||||
if (!std::isfinite(min_inv_d2) || !std::isfinite(max_inv_d2))
|
||||
return out;
|
||||
|
||||
if (max_inv_d2 < min_inv_d2)
|
||||
std::swap(max_inv_d2, min_inv_d2);
|
||||
|
||||
std::vector<ResolutionBinAccumulator> bins(static_cast<size_t>(n_bins));
|
||||
|
||||
auto bin_index = [&](double d) {
|
||||
if (n_bins == 1 || max_inv_d2 <= min_inv_d2)
|
||||
return 0;
|
||||
|
||||
const double inv_d2 = 1.0 / (d * d);
|
||||
const double t = (inv_d2 - min_inv_d2) / (max_inv_d2 - min_inv_d2);
|
||||
int idx = static_cast<int>(t * n_bins);
|
||||
if (idx < 0)
|
||||
idx = 0;
|
||||
if (idx >= n_bins)
|
||||
idx = n_bins - 1;
|
||||
return idx;
|
||||
};
|
||||
|
||||
for (const auto& r : merged) {
|
||||
if (!ReflectionPassesFilters(r, opt))
|
||||
continue;
|
||||
|
||||
const int idx = bin_index(r.d);
|
||||
auto& bin = bins[static_cast<size_t>(idx)];
|
||||
bin.d_min_A = std::min(bin.d_min_A, r.d);
|
||||
bin.d_max_A = std::max(bin.d_max_A, r.d);
|
||||
|
||||
const double i_over_sigma = std::max(0.0, r.I / r.sigma);
|
||||
const gemmi::Op::Miller hkl{{r.h, r.k, r.l}};
|
||||
|
||||
if (gops.is_systematically_absent(hkl)) {
|
||||
bin.absent_sum += i_over_sigma;
|
||||
bin.absent_count += 1;
|
||||
out.absent_reflections += 1;
|
||||
} else {
|
||||
bin.allowed_sum += i_over_sigma;
|
||||
bin.allowed_count += 1;
|
||||
out.allowed_reflections += 1;
|
||||
}
|
||||
}
|
||||
|
||||
double global_absent_sum = 0.0;
|
||||
double global_allowed_sum = 0.0;
|
||||
double weighted_ratio_sum = 0.0;
|
||||
int weighted_ratio_weight = 0;
|
||||
double worst_ratio = 0.0;
|
||||
bool any_decision_bin_failed = false;
|
||||
|
||||
out.bins.reserve(bins.size());
|
||||
|
||||
for (const auto& bin : bins) {
|
||||
SpaceGroupAbsenceBinScore bin_score;
|
||||
if (std::isfinite(bin.d_min_A))
|
||||
bin_score.d_min_A = bin.d_min_A;
|
||||
if (bin.d_max_A > 0.0)
|
||||
bin_score.d_max_A = bin.d_max_A;
|
||||
|
||||
bin_score.absent_reflections = bin.absent_count;
|
||||
bin_score.allowed_reflections = bin.allowed_count;
|
||||
|
||||
if (bin.absent_count > 0)
|
||||
bin_score.mean_absent_i_over_sigma = bin.absent_sum / static_cast<double>(bin.absent_count);
|
||||
if (bin.allowed_count > 0)
|
||||
bin_score.mean_allowed_i_over_sigma = bin.allowed_sum / static_cast<double>(bin.allowed_count);
|
||||
|
||||
global_absent_sum += bin.absent_sum;
|
||||
global_allowed_sum += bin.allowed_sum;
|
||||
|
||||
if (bin.absent_count >= opt.min_absent_reflections_per_bin &&
|
||||
bin.allowed_count >= opt.min_allowed_reflections_per_bin &&
|
||||
bin_score.mean_allowed_i_over_sigma > 0.0) {
|
||||
|
||||
bin_score.used_for_decision = true;
|
||||
bin_score.absent_to_allowed_ratio =
|
||||
bin_score.mean_absent_i_over_sigma / bin_score.mean_allowed_i_over_sigma;
|
||||
bin_score.accepted =
|
||||
bin_score.absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin;
|
||||
|
||||
out.compared_bins += 1;
|
||||
if (bin_score.accepted)
|
||||
out.accepted_bins += 1;
|
||||
else
|
||||
any_decision_bin_failed = true;
|
||||
|
||||
weighted_ratio_sum += bin_score.absent_to_allowed_ratio * bin.absent_count;
|
||||
weighted_ratio_weight += bin.absent_count;
|
||||
worst_ratio = std::max(worst_ratio, bin_score.absent_to_allowed_ratio);
|
||||
}
|
||||
|
||||
out.bins.push_back(bin_score);
|
||||
}
|
||||
|
||||
if (out.absent_reflections > 0)
|
||||
out.mean_absent_i_over_sigma = global_absent_sum / static_cast<double>(out.absent_reflections);
|
||||
if (out.allowed_reflections > 0)
|
||||
out.mean_allowed_i_over_sigma = global_allowed_sum / static_cast<double>(out.allowed_reflections);
|
||||
|
||||
if (weighted_ratio_weight > 0) {
|
||||
out.weighted_absent_to_allowed_ratio = weighted_ratio_sum / static_cast<double>(weighted_ratio_weight);
|
||||
out.worst_absent_to_allowed_ratio = worst_ratio;
|
||||
out.accepted =
|
||||
!any_decision_bin_failed &&
|
||||
out.weighted_absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio;
|
||||
} else if (out.absent_reflections == 0) {
|
||||
out.weighted_absent_to_allowed_ratio = 0.0;
|
||||
out.worst_absent_to_allowed_ratio = 0.0;
|
||||
out.accepted = true;
|
||||
} else if (out.mean_allowed_i_over_sigma > 0.0) {
|
||||
const double global_ratio = out.mean_absent_i_over_sigma / out.mean_allowed_i_over_sigma;
|
||||
out.weighted_absent_to_allowed_ratio = global_ratio;
|
||||
out.worst_absent_to_allowed_ratio = global_ratio;
|
||||
out.accepted = global_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin;
|
||||
} else {
|
||||
out.weighted_absent_to_allowed_ratio = std::numeric_limits<double>::infinity();
|
||||
out.worst_absent_to_allowed_ratio = std::numeric_limits<double>::infinity();
|
||||
out.accepted = false;
|
||||
}
|
||||
|
||||
out.accepted = (out.violating_reflections <= opt.max_absent_violations) &&
|
||||
(out.mean_i_over_sigma <= opt.max_mean_absent_i_over_sigma);
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -323,17 +453,24 @@ SearchSpaceGroupResult SearchSpaceGroup(
|
||||
if (a.accepted != b.accepted)
|
||||
return a.accepted > b.accepted;
|
||||
|
||||
if (a.absence_score.mean_i_over_sigma != b.absence_score.mean_i_over_sigma)
|
||||
return a.absence_score.mean_i_over_sigma < b.absence_score.mean_i_over_sigma;
|
||||
if (a.absence_score.weighted_absent_to_allowed_ratio !=
|
||||
b.absence_score.weighted_absent_to_allowed_ratio)
|
||||
return a.absence_score.weighted_absent_to_allowed_ratio <
|
||||
b.absence_score.weighted_absent_to_allowed_ratio;
|
||||
|
||||
if (a.absence_score.violating_reflections != b.absence_score.violating_reflections)
|
||||
return a.absence_score.violating_reflections < b.absence_score.violating_reflections;
|
||||
if (a.absence_score.worst_absent_to_allowed_ratio !=
|
||||
b.absence_score.worst_absent_to_allowed_ratio)
|
||||
return a.absence_score.worst_absent_to_allowed_ratio <
|
||||
b.absence_score.worst_absent_to_allowed_ratio;
|
||||
|
||||
const int order_a = a.space_group.operations().derive_symmorphic().order();
|
||||
const int order_b = b.space_group.operations().derive_symmorphic().order();
|
||||
if (order_a != order_b)
|
||||
return order_a > order_b;
|
||||
|
||||
if (a.absence_score.absent_reflections != b.absence_score.absent_reflections)
|
||||
return a.absence_score.absent_reflections > b.absence_score.absent_reflections;
|
||||
|
||||
if (a.accepted_operators != b.accepted_operators)
|
||||
return a.accepted_operators > b.accepted_operators;
|
||||
if (a.min_cc != b.min_cc)
|
||||
@@ -341,7 +478,7 @@ SearchSpaceGroupResult SearchSpaceGroup(
|
||||
if (a.mean_cc != b.mean_cc)
|
||||
return a.mean_cc > b.mean_cc;
|
||||
|
||||
return a.space_group.number < b.space_group.number;
|
||||
return a.space_group.number > b.space_group.number;
|
||||
});
|
||||
|
||||
for (const auto& cand : result.candidates) {
|
||||
@@ -376,7 +513,11 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result,
|
||||
<< " "
|
||||
<< std::setw(8) << "Nabs"
|
||||
<< " "
|
||||
<< std::setw(10) << "<I/sig>abs"
|
||||
<< std::setw(8) << "Nallow"
|
||||
<< " "
|
||||
<< std::setw(8) << "Abs/All"
|
||||
<< " "
|
||||
<< std::setw(8) << "worst"
|
||||
<< "\n";
|
||||
|
||||
os << " "
|
||||
@@ -396,7 +537,11 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result,
|
||||
<< " "
|
||||
<< std::setw(8) << "--------"
|
||||
<< " "
|
||||
<< std::setw(10) << "----------"
|
||||
<< std::setw(8) << "--------"
|
||||
<< " "
|
||||
<< std::setw(8) << "--------"
|
||||
<< " "
|
||||
<< std::setw(8) << "--------"
|
||||
<< "\n";
|
||||
|
||||
const size_t n = std::min(max_candidates_to_print, result.candidates.size());
|
||||
@@ -417,9 +562,15 @@ std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result,
|
||||
<< " "
|
||||
<< std::setw(5) << (c.absence_score.accepted ? "yes" : "no")
|
||||
<< " "
|
||||
<< std::setw(8) << c.absence_score.violating_reflections
|
||||
<< std::setw(8) << c.absence_score.absent_reflections
|
||||
<< " "
|
||||
<< std::setw(10) << std::fixed << std::setprecision(2) << c.absence_score.mean_i_over_sigma
|
||||
<< std::setw(8) << c.absence_score.allowed_reflections
|
||||
<< " "
|
||||
<< std::setw(8) << std::fixed << std::setprecision(3)
|
||||
<< c.absence_score.weighted_absent_to_allowed_ratio
|
||||
<< " "
|
||||
<< std::setw(8) << std::fixed << std::setprecision(3)
|
||||
<< c.absence_score.worst_absent_to_allowed_ratio
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
||||
// SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
@@ -14,12 +17,31 @@ struct SpaceGroupOperatorScore {
|
||||
bool accepted = false;
|
||||
};
|
||||
|
||||
struct SpaceGroupAbsenceScore {
|
||||
int violating_reflections = 0;
|
||||
double mean_i_over_sigma = 0.0;
|
||||
struct SpaceGroupAbsenceBinScore {
|
||||
double d_min_A = 0.0;
|
||||
double d_max_A = 0.0;
|
||||
int absent_reflections = 0;
|
||||
int allowed_reflections = 0;
|
||||
double mean_absent_i_over_sigma = 0.0;
|
||||
double mean_allowed_i_over_sigma = 0.0;
|
||||
double absent_to_allowed_ratio = 0.0;
|
||||
bool used_for_decision = false;
|
||||
bool accepted = true;
|
||||
};
|
||||
|
||||
struct SpaceGroupAbsenceScore {
|
||||
int absent_reflections = 0;
|
||||
int allowed_reflections = 0;
|
||||
int compared_bins = 0;
|
||||
int accepted_bins = 0;
|
||||
double mean_absent_i_over_sigma = 0.0;
|
||||
double mean_allowed_i_over_sigma = 0.0;
|
||||
double weighted_absent_to_allowed_ratio = 0.0;
|
||||
double worst_absent_to_allowed_ratio = 0.0;
|
||||
bool accepted = true;
|
||||
std::vector<SpaceGroupAbsenceBinScore> bins;
|
||||
};
|
||||
|
||||
struct SpaceGroupCandidateScore {
|
||||
gemmi::SpaceGroup space_group;
|
||||
double mean_cc = 0.0;
|
||||
@@ -59,11 +81,16 @@ struct SearchSpaceGroupOptions {
|
||||
// Test systematic absences from centering / screws / glides.
|
||||
bool test_systematic_absences = true;
|
||||
|
||||
// Candidate is rejected if forbidden reflections are too strong on average.
|
||||
double max_mean_absent_i_over_sigma = 1.0;
|
||||
// Number of resolution bins used for absence-vs-allowed comparison.
|
||||
int absence_resolution_bins = 10;
|
||||
|
||||
// Hard cap on number of observed forbidden reflections.
|
||||
int max_absent_violations = 0;
|
||||
// Minimum counts in a bin before it contributes to the decision.
|
||||
int min_absent_reflections_per_bin = 5;
|
||||
int min_allowed_reflections_per_bin = 10;
|
||||
|
||||
// Acceptance thresholds for absent/allowed <I/sig> ratios.
|
||||
double max_absent_to_allowed_i_over_sigma_ratio = 0.20;
|
||||
double max_absent_to_allowed_i_over_sigma_ratio_in_any_bin = 0.50;
|
||||
};
|
||||
|
||||
struct SearchSpaceGroupResult {
|
||||
|
||||
@@ -69,9 +69,10 @@ namespace {
|
||||
if (h == 0 && k == 0 && l == 0)
|
||||
continue;
|
||||
|
||||
bool absent = false;
|
||||
gemmi::Op::Miller hkl{{h, k, l}};
|
||||
if (gops.is_systematically_absent(hkl))
|
||||
continue;
|
||||
absent = true;
|
||||
|
||||
const auto [asu, sign_plus] = rasu.to_asu_sign(hkl, gops);
|
||||
if (!sign_plus)
|
||||
@@ -86,7 +87,7 @@ namespace {
|
||||
.h = h,
|
||||
.k = k,
|
||||
.l = l,
|
||||
.I = SyntheticIntensityFromAsu(asu),
|
||||
.I = absent ? 0.0 : SyntheticIntensityFromAsu(asu),
|
||||
.sigma = 1.0,
|
||||
.d = CalcSyntheticD(h, k, l)
|
||||
});
|
||||
@@ -110,10 +111,13 @@ TEST_CASE("SearchSpaceGroup detects synthetic space groups") {
|
||||
{"P 3 2 1", "P321"},
|
||||
{"P 4 2 2", "P422"},
|
||||
{"P 4 3 2", "P432"},
|
||||
{"P 43 21 2", "P43212"},
|
||||
{"P 6 2 2", "P622"},
|
||||
{"C 1 2 1", "C2"},
|
||||
{"C 2 2 2", "C222"},
|
||||
{"I 4 3 2", "I432"},
|
||||
{"I 21 21 21", "I212121"},
|
||||
{"I 2 1 3", "I213"},
|
||||
};
|
||||
|
||||
for (const auto& tc : cases) {
|
||||
@@ -130,8 +134,11 @@ TEST_CASE("SearchSpaceGroup detects synthetic space groups") {
|
||||
opt.min_pairs_per_operator = 20;
|
||||
opt.min_total_compared = 80;
|
||||
opt.test_systematic_absences = true;
|
||||
opt.max_mean_absent_i_over_sigma = 0.1;
|
||||
opt.max_absent_violations = 0;
|
||||
opt.absence_resolution_bins = 10;
|
||||
opt.min_absent_reflections_per_bin = 5;
|
||||
opt.min_allowed_reflections_per_bin = 10;
|
||||
opt.max_absent_to_allowed_i_over_sigma_ratio = 0.05;
|
||||
opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin = 0.10;
|
||||
|
||||
const auto result = SearchSpaceGroup(merged, opt);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user