space group: reject over-symmetrised point groups by merge R-meas consistency

Per-operator CC alone cannot separate a real weak symmetry operator from a false
moderate one: a noisy crystal's genuine cubic 2-folds (InsI2 I23, CC ~0.51) score
BELOW a pseudo-symmetric crystal's false 2-fold (InsH3, CC 0.64), so no CC threshold
works. Add a self-consistency test: a candidate point group is accepted only if
merging the intensities under it does not inflate R-meas past max_rmeas_ratio (1.5x)
the most-consistent candidate. A false operator forces non-equivalent reflections
together and blows R-meas up; a real one leaves it flat.

Fixes InsH3: was over-called R32 (ISa collapsed to 2.2 from the forced merge), now
correctly R3/H3 (ISa 10.2, matching XDS). InsI2 stays I23, and lysoC/InsI3/MyoB/
EP0210/cytC/lyso_ref are unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-30 20:32:03 +02:00
co-authored by Claude Opus 4.8
parent 23d58b6202
commit 7ade6d9906
2 changed files with 69 additions and 9 deletions
@@ -250,11 +250,48 @@ SearchSpaceGroupResult SearchSpaceGroup(
holohedry = HolohedryRotationSet(opt.lattice_system.value());
const auto point_groups = EnumeratePointGroups(holohedry);
// Choose the largest point group all of whose operators are confirmed (ties -> higher min CC).
const PointGroupInfo* best_pg = nullptr;
int best_pg_order = 0;
double best_pg_min_cc = -2.0;
// R-meas of the intensities merged under a point group's rotations - how well its symmetry
// equivalents agree. A real point group keeps this low; a false operator forces non-equivalent
// reflections together and inflates it. Computed over the present (pass_cc) reflections.
auto rmeas_under = [&](const std::vector<gemmi::Op>& rotations) -> double {
std::unordered_map<HKLKey, std::pair<int, double>, HKLKeyHash> grp; // orbit rep -> (n, sum I)
std::vector<HKLKey> rep(n);
for (size_t i = 0; i < n; ++i) {
if (!pass_cc[i])
continue;
HKLKey best = key[i];
for (const auto& op : rotations) {
const auto m = op.apply_to_hkl(gemmi::Op::Miller{{H[i], K[i], L[i]}});
const HKLKey k2 = Canonicalize(m[0], m[1], m[2], opt.merge_friedel);
if (std::make_tuple(k2.h, k2.k, k2.l) < std::make_tuple(best.h, best.k, best.l))
best = k2;
}
rep[i] = best;
auto& g = grp[best];
g.first += 1; g.second += I[i];
}
std::unordered_map<HKLKey, double, HKLKeyHash> absdev;
for (size_t i = 0; i < n; ++i) {
if (!pass_cc[i])
continue;
const auto& g = grp[rep[i]];
if (g.first >= 2)
absdev[rep[i]] += std::fabs(I[i] - g.second / g.first);
}
double num = 0, den = 0;
for (const auto& [k, g] : grp) {
if (g.first < 2)
continue;
num += std::sqrt(static_cast<double>(g.first) / (g.first - 1)) * absdev[k];
den += g.second;
}
return den > 0 ? num / den : std::numeric_limits<double>::quiet_NaN();
};
// Operator-CC-confirmed candidates, each with its merge R-meas; rmeas_ref = the most consistent.
struct PGCand { const PointGroupInfo* pg; int order; double min_cc; double rmeas; };
std::vector<PGCand> pg_cands;
double rmeas_ref = std::numeric_limits<double>::infinity();
for (const auto& pg : point_groups) {
bool all_present = true;
double min_cc = pg.rotations.empty() ? 1.0 : std::numeric_limits<double>::infinity();
@@ -265,12 +302,28 @@ SearchSpaceGroupResult SearchSpaceGroup(
}
if (!all_present)
continue;
const double rm = pg.rotations.empty() ? std::numeric_limits<double>::quiet_NaN()
: rmeas_under(pg.rotations);
pg_cands.push_back({&pg, static_cast<int>(pg.rotations.size()) + 1, min_cc, rm});
if (!pg.rotations.empty() && std::isfinite(rm))
rmeas_ref = std::min(rmeas_ref, rm);
}
const int order = static_cast<int>(pg.rotations.size()) + 1;
if (order > best_pg_order || (order == best_pg_order && min_cc > best_pg_min_cc)) {
best_pg = &pg;
best_pg_order = order;
best_pg_min_cc = min_cc;
// Choose the largest point group that is both operator-confirmed AND self-consistent (its merge
// R-meas is not inflated past max_rmeas_ratio x the most-consistent candidate; ties -> higher min
// CC). Identity (no operators) is always consistent, so it stays the P1 fallback.
const PointGroupInfo* best_pg = nullptr;
int best_pg_order = 0;
double best_pg_min_cc = -2.0;
for (const auto& c : pg_cands) {
const bool consistent = c.pg->rotations.empty() || !std::isfinite(c.rmeas) ||
!std::isfinite(rmeas_ref) || c.rmeas <= rmeas_ref * opt.max_rmeas_ratio;
if (!consistent)
continue;
if (c.order > best_pg_order || (c.order == best_pg_order && c.min_cc > best_pg_min_cc)) {
best_pg = c.pg;
best_pg_order = c.order;
best_pg_min_cc = c.min_cc;
}
}
@@ -71,6 +71,13 @@ struct SearchSpaceGroupOptions {
double min_operator_cc = 0.5;
int min_pairs_per_operator = 20;
// Per-operator CC alone cannot tell a real weak operator from a false moderate one (a noisy
// crystal's genuine 2-fold can score below a pseudo-symmetric crystal's false one). So a point
// group is also required to be SELF-CONSISTENT: merging the intensities under it must not inflate
// R-meas beyond this factor times the most-consistent (lowest-R-meas) candidate. A false operator
// forces non-equivalent reflections together and blows R-meas up; a real one leaves it ~flat.
double max_rmeas_ratio = 1.5;
// --- Stage B: space group (screw axes / centering) ---
bool determine_space_group = true; // false: stop at the symmorphic representative