// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #include "SearchSpaceGroup.h" #include #include #include #include #include #include #include #include #include #include #include namespace { struct MergedHKLKey { int h = 0; int k = 0; int l = 0; bool plus = true; bool operator==(const MergedHKLKey& o) const noexcept { return h == o.h && k == o.k && l == o.l && plus == o.plus; } }; struct MergedHKLKeyHash { size_t operator()(const MergedHKLKey& key) const noexcept { auto mix = [](uint64_t x) { x ^= x >> 33; x *= 0xff51afd7ed558ccdULL; x ^= x >> 33; x *= 0xc4ceb9fe1a85ec53ULL; x ^= x >> 33; return x; }; return static_cast( mix(static_cast(key.h)) ^ (mix(static_cast(key.k)) << 1) ^ (mix(static_cast(key.l)) << 2) ^ (mix(static_cast(key.plus ? 1 : 0)) << 3)); } }; bool IsIdentityOp(const gemmi::Op& op) { return op.rot == gemmi::Op::identity().rot; } bool IsParityChanging(const gemmi::Op& op) { return op.det_rot() < 0; } MergedHKLKey CanonicalizeMergedHKL(int h, int k, int l, bool merge_friedel) { MergedHKLKey key{h, k, l, true}; if (!merge_friedel) return key; const std::tuple pos{h, k, l}; const std::tuple neg{-h, -k, -l}; if (neg < pos) { key.h = -h; key.k = -k; key.l = -l; key.plus = false; } return key; } bool ReflectionPassesFilters(const MergedReflection& r, const SearchSpaceGroupOptions& opt) { if (!std::isfinite(r.I)) return false; if (!std::isfinite(r.sigma) || r.sigma <= 0.0) return false; if (!std::isfinite(r.d) || r.d <= 0.0) return false; if (opt.d_min_limit_A > 0.0 && r.d < opt.d_min_limit_A) return false; if (opt.min_i_over_sigma > 0.0 && r.I / r.sigma < opt.min_i_over_sigma) return false; return true; } std::unordered_map BuildMergedLookup(const std::vector& merged, const SearchSpaceGroupOptions& opt) { std::unordered_map out; out.reserve(merged.size()); for (const auto& r : merged) { if (!ReflectionPassesFilters(r, opt)) continue; const auto key = CanonicalizeMergedHKL(r.h, r.k, r.l, opt.merge_friedel); out.emplace(key, &r); } return out; } double PearsonCC(const std::vector& x, const std::vector& y) { if (x.size() != y.size() || x.size() < 2) return std::numeric_limits::quiet_NaN(); double sx = 0.0, sy = 0.0; double sxx = 0.0, syy = 0.0, sxy = 0.0; for (size_t i = 0; i < x.size(); ++i) { sx += x[i]; sy += y[i]; sxx += x[i] * x[i]; syy += y[i] * y[i]; sxy += x[i] * y[i]; } const double n = static_cast(x.size()); const double cov = sxy - sx * sy / n; const double vx = sxx - sx * sx / n; const double vy = syy - sy * sy / n; if (vx <= 0.0 || vy <= 0.0) return std::numeric_limits::quiet_NaN(); return cov / std::sqrt(vx * vy); } std::optional ScoreOperator( const gemmi::Op& op, const std::vector& merged, const std::unordered_map& lookup, const SearchSpaceGroupOptions& opt) { std::vector x; std::vector y; x.reserve(merged.size() / 2); y.reserve(merged.size() / 2); std::unordered_set used; used.reserve(merged.size() / 2); for (const auto& r : merged) { if (!ReflectionPassesFilters(r, opt)) continue; const auto key1 = CanonicalizeMergedHKL(r.h, r.k, r.l, opt.merge_friedel); if (used.find(key1) != used.end()) continue; const gemmi::Op::Miller hkl{{r.h, r.k, r.l}}; const auto hkl2 = op.apply_to_hkl(hkl); const auto key2 = CanonicalizeMergedHKL(hkl2[0], hkl2[1], hkl2[2], opt.merge_friedel); if (key1.h == key2.h && key1.k == key2.k && key1.l == key2.l && key1.plus == key2.plus) continue; auto it = lookup.find(key2); if (it == lookup.end()) continue; const auto* mate = it->second; if (mate == nullptr) continue; x.push_back(r.I); y.push_back(mate->I); used.insert(key1); used.insert(key2); } if (x.empty()) return std::nullopt; SpaceGroupOperatorScore out; out.op_triplet_hkl = op.as_hkl().triplet('h'); out.compared = static_cast(x.size()); out.cc = PearsonCC(x, y); out.accepted = (out.compared >= opt.min_pairs_per_operator && std::isfinite(out.cc) && out.cc >= opt.min_operator_cc); return out; } struct ResolutionBinAccumulator { double d_min_A = std::numeric_limits::infinity(); double d_max_A = 0.0; double absent_sum = 0.0; int absent_count = 0; double allowed_sum = 0.0; int allowed_count = 0; }; SpaceGroupAbsenceScore ScoreSystematicAbsences( const gemmi::GroupOps& gops, const std::vector& merged, const SearchSpaceGroupOptions& opt) { SpaceGroupAbsenceScore out; if (!opt.test_systematic_absences) return out; int n_bins = opt.absence_resolution_bins; if (n_bins <= 0) n_bins = 1; double min_inv_d2 = std::numeric_limits::infinity(); double max_inv_d2 = -std::numeric_limits::infinity(); for (const auto& r : merged) { if (!ReflectionPassesFilters(r, opt)) continue; const double inv_d2 = 1.0 / (r.d * r.d); min_inv_d2 = std::min(min_inv_d2, inv_d2); max_inv_d2 = std::max(max_inv_d2, inv_d2); } if (!std::isfinite(min_inv_d2) || !std::isfinite(max_inv_d2)) return out; if (max_inv_d2 < min_inv_d2) std::swap(max_inv_d2, min_inv_d2); std::vector bins(static_cast(n_bins)); auto bin_index = [&](double d) { if (n_bins == 1 || max_inv_d2 <= min_inv_d2) return 0; const double inv_d2 = 1.0 / (d * d); const double t = (inv_d2 - min_inv_d2) / (max_inv_d2 - min_inv_d2); int idx = static_cast(t * n_bins); if (idx < 0) idx = 0; if (idx >= n_bins) idx = n_bins - 1; return idx; }; for (const auto& r : merged) { if (!ReflectionPassesFilters(r, opt)) continue; const int idx = bin_index(r.d); auto& bin = bins[static_cast(idx)]; bin.d_min_A = std::min(bin.d_min_A, r.d); bin.d_max_A = std::max(bin.d_max_A, r.d); const double i_over_sigma = std::max(0.0, r.I / r.sigma); const gemmi::Op::Miller hkl{{r.h, r.k, r.l}}; if (gops.is_systematically_absent(hkl)) { bin.absent_sum += i_over_sigma; bin.absent_count += 1; out.absent_reflections += 1; } else { bin.allowed_sum += i_over_sigma; bin.allowed_count += 1; out.allowed_reflections += 1; } } double global_absent_sum = 0.0; double global_allowed_sum = 0.0; double weighted_ratio_sum = 0.0; int weighted_ratio_weight = 0; double worst_ratio = 0.0; bool any_decision_bin_failed = false; out.bins.reserve(bins.size()); for (const auto& bin : bins) { SpaceGroupAbsenceBinScore bin_score; if (std::isfinite(bin.d_min_A)) bin_score.d_min_A = bin.d_min_A; if (bin.d_max_A > 0.0) bin_score.d_max_A = bin.d_max_A; bin_score.absent_reflections = bin.absent_count; bin_score.allowed_reflections = bin.allowed_count; if (bin.absent_count > 0) bin_score.mean_absent_i_over_sigma = bin.absent_sum / static_cast(bin.absent_count); if (bin.allowed_count > 0) bin_score.mean_allowed_i_over_sigma = bin.allowed_sum / static_cast(bin.allowed_count); global_absent_sum += bin.absent_sum; global_allowed_sum += bin.allowed_sum; if (bin.absent_count >= opt.min_absent_reflections_per_bin && bin.allowed_count >= opt.min_allowed_reflections_per_bin && bin_score.mean_allowed_i_over_sigma > 0.0) { bin_score.used_for_decision = true; bin_score.absent_to_allowed_ratio = bin_score.mean_absent_i_over_sigma / bin_score.mean_allowed_i_over_sigma; bin_score.accepted = bin_score.absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin; out.compared_bins += 1; if (bin_score.accepted) out.accepted_bins += 1; else any_decision_bin_failed = true; weighted_ratio_sum += bin_score.absent_to_allowed_ratio * bin.absent_count; weighted_ratio_weight += bin.absent_count; worst_ratio = std::max(worst_ratio, bin_score.absent_to_allowed_ratio); } out.bins.push_back(bin_score); } if (out.absent_reflections > 0) out.mean_absent_i_over_sigma = global_absent_sum / static_cast(out.absent_reflections); if (out.allowed_reflections > 0) out.mean_allowed_i_over_sigma = global_allowed_sum / static_cast(out.allowed_reflections); if (weighted_ratio_weight > 0) { out.weighted_absent_to_allowed_ratio = weighted_ratio_sum / static_cast(weighted_ratio_weight); out.worst_absent_to_allowed_ratio = worst_ratio; out.accepted = !any_decision_bin_failed && out.weighted_absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio; } else if (out.absent_reflections == 0) { out.weighted_absent_to_allowed_ratio = 0.0; out.worst_absent_to_allowed_ratio = 0.0; out.accepted = true; } else if (out.mean_allowed_i_over_sigma > 0.0) { const double global_ratio = out.mean_absent_i_over_sigma / out.mean_allowed_i_over_sigma; out.weighted_absent_to_allowed_ratio = global_ratio; out.worst_absent_to_allowed_ratio = global_ratio; out.accepted = global_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin; } else { out.weighted_absent_to_allowed_ratio = std::numeric_limits::infinity(); out.worst_absent_to_allowed_ratio = std::numeric_limits::infinity(); out.accepted = false; } return out; } bool IsCenteringCompatible(char requested, char candidate) { if (requested == '\0') return true; return std::toupper(static_cast(requested)) == std::toupper(static_cast(candidate)); } bool IsCandidateSpaceGroup(const gemmi::SpaceGroup& sg, const std::optional& crystal_system, char centering) { if (!sg.is_reference_setting()) return false; if (!sg.is_sohncke()) return false; if (crystal_system.has_value() && sg.crystal_system() != crystal_system.value()) return false; if (!IsCenteringCompatible(centering, sg.centring_type())) return false; return true; } std::vector EnumerateCandidateSpaceGroups( const std::optional& crystal_system, char centering) { std::vector out; for (const auto& sg : gemmi::spacegroup_tables::main) { if (!IsCandidateSpaceGroup(sg, crystal_system, centering)) continue; out.push_back(sg); } std::sort(out.begin(), out.end(), [](const gemmi::SpaceGroup& a, const gemmi::SpaceGroup& b) { const int order_a = a.operations().derive_symmorphic().order(); const int order_b = b.operations().derive_symmorphic().order(); if (order_a != order_b) return order_a < order_b; return a.number < b.number; }); return out; } } SearchSpaceGroupResult SearchSpaceGroup( const std::vector& merged, const SearchSpaceGroupOptions& opt) { SearchSpaceGroupResult result; if (merged.empty()) return result; const auto lookup = BuildMergedLookup(merged, opt); const auto candidates = EnumerateCandidateSpaceGroups(opt.crystal_system, opt.centering); for (const auto& sg : candidates) { SpaceGroupCandidateScore score{.space_group = sg}; const gemmi::GroupOps gops_full = sg.operations(); const gemmi::GroupOps gops_rot = gops_full.derive_symmorphic(); double cc_sum = 0.0; int cc_count = 0; int compared_total = 0; double min_cc = std::numeric_limits::infinity(); for (const auto& op : gops_rot.sym_ops) { if (IsIdentityOp(op)) continue; if (IsParityChanging(op)) continue; auto op_score = ScoreOperator(op, merged, lookup, opt); if (!op_score.has_value()) continue; compared_total += op_score->compared; score.operator_scores.push_back(*op_score); if (op_score->compared >= opt.min_pairs_per_operator && std::isfinite(op_score->cc)) { cc_sum += op_score->cc; min_cc = std::min(min_cc, op_score->cc); cc_count += 1; if (op_score->accepted) score.accepted_operators += 1; } } score.absence_score = ScoreSystematicAbsences(gops_full, merged, opt); score.tested_operators = static_cast(score.operator_scores.size()); score.compared_total = compared_total; score.mean_cc = (cc_count > 0) ? (cc_sum / cc_count) : 0.0; score.min_cc = std::isfinite(min_cc) ? min_cc : 0.0; const bool trivial_group = (gops_rot.order() <= 1); const bool rotationally_accepted = trivial_group || ((score.tested_operators > 0) && (score.accepted_operators == score.tested_operators) && (score.compared_total >= opt.min_total_compared)); score.accepted = rotationally_accepted && score.absence_score.accepted; result.candidates.push_back(std::move(score)); } std::sort(result.candidates.begin(), result.candidates.end(), [](const SpaceGroupCandidateScore& a, const SpaceGroupCandidateScore& b) { if (a.accepted != b.accepted) return a.accepted > b.accepted; if (a.absence_score.weighted_absent_to_allowed_ratio != b.absence_score.weighted_absent_to_allowed_ratio) return a.absence_score.weighted_absent_to_allowed_ratio < b.absence_score.weighted_absent_to_allowed_ratio; if (a.absence_score.worst_absent_to_allowed_ratio != b.absence_score.worst_absent_to_allowed_ratio) return a.absence_score.worst_absent_to_allowed_ratio < b.absence_score.worst_absent_to_allowed_ratio; const int order_a = a.space_group.operations().derive_symmorphic().order(); const int order_b = b.space_group.operations().derive_symmorphic().order(); if (order_a != order_b) return order_a > order_b; if (a.absence_score.absent_reflections != b.absence_score.absent_reflections) return a.absence_score.absent_reflections > b.absence_score.absent_reflections; if (a.accepted_operators != b.accepted_operators) return a.accepted_operators > b.accepted_operators; if (a.min_cc != b.min_cc) return a.min_cc > b.min_cc; if (a.mean_cc != b.mean_cc) return a.mean_cc > b.mean_cc; return a.space_group.number > b.space_group.number; }); for (const auto& cand : result.candidates) { if (cand.accepted) { result.best_space_group = cand.space_group; break; } } return result; } std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result, size_t max_candidates_to_print) { std::ostringstream os; os << "Space-group candidates\n"; os << " " << std::setw(10) << "SG" << " " << std::setw(4) << "Acc" << " " << std::setw(8) << "" << " " << std::setw(8) << "minCC" << " " << std::setw(9) << "compared" << " " << std::setw(7) << "ops" << " " << std::setw(5) << "AbsOK" << " " << std::setw(8) << "Nabs" << " " << std::setw(8) << "Nallow" << " " << std::setw(8) << "Abs/All" << " " << std::setw(8) << "worst" << "\n"; os << " " << std::setw(10) << "----------" << " " << std::setw(4) << "----" << " " << std::setw(8) << "--------" << " " << std::setw(8) << "--------" << " " << std::setw(9) << "---------" << " " << std::setw(7) << "-------" << " " << std::setw(5) << "-----" << " " << std::setw(8) << "--------" << " " << std::setw(8) << "--------" << " " << std::setw(8) << "--------" << " " << std::setw(8) << "--------" << "\n"; const size_t n = std::min(max_candidates_to_print, result.candidates.size()); for (size_t i = 0; i < n; ++i) { const auto& c = result.candidates[i]; os << " " << std::setw(10) << c.space_group.short_name() << " " << std::setw(4) << (c.accepted ? "yes" : "no") << " " << std::setw(8) << std::fixed << std::setprecision(3) << c.mean_cc << " " << std::setw(8) << std::fixed << std::setprecision(3) << c.min_cc << " " << std::setw(9) << c.compared_total << " " << std::setw(3) << c.accepted_operators << "/" << std::setw(3) << c.tested_operators << " " << std::setw(5) << (c.absence_score.accepted ? "yes" : "no") << " " << std::setw(8) << c.absence_score.absent_reflections << " " << std::setw(8) << c.absence_score.allowed_reflections << " " << std::setw(8) << std::fixed << std::setprecision(3) << c.absence_score.weighted_absent_to_allowed_ratio << " " << std::setw(8) << std::fixed << std::setprecision(3) << c.absence_score.worst_absent_to_allowed_ratio << "\n"; } if (result.best_space_group.has_value()) os << "Best space group: " << result.best_space_group->short_name() << "\n"; else os << "Best space group: none accepted\n"; return os.str(); }