d043c98c3c
Build Packages / build:rpm (ubuntu2204) (push) Has been cancelled
Build Packages / build:rpm (ubuntu2404) (push) Has been cancelled
Build Packages / Generate python client (push) Has been cancelled
Build Packages / Build documentation (push) Has been cancelled
Build Packages / Unit tests (push) Has been cancelled
Build Packages / Create release (push) Has been cancelled
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Has been cancelled
Build Packages / build:rpm (rocky8) (push) Has been cancelled
Build Packages / build:rpm (rocky8_nocuda) (push) Has been cancelled
Build Packages / build:rpm (rocky9) (push) Has been cancelled
Build Packages / build:rpm (rocky9_sls9) (push) Has been cancelled
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Has been cancelled
Build Packages / build:rpm (rocky9_nocuda) (push) Has been cancelled
Build Packages / build:rpm (rocky8_sls9) (push) Has been cancelled
426 lines
14 KiB
C++
426 lines
14 KiB
C++
#include "SearchSpaceGroup.h"
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <cctype>
|
|
#include <iomanip>
|
|
#include <limits>
|
|
#include <sstream>
|
|
#include <tuple>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
namespace {
|
|
struct MergedHKLKey {
|
|
int h = 0;
|
|
int k = 0;
|
|
int l = 0;
|
|
bool plus = true;
|
|
|
|
bool operator==(const MergedHKLKey& o) const noexcept {
|
|
return h == o.h && k == o.k && l == o.l && plus == o.plus;
|
|
}
|
|
};
|
|
|
|
struct MergedHKLKeyHash {
|
|
size_t operator()(const MergedHKLKey& key) const noexcept {
|
|
auto mix = [](uint64_t x) {
|
|
x ^= x >> 33;
|
|
x *= 0xff51afd7ed558ccdULL;
|
|
x ^= x >> 33;
|
|
x *= 0xc4ceb9fe1a85ec53ULL;
|
|
x ^= x >> 33;
|
|
return x;
|
|
};
|
|
return static_cast<size_t>(
|
|
mix(static_cast<uint64_t>(key.h)) ^
|
|
(mix(static_cast<uint64_t>(key.k)) << 1) ^
|
|
(mix(static_cast<uint64_t>(key.l)) << 2) ^
|
|
(mix(static_cast<uint64_t>(key.plus ? 1 : 0)) << 3));
|
|
}
|
|
};
|
|
|
|
bool IsIdentityOp(const gemmi::Op& op) {
|
|
return op.rot == gemmi::Op::identity().rot;
|
|
}
|
|
|
|
bool IsParityChanging(const gemmi::Op& op) {
|
|
return op.det_rot() < 0;
|
|
}
|
|
|
|
MergedHKLKey CanonicalizeMergedHKL(int h, int k, int l, bool merge_friedel) {
|
|
MergedHKLKey key{h, k, l, true};
|
|
if (merge_friedel)
|
|
return key;
|
|
|
|
const std::tuple<int, int, int> pos{h, k, l};
|
|
const std::tuple<int, int, int> neg{-h, -k, -l};
|
|
if (neg < pos) {
|
|
key.h = -h;
|
|
key.k = -k;
|
|
key.l = -l;
|
|
key.plus = false;
|
|
}
|
|
return key;
|
|
}
|
|
|
|
bool ReflectionPassesFilters(const MergedReflection& r, const SearchSpaceGroupOptions& opt) {
|
|
if (!std::isfinite(r.I))
|
|
return false;
|
|
if (!std::isfinite(r.sigma) || r.sigma <= 0.0)
|
|
return false;
|
|
if (!std::isfinite(r.d) || r.d <= 0.0)
|
|
return false;
|
|
if (opt.d_min_limit_A > 0.0 && r.d < opt.d_min_limit_A)
|
|
return false;
|
|
if (opt.min_i_over_sigma > 0.0 && r.I / r.sigma < opt.min_i_over_sigma)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
std::unordered_map<MergedHKLKey, const MergedReflection*, MergedHKLKeyHash>
|
|
BuildMergedLookup(const std::vector<MergedReflection>& merged, const SearchSpaceGroupOptions& opt) {
|
|
std::unordered_map<MergedHKLKey, const MergedReflection*, MergedHKLKeyHash> out;
|
|
out.reserve(merged.size());
|
|
|
|
for (const auto& r : merged) {
|
|
if (!ReflectionPassesFilters(r, opt))
|
|
continue;
|
|
const auto key = CanonicalizeMergedHKL(r.h, r.k, r.l, opt.merge_friedel);
|
|
out.emplace(key, &r);
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
double PearsonCC(const std::vector<double>& x, const std::vector<double>& y) {
|
|
if (x.size() != y.size() || x.size() < 2)
|
|
return std::numeric_limits<double>::quiet_NaN();
|
|
|
|
double sx = 0.0, sy = 0.0;
|
|
double sxx = 0.0, syy = 0.0, sxy = 0.0;
|
|
|
|
for (size_t i = 0; i < x.size(); ++i) {
|
|
sx += x[i];
|
|
sy += y[i];
|
|
sxx += x[i] * x[i];
|
|
syy += y[i] * y[i];
|
|
sxy += x[i] * y[i];
|
|
}
|
|
|
|
const double n = static_cast<double>(x.size());
|
|
const double cov = sxy - sx * sy / n;
|
|
const double vx = sxx - sx * sx / n;
|
|
const double vy = syy - sy * sy / n;
|
|
|
|
if (vx <= 0.0 || vy <= 0.0)
|
|
return std::numeric_limits<double>::quiet_NaN();
|
|
|
|
return cov / std::sqrt(vx * vy);
|
|
}
|
|
|
|
std::optional<SpaceGroupOperatorScore> ScoreOperator(
|
|
const gemmi::Op& op,
|
|
const std::vector<MergedReflection>& merged,
|
|
const std::unordered_map<MergedHKLKey, const MergedReflection*, MergedHKLKeyHash>& lookup,
|
|
const SearchSpaceGroupOptions& opt) {
|
|
|
|
std::vector<double> x;
|
|
std::vector<double> y;
|
|
x.reserve(merged.size() / 2);
|
|
y.reserve(merged.size() / 2);
|
|
|
|
std::unordered_set<MergedHKLKey, MergedHKLKeyHash> used;
|
|
used.reserve(merged.size() / 2);
|
|
|
|
for (const auto& r : merged) {
|
|
if (!ReflectionPassesFilters(r, opt))
|
|
continue;
|
|
|
|
const auto key1 = CanonicalizeMergedHKL(r.h, r.k, r.l, opt.merge_friedel);
|
|
if (used.find(key1) != used.end())
|
|
continue;
|
|
|
|
const gemmi::Op::Miller hkl{{r.h, r.k, r.l}};
|
|
const auto hkl2 = op.apply_to_hkl(hkl);
|
|
const auto key2 = CanonicalizeMergedHKL(hkl2[0], hkl2[1], hkl2[2], opt.merge_friedel);
|
|
|
|
if (key1.h == key2.h && key1.k == key2.k && key1.l == key2.l && key1.plus == key2.plus)
|
|
continue;
|
|
|
|
auto it = lookup.find(key2);
|
|
if (it == lookup.end())
|
|
continue;
|
|
|
|
const auto* mate = it->second;
|
|
if (mate == nullptr)
|
|
continue;
|
|
|
|
x.push_back(r.I);
|
|
y.push_back(mate->I);
|
|
|
|
used.insert(key1);
|
|
used.insert(key2);
|
|
}
|
|
|
|
if (x.empty())
|
|
return std::nullopt;
|
|
|
|
SpaceGroupOperatorScore out;
|
|
out.op_triplet_hkl = op.as_hkl().triplet('h');
|
|
out.compared = static_cast<int>(x.size());
|
|
out.cc = PearsonCC(x, y);
|
|
out.accepted = (out.compared >= opt.min_pairs_per_operator &&
|
|
std::isfinite(out.cc) &&
|
|
out.cc >= opt.min_operator_cc);
|
|
return out;
|
|
}
|
|
|
|
SpaceGroupAbsenceScore ScoreSystematicAbsences(
|
|
const gemmi::GroupOps& gops,
|
|
const std::vector<MergedReflection>& merged,
|
|
const SearchSpaceGroupOptions& opt) {
|
|
|
|
SpaceGroupAbsenceScore out;
|
|
|
|
if (!opt.test_systematic_absences)
|
|
return out;
|
|
|
|
double sum_i_over_sigma = 0.0;
|
|
|
|
for (const auto& r : merged) {
|
|
if (!ReflectionPassesFilters(r, opt))
|
|
continue;
|
|
|
|
const gemmi::Op::Miller hkl{{r.h, r.k, r.l}};
|
|
if (!gops.is_systematically_absent(hkl))
|
|
continue;
|
|
|
|
out.violating_reflections += 1;
|
|
sum_i_over_sigma += std::max(0.0, r.I / r.sigma);
|
|
}
|
|
|
|
if (out.violating_reflections > 0)
|
|
out.mean_i_over_sigma = sum_i_over_sigma / out.violating_reflections;
|
|
else
|
|
out.mean_i_over_sigma = 0.0;
|
|
|
|
out.accepted = (out.violating_reflections <= opt.max_absent_violations) &&
|
|
(out.mean_i_over_sigma <= opt.max_mean_absent_i_over_sigma);
|
|
return out;
|
|
}
|
|
|
|
bool IsCenteringCompatible(char requested, char candidate) {
|
|
if (requested == '\0')
|
|
return true;
|
|
return std::toupper(static_cast<unsigned char>(requested)) ==
|
|
std::toupper(static_cast<unsigned char>(candidate));
|
|
}
|
|
|
|
bool IsCandidateSpaceGroup(const gemmi::SpaceGroup& sg,
|
|
const std::optional<gemmi::CrystalSystem>& crystal_system,
|
|
char centering) {
|
|
if (!sg.is_reference_setting())
|
|
return false;
|
|
if (!sg.is_sohncke())
|
|
return false;
|
|
if (crystal_system.has_value() && sg.crystal_system() != crystal_system.value())
|
|
return false;
|
|
if (!IsCenteringCompatible(centering, sg.centring_type()))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
std::vector<gemmi::SpaceGroup> EnumerateCandidateSpaceGroups(
|
|
const std::optional<gemmi::CrystalSystem>& crystal_system,
|
|
char centering) {
|
|
|
|
std::vector<gemmi::SpaceGroup> out;
|
|
|
|
for (const auto& sg : gemmi::spacegroup_tables::main) {
|
|
if (!IsCandidateSpaceGroup(sg, crystal_system, centering))
|
|
continue;
|
|
out.push_back(sg);
|
|
}
|
|
|
|
std::sort(out.begin(), out.end(),
|
|
[](const gemmi::SpaceGroup& a, const gemmi::SpaceGroup& b) {
|
|
const int order_a = a.operations().derive_symmorphic().order();
|
|
const int order_b = b.operations().derive_symmorphic().order();
|
|
if (order_a != order_b)
|
|
return order_a < order_b;
|
|
return a.number < b.number;
|
|
});
|
|
return out;
|
|
}
|
|
}
|
|
|
|
SearchSpaceGroupResult SearchSpaceGroup(
|
|
const std::vector<MergedReflection>& merged,
|
|
const SearchSpaceGroupOptions& opt) {
|
|
|
|
SearchSpaceGroupResult result;
|
|
if (merged.empty())
|
|
return result;
|
|
|
|
const auto lookup = BuildMergedLookup(merged, opt);
|
|
const auto candidates = EnumerateCandidateSpaceGroups(opt.crystal_system, opt.centering);
|
|
|
|
for (const auto& sg : candidates) {
|
|
SpaceGroupCandidateScore score{.space_group = sg};
|
|
|
|
const gemmi::GroupOps gops_full = sg.operations();
|
|
const gemmi::GroupOps gops_rot = gops_full.derive_symmorphic();
|
|
|
|
double cc_sum = 0.0;
|
|
int cc_count = 0;
|
|
int compared_total = 0;
|
|
double min_cc = std::numeric_limits<double>::infinity();
|
|
|
|
for (const auto& op : gops_rot.sym_ops) {
|
|
if (IsIdentityOp(op))
|
|
continue;
|
|
if (IsParityChanging(op))
|
|
continue;
|
|
|
|
auto op_score = ScoreOperator(op, merged, lookup, opt);
|
|
if (!op_score.has_value())
|
|
continue;
|
|
|
|
compared_total += op_score->compared;
|
|
score.operator_scores.push_back(*op_score);
|
|
|
|
if (op_score->compared >= opt.min_pairs_per_operator && std::isfinite(op_score->cc)) {
|
|
cc_sum += op_score->cc;
|
|
min_cc = std::min(min_cc, op_score->cc);
|
|
cc_count += 1;
|
|
if (op_score->accepted)
|
|
score.accepted_operators += 1;
|
|
}
|
|
}
|
|
|
|
score.absence_score = ScoreSystematicAbsences(gops_full, merged, opt);
|
|
score.tested_operators = static_cast<int>(score.operator_scores.size());
|
|
score.compared_total = compared_total;
|
|
score.mean_cc = (cc_count > 0) ? (cc_sum / cc_count) : 0.0;
|
|
score.min_cc = std::isfinite(min_cc) ? min_cc : 0.0;
|
|
|
|
const bool trivial_group = (gops_rot.order() <= 1);
|
|
const bool rotationally_accepted =
|
|
trivial_group ||
|
|
((score.tested_operators > 0) &&
|
|
(score.accepted_operators == score.tested_operators) &&
|
|
(score.compared_total >= opt.min_total_compared));
|
|
|
|
score.accepted = rotationally_accepted && score.absence_score.accepted;
|
|
|
|
result.candidates.push_back(std::move(score));
|
|
}
|
|
|
|
std::sort(result.candidates.begin(), result.candidates.end(),
|
|
[](const SpaceGroupCandidateScore& a, const SpaceGroupCandidateScore& b) {
|
|
if (a.accepted != b.accepted)
|
|
return a.accepted > b.accepted;
|
|
|
|
if (a.absence_score.mean_i_over_sigma != b.absence_score.mean_i_over_sigma)
|
|
return a.absence_score.mean_i_over_sigma < b.absence_score.mean_i_over_sigma;
|
|
|
|
if (a.absence_score.violating_reflections != b.absence_score.violating_reflections)
|
|
return a.absence_score.violating_reflections < b.absence_score.violating_reflections;
|
|
|
|
const int order_a = a.space_group.operations().derive_symmorphic().order();
|
|
const int order_b = b.space_group.operations().derive_symmorphic().order();
|
|
if (order_a != order_b)
|
|
return order_a > order_b;
|
|
|
|
if (a.accepted_operators != b.accepted_operators)
|
|
return a.accepted_operators > b.accepted_operators;
|
|
if (a.min_cc != b.min_cc)
|
|
return a.min_cc > b.min_cc;
|
|
if (a.mean_cc != b.mean_cc)
|
|
return a.mean_cc > b.mean_cc;
|
|
|
|
return a.space_group.number < b.space_group.number;
|
|
});
|
|
|
|
for (const auto& cand : result.candidates) {
|
|
if (cand.accepted) {
|
|
result.best_space_group = cand.space_group;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result,
|
|
size_t max_candidates_to_print) {
|
|
std::ostringstream os;
|
|
|
|
os << "Space-group candidates\n";
|
|
os << " "
|
|
<< std::setw(10) << "SG"
|
|
<< " "
|
|
<< std::setw(4) << "Acc"
|
|
<< " "
|
|
<< std::setw(8) << "<CC>"
|
|
<< " "
|
|
<< std::setw(8) << "minCC"
|
|
<< " "
|
|
<< std::setw(9) << "compared"
|
|
<< " "
|
|
<< std::setw(7) << "ops"
|
|
<< " "
|
|
<< std::setw(8) << "absent"
|
|
<< " "
|
|
<< std::setw(10) << "<I/sig>abs"
|
|
<< "\n";
|
|
|
|
os << " "
|
|
<< std::setw(10) << "----------"
|
|
<< " "
|
|
<< std::setw(4) << "----"
|
|
<< " "
|
|
<< std::setw(8) << "--------"
|
|
<< " "
|
|
<< std::setw(8) << "--------"
|
|
<< " "
|
|
<< std::setw(9) << "---------"
|
|
<< " "
|
|
<< std::setw(7) << "-------"
|
|
<< " "
|
|
<< std::setw(8) << "--------"
|
|
<< " "
|
|
<< std::setw(10) << "----------"
|
|
<< "\n";
|
|
|
|
const size_t n = std::min(max_candidates_to_print, result.candidates.size());
|
|
for (size_t i = 0; i < n; ++i) {
|
|
const auto& c = result.candidates[i];
|
|
os << " "
|
|
<< std::setw(10) << c.space_group.short_name()
|
|
<< " "
|
|
<< std::setw(4) << (c.accepted ? "yes" : "no")
|
|
<< " "
|
|
<< std::setw(8) << std::fixed << std::setprecision(3) << c.mean_cc
|
|
<< " "
|
|
<< std::setw(8) << std::fixed << std::setprecision(3) << c.min_cc
|
|
<< " "
|
|
<< std::setw(9) << c.compared_total
|
|
<< " "
|
|
<< std::setw(3) << c.accepted_operators << "/" << std::setw(3) << c.tested_operators
|
|
<< " "
|
|
<< std::setw(8) << c.absence_score.violating_reflections
|
|
<< " "
|
|
<< std::setw(10) << std::fixed << std::setprecision(2) << c.absence_score.mean_i_over_sigma
|
|
<< "\n";
|
|
}
|
|
|
|
if (result.best_space_group.has_value())
|
|
os << "Best space group: " << result.best_space_group->short_name() << "\n";
|
|
else
|
|
os << "Best space group: none accepted\n";
|
|
|
|
return os.str();
|
|
} |