Files
Jungfraujoch/image_analysis/scale_merge/SearchSpaceGroup.cpp
leonarski_f 166fcdb68f
All checks were successful
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 11m20s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 10m46s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 11m27s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 12m32s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 10m57s
Build Packages / build:rpm (rocky8) (push) Successful in 11m54s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 13m9s
Build Packages / build:rpm (rocky9) (push) Successful in 12m37s
Build Packages / Generate python client (push) Successful in 24s
Build Packages / Create release (push) Has been skipped
Build Packages / Build documentation (push) Successful in 57s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 9m12s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 8m4s
Build Packages / Unit tests (push) Successful in 1h17m43s
v1.0.0-rc.131 (#39)
This is an UNSTABLE release. The release has significant modifications and bug fixes, if things go wrong, it is better to revert to 1.0.0-rc.124.

* jfjoch_broker: Fix bug in saving JUNGFRAU calibration (pedestal/pedestalRMS)
* jfjoch_viewer: Fix calibration (pedestal) images being open flipped
* jfjoch_process: Add space group detection (EXPERIMENTAL)

Reviewed-on: #39
2026-03-07 11:34:04 +01:00

583 lines
20 KiB
C++

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "SearchSpaceGroup.h"
#include <algorithm>
#include <array>
#include <cmath>
#include <cctype>
#include <iomanip>
#include <limits>
#include <sstream>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace {
struct MergedHKLKey {
int h = 0;
int k = 0;
int l = 0;
bool plus = true;
bool operator==(const MergedHKLKey& o) const noexcept {
return h == o.h && k == o.k && l == o.l && plus == o.plus;
}
};
struct MergedHKLKeyHash {
size_t operator()(const MergedHKLKey& key) const noexcept {
auto mix = [](uint64_t x) {
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
x ^= x >> 33;
return x;
};
return static_cast<size_t>(
mix(static_cast<uint64_t>(key.h)) ^
(mix(static_cast<uint64_t>(key.k)) << 1) ^
(mix(static_cast<uint64_t>(key.l)) << 2) ^
(mix(static_cast<uint64_t>(key.plus ? 1 : 0)) << 3));
}
};
bool IsIdentityOp(const gemmi::Op& op) {
return op.rot == gemmi::Op::identity().rot;
}
bool IsParityChanging(const gemmi::Op& op) {
return op.det_rot() < 0;
}
MergedHKLKey CanonicalizeMergedHKL(int h, int k, int l, bool merge_friedel) {
MergedHKLKey key{h, k, l, true};
if (!merge_friedel)
return key;
const std::tuple<int, int, int> pos{h, k, l};
const std::tuple<int, int, int> neg{-h, -k, -l};
if (neg < pos) {
key.h = -h;
key.k = -k;
key.l = -l;
key.plus = false;
}
return key;
}
bool ReflectionPassesFilters(const MergedReflection& r, const SearchSpaceGroupOptions& opt) {
if (!std::isfinite(r.I))
return false;
if (!std::isfinite(r.sigma) || r.sigma <= 0.0)
return false;
if (!std::isfinite(r.d) || r.d <= 0.0)
return false;
if (opt.d_min_limit_A > 0.0 && r.d < opt.d_min_limit_A)
return false;
if (opt.min_i_over_sigma > 0.0 && r.I / r.sigma < opt.min_i_over_sigma)
return false;
return true;
}
std::unordered_map<MergedHKLKey, const MergedReflection*, MergedHKLKeyHash>
BuildMergedLookup(const std::vector<MergedReflection>& merged, const SearchSpaceGroupOptions& opt) {
std::unordered_map<MergedHKLKey, const MergedReflection*, MergedHKLKeyHash> out;
out.reserve(merged.size());
for (const auto& r : merged) {
if (!ReflectionPassesFilters(r, opt))
continue;
const auto key = CanonicalizeMergedHKL(r.h, r.k, r.l, opt.merge_friedel);
out.emplace(key, &r);
}
return out;
}
double PearsonCC(const std::vector<double>& x, const std::vector<double>& y) {
if (x.size() != y.size() || x.size() < 2)
return std::numeric_limits<double>::quiet_NaN();
double sx = 0.0, sy = 0.0;
double sxx = 0.0, syy = 0.0, sxy = 0.0;
for (size_t i = 0; i < x.size(); ++i) {
sx += x[i];
sy += y[i];
sxx += x[i] * x[i];
syy += y[i] * y[i];
sxy += x[i] * y[i];
}
const double n = static_cast<double>(x.size());
const double cov = sxy - sx * sy / n;
const double vx = sxx - sx * sx / n;
const double vy = syy - sy * sy / n;
if (vx <= 0.0 || vy <= 0.0)
return std::numeric_limits<double>::quiet_NaN();
return cov / std::sqrt(vx * vy);
}
std::optional<SpaceGroupOperatorScore> ScoreOperator(
const gemmi::Op& op,
const std::vector<MergedReflection>& merged,
const std::unordered_map<MergedHKLKey, const MergedReflection*, MergedHKLKeyHash>& lookup,
const SearchSpaceGroupOptions& opt) {
std::vector<double> x;
std::vector<double> y;
x.reserve(merged.size() / 2);
y.reserve(merged.size() / 2);
std::unordered_set<MergedHKLKey, MergedHKLKeyHash> used;
used.reserve(merged.size() / 2);
for (const auto& r : merged) {
if (!ReflectionPassesFilters(r, opt))
continue;
const auto key1 = CanonicalizeMergedHKL(r.h, r.k, r.l, opt.merge_friedel);
if (used.find(key1) != used.end())
continue;
const gemmi::Op::Miller hkl{{r.h, r.k, r.l}};
const auto hkl2 = op.apply_to_hkl(hkl);
const auto key2 = CanonicalizeMergedHKL(hkl2[0], hkl2[1], hkl2[2], opt.merge_friedel);
if (key1.h == key2.h && key1.k == key2.k && key1.l == key2.l && key1.plus == key2.plus)
continue;
auto it = lookup.find(key2);
if (it == lookup.end())
continue;
const auto* mate = it->second;
if (mate == nullptr)
continue;
x.push_back(r.I);
y.push_back(mate->I);
used.insert(key1);
used.insert(key2);
}
if (x.empty())
return std::nullopt;
SpaceGroupOperatorScore out;
out.op_triplet_hkl = op.as_hkl().triplet('h');
out.compared = static_cast<int>(x.size());
out.cc = PearsonCC(x, y);
out.accepted = (out.compared >= opt.min_pairs_per_operator &&
std::isfinite(out.cc) &&
out.cc >= opt.min_operator_cc);
return out;
}
struct ResolutionBinAccumulator {
double d_min_A = std::numeric_limits<double>::infinity();
double d_max_A = 0.0;
double absent_sum = 0.0;
int absent_count = 0;
double allowed_sum = 0.0;
int allowed_count = 0;
};
SpaceGroupAbsenceScore ScoreSystematicAbsences(
const gemmi::GroupOps& gops,
const std::vector<MergedReflection>& merged,
const SearchSpaceGroupOptions& opt) {
SpaceGroupAbsenceScore out;
if (!opt.test_systematic_absences)
return out;
int n_bins = opt.absence_resolution_bins;
if (n_bins <= 0)
n_bins = 1;
double min_inv_d2 = std::numeric_limits<double>::infinity();
double max_inv_d2 = -std::numeric_limits<double>::infinity();
for (const auto& r : merged) {
if (!ReflectionPassesFilters(r, opt))
continue;
const double inv_d2 = 1.0 / (r.d * r.d);
min_inv_d2 = std::min(min_inv_d2, inv_d2);
max_inv_d2 = std::max(max_inv_d2, inv_d2);
}
if (!std::isfinite(min_inv_d2) || !std::isfinite(max_inv_d2))
return out;
if (max_inv_d2 < min_inv_d2)
std::swap(max_inv_d2, min_inv_d2);
std::vector<ResolutionBinAccumulator> bins(static_cast<size_t>(n_bins));
auto bin_index = [&](double d) {
if (n_bins == 1 || max_inv_d2 <= min_inv_d2)
return 0;
const double inv_d2 = 1.0 / (d * d);
const double t = (inv_d2 - min_inv_d2) / (max_inv_d2 - min_inv_d2);
int idx = static_cast<int>(t * n_bins);
if (idx < 0)
idx = 0;
if (idx >= n_bins)
idx = n_bins - 1;
return idx;
};
for (const auto& r : merged) {
if (!ReflectionPassesFilters(r, opt))
continue;
const int idx = bin_index(r.d);
auto& bin = bins[static_cast<size_t>(idx)];
bin.d_min_A = std::min(bin.d_min_A, r.d);
bin.d_max_A = std::max(bin.d_max_A, r.d);
const double i_over_sigma = std::max(0.0, r.I / r.sigma);
const gemmi::Op::Miller hkl{{r.h, r.k, r.l}};
if (gops.is_systematically_absent(hkl)) {
bin.absent_sum += i_over_sigma;
bin.absent_count += 1;
out.absent_reflections += 1;
} else {
bin.allowed_sum += i_over_sigma;
bin.allowed_count += 1;
out.allowed_reflections += 1;
}
}
double global_absent_sum = 0.0;
double global_allowed_sum = 0.0;
double weighted_ratio_sum = 0.0;
int weighted_ratio_weight = 0;
double worst_ratio = 0.0;
bool any_decision_bin_failed = false;
out.bins.reserve(bins.size());
for (const auto& bin : bins) {
SpaceGroupAbsenceBinScore bin_score;
if (std::isfinite(bin.d_min_A))
bin_score.d_min_A = bin.d_min_A;
if (bin.d_max_A > 0.0)
bin_score.d_max_A = bin.d_max_A;
bin_score.absent_reflections = bin.absent_count;
bin_score.allowed_reflections = bin.allowed_count;
if (bin.absent_count > 0)
bin_score.mean_absent_i_over_sigma = bin.absent_sum / static_cast<double>(bin.absent_count);
if (bin.allowed_count > 0)
bin_score.mean_allowed_i_over_sigma = bin.allowed_sum / static_cast<double>(bin.allowed_count);
global_absent_sum += bin.absent_sum;
global_allowed_sum += bin.allowed_sum;
if (bin.absent_count >= opt.min_absent_reflections_per_bin &&
bin.allowed_count >= opt.min_allowed_reflections_per_bin &&
bin_score.mean_allowed_i_over_sigma > 0.0) {
bin_score.used_for_decision = true;
bin_score.absent_to_allowed_ratio =
bin_score.mean_absent_i_over_sigma / bin_score.mean_allowed_i_over_sigma;
bin_score.accepted =
bin_score.absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin;
out.compared_bins += 1;
if (bin_score.accepted)
out.accepted_bins += 1;
else
any_decision_bin_failed = true;
weighted_ratio_sum += bin_score.absent_to_allowed_ratio * bin.absent_count;
weighted_ratio_weight += bin.absent_count;
worst_ratio = std::max(worst_ratio, bin_score.absent_to_allowed_ratio);
}
out.bins.push_back(bin_score);
}
if (out.absent_reflections > 0)
out.mean_absent_i_over_sigma = global_absent_sum / static_cast<double>(out.absent_reflections);
if (out.allowed_reflections > 0)
out.mean_allowed_i_over_sigma = global_allowed_sum / static_cast<double>(out.allowed_reflections);
if (weighted_ratio_weight > 0) {
out.weighted_absent_to_allowed_ratio = weighted_ratio_sum / static_cast<double>(weighted_ratio_weight);
out.worst_absent_to_allowed_ratio = worst_ratio;
out.accepted =
!any_decision_bin_failed &&
out.weighted_absent_to_allowed_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio;
} else if (out.absent_reflections == 0) {
out.weighted_absent_to_allowed_ratio = 0.0;
out.worst_absent_to_allowed_ratio = 0.0;
out.accepted = true;
} else if (out.mean_allowed_i_over_sigma > 0.0) {
const double global_ratio = out.mean_absent_i_over_sigma / out.mean_allowed_i_over_sigma;
out.weighted_absent_to_allowed_ratio = global_ratio;
out.worst_absent_to_allowed_ratio = global_ratio;
out.accepted = global_ratio <= opt.max_absent_to_allowed_i_over_sigma_ratio_in_any_bin;
} else {
out.weighted_absent_to_allowed_ratio = std::numeric_limits<double>::infinity();
out.worst_absent_to_allowed_ratio = std::numeric_limits<double>::infinity();
out.accepted = false;
}
return out;
}
bool IsCenteringCompatible(char requested, char candidate) {
if (requested == '\0')
return true;
return std::toupper(static_cast<unsigned char>(requested)) ==
std::toupper(static_cast<unsigned char>(candidate));
}
bool IsCandidateSpaceGroup(const gemmi::SpaceGroup& sg,
const std::optional<gemmi::CrystalSystem>& crystal_system,
char centering) {
if (!sg.is_reference_setting())
return false;
if (!sg.is_sohncke())
return false;
if (crystal_system.has_value() && sg.crystal_system() != crystal_system.value())
return false;
if (!IsCenteringCompatible(centering, sg.centring_type()))
return false;
return true;
}
std::vector<gemmi::SpaceGroup> EnumerateCandidateSpaceGroups(
const std::optional<gemmi::CrystalSystem>& crystal_system,
char centering) {
std::vector<gemmi::SpaceGroup> out;
for (const auto& sg : gemmi::spacegroup_tables::main) {
if (!IsCandidateSpaceGroup(sg, crystal_system, centering))
continue;
out.push_back(sg);
}
std::sort(out.begin(), out.end(),
[](const gemmi::SpaceGroup& a, const gemmi::SpaceGroup& b) {
const int order_a = a.operations().derive_symmorphic().order();
const int order_b = b.operations().derive_symmorphic().order();
if (order_a != order_b)
return order_a < order_b;
return a.number < b.number;
});
return out;
}
}
SearchSpaceGroupResult SearchSpaceGroup(
const std::vector<MergedReflection>& merged,
const SearchSpaceGroupOptions& opt) {
SearchSpaceGroupResult result;
if (merged.empty())
return result;
const auto lookup = BuildMergedLookup(merged, opt);
const auto candidates = EnumerateCandidateSpaceGroups(opt.crystal_system, opt.centering);
for (const auto& sg : candidates) {
SpaceGroupCandidateScore score{.space_group = sg};
const gemmi::GroupOps gops_full = sg.operations();
const gemmi::GroupOps gops_rot = gops_full.derive_symmorphic();
double cc_sum = 0.0;
int cc_count = 0;
int compared_total = 0;
double min_cc = std::numeric_limits<double>::infinity();
for (const auto& op : gops_rot.sym_ops) {
if (IsIdentityOp(op))
continue;
if (IsParityChanging(op))
continue;
auto op_score = ScoreOperator(op, merged, lookup, opt);
if (!op_score.has_value())
continue;
compared_total += op_score->compared;
score.operator_scores.push_back(*op_score);
if (op_score->compared >= opt.min_pairs_per_operator && std::isfinite(op_score->cc)) {
cc_sum += op_score->cc;
min_cc = std::min(min_cc, op_score->cc);
cc_count += 1;
if (op_score->accepted)
score.accepted_operators += 1;
}
}
score.absence_score = ScoreSystematicAbsences(gops_full, merged, opt);
score.tested_operators = static_cast<int>(score.operator_scores.size());
score.compared_total = compared_total;
score.mean_cc = (cc_count > 0) ? (cc_sum / cc_count) : 0.0;
score.min_cc = std::isfinite(min_cc) ? min_cc : 0.0;
const bool trivial_group = (gops_rot.order() <= 1);
const bool rotationally_accepted =
trivial_group ||
((score.tested_operators > 0) &&
(score.accepted_operators == score.tested_operators) &&
(score.compared_total >= opt.min_total_compared));
score.accepted = rotationally_accepted && score.absence_score.accepted;
result.candidates.push_back(std::move(score));
}
std::sort(result.candidates.begin(), result.candidates.end(),
[](const SpaceGroupCandidateScore& a, const SpaceGroupCandidateScore& b) {
if (a.accepted != b.accepted)
return a.accepted > b.accepted;
if (a.absence_score.weighted_absent_to_allowed_ratio !=
b.absence_score.weighted_absent_to_allowed_ratio)
return a.absence_score.weighted_absent_to_allowed_ratio <
b.absence_score.weighted_absent_to_allowed_ratio;
if (a.absence_score.worst_absent_to_allowed_ratio !=
b.absence_score.worst_absent_to_allowed_ratio)
return a.absence_score.worst_absent_to_allowed_ratio <
b.absence_score.worst_absent_to_allowed_ratio;
const int order_a = a.space_group.operations().derive_symmorphic().order();
const int order_b = b.space_group.operations().derive_symmorphic().order();
if (order_a != order_b)
return order_a > order_b;
if (a.absence_score.absent_reflections != b.absence_score.absent_reflections)
return a.absence_score.absent_reflections > b.absence_score.absent_reflections;
if (a.accepted_operators != b.accepted_operators)
return a.accepted_operators > b.accepted_operators;
if (a.min_cc != b.min_cc)
return a.min_cc > b.min_cc;
if (a.mean_cc != b.mean_cc)
return a.mean_cc > b.mean_cc;
return a.space_group.number > b.space_group.number;
});
for (const auto& cand : result.candidates) {
if (cand.accepted) {
result.best_space_group = cand.space_group;
break;
}
}
return result;
}
std::string SearchSpaceGroupResultToText(const SearchSpaceGroupResult& result,
size_t max_candidates_to_print) {
std::ostringstream os;
os << "Space-group candidates\n";
os << " "
<< std::setw(10) << "SG"
<< " "
<< std::setw(4) << "Acc"
<< " "
<< std::setw(8) << "<CC>"
<< " "
<< std::setw(8) << "minCC"
<< " "
<< std::setw(9) << "compared"
<< " "
<< std::setw(7) << "ops"
<< " "
<< std::setw(5) << "AbsOK"
<< " "
<< std::setw(8) << "Nabs"
<< " "
<< std::setw(8) << "Nallow"
<< " "
<< std::setw(8) << "Abs/All"
<< " "
<< std::setw(8) << "worst"
<< "\n";
os << " "
<< std::setw(10) << "----------"
<< " "
<< std::setw(4) << "----"
<< " "
<< std::setw(8) << "--------"
<< " "
<< std::setw(8) << "--------"
<< " "
<< std::setw(9) << "---------"
<< " "
<< std::setw(7) << "-------"
<< " "
<< std::setw(5) << "-----"
<< " "
<< std::setw(8) << "--------"
<< " "
<< std::setw(8) << "--------"
<< " "
<< std::setw(8) << "--------"
<< " "
<< std::setw(8) << "--------"
<< "\n";
const size_t n = std::min(max_candidates_to_print, result.candidates.size());
for (size_t i = 0; i < n; ++i) {
const auto& c = result.candidates[i];
os << " "
<< std::setw(10) << c.space_group.short_name()
<< " "
<< std::setw(4) << (c.accepted ? "yes" : "no")
<< " "
<< std::setw(8) << std::fixed << std::setprecision(3) << c.mean_cc
<< " "
<< std::setw(8) << std::fixed << std::setprecision(3) << c.min_cc
<< " "
<< std::setw(9) << c.compared_total
<< " "
<< std::setw(3) << c.accepted_operators << "/" << std::setw(3) << c.tested_operators
<< " "
<< std::setw(5) << (c.absence_score.accepted ? "yes" : "no")
<< " "
<< std::setw(8) << c.absence_score.absent_reflections
<< " "
<< std::setw(8) << c.absence_score.allowed_reflections
<< " "
<< std::setw(8) << std::fixed << std::setprecision(3)
<< c.absence_score.weighted_absent_to_allowed_ratio
<< " "
<< std::setw(8) << std::fixed << std::setprecision(3)
<< c.absence_score.worst_absent_to_allowed_ratio
<< "\n";
}
if (result.best_space_group.has_value())
os << "Best space group: " << result.best_space_group->short_name() << "\n";
else
os << "Best space group: none accepted\n";
return os.str();
}