Merge: Simplify code for merging, make it more efficient
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 13m55s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 14m4s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 14m34s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 15m45s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 16m22s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 17m11s
Build Packages / build:rpm (rocky8) (push) Successful in 9m49s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 11m39s
Build Packages / Generate python client (push) Successful in 35s
Build Packages / build:rpm (rocky9) (push) Successful in 13m9s
Build Packages / Create release (push) Skipped
Build Packages / Build documentation (push) Successful in 50s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 13m19s
Build Packages / XDS test (durin plugin) (push) Successful in 11m24s
Build Packages / XDS test (neggia plugin) (push) Successful in 10m32s
Build Packages / XDS test (JFJoch plugin) (push) Successful in 11m2s
Build Packages / DIALS test (push) Successful in 14m40s
Build Packages / Unit tests (push) Successful in 56m52s

This commit is contained in:
2026-05-13 12:27:43 +02:00
parent 02327bf610
commit d5b928fa73
7 changed files with 228 additions and 251 deletions
+9 -5
View File
@@ -6,6 +6,8 @@
#include <cstdint>
#include <optional>
#include <cmath>
#include "SpotToSave.h"
struct Reflection {
@@ -29,11 +31,13 @@ struct Reflection {
};
struct MergedReflection {
int32_t h;
int32_t k;
int32_t l;
float I;
float sigma;
int32_t h = 0;
int32_t k = 0;
int32_t l = 0;
float I = NAN;
float sigma = NAN;
float I_anom[2] = {NAN, NAN};
float sigma_anom[2] = {NAN, NAN};
float d = 0.0;
};
+7 -4
View File
@@ -281,11 +281,14 @@ void IndexAndRefine::ScaleImage(size_t n, ScaleOnTheFly &scaling, ScalingResult
}
ScalingResult IndexAndRefine::ScaleAllImages(size_t nthreads) {
auto merge_result = MergeReflections(reflections, experiment);
ScaleOnTheFly scaling(merge_result.merged, experiment);
auto merge_result = MergeAll(experiment, reflections);
ScaleOnTheFly scaling(merge_result, experiment);
return scaling.Scale(reflections, mosaicity, nthreads);
}
MergeResult IndexAndRefine::Merge() {
return MergeReflections(reflections, experiment);
MergeResult IndexAndRefine::Merge() const {
MergeResult out;
out.merged = MergeAll(experiment, reflections);
out.statistics = MergeStats(experiment, out.merged, reflections);
return out;
}
+1 -1
View File
@@ -65,7 +65,7 @@ public:
void ProcessImage(DataMessage &msg, const SpotFindingSettings &settings, const CompressedImage &image, BraggPrediction &prediction);
ScalingResult ScaleAllImages(size_t nthreads = 0);
MergeResult Merge();
MergeResult Merge() const;
std::optional<RotationIndexerResult> Finalize();
};
+25
View File
@@ -6,6 +6,26 @@
#include "HKLKey.h"
#include "gemmi/symmetry.hpp"
uint64_t HKLKey::pack_no_anom() const {
constexpr int bits = 21;
constexpr int bias = 1 << (bits - 1); // 1,048,576
constexpr int max_value = bias - 1;
constexpr int min_value = -bias;
constexpr std::uint64_t mask = (1ULL << bits) - 1ULL;
if (h < min_value || h > max_value ||
k < min_value || k > max_value ||
l < min_value || l > max_value) {
throw std::out_of_range("HKL index outside packable range");
}
const std::uint64_t hh = static_cast<std::uint64_t>(h + bias) & mask;
const std::uint64_t kk = static_cast<std::uint64_t>(k + bias) & mask;
const std::uint64_t ll = static_cast<std::uint64_t>(l + bias) & mask;
return hh | (kk << bits) | (ll << (2 * bits));
}
HKLKeyGenerator::HKLKeyGenerator(bool merge_friedel, int32_t space_group_number)
: HKLKeyGenerator(merge_friedel, *gemmi::find_spacegroup_by_number(space_group_number)) {
}
@@ -57,6 +77,8 @@ bool AcceptReflection(const Reflection &r, std::optional<double> d_min_limit) {
return false;
if (!std::isfinite(r.rlp) || r.rlp == 0.0f)
return false;
if (!std::isfinite(r.sigma) || r.sigma <= 0.0)
return false;
return true;
}
@@ -69,5 +91,8 @@ bool AcceptReflection(const Reflection &r, double d_min_limit) {
return false;
if (!std::isfinite(r.rlp) || r.rlp == 0.0f)
return false;
if (!std::isfinite(r.sigma) || r.sigma <= 0.0)
return false;
return true;
}
+6
View File
@@ -3,6 +3,10 @@
#pragma once
#include <cmath>
#include <cstdint>
#include <stdexcept>
#include <tuple>
#include "../../common/Reflection.h"
#include "gemmi/symmetry.hpp"
@@ -19,6 +23,8 @@ struct HKLKey {
bool operator==(const HKLKey &o) const {
return h == o.h && k == o.k && l == o.l && plus == o.plus;
}
[[nodiscard]] uint64_t pack_no_anom() const;
};
class HKLKeyGenerator {
+174 -238
View File
@@ -6,267 +6,203 @@
#include <algorithm>
#include <cmath>
#include <limits>
#include <map>
#include <stdexcept>
#include <tuple>
#include <unordered_set>
#include <unordered_map>
#include "../../common/ResolutionShells.h"
#include "HKLKey.h"
namespace {
struct Obs {
const Reflection *r = nullptr;
int hkl = -1;
double sigma = 1.0;
std::vector<MergedReflection> MergeAll(const DiffractionExperiment &x, const std::vector<std::vector<Reflection> > &reflections) {
auto scaling_settings = x.GetScalingSettings();
HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1));
const std::optional<double> high_resolution_limit = scaling_settings.GetHighResolutionLimit_A();
struct Accum {
// Keep anomalous + / - together, but separate
int32_t h = 0;
int32_t k = 0;
int32_t l = 0;
float d = NAN;
double sum_wI = 0.0;
double sum_w = 0.0;
double sum_wI_anom[2] = {0.0, 0.0};
double sum_w_anom[2] = {0.0, 0.0};
bool present[2] = {false, false};
};
double SafeSigma(double sigma) {
// TODO: Think about safe sigma...
if (!std::isfinite(sigma) || sigma <= 1e-3)
return 1e-3;
return sigma;
}
std::unordered_map<uint64_t, Accum> acc;
std::vector<Obs> BuildObservations(const std::vector<std::vector<Reflection>> &observations,
const DiffractionExperiment &x,
std::vector<HKLKey> &slot_to_hkl) {
std::map<HKLKey, int> hkl_to_slot;
std::vector<Obs> out;
size_t nrefl = 0;
for (const auto &image: observations)
nrefl += image.size();
out.reserve(nrefl);
auto scaling_settings = x.GetScalingSettings();
HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1) );
for (const auto &image: observations) {
for (const auto &r: image) {
if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction))
continue;
if (!AcceptReflection(r, scaling_settings.GetHighResolutionLimit_A()))
continue;
HKLKey key = key_generator(r);
auto it = hkl_to_slot.find(key);
if (it == hkl_to_slot.end()) {
const int slot = static_cast<int>(slot_to_hkl.size());
it = hkl_to_slot.emplace(key, slot).first;
slot_to_hkl.push_back(key);
}
out.push_back({
.r = &r,
.hkl = it->second,
.sigma = SafeSigma(r.sigma)
});
}
}
return out;
}
MergeResult Merge(const std::vector<HKLKey> &slot_to_hkl, const std::vector<Obs> &obs) {
struct Accum {
double sum_wI = 0.0;
double sum_w = 0.0;
double sum_wsigma2 = 0.0;
std::vector<double> d_values;
};
std::vector<Accum> acc(slot_to_hkl.size());
for (const auto &o: obs) {
const double I_corr = static_cast<double>(o.r->I) * o.r->scaling_correction;
const double sigma_corr = o.sigma * o.r->scaling_correction;
for (const auto &image: reflections) {
for (const auto &r: image) {
if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction))
continue;
if (!AcceptReflection(r, high_resolution_limit))
continue;
const float I_corr = r.I * r.scaling_correction;
const float sigma_corr = r.sigma * r.scaling_correction;
if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0)
continue;
// TODO: Figure out right way to handle this
const double w = 1.0 / (sigma_corr * sigma_corr);
auto hkl = key_generator(r);
auto hkl_key = hkl.pack_no_anom();
auto &a = acc[o.hkl];
a.sum_wI += w * I_corr;
a.sum_w += w;
a.sum_wsigma2 += w * w * sigma_corr * sigma_corr;
if (std::isfinite(o.r->d) && o.r->d > 0.0f)
a.d_values.push_back(o.r->d);
auto it = acc.find(hkl_key);
if (it == acc.end())
it = acc.emplace(hkl_key, Accum{
.h = hkl.h,
.k = hkl.k,
.l = hkl.l
}).first;
int solution = hkl.plus ? 0 : 1;
const float w = 1.0f / (sigma_corr * sigma_corr);
it->second.sum_wI += w * I_corr;
it->second.sum_w += w;
it->second.sum_wI_anom[solution] += w * I_corr;
it->second.sum_w_anom[solution] += w;
it->second.present[solution] = true;
if (!std::isfinite(it->second.d) && std::isfinite(r.d) && r.d > 0.0f)
it->second.d = r.d;
}
MergeResult out;
out.merged.resize(slot_to_hkl.size());
for (int h = 0; h < static_cast<int>(slot_to_hkl.size()); ++h) {
auto &a = acc[h];
if (a.sum_w <= 0.0)
continue;
out.merged[h].h = slot_to_hkl[h].h;
out.merged[h].k = slot_to_hkl[h].k;
out.merged[h].l = slot_to_hkl[h].l;
out.merged[h].I = a.sum_wI / a.sum_w;
out.merged[h].sigma = std::sqrt(a.sum_wsigma2) / a.sum_w;
if (a.d_values.empty())
continue;
std::ranges::nth_element(a.d_values,
a.d_values.begin() + static_cast<long>(a.d_values.size() / 2));
out.merged[h].d = a.d_values[a.d_values.size() / 2];
}
return out;
}
void Stats(const DiffractionExperiment &x, MergeResult &out, const std::vector<Obs> &obs) {
constexpr int n_shells = 10;
std::vector<MergedReflection> out;
out.reserve(acc.size());
for (const auto &[key, accum]: acc) {
if (accum.sum_w <= 0.0)
continue;
float d_min = std::numeric_limits<float>::max();
float d_max = 0.0f;
float I_anom[2] = {NAN, NAN};
float sigma_anom[2] = {NAN, NAN};
auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A();
for (const auto &m: out.merged) {
const auto d = static_cast<float>(m.d);
if (!std::isfinite(d) || d <= 0.0f)
continue;
if (d_min_limit_A && d < d_min_limit_A)
continue;
d_min = std::min(d_min, d);
d_max = std::max(d_max, d);
}
if (!(d_min < d_max && d_min > 0.0f))
return;
const float d_min_pad = d_min * 0.999f;
const float d_max_pad = d_max * 1.001f;
ResolutionShells shells(d_min_pad, d_max_pad, n_shells);
const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq();
const auto shell_min_res = shells.GetShellMinRes();
std::vector<int> hkl_shell(out.merged.size(), -1);
for (int h = 0; h < static_cast<int>(out.merged.size()); ++h) {
auto s = shells.GetShell(out.merged[h].d);
if (s)
hkl_shell[h] = *s;
}
struct PerHKL {
double sum_I = 0.0;
std::vector<double> I;
};
std::vector<PerHKL> per_hkl(out.merged.size());
for (const auto &o: obs) {
if (o.hkl < 0 || o.hkl >= static_cast<int>(per_hkl.size()))
continue;
if (hkl_shell[o.hkl] < 0)
continue;
const double I_corr = static_cast<double>(o.r->I) * o.r->scaling_correction;
if (!std::isfinite(I_corr))
continue;
per_hkl[o.hkl].sum_I += I_corr;
per_hkl[o.hkl].I.push_back(I_corr);
}
struct ShellAccum {
int total_obs = 0;
std::unordered_set<int> unique;
double rmeas_num = 0.0;
double rmeas_den = 0.0;
double sum_i_over_sigma = 0.0;
int n_i_over_sigma = 0;
};
std::vector<ShellAccum> acc(n_shells);
for (int h = 0; h < static_cast<int>(per_hkl.size()); ++h) {
const int s = hkl_shell[h];
if (s < 0 || per_hkl[h].I.empty())
continue;
auto &sa = acc[s];
const auto &ph = per_hkl[h];
const int n = static_cast<int>(ph.I.size());
const double mean_I = ph.sum_I / n;
sa.unique.insert(h);
sa.total_obs += n;
if (n >= 2) {
double sum_abs_dev = 0.0;
for (double I: ph.I)
sum_abs_dev += std::abs(I - mean_I);
sa.rmeas_num += std::sqrt(static_cast<double>(n) / (n - 1.0)) * sum_abs_dev;
}
for (double I: ph.I)
sa.rmeas_den += std::abs(I);
if (out.merged[h].sigma > 0.0) {
sa.sum_i_over_sigma += out.merged[h].I / out.merged[h].sigma;
++sa.n_i_over_sigma;
for (int i = 0; i < 2; ++i) {
if (accum.present[i] && accum.sum_w_anom[i] > 0.0) {
I_anom[i] = static_cast<float>(accum.sum_wI_anom[i] / accum.sum_w_anom[i]);
sigma_anom[i] = 1.0f / std::sqrt(static_cast<float>(accum.sum_w_anom[i]));
}
}
out.statistics.shells.resize(n_shells);
for (int s = 0; s < n_shells; ++s) {
const auto &sa = acc[s];
auto &ss = out.statistics.shells[s];
ss.mean_one_over_d2 = shell_mean_1_d2[s];
ss.d_min = shell_min_res[s];
ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1];
ss.total_observations = sa.total_obs;
ss.unique_reflections = static_cast<int>(sa.unique.size());
ss.rmeas = sa.rmeas_den > 0.0 ? sa.rmeas_num / sa.rmeas_den : 0.0;
ss.mean_i_over_sigma = sa.n_i_over_sigma > 0
? sa.sum_i_over_sigma / sa.n_i_over_sigma
: 0.0;
}
auto &overall = out.statistics.overall;
overall.d_min = d_min;
overall.d_max = d_max;
std::unordered_set<int> all_unique;
double rmeas_num = 0.0;
double rmeas_den = 0.0;
double sum_i_over_sigma = 0.0;
int n_i_over_sigma = 0;
for (const auto &sa: acc) {
overall.total_observations += sa.total_obs;
all_unique.insert(sa.unique.begin(), sa.unique.end());
rmeas_num += sa.rmeas_num;
rmeas_den += sa.rmeas_den;
sum_i_over_sigma += sa.sum_i_over_sigma;
n_i_over_sigma += sa.n_i_over_sigma;
}
overall.unique_reflections = static_cast<int>(all_unique.size());
overall.rmeas = rmeas_den > 0.0 ? rmeas_num / rmeas_den : 0.0;
overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0;
out.emplace_back(MergedReflection{
.h = accum.h,
.k = accum.k,
.l = accum.l,
.I = static_cast<float>(accum.sum_wI / accum.sum_w),
.sigma = 1.0f / std::sqrt(static_cast<float>(accum.sum_w)),
.I_anom = {I_anom[0], I_anom[1]},
.sigma_anom = {sigma_anom[0], sigma_anom[1]},
.d = accum.d
});
}
return out;
}
MergeResult MergeReflections(const std::vector<std::vector<Reflection> > &observations,
const DiffractionExperiment &x) {
std::vector<HKLKey> slot_to_hkl;
auto obs = BuildObservations(observations, x, slot_to_hkl);
auto out = Merge(slot_to_hkl, obs);
Stats(x, out, obs);
MergeStatistics MergeStats(const DiffractionExperiment &x,
const std::vector<MergedReflection> &merged,
const std::vector<std::vector<Reflection> > &reflections) {
constexpr int n_shells = 10;
float d_min = std::numeric_limits<float>::max();
float d_max = 0.0f;
auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A();
for (const auto &m: merged) {
if (!std::isfinite(m.d) || m.d <= 0.0f)
continue;
if (d_min_limit_A && m.d < d_min_limit_A)
continue;
d_min = std::min(d_min, m.d);
d_max = std::max(d_max, m.d);
}
if (!(d_min < d_max && d_min > 0.0f))
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Error in resolution calculation");
const float d_min_pad = d_min * 0.999f;
const float d_max_pad = d_max * 1.001f;
ResolutionShells shells(d_min_pad, d_max_pad, n_shells);
const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq();
const auto shell_min_res = shells.GetShellMinRes();
struct ShellAccum {
int total_obs = 0;
int unique = 0;
double sum_i_over_sigma = 0.0;
int n_i_over_sigma = 0;
};
std::vector<ShellAccum> acc(n_shells);
for (const auto &m: merged) {
const auto shell = shells.GetShell(m.d);
if (!shell.has_value())
continue;
const int s = *shell;
if (s >= 0 && s < n_shells) {
if (std::isfinite(m.I) && std::isfinite(m.sigma) && m.sigma > 0.0) {
acc[s].unique++;
acc[s].sum_i_over_sigma += m.I / m.sigma;
++acc[s].n_i_over_sigma;
}
}
}
for (const auto &image: reflections) {
for (const auto &r: image) {
if (r.scaling_correction <= 0.0 || !std::isfinite(r.scaling_correction))
continue;
if (!AcceptReflection(r, d_min_limit_A))
continue;
const auto shell = shells.GetShell(r.d);
if (!shell.has_value())
continue;
const int s = *shell;
if (s >= 0 && s < n_shells)
acc[s].total_obs++;
}
}
MergeStatistics out;
out.shells.resize(n_shells);
for (int s = 0; s < n_shells; ++s) {
const auto &sa = acc[s];
auto &ss = out.shells[s];
ss.mean_one_over_d2 = shell_mean_1_d2[s];
ss.d_min = shell_min_res[s];
ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1];
ss.total_observations = sa.total_obs;
ss.unique_reflections = sa.unique;
ss.mean_i_over_sigma = sa.n_i_over_sigma > 0
? sa.sum_i_over_sigma / sa.n_i_over_sigma
: 0.0;
}
auto &overall = out.overall;
overall.d_min = d_min;
overall.d_max = d_max;
int all_unique = 0;
double sum_i_over_sigma = 0.0;
int n_i_over_sigma = 0;
for (const auto &sa: acc) {
overall.total_observations += sa.total_obs;
all_unique += sa.unique;
sum_i_over_sigma += sa.sum_i_over_sigma;
n_i_over_sigma += sa.n_i_over_sigma;
}
overall.unique_reflections = all_unique;
overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0;
return out;
}
+6 -3
View File
@@ -18,7 +18,6 @@ struct MergeStatisticsShell {
float mean_one_over_d2 = 0;
int total_observations = 0;
int unique_reflections = 0;
double rmeas = 0.0;
double mean_i_over_sigma = 0.0;
};
@@ -33,5 +32,9 @@ struct MergeResult {
MergeStatistics statistics;
};
MergeResult MergeReflections(const std::vector<std::vector<Reflection> > &observations,
const DiffractionExperiment &x);
std::vector<MergedReflection> MergeAll(const DiffractionExperiment &x,
const std::vector<std::vector<Reflection> > &reflections);
MergeStatistics MergeStats(const DiffractionExperiment &x,
const std::vector<MergedReflection> &merged,
const std::vector<std::vector<Reflection> > &reflections);