423 lines
15 KiB
C++
423 lines
15 KiB
C++
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#include "Merge.h"
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include <random>
|
|
#include <unordered_map>
|
|
|
|
#include <gemmi/reciproc.hpp>
|
|
|
|
#include "../../common/CorrelationCoefficient.h"
|
|
#include "../../common/ResolutionShells.h"
|
|
#include "HKLKey.h"
|
|
|
|
size_t CalcMergeMaskCC(const DiffractionExperiment &x,
|
|
const std::vector<float> &scale_cc,
|
|
std::vector<uint8_t> &result_mask) {
|
|
|
|
if (scale_cc.size() != result_mask.size())
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Scale CC size mismatch");
|
|
|
|
size_t n_rejected = 0;
|
|
auto min_cc = x.GetScalingSettings().GetMinCCForImage();
|
|
if (min_cc > 0.0) {
|
|
for (int i = 0; i < result_mask.size(); ++i) {
|
|
if (result_mask[i] && (scale_cc[i] < min_cc)) {
|
|
result_mask[i] = 0;
|
|
++n_rejected;
|
|
}
|
|
}
|
|
}
|
|
return n_rejected;
|
|
}
|
|
|
|
size_t CalcMergeMaskUnitCell(const DiffractionExperiment &x,
|
|
const UnitCell &ref_cell,
|
|
const std::vector<std::optional<UnitCell> > &unit_cells,
|
|
std::vector<uint8_t> &mask) {
|
|
if (mask.size() != unit_cells.size())
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Mismatch in vector size");
|
|
|
|
size_t n_rejected = 0;
|
|
|
|
const auto dtol = x.GetIndexingSettings().GetUnitCellDistTolerance();
|
|
const auto atol = x.GetIndexingSettings().GetUnitCellAngleTolerance_deg();
|
|
for (size_t i = 0; i < unit_cells.size(); ++i) {
|
|
if (mask[i]) {
|
|
// Counter should not include trivial case (non-indexed image or weird things)
|
|
// It should only count where indexing did happen, but cell was too far from reference
|
|
if (!unit_cells[i] || !unit_cells[i]->is_finite())
|
|
mask[i] = 0;
|
|
else if (!unit_cells[i]->is_close(ref_cell, dtol, atol)) {
|
|
mask[i] = 0;
|
|
++n_rejected;
|
|
}
|
|
}
|
|
}
|
|
|
|
return n_rejected;
|
|
}
|
|
|
|
std::vector<MergedReflection> MergeAll(const DiffractionExperiment &x,
|
|
const std::vector<std::vector<Reflection> > &reflections,
|
|
const std::vector<uint8_t> &merge_mask) {
|
|
|
|
// To select images for half-datasets to calculate CC1/2, I use a random number generator with a fixed seed.
|
|
// This makes sure that images are selected randomly, but in a fully reproducible manner (at least for the same binary)
|
|
std::mt19937 rng(123456789u);
|
|
std::bernoulli_distribution half_dist(0.5);
|
|
|
|
if (!merge_mask.empty() && merge_mask.size() != reflections.size())
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Merge mask size mismatch");
|
|
|
|
auto scaling_settings = x.GetScalingSettings();
|
|
HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1));
|
|
const std::optional<double> high_resolution_limit = scaling_settings.GetHighResolutionLimit_A();
|
|
auto min_partiality = scaling_settings.GetMinPartiality();
|
|
|
|
struct Accum {
|
|
int32_t h = 0;
|
|
int32_t k = 0;
|
|
int32_t l = 0;
|
|
float d = NAN;
|
|
double sum_wI = 0.0;
|
|
double sum_w = 0.0;
|
|
double sum_wI_half[2] = {0.0, 0.0};
|
|
double sum_w_half[2] = {0.0, 0.0};
|
|
size_t n_half[2] = {0, 0};
|
|
};
|
|
|
|
std::unordered_map<uint64_t, Accum> acc;
|
|
|
|
for (int i = 0; i < reflections.size(); ++i) {
|
|
const int half = half_dist(rng);
|
|
if (!merge_mask.empty() && merge_mask[i] == 0)
|
|
continue;
|
|
if (reflections[i].empty())
|
|
continue;
|
|
for (const auto &r: reflections[i]) {
|
|
if (key_generator.IsSystematicallyAbsent(r))
|
|
continue;
|
|
|
|
if (r.image_scale_corr <= 0.0 || !std::isfinite(r.image_scale_corr))
|
|
continue;
|
|
if (!AcceptReflection(r, high_resolution_limit))
|
|
continue;
|
|
if (r.partiality < min_partiality)
|
|
continue;
|
|
|
|
const float I_corr = r.I * r.image_scale_corr;
|
|
const float sigma_corr = r.sigma * r.image_scale_corr;
|
|
if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0)
|
|
continue;
|
|
|
|
auto hkl = key_generator(r);
|
|
auto hkl_key = hkl.pack();
|
|
|
|
auto it = acc.find(hkl_key);
|
|
if (it == acc.end())
|
|
it = acc.emplace(hkl_key, Accum{
|
|
.h = hkl.plus ? hkl.h : -hkl.h,
|
|
.k = hkl.plus ? hkl.k : -hkl.k,
|
|
.l = hkl.plus ? hkl.l : -hkl.l,
|
|
}).first;
|
|
|
|
const float w = 1.0f / (sigma_corr * sigma_corr);
|
|
const float wI = w * I_corr;
|
|
|
|
it->second.sum_wI += wI;
|
|
it->second.sum_w += w;
|
|
|
|
it->second.sum_wI_half[half] += wI;
|
|
it->second.sum_w_half[half] += w;
|
|
it->second.n_half[half]++;
|
|
|
|
if (!std::isfinite(it->second.d) && std::isfinite(r.d) && r.d > 0.0f)
|
|
it->second.d = r.d;
|
|
}
|
|
}
|
|
|
|
std::vector<MergedReflection> out;
|
|
out.reserve(acc.size());
|
|
for (const auto &[key, accum]: acc) {
|
|
if (accum.sum_w <= 0.0)
|
|
continue;
|
|
|
|
MergedReflection mr{
|
|
.h = accum.h,
|
|
.k = accum.k,
|
|
.l = accum.l,
|
|
.I = static_cast<float>(accum.sum_wI / accum.sum_w),
|
|
.sigma = 1.0f / std::sqrt(static_cast<float>(accum.sum_w)),
|
|
.I_half = {NAN, NAN},
|
|
.sigma_half = {NAN, NAN},
|
|
.d = accum.d
|
|
};
|
|
|
|
if (accum.n_half[0] + accum.n_half[1] > 0
|
|
&& accum.sum_w_half[0] > 0.0
|
|
&& accum.sum_w_half[1] > 0.0) {
|
|
for (int i = 0; i < 2; ++i) {
|
|
mr.I_half[i] = static_cast<float>(accum.sum_wI_half[i] / accum.sum_w_half[i]);
|
|
mr.sigma_half[i] = 1.0f / std::sqrt(static_cast<float>(accum.sum_w_half[i]));
|
|
}
|
|
}
|
|
out.emplace_back(mr);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
|
|
struct ShellAccum {
|
|
int total_obs = 0;
|
|
int unique = 0;
|
|
int possible = 0;
|
|
|
|
double sum_i_over_sigma = 0.0;
|
|
int n_i_over_sigma = 0;
|
|
|
|
CorrelationCoefficient cc_half;
|
|
CorrelationCoefficient cc_ref;
|
|
};
|
|
|
|
void CalcPossibleReflections(const DiffractionExperiment &x,
|
|
const UnitCell &cell,
|
|
double d_min,
|
|
double d_max,
|
|
const ResolutionShells &shells,
|
|
std::vector<ShellAccum> &acc) {
|
|
gemmi::UnitCell gemmi_cell = cell;
|
|
const gemmi::SpaceGroup *sg = gemmi::find_spacegroup_by_number(x.GetSpaceGroupNumber().value_or(1));
|
|
|
|
// Generate unique reflections
|
|
std::vector<gemmi::Miller> possible_hkls = gemmi::make_miller_vector(gemmi_cell, sg, d_min, d_max, true);
|
|
CrystalLattice lattice(cell);
|
|
const auto astar = lattice.Astar();
|
|
const auto bstar = lattice.Bstar();
|
|
const auto cstar = lattice.Cstar();
|
|
|
|
for (const auto& hkl: possible_hkls) {
|
|
const auto q = hkl[0] * astar + hkl[1] * bstar + hkl[2] * cstar;
|
|
const auto qlen = q.Length();
|
|
if (qlen < 1e-6)
|
|
continue;
|
|
const auto d = 1.0 / qlen;
|
|
const auto shell = shells.GetShell(d);
|
|
if (!shell.has_value())
|
|
continue;
|
|
const int s = *shell;
|
|
if (s >= 0 && s < acc.size())
|
|
acc[s].possible++;
|
|
}
|
|
}
|
|
|
|
MergeStatistics MergeStats(const DiffractionExperiment &x,
|
|
const std::vector<MergedReflection> &merged,
|
|
const std::vector<std::vector<Reflection> > &reflections,
|
|
const UnitCell &cell,
|
|
const std::vector<uint8_t> &merge_mask,
|
|
const std::vector<MergedReflection> &reference) {
|
|
if (!merge_mask.empty() && merge_mask.size() != reflections.size())
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Merge mask size mismatch");
|
|
|
|
constexpr int n_shells = 10;
|
|
|
|
float d_min = std::numeric_limits<float>::max();
|
|
float d_max = 0.0f;
|
|
|
|
auto min_partiality = x.GetScalingSettings().GetMinPartiality();
|
|
auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A();
|
|
auto scaling_settings = x.GetScalingSettings();
|
|
HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1));
|
|
|
|
std::unordered_map<uint64_t, float> reference_intensities;
|
|
if (!reference.empty()) {
|
|
reference_intensities.reserve(reference.size());
|
|
for (const auto &r: reference) {
|
|
if (!std::isfinite(r.I))
|
|
continue;
|
|
|
|
const auto hkl = key_generator(r);
|
|
reference_intensities[hkl.pack()] = r.I;
|
|
}
|
|
}
|
|
|
|
for (const auto &m: merged) {
|
|
if (!std::isfinite(m.d) || m.d <= 0.0f)
|
|
continue;
|
|
if (d_min_limit_A && m.d < d_min_limit_A)
|
|
continue;
|
|
|
|
d_min = std::min(d_min, m.d);
|
|
d_max = std::max(d_max, m.d);
|
|
}
|
|
|
|
if (!(d_min < d_max && d_min > 0.0f))
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
|
|
"MergeStats: Error in resolution calculation");
|
|
|
|
const float d_min_pad = d_min * 0.999f;
|
|
const float d_max_pad = d_max * 1.001f;
|
|
|
|
ResolutionShells shells(d_min_pad, d_max_pad, n_shells);
|
|
const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq();
|
|
const auto shell_min_res = shells.GetShellMinRes();
|
|
|
|
std::vector<ShellAccum> acc(n_shells);
|
|
|
|
CalcPossibleReflections(x, cell, d_min_pad, d_max_pad, shells, acc);
|
|
|
|
CorrelationCoefficient cc_half_overall;
|
|
CorrelationCoefficient cc_ref_overall;
|
|
|
|
for (const auto &m: merged) {
|
|
const auto shell = shells.GetShell(m.d);
|
|
if (!shell.has_value())
|
|
continue;
|
|
|
|
const int s = *shell;
|
|
if (s >= 0 && s < n_shells) {
|
|
if (std::isfinite(m.I) && std::isfinite(m.sigma) && m.sigma > 0.0) {
|
|
acc[s].unique++;
|
|
acc[s].sum_i_over_sigma += m.I / m.sigma;
|
|
++acc[s].n_i_over_sigma;
|
|
|
|
if (!reference_intensities.empty()) {
|
|
const auto hkl = key_generator(m);
|
|
const auto ref_it = reference_intensities.find(hkl.pack());
|
|
if (ref_it != reference_intensities.end() && std::isfinite(ref_it->second)) {
|
|
acc[s].cc_ref.Add(m.I, ref_it->second);
|
|
cc_ref_overall.Add(m.I, ref_it->second);
|
|
}
|
|
}
|
|
|
|
if (std::isfinite(m.I_half[0]) && std::isfinite(m.I_half[1])) {
|
|
acc[s].cc_half.Add(m.I_half[0], m.I_half[1]);
|
|
cc_half_overall.Add(m.I_half[0], m.I_half[1]);
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < reflections.size(); ++i) {
|
|
if (!merge_mask.empty() && merge_mask[i] == 0)
|
|
continue;
|
|
|
|
for (const auto &r: reflections[i]) {
|
|
if (key_generator.IsSystematicallyAbsent(r))
|
|
continue;
|
|
if (r.image_scale_corr <= 0.0 || !std::isfinite(r.image_scale_corr))
|
|
continue;
|
|
if (!AcceptReflection(r, d_min_limit_A))
|
|
continue;
|
|
if (r.partiality < min_partiality)
|
|
continue;
|
|
|
|
const float I_corr = r.I * r.image_scale_corr;
|
|
const float sigma_corr = r.sigma * r.image_scale_corr;
|
|
if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0f)
|
|
continue;
|
|
|
|
const auto shell = shells.GetShell(r.d);
|
|
if (!shell.has_value())
|
|
continue;
|
|
const int s = *shell;
|
|
if (s >= 0 && s < n_shells)
|
|
acc[s].total_obs++;
|
|
}
|
|
}
|
|
|
|
MergeStatistics out;
|
|
out.shells.resize(n_shells);
|
|
|
|
for (int s = 0; s < n_shells; ++s) {
|
|
const auto &sa = acc[s];
|
|
auto &ss = out.shells[s];
|
|
|
|
ss.mean_one_over_d2 = shell_mean_1_d2[s];
|
|
ss.d_min = shell_min_res[s];
|
|
ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1];
|
|
ss.total_observations = sa.total_obs;
|
|
ss.unique_reflections = sa.unique;
|
|
ss.possible_unique_reflections = sa.possible;
|
|
ss.mean_i_over_sigma = sa.n_i_over_sigma > 0
|
|
? sa.sum_i_over_sigma / sa.n_i_over_sigma
|
|
: 0.0;
|
|
|
|
ss.cc_half = sa.cc_half.GetCC();
|
|
ss.cc_ref = sa.cc_ref.GetCC();
|
|
}
|
|
|
|
auto &overall = out.overall;
|
|
overall.d_min = d_min;
|
|
overall.d_max = d_max;
|
|
|
|
int all_possible = 0;
|
|
int all_unique = 0;
|
|
double sum_i_over_sigma = 0.0;
|
|
int n_i_over_sigma = 0;
|
|
|
|
|
|
for (const auto &sa: acc) {
|
|
overall.total_observations += sa.total_obs;
|
|
all_unique += sa.unique;
|
|
all_possible += sa.possible;
|
|
sum_i_over_sigma += sa.sum_i_over_sigma;
|
|
n_i_over_sigma += sa.n_i_over_sigma;
|
|
|
|
}
|
|
|
|
overall.possible_unique_reflections = all_possible;
|
|
overall.unique_reflections = all_unique;
|
|
overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0;
|
|
overall.cc_half = cc_half_overall.GetCC();
|
|
overall.cc_ref = cc_ref_overall.GetCC();
|
|
|
|
return out;
|
|
}
|
|
|
|
void MergeStatistics::Print(Logger &logger) const {
|
|
logger.Info("");
|
|
logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>8s} {:>8s}", "d_min", "N_obs", "N_uniq", "N_possib", "Compl","<I/sig>", "CC1/2", "CCref");
|
|
logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", "", "", "", "", "");
|
|
for (const auto &sh: shells) {
|
|
if (sh.unique_reflections == 0)
|
|
continue;
|
|
double completeness = sh.possible_unique_reflections > 0
|
|
? static_cast<double>(sh.unique_reflections) / sh.possible_unique_reflections * 100.0 : 0.0;
|
|
|
|
logger.Info(" {:8.2f} {:8d} {:8d} {:8d} {:7.1f}% {:8.1f} {:7.1f}% {:7.1f}%",
|
|
sh.d_min,
|
|
sh.total_observations,
|
|
sh.unique_reflections,
|
|
sh.possible_unique_reflections,
|
|
completeness,
|
|
sh.mean_i_over_sigma,
|
|
sh.cc_half*100.0,
|
|
sh.cc_ref*100.0);
|
|
}
|
|
{
|
|
const auto &ov = overall;
|
|
double completeness = ov.possible_unique_reflections > 0
|
|
? static_cast<double>(ov.unique_reflections) / ov.possible_unique_reflections * 100.0 : 0.0;
|
|
|
|
logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", "", "", "", "", "");
|
|
logger.Info(" {:>8s} {:8d} {:8d} {:8d} {:7.1f}% {:8.1f} {:7.1f}% {:7.1f}%",
|
|
"Overall",
|
|
ov.total_observations,
|
|
ov.unique_reflections,
|
|
ov.possible_unique_reflections,
|
|
completeness,
|
|
ov.mean_i_over_sigma,
|
|
ov.cc_half*100.0,
|
|
ov.cc_ref*100.0);
|
|
}
|
|
logger.Info("");
|
|
}
|
|
|