// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #include "Merge.h" #include #include #include #include #include #include "../../common/ResolutionShells.h" #include "HKLKey.h" std::vector CalcMergeMask(const DiffractionExperiment &x, const ScalingResult &result) { std::vector ret(result.image_cc.size(), 0); auto min_cc = x.GetScalingSettings().GetMinCCForImage(); for (int i = 0 ; i < result.image_cc.size(); ++i) ret[i] = (result.image_cc[i] >= min_cc) ? 1 : 0; return ret; } std::vector MergeAll(const DiffractionExperiment &x, const std::vector > &reflections, const std::vector &merge_mask) { // To select images for half-datasets to calculate CC1/2, I use a random number generator with a fixed seed. // This makes sure that images are selected randomly, but in a fully reproducible manner (at least for the same binary) std::mt19937 rng(123456789u); std::bernoulli_distribution half_dist(0.5); if (!merge_mask.empty() && merge_mask.size() != reflections.size()) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Merge mask size mismatch"); auto scaling_settings = x.GetScalingSettings(); HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1)); const std::optional high_resolution_limit = scaling_settings.GetHighResolutionLimit_A(); auto min_partiality = scaling_settings.GetMinPartiality(); struct Accum { int32_t h = 0; int32_t k = 0; int32_t l = 0; float d = NAN; double sum_wI = 0.0; double sum_w = 0.0; double sum_wI_half[2] = {0.0, 0.0}; double sum_w_half[2] = {0.0, 0.0}; size_t n_half[2] = {0, 0}; }; std::unordered_map acc; for (int i = 0; i < reflections.size(); ++i) { const int half = half_dist(rng); if (!merge_mask.empty() && merge_mask[i] == 0) continue; if (reflections[i].empty()) continue; for (const auto &r: reflections[i]) { if (r.image_scale_corr <= 0.0 || !std::isfinite(r.image_scale_corr)) continue; if (!AcceptReflection(r, high_resolution_limit)) continue; if (r.partiality < min_partiality) continue; const float I_corr = r.I * r.image_scale_corr; const float sigma_corr = r.sigma * r.image_scale_corr; if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0) continue; auto hkl = key_generator(r); auto hkl_key = hkl.pack(); auto it = acc.find(hkl_key); if (it == acc.end()) it = acc.emplace(hkl_key, Accum{ .h = hkl.plus ? hkl.h : -hkl.h, .k = hkl.plus ? hkl.k : -hkl.k, .l = hkl.plus ? hkl.l : -hkl.l, }).first; const float w = 1.0f / (sigma_corr * sigma_corr); const float wI = w * I_corr; it->second.sum_wI += wI; it->second.sum_w += w; it->second.sum_wI_half[half] += wI; it->second.sum_w_half[half] += w; it->second.n_half[half]++; if (!std::isfinite(it->second.d) && std::isfinite(r.d) && r.d > 0.0f) it->second.d = r.d; } } std::vector out; out.reserve(acc.size()); for (const auto &[key, accum]: acc) { if (accum.sum_w <= 0.0) continue; MergedReflection mr{ .h = accum.h, .k = accum.k, .l = accum.l, .I = static_cast(accum.sum_wI / accum.sum_w), .sigma = 1.0f / std::sqrt(static_cast(accum.sum_w)), .I_half = {NAN, NAN}, .sigma_half = {NAN, NAN}, .d = accum.d }; if (accum.n_half[0] + accum.n_half[1] > 0 && accum.sum_w_half[0] > 0.0 && accum.sum_w_half[1] > 0.0) { for (int i = 0; i < 2; ++i) { mr.I_half[i] = static_cast(accum.sum_wI_half[i] / accum.sum_w_half[i]); mr.sigma_half[i] = 1.0f / std::sqrt(static_cast(accum.sum_w_half[i])); } } out.emplace_back(mr); } return out; } MergeStatistics MergeStats(const DiffractionExperiment &x, const std::vector &merged, const std::vector > &reflections, const std::vector &merge_mask) { if (!merge_mask.empty() && merge_mask.size() != reflections.size()) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Merge mask size mismatch"); constexpr int n_shells = 10; float d_min = std::numeric_limits::max(); float d_max = 0.0f; auto min_partiality = x.GetScalingSettings().GetMinPartiality(); auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A(); auto scaling_settings = x.GetScalingSettings(); HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1)); for (const auto &m: merged) { if (!std::isfinite(m.d) || m.d <= 0.0f) continue; if (d_min_limit_A && m.d < d_min_limit_A) continue; d_min = std::min(d_min, m.d); d_max = std::max(d_max, m.d); } if (!(d_min < d_max && d_min > 0.0f)) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Error in resolution calculation"); const float d_min_pad = d_min * 0.999f; const float d_max_pad = d_max * 1.001f; ResolutionShells shells(d_min_pad, d_max_pad, n_shells); const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq(); const auto shell_min_res = shells.GetShellMinRes(); struct ShellAccum { int total_obs = 0; int unique = 0; double sum_i_over_sigma = 0.0; int n_i_over_sigma = 0; double sum_x = 0.0; double sum_y = 0.0; double sum_x2 = 0.0; double sum_y2 = 0.0; double sum_xy = 0.0; int n_cc_half = 0; }; std::vector acc(n_shells); for (const auto &m: merged) { const auto shell = shells.GetShell(m.d); if (!shell.has_value()) continue; const int s = *shell; if (s >= 0 && s < n_shells) { if (std::isfinite(m.I) && std::isfinite(m.sigma) && m.sigma > 0.0) { acc[s].unique++; acc[s].sum_i_over_sigma += m.I / m.sigma; ++acc[s].n_i_over_sigma; if (std::isfinite(m.I_half[0]) && std::isfinite(m.I_half[1])) { acc[s].n_cc_half += 1; acc[s].sum_x += m.I_half[0]; acc[s].sum_y += m.I_half[1]; acc[s].sum_x2 += m.I_half[0] * m.I_half[0]; acc[s].sum_y2 += m.I_half[1] * m.I_half[1]; acc[s].sum_xy += m.I_half[0] * m.I_half[1]; } } } } for (int i = 0; i < reflections.size(); ++i) { if (!merge_mask.empty() && merge_mask[i] == 0) continue; for (const auto &r: reflections[i]) { if (r.image_scale_corr <= 0.0 || !std::isfinite(r.image_scale_corr)) continue; if (!AcceptReflection(r, d_min_limit_A)) continue; if (r.partiality < min_partiality) continue; const float I_corr = r.I * r.image_scale_corr; const float sigma_corr = r.sigma * r.image_scale_corr; if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0f) continue; const auto shell = shells.GetShell(r.d); if (!shell.has_value()) continue; const int s = *shell; if (s >= 0 && s < n_shells) acc[s].total_obs++; } } MergeStatistics out; out.shells.resize(n_shells); for (int s = 0; s < n_shells; ++s) { const auto &sa = acc[s]; auto &ss = out.shells[s]; ss.mean_one_over_d2 = shell_mean_1_d2[s]; ss.d_min = shell_min_res[s]; ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1]; ss.total_observations = sa.total_obs; ss.unique_reflections = sa.unique; ss.mean_i_over_sigma = sa.n_i_over_sigma > 0 ? sa.sum_i_over_sigma / sa.n_i_over_sigma : 0.0; if (sa.n_cc_half >= 2) { const double n = static_cast(sa.n_cc_half); const double cov = sa.sum_xy - sa.sum_x * sa.sum_y / n; const double var_x = sa.sum_x2 - sa.sum_x * sa.sum_x / n; const double var_y = sa.sum_y2 - sa.sum_y * sa.sum_y / n; ss.cc_half = cov / std::sqrt(var_x * var_y); if (!std::isfinite(ss.cc_half)) ss.cc_half = 0.0; } else { ss.cc_half = 0.0; } } auto &overall = out.overall; overall.d_min = d_min; overall.d_max = d_max; int all_unique = 0; double sum_i_over_sigma = 0.0; int n_i_over_sigma = 0; double all_sum_x = 0.0; double all_sum_y = 0.0; double all_sum_x2 = 0.0; double all_sum_y2 = 0.0; double all_sum_xy = 0.0; int all_n_cc_half = 0; for (const auto &sa: acc) { overall.total_observations += sa.total_obs; all_unique += sa.unique; sum_i_over_sigma += sa.sum_i_over_sigma; n_i_over_sigma += sa.n_i_over_sigma; all_sum_x += sa.sum_x; all_sum_y += sa.sum_y; all_sum_x2 += sa.sum_x2; all_sum_y2 += sa.sum_y2; all_sum_xy += sa.sum_xy; all_n_cc_half += sa.n_cc_half; } overall.unique_reflections = all_unique; overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0; if (all_n_cc_half >= 2) { const double n = static_cast(all_n_cc_half); const double cov = all_sum_xy - all_sum_x * all_sum_y / n; const double var_x = all_sum_x2 - all_sum_x * all_sum_x / n; const double var_y = all_sum_y2 - all_sum_y * all_sum_y / n; overall.cc_half = cov / std::sqrt(var_x * var_y); if (!std::isfinite(overall.cc_half)) overall.cc_half = 0.0; } else { overall.cc_half = 0.0; } return out; } void MergeStatistics::Print(Logger &logger) const { logger.Info(""); logger.Info(" {:>8s} {:>8s} {:>8s} {:>8s} {:>8s}", "d_min", "N_obs", "N_uniq", "", "CC1/2"); logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s} {:>8s}", "", "", "", "", ""); for (const auto &sh: shells) { if (sh.unique_reflections == 0) continue; logger.Info(" {:8.2f} {:8d} {:8d} {:8.1f} {:8.3f}", sh.d_min, sh.total_observations, sh.unique_reflections, sh.mean_i_over_sigma, sh.cc_half*100.0); } { const auto &ov = overall; logger.Info(" {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", ""); logger.Info(" {:>8s} {:8d} {:8d} {:8.1f} {:8.3f}", "Overall", ov.total_observations, ov.unique_reflections, ov.mean_i_over_sigma, ov.cc_half*100.0); } logger.Info(""); }