Jungfraujoch/image_analysis/scale_merge/Merge.cpp

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only

#include "Merge.h"

#include <algorithm>
#include <cmath>
#include <limits>
#include <random>
#include <unordered_map>

#include "../../common/ResolutionShells.h"
#include "HKLKey.h"

// TODO: Unit cell logic is very messy, given each reflection can have its own d value
// Need to use consistent lattice to calc resolution (which would also allow to calculate completeness)
// But this is not possible for current still workflow (while fine for rotation workflow)

std::vector<uint8_t> CalcMergeMask(const DiffractionExperiment &x,
                                   const ScalingResult &result) {
    std::vector<uint8_t> ret(result.image_cc.size(), 0);

    auto min_cc = x.GetScalingSettings().GetMinCCForImage();
    for (int i = 0 ; i < result.image_cc.size(); ++i)
        ret[i] = (result.image_cc[i] >= min_cc) ? 1 : 0;

    return ret;
}

std::vector<MergedReflection> MergeAll(const DiffractionExperiment &x,
                                       const std::vector<std::vector<Reflection> > &reflections,
                                       const std::vector<uint8_t> &merge_mask) {

    // To select images for half-datasets to calculate CC1/2, I use a random number generator with a fixed seed.
    // This makes sure that images are selected randomly, but in a fully reproducible manner (at least for the same binary)
    std::mt19937 rng(123456789u);
    std::bernoulli_distribution half_dist(0.5);

    if (!merge_mask.empty() && merge_mask.size() != reflections.size())
        throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Merge mask size mismatch");

    auto scaling_settings = x.GetScalingSettings();
    HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1));
    const std::optional<double> high_resolution_limit = scaling_settings.GetHighResolutionLimit_A();
    auto min_partiality = scaling_settings.GetMinPartiality();

    struct Accum {
        int32_t h = 0;
        int32_t k = 0;
        int32_t l = 0;
        float d = NAN;
        double sum_wI = 0.0;
        double sum_w = 0.0;
        double sum_wI_half[2] = {0.0, 0.0};
        double sum_w_half[2] = {0.0, 0.0};
        size_t n_half[2] = {0, 0};
    };

    std::unordered_map<uint64_t, Accum> acc;

    for (int i = 0; i < reflections.size(); ++i) {
        const int half = half_dist(rng);
        if (!merge_mask.empty() && merge_mask[i] == 0)
            continue;
        if (reflections[i].empty())
            continue;
        for (const auto &r: reflections[i]) {
            if (r.image_scale_corr <= 0.0 || !std::isfinite(r.image_scale_corr))
                continue;
            if (!AcceptReflection(r, high_resolution_limit))
                continue;
            if (r.partiality < min_partiality)
                continue;

            const float I_corr = r.I * r.image_scale_corr;
            const float sigma_corr = r.sigma * r.image_scale_corr;
            if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0)
                continue;

            auto hkl = key_generator(r);
            auto hkl_key = hkl.pack();

            auto it = acc.find(hkl_key);
            if (it == acc.end())
                it = acc.emplace(hkl_key, Accum{
                                     .h = hkl.plus ? hkl.h : -hkl.h,
                                     .k = hkl.plus ? hkl.k : -hkl.k,
                                     .l = hkl.plus ? hkl.l : -hkl.l,
                                 }).first;

            const float w = 1.0f / (sigma_corr * sigma_corr);
            const float wI = w * I_corr;

            it->second.sum_wI += wI;
            it->second.sum_w += w;

            it->second.sum_wI_half[half] += wI;
            it->second.sum_w_half[half] += w;
            it->second.n_half[half]++;

            if (!std::isfinite(it->second.d) && std::isfinite(r.d) && r.d > 0.0f)
                it->second.d = r.d;
        }
    }

    std::vector<MergedReflection> out;
    out.reserve(acc.size());
    for (const auto &[key, accum]: acc) {
        if (accum.sum_w <= 0.0)
            continue;

        MergedReflection mr{
            .h = accum.h,
            .k = accum.k,
            .l = accum.l,
            .I = static_cast<float>(accum.sum_wI / accum.sum_w),
            .sigma = 1.0f / std::sqrt(static_cast<float>(accum.sum_w)),
            .I_half = {NAN, NAN},
            .sigma_half = {NAN, NAN},
            .d = accum.d
        };

        if (accum.n_half[0] + accum.n_half[1] > 0
            && accum.sum_w_half[0] > 0.0
            && accum.sum_w_half[1] > 0.0) {
            for (int i = 0; i < 2; ++i) {
                mr.I_half[i] = static_cast<float>(accum.sum_wI_half[i] / accum.sum_w_half[i]);
                mr.sigma_half[i] = 1.0f / std::sqrt(static_cast<float>(accum.sum_w_half[i]));
            }
        }
        out.emplace_back(mr);
    }
    return out;
}

MergeStatistics MergeStats(const DiffractionExperiment &x,
                           const std::vector<MergedReflection> &merged,
                           const std::vector<std::vector<Reflection> > &reflections,
                           const std::vector<uint8_t> &merge_mask) {

    if (!merge_mask.empty() && merge_mask.size() != reflections.size())
        throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Merge mask size mismatch");

    constexpr int n_shells = 10;

    float d_min = std::numeric_limits<float>::max();
    float d_max = 0.0f;

    auto min_partiality = x.GetScalingSettings().GetMinPartiality();
    auto d_min_limit_A = x.GetScalingSettings().GetHighResolutionLimit_A();
    auto scaling_settings = x.GetScalingSettings();
    HKLKeyGenerator key_generator(scaling_settings.GetMergeFriedel(), x.GetSpaceGroupNumber().value_or(1));

    for (const auto &m: merged) {
        if (!std::isfinite(m.d) || m.d <= 0.0f)
            continue;
        if (d_min_limit_A && m.d < d_min_limit_A)
            continue;

        d_min = std::min(d_min, m.d);
        d_max = std::max(d_max, m.d);
    }

    if (!(d_min < d_max && d_min > 0.0f))
        throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
            "Error in resolution calculation");

    const float d_min_pad = d_min * 0.999f;
    const float d_max_pad = d_max * 1.001f;

    ResolutionShells shells(d_min_pad, d_max_pad, n_shells);
    const auto shell_mean_1_d2 = shells.GetShellMeanOneOverResSq();
    const auto shell_min_res = shells.GetShellMinRes();

    struct ShellAccum {
        int total_obs = 0;
        int unique = 0;
        double sum_i_over_sigma = 0.0;
        int n_i_over_sigma = 0;

        double sum_x = 0.0;
        double sum_y = 0.0;
        double sum_x2 = 0.0;
        double sum_y2 = 0.0;
        double sum_xy = 0.0;
        int n_cc_half = 0;
    };

    std::vector<ShellAccum> acc(n_shells);

    for (const auto &m: merged) {
        const auto shell = shells.GetShell(m.d);
        if (!shell.has_value())
            continue;

        const int s = *shell;
        if (s >= 0 && s < n_shells) {
            if (std::isfinite(m.I) && std::isfinite(m.sigma) && m.sigma > 0.0) {
                acc[s].unique++;
                acc[s].sum_i_over_sigma += m.I / m.sigma;
                ++acc[s].n_i_over_sigma;

                if (std::isfinite(m.I_half[0]) && std::isfinite(m.I_half[1])) {
                    acc[s].n_cc_half += 1;
                    acc[s].sum_x += m.I_half[0];
                    acc[s].sum_y += m.I_half[1];
                    acc[s].sum_x2 += m.I_half[0] * m.I_half[0];
                    acc[s].sum_y2 += m.I_half[1] * m.I_half[1];
                    acc[s].sum_xy += m.I_half[0] * m.I_half[1];
                }
            }
        }
    }

    for (int i = 0; i < reflections.size(); ++i) {
        if (!merge_mask.empty() && merge_mask[i] == 0)
            continue;

        for (const auto &r: reflections[i]) {
            if (r.image_scale_corr <= 0.0 || !std::isfinite(r.image_scale_corr))
                continue;

            if (!AcceptReflection(r, d_min_limit_A))
                continue;
            if (r.partiality < min_partiality)
                continue;

            const float I_corr = r.I * r.image_scale_corr;
            const float sigma_corr = r.sigma * r.image_scale_corr;
            if (!std::isfinite(I_corr) || !std::isfinite(sigma_corr) || sigma_corr <= 0.0f)
                continue;

            const auto shell = shells.GetShell(r.d);
            if (!shell.has_value())
                continue;
            const int s = *shell;
            if (s >= 0 && s < n_shells)
                acc[s].total_obs++;
        }
    }

    MergeStatistics out;
    out.shells.resize(n_shells);

    for (int s = 0; s < n_shells; ++s) {
        const auto &sa = acc[s];
        auto &ss = out.shells[s];

        ss.mean_one_over_d2 = shell_mean_1_d2[s];
        ss.d_min = shell_min_res[s];
        ss.d_max = s == 0 ? d_max_pad : shell_min_res[s - 1];
        ss.total_observations = sa.total_obs;
        ss.unique_reflections = sa.unique;
        ss.mean_i_over_sigma = sa.n_i_over_sigma > 0
                                   ? sa.sum_i_over_sigma / sa.n_i_over_sigma
                                   : 0.0;

        if (sa.n_cc_half >= 2) {
            const double n = static_cast<double>(sa.n_cc_half);
            const double cov = sa.sum_xy - sa.sum_x * sa.sum_y / n;
            const double var_x = sa.sum_x2 - sa.sum_x * sa.sum_x / n;
            const double var_y = sa.sum_y2 - sa.sum_y * sa.sum_y / n;

            ss.cc_half = cov / std::sqrt(var_x * var_y);
            if (!std::isfinite(ss.cc_half))
                ss.cc_half = 0.0;
        } else {
            ss.cc_half = 0.0;
        }
    }

    auto &overall = out.overall;
    overall.d_min = d_min;
    overall.d_max = d_max;

    int all_unique = 0;
    double sum_i_over_sigma = 0.0;
    int n_i_over_sigma = 0;

    double all_sum_x = 0.0;
    double all_sum_y = 0.0;
    double all_sum_x2 = 0.0;
    double all_sum_y2 = 0.0;
    double all_sum_xy = 0.0;
    int all_n_cc_half = 0;

    for (const auto &sa: acc) {
        overall.total_observations += sa.total_obs;
        all_unique += sa.unique;
        sum_i_over_sigma += sa.sum_i_over_sigma;
        n_i_over_sigma += sa.n_i_over_sigma;

        all_sum_x += sa.sum_x;
        all_sum_y += sa.sum_y;
        all_sum_x2 += sa.sum_x2;
        all_sum_y2 += sa.sum_y2;
        all_sum_xy += sa.sum_xy;
        all_n_cc_half += sa.n_cc_half;
    }

    overall.unique_reflections = all_unique;
    overall.mean_i_over_sigma = n_i_over_sigma > 0 ? sum_i_over_sigma / n_i_over_sigma : 0.0;

    if (all_n_cc_half >= 2) {
        const double n = static_cast<double>(all_n_cc_half);
        const double cov = all_sum_xy - all_sum_x * all_sum_y / n;
        const double var_x = all_sum_x2 - all_sum_x * all_sum_x / n;
        const double var_y = all_sum_y2 - all_sum_y * all_sum_y / n;

        overall.cc_half = cov / std::sqrt(var_x * var_y);
        if (!std::isfinite(overall.cc_half))
            overall.cc_half = 0.0;
    } else {
        overall.cc_half = 0.0;
    }

    return out;
}

void MergeStatistics::Print(Logger &logger) const {
    logger.Info("");
    logger.Info("  {:>8s} {:>8s} {:>8s} {:>8s} {:>8s}", "d_min", "N_obs", "N_uniq", "<I/sig>", "CC1/2");
    logger.Info("  {:->8s} {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", "", "");
    for (const auto &sh: shells) {
        if (sh.unique_reflections == 0)
            continue;
        logger.Info("  {:8.2f} {:8d} {:8d} {:8.1f} {:8.3f}",
                    sh.d_min, sh.total_observations, sh.unique_reflections,
                    sh.mean_i_over_sigma, sh.cc_half*100.0);
    }
    {
        const auto &ov = overall;
        logger.Info("  {:->8s} {:->8s} {:->8s} {:->8s} {:->8s}", "", "", "", "", "");
        logger.Info("  {:>8s} {:8d} {:8d} {:8.1f} {:8.3f}",
                    "Overall", ov.total_observations, ov.unique_reflections,
                    ov.mean_i_over_sigma, ov.cc_half*100.0);
    }
    logger.Info("");
}