Jungfraujoch/image_analysis/bragg_integration/ProfileIntegrate2D.cpp

// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only

#include "ProfileIntegrate2D.h"

#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
#include <string>
#include <vector>

#include "../../common/JFJochException.h"

namespace {

constexpr int N_SHELL = 6;
constexpr double STRONG_I_OVER_SIGMA = 5.0;
constexpr int MIN_STRONG_PER_SHELL = 30;

// Radial parallax broadening as the coefficient of tan^2(2theta), i.e. Var(z)/pixel^2 [px^2]. A photon
// converts at a random depth z in the sensor (exponential with attenuation length L, truncated at the
// sensor thickness t), shifting the recorded spot RADIALLY by z*tan(2theta) -> radial variance
// Var(z)*tan^2(2theta). L is photoelectric-dominated (~lambda^3 between edges), so a per-material
// reference (NIST XCOM at 0.953 A / 13 keV) is scaled by lambda^3; Si and CdTe are the sensors in use.
double parallax_var_px2(const std::string &material, double thickness_um, double lambda_A, double pixel_um) {
    if (!(thickness_um > 0.0) || !(pixel_um > 0.0) || !(lambda_A > 0.0))
        return 0.0;
    const double L_ref = material == "CdTe" ? 42.6 : 273.0;   // attenuation length [um] at 0.953 A
    const double s = lambda_A / 0.953;
    const double L = L_ref / (s * s * s);
    const double a = thickness_um / L, e = std::exp(-a);
    if (1.0 - e <= 0.0)
        return 0.0;
    const double mean = L * (1.0 - (1.0 + a) * e) / (1.0 - e);
    const double ez2 = L * L * (2.0 - (a * a + 2.0 * a + 2.0) * e) / (1.0 - e);
    const double var = std::max(0.0, ez2 - mean * mean);     // um^2
    return var / (pixel_um * pixel_um);
}

// Mark the r2 signal disk of every predicted reflection. A neighbour whose disk falls inside this
// reflection's r2..r3 background ring would otherwise bias the background mean high and over-subtract;
// excluding the marked pixels from the ring removes that contamination (same as BraggIntegrate2D).
std::vector<uint8_t> BuildReflectionMask(const std::vector<Reflection> &predicted, size_t npredicted,
                                         size_t xpixel, size_t ypixel, float r2) {
    std::vector<uint8_t> mask(xpixel * ypixel, 0);
    const float r2_sq = r2 * r2;
    for (size_t i = 0; i < npredicted; ++i) {
        const auto &r = predicted[i];
        const int64_t x0 = std::max<int64_t>(0, std::floor(r.predicted_x - r2 - 1.0f));
        const int64_t x1 = std::min<int64_t>(xpixel - 1, std::ceil(r.predicted_x + r2 + 1.0f));
        const int64_t y0 = std::max<int64_t>(0, std::floor(r.predicted_y - r2 - 1.0f));
        const int64_t y1 = std::min<int64_t>(ypixel - 1, std::ceil(r.predicted_y + r2 + 1.0f));
        for (int64_t y = y0; y <= y1; ++y)
            for (int64_t x = x0; x <= x1; ++x) {
                const double d2 = (x - r.predicted_x) * (x - r.predicted_x) + (y - r.predicted_y) * (y - r.predicted_y);
                if (d2 < r2_sq) mask[y * xpixel + x] = 1;
            }
    }
    return mask;
}

// Rough box-sum of one reflection: total intensity over the inner disk (r1) minus the mean of the
// background ring (r2..r3), like BraggIntegrate2D. Used to seed the fit, pick strong spots for
// profile learning, and provide the per-reflection background. ok=false when the inner disk is
// clipped by a gap/edge/bad pixel (same rejection as the box-sum integrator).
struct Rough {
    double I = 0.0, sigma = NAN, bkg = 0.0;
    int cx = 0, cy = 0, shell = -1;
    bool ok = false, strong = false;
};

template<class T>
Rough BoxSum(const T *image, size_t xpixel, size_t ypixel, int64_t special, int64_t saturation,
             const Reflection &r, const std::vector<uint8_t> &refl_mask,
             float r1_sq, float r2_sq, float r3_sq, float r3, float min_sigma_ratio,
             bool apply_bkg_clip) {
    Rough out;
    const int64_t x0 = std::max<int64_t>(0, std::floor(r.predicted_x - r3 - 1.0));
    const int64_t x1 = std::min<int64_t>(xpixel - 1, std::ceil(r.predicted_x + r3 + 1.0));
    const int64_t y0 = std::max<int64_t>(0, std::floor(r.predicted_y - r3 - 1.0));
    const int64_t y1 = std::min<int64_t>(ypixel - 1, std::ceil(r.predicted_y + r3 + 1.0));

    int64_t I_sum = 0, n_inner = 0, n_inner_valid = 0;
    double bkg_sum = 0.0;
    int n_bkg = 0;
    std::vector<double> bkg_vals;
    bkg_vals.reserve(128);
    for (int64_t y = y0; y <= y1; ++y)
        for (int64_t x = x0; x <= x1; ++x) {
            const double d2 = (x - r.predicted_x) * (x - r.predicted_x) + (y - r.predicted_y) * (y - r.predicted_y);
            const auto px = image[y * xpixel + x];
            if (d2 < r1_sq) {
                ++n_inner;
                if (px == special || px == special + 1 || px == saturation || px == saturation - 1) continue;
                I_sum += px;
                ++n_inner_valid;
            } else if (d2 >= r2_sq && d2 < r3_sq) {
                if (refl_mask[y * xpixel + x]) continue;
                if (px == special || px == special + 1 || px == saturation || px == saturation - 1) continue;
                bkg_sum += static_cast<double>(px);
                ++n_bkg;
                bkg_vals.push_back(static_cast<double>(px));
            }
        }
    if (n_inner_valid != n_inner || n_bkg <= 5)
        return out;
    out.bkg = bkg_sum / n_bkg;
    // One high-outlier sigma-clip pass on the background ring. A bandwidth-streaked high-resolution spot
    // (or a neighbour) leaks into the annulus and biases the mean high, which over-subtracts and drives
    // the merged high-resolution intensities negative; rejecting pixels above mean + 3*sqrt(mean) removes
    // that contamination (and barely touches a clean Poisson background, where ~0.1% exceed the cut).
    // Stills-only: on rotation (no bandwidth streak) it clips legitimate high background pixels and biases
    // the mean low, slightly hurting weak (anomalous) intensities, so it is gated off there.
    if (apply_bkg_clip) {
        const double thr = out.bkg + 3.0 * std::sqrt(std::max(out.bkg, 1.0));
        double s = 0.0;
        int n = 0;
        for (const double v : bkg_vals) if (v <= thr) { s += v; ++n; }
        if (n > 5) out.bkg = s / n;
    }
    out.I = static_cast<double>(I_sum) - static_cast<double>(n_inner) * out.bkg;
    out.sigma = std::max(1.0, out.I * min_sigma_ratio);
    if (I_sum > 0)
        out.sigma = std::max(out.sigma, std::sqrt(static_cast<double>(I_sum)));
    out.cx = static_cast<int>(std::lround(r.predicted_x));
    out.cy = static_cast<int>(std::lround(r.predicted_y));
    out.ok = true;
    out.strong = out.sigma > 0.0 && out.I / out.sigma >= STRONG_I_OVER_SIGMA;
    return out;
}

template<class T>
std::vector<Reflection> ProfileIntegrateInternal(const DiffractionExperiment &experiment,
                                                 const CompressedImage &image,
                                                 const std::vector<Reflection> &predicted, size_t npredicted,
                                                 int64_t special, int64_t saturation, int64_t image_number) {
    const auto settings = experiment.GetBraggIntegrationSettings();
    const auto geom = experiment.GetDiffractionGeometry();
    const bool empirical = settings.GetIntegrator() == IntegratorMode::ProfileEmpirical;

    const float r1_sq = settings.GetR1() * settings.GetR1();
    const float r2 = settings.GetR2(), r2_sq = r2 * r2;
    const float r3 = settings.GetR3(), r3_sq = r3 * r3;
    const float min_sigma_ratio = settings.GetMinimumSigmaInRegardsToI();
    // A set bandwidth (broadband / stills) vs monochromatic (rotation) splits the profile treatment:
    //  - background-ring sigma-clip de-biases the bandwidth streak but would clip legitimate rotation
    //    background, so it is stills-only;
    //  - the profile-width 2nd-moment is measured over the signal disk (r1) on the monochromatic path,
    //    where sharp crowded spots make the learning grid neighbour-contaminated, but over the full grid
    //    on the broadband path, where spots are sparse and want the generous width (centroid floor);
    //  - the radial profile gets an extra weak-spot capture term on the monochromatic path only.
    const double bw_sigma = experiment.GetBandwidthFWHM().value_or(0.0f) / 2.3548f;
    const bool broadband = bw_sigma > 0.0;
    const bool apply_bkg_clip = broadband;
    const int R = static_cast<int>(std::ceil(r2));   // profile grid half-size
    const int G = 2 * R + 1, GG = G * G;
    auto idx = [G, R](int dx, int dy) { return (dy + R) * G + (dx + R); };

    std::vector<uint8_t> buffer;
    const auto *ptr = reinterpret_cast<const T *>(image.GetUncompressedPtr(buffer));
    const size_t xpixel = image.GetWidth(), ypixel = image.GetHeight();

    // --- Pass A: box-sum every reflection (rough I, background, centre, strong flag). ---
    const auto refl_mask = BuildReflectionMask(predicted, npredicted, xpixel, ypixel, r2);
    std::vector<Rough> rough(npredicted);
    double inv_d2_min = std::numeric_limits<double>::max(), inv_d2_max = 0.0;
    for (size_t i = 0; i < npredicted; ++i) {
        rough[i] = BoxSum<T>(ptr, xpixel, ypixel, special, saturation, predicted[i], refl_mask,
                             r1_sq, r2_sq, r3_sq, r3, min_sigma_ratio, apply_bkg_clip);
        if (rough[i].ok && predicted[i].d > 0.0f) {
            const double inv_d2 = 1.0 / (predicted[i].d * predicted[i].d);
            inv_d2_min = std::min(inv_d2_min, inv_d2);
            inv_d2_max = std::max(inv_d2_max, inv_d2);
        }
    }
    auto shell_of = [&](float d) {
        if (!(d > 0.0f) || inv_d2_max <= inv_d2_min) return 0;
        const double t = (1.0 / (d * d) - inv_d2_min) / (inv_d2_max - inv_d2_min);
        return std::clamp(static_cast<int>(t * N_SHELL), 0, N_SHELL - 1);
    };
    for (size_t i = 0; i < npredicted; ++i)
        if (rough[i].ok)
            rough[i].shell = shell_of(predicted[i].d);

    // --- Learn the profile per shell from the strong spots (+ a global fallback). Each strong
    //     spot contributes its background-subtracted, intensity-normalised, centroid-aligned grid. ---
    std::vector<std::vector<double>> shell_grid(N_SHELL, std::vector<double>(GG, 0.0));
    std::vector<int> shell_n(N_SHELL, 0);
    std::vector<double> global_grid(GG, 0.0);
    int global_n = 0;
    for (size_t i = 0; i < npredicted; ++i) {
        const auto &rh = rough[i];
        if (!rh.ok || !rh.strong || rh.I <= 0.0) continue;
        for (int dy = -R; dy <= R; ++dy)
            for (int dx = -R; dx <= R; ++dx) {
                const int64_t x = rh.cx + dx, y = rh.cy + dy;
                if (x < 0 || y < 0 || x >= static_cast<int64_t>(xpixel) || y >= static_cast<int64_t>(ypixel)) continue;
                const auto px = ptr[y * xpixel + x];
                if (px == special || px == special + 1 || px == saturation || px == saturation - 1) continue;
                const double v = (static_cast<double>(px) - rh.bkg) / rh.I;
                shell_grid[rh.shell][idx(dx, dy)] += v;
                global_grid[idx(dx, dy)] += v;
            }
        ++shell_n[rh.shell];
        ++global_n;
    }

    // Build a normalised profile (sum = 1): the empirical average, or an isotropic Gaussian of the
    // same (measured) second moment - the spots are ~round in the detector plane (the real radial
    // anisotropy is handled by the parallax ellipse below), so a single width is kept here.
    // The 2nd-moment is measured over the signal disk r1 on the monochromatic path (the wide grid
    // corners only add neighbour leakage and rectified noise, lever arm dx^2+dy^2 up to ~72, inflating
    // the learned width several-fold) and over the full grid on the broadband path (sparse spots, the
    // generous width that the stills centroid-undersampling floor wants).
    auto build_profile = [&](const std::vector<double> &grid, int n) {
        std::vector<double> P(GG, 0.0);
        if (n <= 0) return P;
        double sum = 0.0, m2 = 0.0, m2w = 0.0;
        for (int dy = -R; dy <= R; ++dy)
            for (int dx = -R; dx <= R; ++dx) {
                const double g = std::max(0.0, grid[idx(dx, dy)]);
                if (broadband || dx * dx + dy * dy < r1_sq) { m2 += g * (dx * dx + dy * dy); m2w += g; }
                sum += g;
                if (empirical) P[idx(dx, dy)] = g;
            }
        if (sum <= 0.0) return P;
        if (empirical) {
            for (double &p : P) p /= sum;
        } else {
            const double sigma2 = std::max(0.25, (m2w > 0.0 ? m2 / m2w : 1.0) / 2.0);   // <r^2> = 2 sigma^2 (2D)
            double gsum = 0.0;
            for (int dy = -R; dy <= R; ++dy)
                for (int dx = -R; dx <= R; ++dx) {
                    const double g = std::exp(-(dx * dx + dy * dy) / (2.0 * sigma2));
                    P[idx(dx, dy)] = g;
                    gsum += g;
                }
            for (double &p : P) p /= gsum;
        }
        return P;
    };
    const std::vector<double> global_P = build_profile(global_grid, global_n);
    std::vector<std::vector<double>> shell_P(N_SHELL);
    for (int s = 0; s < N_SHELL; ++s)
        shell_P[s] = shell_n[s] >= MIN_STRONG_PER_SHELL ? build_profile(shell_grid[s], shell_n[s]) : global_P;

    // Isotropic (intrinsic) width per shell, used to seed the radially-elongated profile below.
    auto measure_sigma2 = [&](const std::vector<double> &grid) {
        double m2 = 0.0, m2w = 0.0;
        for (int dy = -R; dy <= R; ++dy)
            for (int dx = -R; dx <= R; ++dx) {
                if (!broadband && dx * dx + dy * dy >= r1_sq) continue;
                const double g = std::max(0.0, grid[idx(dx, dy)]);
                m2 += g * (dx * dx + dy * dy);
                m2w += g;
            }
        return m2w > 0.0 ? std::max(0.25, (m2 / m2w) / 2.0) : 1.0;
    };
    const double global_sigma2 = global_n > 0 ? measure_sigma2(global_grid) : 1.0;
    std::vector<double> shell_sigma2(N_SHELL, global_sigma2);
    for (int s = 0; s < N_SHELL; ++s)
        if (shell_n[s] >= MIN_STRONG_PER_SHELL)
            shell_sigma2[s] = measure_sigma2(shell_grid[s]);

    // Fit each reflection with a per-reflection Gaussian elongated only along the RADIAL direction
    // (sigma^2_radial = sigma^2_intrinsic + radial_extra, sigma^2_tangential = sigma^2_intrinsic), on a
    // grid grown to hold the streak - no extra tangential background, unlike an isotropic widening.
    // Two radial terms, both growing as tan^2(2theta) = (Rpx/F)^2 (Rpx = distance from the beam centre):
    //  - parallax (always): the sensor converts photons at a spread of depths -> radial shift
    //    z*tan(2theta), variance c_par * tan^2(2theta), c_par = Var(z)/pixel^2 from the sensor;
    //  - the energy-bandwidth streak (stills): sigma_bw = bandwidth_sigma * Rpx;
    //  - a weak-spot capture term (monochromatic path only): the radial profile must also absorb the
    //    per-frame radial position scatter of weak high-res spots (residual cell/orientation error,
    //    growing ~ tan^2), which the strong learned profile underestimates. A fixed coefficient recovers
    //    the weak high-res reflections; the metric is a broad plateau so it needs no tuning. On the
    //    broadband path the bandwidth streak already provides this generosity, so it is omitted there.
    const float beam_x = geom.GetBeamX_pxl(), beam_y = geom.GetBeamY_pxl();
    const auto &det = experiment.GetDetectorSetup();
    const double c_par = parallax_var_px2(det.GetSensorMaterial(), det.GetSensorThickness_um(),
                                          geom.GetWavelength_A(), geom.GetPixelSize_mm() * 1000.0);
    constexpr double C_CAPTURE = 2.5;
    const double c_radial = c_par + (broadband ? 0.0 : C_CAPTURE);
    const double F_px = geom.GetDetectorDistance_mm() / std::max(1e-6f, geom.GetPixelSize_mm());
    const bool use_ellipse = !empirical && (bw_sigma > 0.0 || c_radial > 0.0);

    // Reciprocal-space global profile width (--reciprocal-profile): one global model
    // sigma2_q,tan = A + B*|q| + C*|q|^2 over all strong spots replaces the per-shell pixel width. The
    // Jacobian g_tan = cos(2theta) maps the pixel tangential moment into reciprocal space, removing the
    // geometric projection that makes the pixel width grow ~4x with resolution. The C*|q|^2 term is the
    // crystal MOSAICITY (relrod variance ~ (eta*|q|)^2): ~0 for a sharp crystal (the fit collapses to
    // A + B*|q|, tying the per-shell model) but dominant for a mosaic one, where per-shell starves on the
    // wide, weak high-res spots. Applied per reflection as sigma2_tan,px = (A + B*|q| + C*|q|^2)/g_tan^2.
    const bool recip_on = settings.GetReciprocalProfile();
    double recip_A = 0.0, recip_B = 0.0, recip_C = 0.0;
    bool use_recip = false;
    if (recip_on && !empirical) {
        double s1 = 0, sq = 0, sq2 = 0, sq3 = 0, sq4 = 0, sy = 0, sqy = 0, sq2y = 0;
        for (size_t i = 0; i < npredicted; ++i) {
            const auto &rh = rough[i];
            if (!rh.ok || !rh.strong || rh.I <= 0.0 || !(predicted[i].d > 0.0f)) continue;
            const double rx = predicted[i].predicted_x - beam_x, ry = predicted[i].predicted_y - beam_y;
            const double Rpx = std::hypot(rx, ry);
            if (Rpx < 1e-6) continue;
            const double ux = rx / Rpx, uy = ry / Rpx;
            double m2 = 0.0, m2w = 0.0;
            for (int dy = -R; dy <= R; ++dy)
                for (int dx = -R; dx <= R; ++dx) {
                    if (dx * dx + dy * dy >= r1_sq) continue;
                    const int64_t x = rh.cx + dx, y = rh.cy + dy;
                    if (x < 0 || y < 0 || x >= static_cast<int64_t>(xpixel) || y >= static_cast<int64_t>(ypixel)) continue;
                    const auto px = ptr[y * xpixel + x];
                    if (px == special || px == special + 1 || px == saturation || px == saturation - 1) continue;
                    const double w = std::max(0.0, (static_cast<double>(px) - rh.bkg) / rh.I);
                    const double tn = -dx * uy + dy * ux;
                    m2 += w * tn * tn; m2w += w;
                }
            if (m2w <= 0.0) continue;
            const double tan2t = Rpx / F_px, cos2t = 1.0 / std::sqrt(1.0 + tan2t * tan2t);
            const double q = 1.0 / predicted[i].d, yv = cos2t * cos2t * (m2 / m2w);
            s1 += 1; sq += q; sq2 += q * q; sq3 += q * q * q; sq4 += q * q * q * q;
            sy += yv; sqy += q * yv; sq2y += q * q * yv;
        }
        // Fit quadratic, falling back to linear then constant so B and C stay >= 0 (a relrod cannot shrink
        // with resolution; a sharp crystal's C comes out as noise/<0 and drops to the linear branch).
        auto det3 = [](double a, double b, double c, double d, double e, double f, double g, double h, double i) {
            return a * (e * i - f * h) - b * (d * i - f * g) + c * (d * h - e * g);
        };
        if (s1 >= 30) {
            const double D3 = det3(s1, sq, sq2, sq, sq2, sq3, sq2, sq3, sq4), D2 = s1 * sq2 - sq * sq;
            const double C3 = std::fabs(D3) > 1e-9 ? det3(s1, sq, sy, sq, sq2, sqy, sq2, sq3, sq2y) / D3 : -1.0;
            const double B3 = std::fabs(D3) > 1e-9 ? det3(s1, sy, sq2, sq, sqy, sq3, sq2, sq2y, sq4) / D3 : 0.0;
            const double B2 = std::fabs(D2) > 1e-9 ? (s1 * sqy - sq * sy) / D2 : -1.0;
            if (std::fabs(D3) > 1e-9 && C3 > 0.0 && B3 >= 0.0) {
                recip_A = det3(sy, sq, sq2, sqy, sq2, sq3, sq2y, sq3, sq4) / D3; recip_B = B3; recip_C = C3;
            } else if (std::fabs(D2) > 1e-9 && B2 >= 0.0) {
                recip_A = (sy * sq2 - sqy * sq) / D2; recip_B = B2;
            } else {
                recip_A = sy / s1;   // constant width fallback
            }
            use_recip = recip_A > 0.0;
        }
    }

    // --- Pass B: profile-fit each reflection (Kabsch, de-biased variance v = B + I*P; iterate). ---
    std::vector<Reflection> out;
    out.reserve(npredicted);
    const auto pol = experiment.GetPolarizationFactor();
    std::vector<double> Pbuf;
    for (size_t i = 0; i < npredicted; ++i) {
        const auto &rh = rough[i];
        if (!rh.ok) continue;
        const int sh = rh.shell < 0 ? 0 : rh.shell;

        int Rf = R;
        const std::vector<double> *Pvec = &shell_P[sh];   // ProfileEmpirical uses the shared learned grid
        const double rx = predicted[i].predicted_x - beam_x, ry = predicted[i].predicted_y - beam_y;
        const double Rpx = std::hypot(rx, ry);
        const double tan2t = Rpx / F_px;
        double s2t = shell_sigma2[sh];
        if (use_recip) {   // global reciprocal width instead of the per-shell pixel width
            const double q = 1.0 / std::max(predicted[i].d, 1e-6f), cos2t = 1.0 / std::sqrt(1.0 + tan2t * tan2t);
            s2t = std::max(0.25, (recip_A + recip_B * q + recip_C * q * q) / (cos2t * cos2t));
        }
        double s2r = s2t;
        double ux = 1.0, uy = 0.0;
        bool elong = false;
        if (use_ellipse) {
            const double sbw = bw_sigma * Rpx;
            const double radial_extra = sbw * sbw + c_radial * tan2t * tan2t;   // bandwidth + parallax + capture
            // Only elongate where the radial streak adds a genuine fraction of a pixel of variance; at
            // low/mid resolution the smear is sub-pixel and elongating just adds noise.
            if (Rpx > 1e-6 && radial_extra > 0.25) {
                ux = rx / Rpx; uy = ry / Rpx;
                s2r = s2t + radial_extra;
                elong = true;
            }
        }
        // For the Gaussian integrator, build the profile per reflection so it is centred on the PREDICTED
        // sub-pixel position and (when needed) radially elongated. The learning/fit grid is otherwise
        // binned about the rounded centre round(predicted), which mis-places a single shared profile by up
        // to 0.5 px; the predicted position is noise-free geometry (unlike the observed centroid, which
        // hurt). ProfileEmpirical keeps its shared averaged grid (no sub-pixel shift without interpolation).
        if (!empirical) {
            const double fx = predicted[i].predicted_x - rh.cx, fy = predicted[i].predicted_y - rh.cy;
            Rf = elong ? std::min(3 * R, static_cast<int>(std::ceil(r2 + 2.0 * std::sqrt(s2r)))) : R;
            const int Gf = 2 * Rf + 1;
            Pbuf.assign(static_cast<size_t>(Gf) * Gf, 0.0);
            double gs = 0.0;
            for (int dy = -Rf; dy <= Rf; ++dy)
                for (int dx = -Rf; dx <= Rf; ++dx) {
                    const double ex = dx - fx, ey = dy - fy;
                    const double rad = ex * ux + ey * uy, tn = -ex * uy + ey * ux;
                    const double g = std::exp(-rad * rad / (2.0 * s2r) - tn * tn / (2.0 * s2t));
                    Pbuf[(dy + Rf) * Gf + (dx + Rf)] = g;
                    gs += g;
                }
            for (double &p : Pbuf) p /= gs;
            Pvec = &Pbuf;
        }

        const int Gf = 2 * Rf + 1;
        const double B = std::max(rh.bkg, 1.0);
        double I = rh.I, den = 0.0;
        for (int iter = 0; iter < 4; ++iter) {
            double num = 0.0;
            den = 0.0;
            for (int dy = -Rf; dy <= Rf; ++dy)
                for (int dx = -Rf; dx <= Rf; ++dx) {
                    const double Pp = (*Pvec)[(dy + Rf) * Gf + (dx + Rf)];
                    if (Pp <= 0.0) continue;
                    const int64_t x = rh.cx + dx, y = rh.cy + dy;
                    if (x < 0 || y < 0 || x >= static_cast<int64_t>(xpixel) || y >= static_cast<int64_t>(ypixel)) continue;
                    const auto px = ptr[y * xpixel + x];
                    if (px == special || px == special + 1 || px == saturation || px == saturation - 1) continue;
                    const double v = B + std::max(0.0, I) * Pp;
                    num += Pp * (static_cast<double>(px) - rh.bkg) / v;
                    den += Pp * Pp / v;
                }
            if (den > 0.0) I = num / den; else break;
        }
        if (!(den > 0.0)) continue;

        Reflection refl = predicted[i];
        refl.I = static_cast<float>(I);
        refl.sigma = static_cast<float>(std::sqrt(1.0 / den));
        refl.bkg = static_cast<float>(rh.bkg);
        refl.observed = true;
        if (pol)
            refl.rlp /= geom.CalcAzIntPolarizationCorr(refl.predicted_x, refl.predicted_y, pol.value());
        refl.image_scale_corr = refl.rlp / refl.partiality;
        refl.image_number = static_cast<float>(image_number);
        out.push_back(refl);
    }
    return out;
}

}   // namespace

std::vector<Reflection> ProfileIntegrate2D(const DiffractionExperiment &experiment,
                                           const CompressedImage &image,
                                           const std::vector<Reflection> &predicted, size_t npredicted,
                                           int64_t image_number) {
    if (image.GetCompressedSize() == 0 || predicted.empty())
        return {};
    switch (image.GetMode()) {
        case CompressedImageMode::Int8:
            return ProfileIntegrateInternal<int8_t>(experiment, image, predicted, npredicted, INT8_MIN, INT8_MAX, image_number);
        case CompressedImageMode::Int16:
            return ProfileIntegrateInternal<int16_t>(experiment, image, predicted, npredicted, INT16_MIN, INT16_MAX, image_number);
        case CompressedImageMode::Int32:
            return ProfileIntegrateInternal<int32_t>(experiment, image, predicted, npredicted, INT32_MIN, INT32_MAX, image_number);
        case CompressedImageMode::Uint8:
            return ProfileIntegrateInternal<uint8_t>(experiment, image, predicted, npredicted, UINT8_MAX, UINT8_MAX, image_number);
        case CompressedImageMode::Uint16:
            return ProfileIntegrateInternal<uint16_t>(experiment, image, predicted, npredicted, UINT16_MAX, UINT16_MAX, image_number);
        case CompressedImageMode::Uint32:
            return ProfileIntegrateInternal<uint32_t>(experiment, image, predicted, npredicted, UINT32_MAX, UINT32_MAX, image_number);
        default:
            throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Image mode not supported");
    }
}