// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #include "BraggIntegrationEngineCPU.h" #include #include #include using namespace bragg_engine; namespace { // The preprocessed buffer stores masked/bad pixels as INT32_MIN and saturated as INT32_MAX. inline bool valid(int32_t v) { return v != INT32_MIN && v != INT32_MAX; } } // namespace BraggIntegrationEngineCPU::BraggIntegrationEngineCPU(const DiffractionExperiment &experiment) : BraggIntegrationEngine(experiment) {} std::vector BraggIntegrationEngineCPU::Run(const ImagePreprocessorBuffer &image, const std::vector &predicted, size_t npredicted, int64_t image_number) { std::vector results(npredicted); if (image.size() != npixel || npredicted == 0) return Finalize(predicted, npredicted, results, image_number); const int32_t *img = image.data(); const int W = static_cast(xpixel), H = static_cast(ypixel); const bool do_clip = apply_bkg_clip && mode != IntegratorMode::BoxSum; auto grid_idx = [this](int dx, int dy) { return (dy + R) * G + (dx + R); }; // --- Reflection mask: mark the r2 signal disk of every predicted reflection so a neighbour's // disk is excluded from this reflection's r2..r3 background ring. --- std::vector refl_mask(npixel, 0); for (size_t i = 0; i < npredicted; ++i) { const auto &r = predicted[i]; const int x0 = std::max(0, static_cast(std::floor(r.predicted_x - r2 - 1.0f))); const int x1 = std::min(W - 1, static_cast(std::ceil(r.predicted_x + r2 + 1.0f))); const int y0 = std::max(0, static_cast(std::floor(r.predicted_y - r2 - 1.0f))); const int y1 = std::min(H - 1, static_cast(std::ceil(r.predicted_y + r2 + 1.0f))); for (int y = y0; y <= y1; ++y) for (int x = x0; x <= x1; ++x) { const double d2 = (x - r.predicted_x) * (x - r.predicted_x) + (y - r.predicted_y) * (y - r.predicted_y); if (d2 < r2_sq) refl_mask[y * W + x] = 1; } } // --- Pass A: box-sum every reflection (rough I, background, centroid, strong flag). --- struct Rough { double I = 0.0, sigma = NAN, bkg = 0.0, obs_x = 0.0, obs_y = 0.0; int cx = 0, cy = 0, shell = -1; bool ok = false, strong = false, has_obs = false; }; std::vector rough(npredicted); double inv_d2_min = std::numeric_limits::max(), inv_d2_max = 0.0; for (size_t i = 0; i < npredicted; ++i) { const auto &r = predicted[i]; Rough out; const int x0 = std::max(0, static_cast(std::floor(r.predicted_x - r3 - 1.0))); const int x1 = std::min(W - 1, static_cast(std::ceil(r.predicted_x + r3 + 1.0))); const int y0 = std::max(0, static_cast(std::floor(r.predicted_y - r3 - 1.0))); const int y1 = std::min(H - 1, static_cast(std::ceil(r.predicted_y + r3 + 1.0))); int64_t I_sum = 0, I_sum_x = 0, I_sum_y = 0, n_inner = 0, n_inner_valid = 0; double bkg_sum = 0.0; int n_bkg = 0; for (int y = y0; y <= y1; ++y) for (int x = x0; x <= x1; ++x) { const double d2 = (x - r.predicted_x) * (x - r.predicted_x) + (y - r.predicted_y) * (y - r.predicted_y); const int32_t px = img[y * W + x]; if (d2 < r1_sq) { ++n_inner; if (!valid(px)) continue; I_sum += px; I_sum_x += static_cast(x) * px; I_sum_y += static_cast(y) * px; ++n_inner_valid; } else if (d2 >= r2_sq && d2 < r3_sq) { if (refl_mask[y * W + x]) continue; if (!valid(px)) continue; bkg_sum += static_cast(px); ++n_bkg; } } if (n_inner_valid == n_inner && n_bkg > 5) { out.bkg = bkg_sum / n_bkg; // One high-outlier sigma-clip pass on the background ring (stills-only): reject pixels // above mean + 3*sqrt(mean) to strip a bandwidth-streaked neighbour that biases the mean. if (do_clip) { const double thr = out.bkg + 3.0 * std::sqrt(std::max(out.bkg, 1.0)); double s = 0.0; int n = 0; for (int y = y0; y <= y1; ++y) for (int x = x0; x <= x1; ++x) { const double d2 = (x - r.predicted_x) * (x - r.predicted_x) + (y - r.predicted_y) * (y - r.predicted_y); if (!(d2 >= r2_sq && d2 < r3_sq)) continue; if (refl_mask[y * W + x]) continue; const int32_t px = img[y * W + x]; if (!valid(px)) continue; if (static_cast(px) <= thr) { s += px; ++n; } } if (n > 5) out.bkg = s / n; } out.I = static_cast(I_sum) - static_cast(n_inner) * out.bkg; out.sigma = std::max(1.0, out.I * min_sigma_ratio); if (I_sum > 0) { out.sigma = std::max(out.sigma, std::sqrt(static_cast(I_sum))); out.obs_x = static_cast(I_sum_x) / static_cast(I_sum); out.obs_y = static_cast(I_sum_y) / static_cast(I_sum); out.has_obs = true; } out.cx = static_cast(std::lround(r.predicted_x)); out.cy = static_cast(std::lround(r.predicted_y)); out.ok = true; out.strong = out.sigma > 0.0 && out.I / out.sigma >= STRONG_I_OVER_SIGMA; if (r.d > 0.0f) { const double inv_d2 = 1.0 / (static_cast(r.d) * r.d); inv_d2_min = std::min(inv_d2_min, inv_d2); inv_d2_max = std::max(inv_d2_max, inv_d2); } } rough[i] = out; } // --- BoxSum mode is BraggIntegrate2D: emit the rough result directly. --- if (mode == IntegratorMode::BoxSum) { for (size_t i = 0; i < npredicted; ++i) { const auto &rh = rough[i]; if (!rh.ok) continue; results[i] = {static_cast(rh.I), static_cast(rh.sigma), static_cast(rh.bkg), static_cast(rh.obs_x), static_cast(rh.obs_y), true, rh.has_obs}; } return Finalize(predicted, npredicted, results, image_number); } auto shell_of = [&](float d) { if (!(d > 0.0f) || inv_d2_max <= inv_d2_min) return 0; const double t = (1.0 / (static_cast(d) * d) - inv_d2_min) / (inv_d2_max - inv_d2_min); return std::clamp(static_cast(t * N_SHELL), 0, N_SHELL - 1); }; for (size_t i = 0; i < npredicted; ++i) if (rough[i].ok) rough[i].shell = shell_of(predicted[i].d); // --- Learn the profile per shell (+ global) from the strong spots. --- std::vector> shell_grid(N_SHELL, std::vector(GG, 0.0)); std::vector shell_n(N_SHELL, 0); std::vector global_grid(GG, 0.0); int global_n = 0; for (size_t i = 0; i < npredicted; ++i) { const auto &rh = rough[i]; if (!rh.ok || !rh.strong || rh.I <= 0.0) continue; for (int dy = -R; dy <= R; ++dy) for (int dx = -R; dx <= R; ++dx) { const int x = rh.cx + dx, y = rh.cy + dy; if (x < 0 || y < 0 || x >= W || y >= H) continue; const int32_t px = img[y * W + x]; if (!valid(px)) continue; const double v = (static_cast(px) - rh.bkg) / rh.I; shell_grid[rh.shell][grid_idx(dx, dy)] += v; global_grid[grid_idx(dx, dy)] += v; } ++shell_n[rh.shell]; ++global_n; } // Isotropic width (2nd moment) of a learned grid: over the r1 disk (monochromatic) or the full // grid (broadband); = 2 sigma^2 in 2D. auto measure_sigma2 = [&](const std::vector &grid) { double m2 = 0.0, m2w = 0.0; for (int dy = -R; dy <= R; ++dy) for (int dx = -R; dx <= R; ++dx) { if (!broadband && dx * dx + dy * dy >= r1_sq) continue; const double g = std::max(0.0, grid[grid_idx(dx, dy)]); m2 += g * (dx * dx + dy * dy); m2w += g; } return m2w > 0.0 ? std::max(0.25, (m2 / m2w) / 2.0) : 1.0; }; // Normalised profile (sum = 1): empirical average grid, or an isotropic Gaussian of the measured // 2nd moment (only used by ProfileEmpirical; ProfileGaussian rebuilds per reflection in Pass B). auto build_profile = [&](const std::vector &grid, int n) { std::vector P(GG, 0.0); if (n <= 0) return P; double sum = 0.0; for (int k = 0; k < GG; ++k) { const double g = std::max(0.0, grid[k]); sum += g; if (empirical) P[k] = g; } if (sum <= 0.0) return P; if (empirical) { for (double &p : P) p /= sum; } else { const double sigma2 = measure_sigma2(grid); double gsum = 0.0; for (int dy = -R; dy <= R; ++dy) for (int dx = -R; dx <= R; ++dx) { const double g = std::exp(-(dx * dx + dy * dy) / (2.0 * sigma2)); P[grid_idx(dx, dy)] = g; gsum += g; } for (double &p : P) p /= gsum; } return P; }; const std::vector global_P = build_profile(global_grid, global_n); const double global_sigma2 = global_n > 0 ? measure_sigma2(global_grid) : 1.0; std::vector> shell_P(N_SHELL); std::vector shell_sigma2(N_SHELL, global_sigma2); for (int s = 0; s < N_SHELL; ++s) { if (shell_n[s] >= MIN_STRONG_PER_SHELL) { shell_P[s] = build_profile(shell_grid[s], shell_n[s]); shell_sigma2[s] = measure_sigma2(shell_grid[s]); } else { shell_P[s] = global_P; } } // --- Pass B: profile-fit each reflection (Kabsch, de-biased variance v = B + I*P; iterate). --- std::vector Pbuf; for (size_t i = 0; i < npredicted; ++i) { const auto &rh = rough[i]; if (!rh.ok) continue; const int sh = rh.shell < 0 ? 0 : rh.shell; int Rf = R; const std::vector *Pvec = &shell_P[sh]; if (!empirical) { const double rx = predicted[i].predicted_x - beam_x, ry = predicted[i].predicted_y - beam_y; const double Rpx = std::hypot(rx, ry); const double tan2t = Rpx / F_px; const double s2t = shell_sigma2[sh]; double s2r = s2t, ux = 1.0, uy = 0.0; bool elong = false; if (use_ellipse) { const double sbw = bw_sigma * Rpx; const double radial_extra = sbw * sbw + c_radial * tan2t * tan2t; if (Rpx > 1e-6 && radial_extra > 0.25) { ux = rx / Rpx; uy = ry / Rpx; s2r = s2t + radial_extra; elong = true; } } // Build the Gaussian per reflection, centred on the sub-pixel predicted position and (when // needed) radially elongated, on a grid grown to hold the streak. const double fx = predicted[i].predicted_x - rh.cx, fy = predicted[i].predicted_y - rh.cy; Rf = elong ? std::min(3 * R, static_cast(std::ceil(r2 + 2.0 * std::sqrt(s2r)))) : R; const int Gf = 2 * Rf + 1; Pbuf.assign(static_cast(Gf) * Gf, 0.0); double gs = 0.0; for (int dy = -Rf; dy <= Rf; ++dy) for (int dx = -Rf; dx <= Rf; ++dx) { const double ex = dx - fx, ey = dy - fy; const double rad = ex * ux + ey * uy, tn = -ex * uy + ey * ux; const double g = std::exp(-rad * rad / (2.0 * s2r) - tn * tn / (2.0 * s2t)); Pbuf[(dy + Rf) * Gf + (dx + Rf)] = g; gs += g; } for (double &p : Pbuf) p /= gs; Pvec = &Pbuf; } const int Gf = 2 * Rf + 1; const double B = std::max(rh.bkg, 1.0); double I = rh.I, den = 0.0; for (int iter = 0; iter < 4; ++iter) { double num = 0.0; den = 0.0; for (int dy = -Rf; dy <= Rf; ++dy) for (int dx = -Rf; dx <= Rf; ++dx) { const double Pp = (*Pvec)[(dy + Rf) * Gf + (dx + Rf)]; if (Pp <= 0.0) continue; const int x = rh.cx + dx, y = rh.cy + dy; if (x < 0 || y < 0 || x >= W || y >= H) continue; const int32_t px = img[y * W + x]; if (!valid(px)) continue; const double v = B + std::max(0.0, I) * Pp; num += Pp * (static_cast(px) - rh.bkg) / v; den += Pp * Pp / v; } if (den > 0.0) I = num / den; else break; } if (!(den > 0.0)) continue; results[i] = {static_cast(I), static_cast(std::sqrt(1.0 / den)), static_cast(rh.bkg), 0.0f, 0.0f, true, false}; } return Finalize(predicted, npredicted, results, image_number); }