Build Packages / build:windows:nocuda (pull_request) Successful in 14m41s
Build Packages / build:windows:cuda (pull_request) Successful in 16m48s
Build Packages / build:rpm (ubuntu2404_nocuda) (pull_request) Successful in 11m15s
Build Packages / build:rpm (rocky8_nocuda) (pull_request) Successful in 12m46s
Build Packages / build:rpm (ubuntu2204_nocuda) (pull_request) Successful in 12m38s
Build Packages / build:rpm (rocky9_nocuda) (pull_request) Successful in 13m11s
Build Packages / build:rpm (rocky8_sls9) (pull_request) Successful in 12m20s
Build Packages / build:rpm (rocky9_sls9) (pull_request) Successful in 12m22s
Build Packages / build:rpm (ubuntu2404) (pull_request) Successful in 11m7s
Build Packages / build:rpm (ubuntu2204) (pull_request) Successful in 11m55s
Build Packages / build:rpm (rocky8) (pull_request) Successful in 12m56s
Build Packages / Generate python client (pull_request) Successful in 14s
Build Packages / build:rpm (rocky9) (pull_request) Successful in 13m15s
Build Packages / Create release (pull_request) Skipped
Build Packages / Build documentation (pull_request) Successful in 41s
Build Packages / XDS test (durin plugin) (pull_request) Successful in 10m3s
Build Packages / DIALS test (pull_request) Successful in 13m6s
Build Packages / XDS test (neggia plugin) (pull_request) Successful in 6m58s
Build Packages / XDS test (JFJoch plugin) (pull_request) Successful in 7m30s
Build Packages / Unit tests (pull_request) Successful in 58m5s
Build Packages / Unit tests (push) Successful in 1h12m36s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 14m52s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 15m35s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 15m29s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 13m35s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 15m25s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 16m5s
Build Packages / build:rpm (rocky8) (push) Successful in 15m11s
Build Packages / build:rpm (rocky9) (push) Successful in 13m35s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 11m59s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 12m14s
Build Packages / DIALS test (push) Successful in 14m29s
Build Packages / XDS test (durin plugin) (push) Successful in 9m56s
Build Packages / XDS test (JFJoch plugin) (push) Successful in 10m23s
Build Packages / XDS test (neggia plugin) (push) Successful in 9m3s
Build Packages / Generate python client (push) Successful in 20s
Build Packages / Build documentation (push) Successful in 1m10s
Build Packages / Create release (push) Skipped
Build Packages / build:windows:nocuda (push) Successful in 16m39s
Build Packages / build:windows:cuda (push) Successful in 18m40s
Reimplement BraggIntegrate2D (box sum) and ProfileIntegrate2D (Kabsch profile fit) under one roof as a base + CPU + GPU engine, mirroring the AzIntEngine / ROIIntegration pattern. Reads the preprocessed int32 ImagePreprocessorBuffer (masked=INT32_MIN, saturated=INT32_MAX), the same buffer AzIntEngineGPU/ROIIntegrationGPU consume. The CUDA engine runs one block per reflection with shared-memory reductions across six kernels (reset, mask, box-sum, profile learning, profile build, Kabsch fit); the resolution shell is computed inline. The learning/fit hot path is single precision (FP64 is throttled on consumer GPUs; reproduces the double CPU path to ~1e-4). Collapsing the per-frame CUDA API calls into one reset kernel keeps launch-latency overhead low. Standalone for now: NOT wired into IndexAndRefine. See BRAGG_INTEGRATION_ENGINE.md for the design and the binding steps. BraggIntegrationEngineGPUTest checks GPU == CPU across all three modes (box/gaussian/empirical) within numeric tolerance, plus a [bragg_bench] perf sweep. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
101 lines
4.3 KiB
C++
101 lines
4.3 KiB
C++
// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#include "BraggIntegrationEngine.h"
|
|
|
|
#include <algorithm>
|
|
#include <string>
|
|
|
|
namespace {
|
|
|
|
// Radial parallax broadening as the coefficient of tan^2(2theta), i.e. Var(z)/pixel^2 [px^2].
|
|
// Copied verbatim from ProfileIntegrate2D: a photon converts at a random depth z (exponential,
|
|
// attenuation length L, truncated at the sensor thickness), shifting the recorded spot radially by
|
|
// z*tan(2theta). L is photoelectric-dominated (~lambda^3), so a per-material reference (13 keV) is
|
|
// scaled by lambda^3; Si and CdTe are the sensors in use.
|
|
double parallax_var_px2(const std::string &material, double thickness_um, double lambda_A, double pixel_um) {
|
|
if (!(thickness_um > 0.0) || !(pixel_um > 0.0) || !(lambda_A > 0.0))
|
|
return 0.0;
|
|
const double L_ref = material == "CdTe" ? 42.6 : 273.0; // attenuation length [um] at 0.953 A
|
|
const double s = lambda_A / 0.953;
|
|
const double L = L_ref / (s * s * s);
|
|
const double a = thickness_um / L, e = std::exp(-a);
|
|
if (1.0 - e <= 0.0)
|
|
return 0.0;
|
|
const double mean = L * (1.0 - (1.0 + a) * e) / (1.0 - e);
|
|
const double ez2 = L * L * (2.0 - (a * a + 2.0 * a + 2.0) * e) / (1.0 - e);
|
|
const double var = std::max(0.0, ez2 - mean * mean); // um^2
|
|
return var / (pixel_um * pixel_um);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
BraggIntegrationEngine::BraggIntegrationEngine(const DiffractionExperiment &experiment)
|
|
: geom(experiment.GetDiffractionGeometry()) {
|
|
const auto settings = experiment.GetBraggIntegrationSettings();
|
|
const auto &det = experiment.GetDetectorSetup();
|
|
|
|
mode = settings.GetIntegrator();
|
|
empirical = mode == IntegratorMode::ProfileEmpirical;
|
|
|
|
// Same frame as the reflections' predicted_x/predicted_y and the ImagePreprocessorBuffer that
|
|
// feeds this engine (MXAnalysisWithoutFPGA sizes that buffer to GetPixelsNum()).
|
|
xpixel = experiment.GetXPixelsNum();
|
|
ypixel = experiment.GetYPixelsNum();
|
|
npixel = experiment.GetPixelsNum();
|
|
|
|
r1_sq = settings.GetR1() * settings.GetR1();
|
|
r2 = settings.GetR2();
|
|
r2_sq = r2 * r2;
|
|
r3 = settings.GetR3();
|
|
r3_sq = r3 * r3;
|
|
min_sigma_ratio = settings.GetMinimumSigmaInRegardsToI();
|
|
R = static_cast<int>(std::ceil(r2));
|
|
G = 2 * R + 1;
|
|
GG = G * G;
|
|
|
|
// A set bandwidth (broadband / stills) vs monochromatic (rotation) splits the treatment: the
|
|
// background sigma-clip and radial-elongation terms are path-dependent (see ProfileIntegrate2D).
|
|
bw_sigma = experiment.GetBandwidthFWHM().value_or(0.0f) / 2.3548f;
|
|
broadband = bw_sigma > 0.0;
|
|
apply_bkg_clip = broadband;
|
|
|
|
const double c_par = parallax_var_px2(det.GetSensorMaterial(), det.GetSensorThickness_um(),
|
|
geom.GetWavelength_A(), geom.GetPixelSize_mm() * 1000.0);
|
|
c_radial = c_par + (broadband ? 0.0 : bragg_engine::C_CAPTURE);
|
|
F_px = geom.GetDetectorDistance_mm() / std::max(1e-6f, geom.GetPixelSize_mm());
|
|
beam_x = geom.GetBeamX_pxl();
|
|
beam_y = geom.GetBeamY_pxl();
|
|
use_ellipse = !empirical && (bw_sigma > 0.0 || c_radial > 0.0);
|
|
|
|
polarization = experiment.GetPolarizationFactor();
|
|
}
|
|
|
|
std::vector<Reflection> BraggIntegrationEngine::Finalize(const std::vector<Reflection> &predicted,
|
|
size_t npredicted,
|
|
const std::vector<BraggFitResult> &results,
|
|
int64_t image_number) const {
|
|
std::vector<Reflection> out;
|
|
out.reserve(npredicted);
|
|
for (size_t i = 0; i < npredicted; ++i) {
|
|
const auto &fr = results[i];
|
|
if (!fr.ok)
|
|
continue;
|
|
Reflection refl = predicted[i];
|
|
refl.I = fr.I;
|
|
refl.sigma = fr.sigma;
|
|
refl.bkg = fr.bkg;
|
|
if (fr.has_observed) {
|
|
refl.observed_x = fr.observed_x;
|
|
refl.observed_y = fr.observed_y;
|
|
}
|
|
refl.observed = true;
|
|
if (polarization)
|
|
refl.rlp /= geom.CalcAzIntPolarizationCorr(refl.predicted_x, refl.predicted_y, polarization.value());
|
|
refl.image_scale_corr = refl.rlp / refl.partiality;
|
|
refl.image_number = static_cast<float>(image_number);
|
|
out.push_back(refl);
|
|
}
|
|
return out;
|
|
}
|