Files
Jungfraujoch/tests/ROIIntegrationGPUTest.cpp
T
leonarski_f 188cbb659d
Build Packages / Unit tests (push) Successful in 55m23s
Build Packages / DIALS test (push) Successful in 11m2s
Build Packages / XDS test (durin plugin) (push) Successful in 6m22s
Build Packages / XDS test (JFJoch plugin) (push) Successful in 6m27s
Build Packages / XDS test (neggia plugin) (push) Successful in 5m50s
Build Packages / Generate python client (push) Successful in 12s
Build Packages / Build documentation (push) Successful in 41s
Build Packages / Create release (push) Skipped
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 9m44s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 11m48s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 11m56s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 13m7s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 13m8s
Build Packages / build:rpm (rocky8) (push) Successful in 13m8s
Build Packages / build:rpm (rocky9) (push) Successful in 14m10s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 14m31s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 9m1s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 8m56s
tests: add GPU/CPU equivalence test for ROI integration
Runs ROIIntegrationGPU and ROIIntegrationCPU on identical input and asserts
every per-ROI field (sum, sum_square, max, pixels, weighted centre, masked
count) matches bit-for-bit. Uses overlapping ROI boxes (multi-bit masks),
negative pixel values (signed weighted-sum path), and an injected saturated
and masked pixel per ROI to cover the "max only" and "fully excluded" branches.

Guarded by JFJOCH_USE_CUDA and skips with a warning when no CUDA GPU is present,
mirroring ImageSpotFinderGPUTest.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 22:28:42 +02:00

115 lines
4.3 KiB
C++

// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include <catch2/catch_all.hpp>
#include "../common/CUDAWrapper.h"
#ifdef JFJOCH_USE_CUDA
#include <map>
#include <string>
#include <vector>
#include "../image_analysis/roi/ROIIntegrationCPU.h"
#include "../image_analysis/roi/ROIIntegrationGPU.h"
#include "../image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.h"
#include "../common/DiffractionExperiment.h"
namespace {
DiffractionExperiment make_roi_experiment() {
DiffractionExperiment experiment(DetJF(1));
// Overlapping boxes so some pixels belong to several ROIs at once (multi-bit
// mask), exercising the per-bit accumulation that has to match between CPU and GPU.
// ROIBox is (name, x_min, x_max, y_min, y_max), kept within the JF module bounds.
experiment.ROI().SetROI(ROIDefinition{.boxes = {
ROIBox("roiA", 10, 210, 20, 220),
ROIBox("roiB", 100, 300, 100, 300),
ROIBox("roiC", 0, 150, 0, 150),
ROIBox("roiD", 50, 250, 50, 250),
}});
return experiment;
}
void compare_results(const std::map<std::string, ROIMessage> &cpu,
const std::map<std::string, ROIMessage> &gpu) {
REQUIRE(cpu.size() == gpu.size());
for (const auto &[name, c] : cpu) {
INFO("ROI " << name);
REQUIRE(gpu.contains(name));
const auto &g = gpu.at(name);
CHECK(g.sum == c.sum);
CHECK(g.sum_square == c.sum_square);
CHECK(g.max_count == c.max_count);
CHECK(g.pixels == c.pixels);
CHECK(g.x_weighted == c.x_weighted);
CHECK(g.y_weighted == c.y_weighted);
CHECK(g.pixels_masked == c.pixels_masked);
}
}
} // namespace
// The GPU kernel reduces with atomics and two's-complement unsigned accumulators,
// while the CPU path is a plain serial loop. On identical input every per-ROI
// statistic must be bit-for-bit identical, so we run both and compare.
TEST_CASE("ROIIntegrationGPU_MatchesCPU") {
if (get_gpu_count() == 0) {
WARN("No CUDA GPU present. Skipping ROIIntegrationGPU_MatchesCPU");
return;
}
const DiffractionExperiment experiment = make_roi_experiment();
const auto roi_map = experiment.ExportROIMap();
const size_t npixel = roi_map.size();
const uint16_t roi_count = experiment.ROI().size();
REQUIRE(roi_count == 4);
// Deterministic image with both positive and negative values; negatives exercise
// the signed weighted-sum path (val * x can be negative).
std::vector<int32_t> values(npixel);
for (size_t i = 0; i < npixel; i++)
values[i] = static_cast<int32_t>((i * 2654435761u) % 1000) - 500;
// Inject one saturated (INT32_MAX) and one masked (INT32_MIN) pixel into every ROI
// so both the "max only, not summed" and "fully excluded" branches are covered.
for (uint16_t r = 0; r < roi_count; r++) {
bool injected_sat = false, injected_mask = false;
for (size_t i = 0; i < npixel && !(injected_sat && injected_mask); i++) {
if (!(roi_map[i] & (1u << r)))
continue;
if (!injected_sat) { values[i] = INT32_MAX; injected_sat = true; }
else if (!injected_mask) { values[i] = INT32_MIN; injected_mask = true; }
}
REQUIRE(injected_sat);
REQUIRE(injected_mask);
}
// CPU reference
ImagePreprocessorBuffer cpu_image(npixel);
for (size_t i = 0; i < npixel; i++)
cpu_image[i] = values[i];
ROIIntegrationCPU cpu(experiment);
std::map<std::string, ROIMessage> out_cpu;
cpu.Run(cpu_image, out_cpu);
// GPU under test — identical input uploaded to the device
auto stream = std::make_shared<CudaStream>();
ImagePreprocessorBufferGPU gpu_image(npixel);
for (size_t i = 0; i < npixel; i++)
gpu_image[i] = values[i];
REQUIRE(cudaMemcpyAsync(gpu_image.getGPUBuffer(),
gpu_image.getBuffer().data(),
npixel * sizeof(int32_t),
cudaMemcpyHostToDevice,
*stream) == cudaSuccess);
ROIIntegrationGPU gpu(experiment, stream);
std::map<std::string, ROIMessage> out_gpu;
gpu.Run(gpu_image, out_gpu);
compare_results(out_cpu, out_gpu);
}
#endif