188cbb659d
Build Packages / Unit tests (push) Successful in 55m23s
Build Packages / DIALS test (push) Successful in 11m2s
Build Packages / XDS test (durin plugin) (push) Successful in 6m22s
Build Packages / XDS test (JFJoch plugin) (push) Successful in 6m27s
Build Packages / XDS test (neggia plugin) (push) Successful in 5m50s
Build Packages / Generate python client (push) Successful in 12s
Build Packages / Build documentation (push) Successful in 41s
Build Packages / Create release (push) Skipped
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 9m44s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 11m48s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 11m56s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 13m7s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 13m8s
Build Packages / build:rpm (rocky8) (push) Successful in 13m8s
Build Packages / build:rpm (rocky9) (push) Successful in 14m10s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 14m31s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 9m1s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 8m56s
Runs ROIIntegrationGPU and ROIIntegrationCPU on identical input and asserts every per-ROI field (sum, sum_square, max, pixels, weighted centre, masked count) matches bit-for-bit. Uses overlapping ROI boxes (multi-bit masks), negative pixel values (signed weighted-sum path), and an injected saturated and masked pixel per ROI to cover the "max only" and "fully excluded" branches. Guarded by JFJOCH_USE_CUDA and skips with a warning when no CUDA GPU is present, mirroring ImageSpotFinderGPUTest. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
115 lines
4.3 KiB
C++
115 lines
4.3 KiB
C++
// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#include <catch2/catch_all.hpp>
|
|
#include "../common/CUDAWrapper.h"
|
|
|
|
#ifdef JFJOCH_USE_CUDA
|
|
|
|
#include <map>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "../image_analysis/roi/ROIIntegrationCPU.h"
|
|
#include "../image_analysis/roi/ROIIntegrationGPU.h"
|
|
#include "../image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.h"
|
|
#include "../common/DiffractionExperiment.h"
|
|
|
|
namespace {
|
|
DiffractionExperiment make_roi_experiment() {
|
|
DiffractionExperiment experiment(DetJF(1));
|
|
// Overlapping boxes so some pixels belong to several ROIs at once (multi-bit
|
|
// mask), exercising the per-bit accumulation that has to match between CPU and GPU.
|
|
// ROIBox is (name, x_min, x_max, y_min, y_max), kept within the JF module bounds.
|
|
experiment.ROI().SetROI(ROIDefinition{.boxes = {
|
|
ROIBox("roiA", 10, 210, 20, 220),
|
|
ROIBox("roiB", 100, 300, 100, 300),
|
|
ROIBox("roiC", 0, 150, 0, 150),
|
|
ROIBox("roiD", 50, 250, 50, 250),
|
|
}});
|
|
return experiment;
|
|
}
|
|
|
|
void compare_results(const std::map<std::string, ROIMessage> &cpu,
|
|
const std::map<std::string, ROIMessage> &gpu) {
|
|
REQUIRE(cpu.size() == gpu.size());
|
|
for (const auto &[name, c] : cpu) {
|
|
INFO("ROI " << name);
|
|
REQUIRE(gpu.contains(name));
|
|
const auto &g = gpu.at(name);
|
|
CHECK(g.sum == c.sum);
|
|
CHECK(g.sum_square == c.sum_square);
|
|
CHECK(g.max_count == c.max_count);
|
|
CHECK(g.pixels == c.pixels);
|
|
CHECK(g.x_weighted == c.x_weighted);
|
|
CHECK(g.y_weighted == c.y_weighted);
|
|
CHECK(g.pixels_masked == c.pixels_masked);
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
// The GPU kernel reduces with atomics and two's-complement unsigned accumulators,
|
|
// while the CPU path is a plain serial loop. On identical input every per-ROI
|
|
// statistic must be bit-for-bit identical, so we run both and compare.
|
|
TEST_CASE("ROIIntegrationGPU_MatchesCPU") {
|
|
if (get_gpu_count() == 0) {
|
|
WARN("No CUDA GPU present. Skipping ROIIntegrationGPU_MatchesCPU");
|
|
return;
|
|
}
|
|
|
|
const DiffractionExperiment experiment = make_roi_experiment();
|
|
const auto roi_map = experiment.ExportROIMap();
|
|
const size_t npixel = roi_map.size();
|
|
const uint16_t roi_count = experiment.ROI().size();
|
|
REQUIRE(roi_count == 4);
|
|
|
|
// Deterministic image with both positive and negative values; negatives exercise
|
|
// the signed weighted-sum path (val * x can be negative).
|
|
std::vector<int32_t> values(npixel);
|
|
for (size_t i = 0; i < npixel; i++)
|
|
values[i] = static_cast<int32_t>((i * 2654435761u) % 1000) - 500;
|
|
|
|
// Inject one saturated (INT32_MAX) and one masked (INT32_MIN) pixel into every ROI
|
|
// so both the "max only, not summed" and "fully excluded" branches are covered.
|
|
for (uint16_t r = 0; r < roi_count; r++) {
|
|
bool injected_sat = false, injected_mask = false;
|
|
for (size_t i = 0; i < npixel && !(injected_sat && injected_mask); i++) {
|
|
if (!(roi_map[i] & (1u << r)))
|
|
continue;
|
|
if (!injected_sat) { values[i] = INT32_MAX; injected_sat = true; }
|
|
else if (!injected_mask) { values[i] = INT32_MIN; injected_mask = true; }
|
|
}
|
|
REQUIRE(injected_sat);
|
|
REQUIRE(injected_mask);
|
|
}
|
|
|
|
// CPU reference
|
|
ImagePreprocessorBuffer cpu_image(npixel);
|
|
for (size_t i = 0; i < npixel; i++)
|
|
cpu_image[i] = values[i];
|
|
|
|
ROIIntegrationCPU cpu(experiment);
|
|
std::map<std::string, ROIMessage> out_cpu;
|
|
cpu.Run(cpu_image, out_cpu);
|
|
|
|
// GPU under test — identical input uploaded to the device
|
|
auto stream = std::make_shared<CudaStream>();
|
|
ImagePreprocessorBufferGPU gpu_image(npixel);
|
|
for (size_t i = 0; i < npixel; i++)
|
|
gpu_image[i] = values[i];
|
|
|
|
REQUIRE(cudaMemcpyAsync(gpu_image.getGPUBuffer(),
|
|
gpu_image.getBuffer().data(),
|
|
npixel * sizeof(int32_t),
|
|
cudaMemcpyHostToDevice,
|
|
*stream) == cudaSuccess);
|
|
|
|
ROIIntegrationGPU gpu(experiment, stream);
|
|
std::map<std::string, ROIMessage> out_gpu;
|
|
gpu.Run(gpu_image, out_gpu);
|
|
|
|
compare_results(out_cpu, out_gpu);
|
|
}
|
|
|
|
#endif
|