Files
Jungfraujoch/tests/ROIIntegrationGPUTest.cpp
T
leonarski_f 75e401f0e5
Build Packages / Unit tests (push) Successful in 1h31m59s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 8m43s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 10m5s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 9m27s
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 8m56s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 9m24s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 10m27s
Build Packages / build:rpm (rocky8) (push) Successful in 9m20s
Build Packages / build:rpm (rocky9) (push) Successful in 10m50s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 9m54s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 8m38s
Build Packages / DIALS test (push) Successful in 12m13s
Build Packages / XDS test (durin plugin) (push) Successful in 7m8s
Build Packages / XDS test (JFJoch plugin) (push) Successful in 7m8s
Build Packages / XDS test (neggia plugin) (push) Successful in 7m50s
Build Packages / Generate python client (push) Successful in 16s
Build Packages / Build documentation (push) Successful in 50s
Build Packages / Create release (push) Skipped
v1.0.0-rc.153 (#63)
This is an UNSTABLE release. It includes many experimental features, as well as many AI generated fixes. We recommend using rc.152 for production use.

* jfjoch_broker: Add EXPERIMENTAL pixelrefine mode for image processing
* jfjoch_broker: Allow to load user mask from 8-bit and 16-bit TIFF files
* jfjoch_broker: Add ROI calculation in non-FPGA workflow
* jfjoch_broker: Fixes to TCP image pusher
* jfjoch_broker: Remove NUMA bindings
* jfjoch_broker: Improvements to indexing
* jfjoch_broker: For PSI EIGER, trimming energies are taken from the detector configuration (now compulsory) instead of hardcoded values
* jfjoch_writer: Save ROI definitions and the per-pixel ROI bitmap in the master file; azimuthal ROIs support phi (angular) sectors
* jfjoch_viewer: Major redesign with dockable panels and saved layouts, plus on-canvas creation/move/resize of box, circle and azimuthal ROIs
* jfjoch_viewer: Run jfjoch_process reprocessing jobs from inside the GUI and overlay per-run results

Reviewed-on: #63
2026-06-23 20:29:49 +02:00

115 lines
4.3 KiB
C++

// SPDX-FileCopyrightText: 2026 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include <catch2/catch_all.hpp>
#include "../common/CUDAWrapper.h"
#ifdef JFJOCH_USE_CUDA
#include <map>
#include <string>
#include <vector>
#include "../image_analysis/roi/ROIIntegrationCPU.h"
#include "../image_analysis/roi/ROIIntegrationGPU.h"
#include "../image_analysis/image_preprocessing/ImagePreprocessorBufferGPU.h"
#include "../common/DiffractionExperiment.h"
namespace {
DiffractionExperiment make_roi_experiment() {
DiffractionExperiment experiment(DetJF(1));
// Overlapping boxes so some pixels belong to several ROIs at once (multi-bit
// mask), exercising the per-bit accumulation that has to match between CPU and GPU.
// ROIBox is (name, x_min, x_max, y_min, y_max), kept within the JF module bounds.
experiment.ROI().SetROI(ROIDefinition{.boxes = {
ROIBox("roiA", 10, 210, 20, 220),
ROIBox("roiB", 100, 300, 100, 300),
ROIBox("roiC", 0, 150, 0, 150),
ROIBox("roiD", 50, 250, 50, 250),
}});
return experiment;
}
void compare_results(const std::map<std::string, ROIMessage> &cpu,
const std::map<std::string, ROIMessage> &gpu) {
REQUIRE(cpu.size() == gpu.size());
for (const auto &[name, c] : cpu) {
INFO("ROI " << name);
REQUIRE(gpu.contains(name));
const auto &g = gpu.at(name);
CHECK(g.sum == c.sum);
CHECK(g.sum_square == c.sum_square);
CHECK(g.max_count == c.max_count);
CHECK(g.pixels == c.pixels);
CHECK(g.x_weighted == c.x_weighted);
CHECK(g.y_weighted == c.y_weighted);
CHECK(g.pixels_masked == c.pixels_masked);
}
}
} // namespace
// The GPU kernel reduces with atomics and two's-complement unsigned accumulators,
// while the CPU path is a plain serial loop. On identical input every per-ROI
// statistic must be bit-for-bit identical, so we run both and compare.
TEST_CASE("ROIIntegrationGPU_MatchesCPU") {
if (get_gpu_count() == 0) {
WARN("No CUDA GPU present. Skipping ROIIntegrationGPU_MatchesCPU");
return;
}
const DiffractionExperiment experiment = make_roi_experiment();
const auto roi_map = experiment.ExportROIMap();
const size_t npixel = roi_map.size();
const uint16_t roi_count = experiment.ROI().size();
REQUIRE(roi_count == 4);
// Deterministic image with both positive and negative values; negatives exercise
// the signed weighted-sum path (val * x can be negative).
std::vector<int32_t> values(npixel);
for (size_t i = 0; i < npixel; i++)
values[i] = static_cast<int32_t>((i * 2654435761u) % 1000) - 500;
// Inject one saturated (INT32_MAX) and one masked (INT32_MIN) pixel into every ROI
// so both the "max only, not summed" and "fully excluded" branches are covered.
for (uint16_t r = 0; r < roi_count; r++) {
bool injected_sat = false, injected_mask = false;
for (size_t i = 0; i < npixel && !(injected_sat && injected_mask); i++) {
if (!(roi_map[i] & (1u << r)))
continue;
if (!injected_sat) { values[i] = INT32_MAX; injected_sat = true; }
else if (!injected_mask) { values[i] = INT32_MIN; injected_mask = true; }
}
REQUIRE(injected_sat);
REQUIRE(injected_mask);
}
// CPU reference
ImagePreprocessorBuffer cpu_image(npixel);
for (size_t i = 0; i < npixel; i++)
cpu_image[i] = values[i];
ROIIntegrationCPU cpu(experiment);
std::map<std::string, ROIMessage> out_cpu;
cpu.Run(cpu_image, out_cpu);
// GPU under test — identical input uploaded to the device
auto stream = std::make_shared<CudaStream>();
ImagePreprocessorBufferGPU gpu_image(npixel);
for (size_t i = 0; i < npixel; i++)
gpu_image[i] = values[i];
REQUIRE(cudaMemcpyAsync(gpu_image.getGPUBuffer(),
gpu_image.getBuffer().data(),
npixel * sizeof(int32_t),
cudaMemcpyHostToDevice,
*stream) == cudaSuccess);
ROIIntegrationGPU gpu(experiment, stream);
std::map<std::string, ROIMessage> out_gpu;
gpu.Run(gpu_image, out_gpu);
compare_results(out_cpu, out_gpu);
}
#endif