ImageAnalysisGPU: Add test and minor fixes

This commit is contained in:
2025-10-04 19:38:08 +02:00
parent 12addb0ff4
commit bb4e5a7f8d
4 changed files with 126 additions and 7 deletions

View File

@@ -18,8 +18,8 @@ struct spot_parameters {
uint32_t nby;
uint32_t lines;
uint32_t cols;
int16_t min_viable_number;
int16_t max_viable_number;
int32_t min_viable_number;
int32_t max_viable_number;
};
// input X x Y pixels array
@@ -108,8 +108,10 @@ __device__ __forceinline__ uint8_t pixel_result(const spot_parameters& params, c
const int64_t tmp1 = in_minus_mean * in_minus_mean * (count-1);
const float tmp2 = (var * count) * params.strong_pixel_threshold2;
const bool strong_pixel = (val > params.max_viable_number)
|| ((val >= params.count_threshold) & (in_minus_mean > 0) & (tmp1 > tmp2));
bool snr_criterion = (params.strong_pixel_threshold2 <= 0) || (in_minus_mean > 0) && (tmp1 > tmp2);
bool count_criterion = (val >= params.count_threshold);
const bool strong_pixel = (val > params.max_viable_number) || (snr_criterion && count_criterion);
return strong_pixel ? 1 : 0;
}
@@ -311,7 +313,7 @@ void GPUImageAnalysis::RunSpotFinder(const SpotFindingSettings &settings) {
spot_params.cols = xpixels;
spot_params.count_threshold = settings.photon_count_threshold;
spot_params.min_viable_number = -100;
spot_params.max_viable_number = (1<22) - 1; // 22-bit
spot_params.max_viable_number = (1<<22) - 1; // 22-bit
if (2 * spot_params.nbx + 1 > windowSizeLimit)
throw JFJochException(JFJochExceptionCategory::SpotFinderError, "nbx exceeds window size limit");
@@ -356,7 +358,7 @@ void GPUImageAnalysis::GetSpotFinderResults(StrongPixelSet &pixel_set) {
if (out_ptr[i]) {
for (int j = 0; j < 8 * sizeof(out_ptr[0]); j++) {
if (out_ptr[i] & (1 << j)) {
size_t npixel = i * 8 * sizeof(out_ptr[0])| j;
size_t npixel = i * 8 * sizeof(out_ptr[0]) + j;
size_t line = npixel / xpixels;
size_t col = npixel % xpixels;
pixel_set.AddStrongPixel(col, line, host_in[npixel]);

View File

@@ -28,7 +28,7 @@ class GPUImageAnalysis {
int numberOfSMs;
const int numberOfCudaThreads = 128; // #threads per block that works well for Nvidia T4
const int numberOfWaves = 40; // #waves that works well for Nvidia T4
const int windowSizeLimit = 21; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems
const int windowSizeLimit = 32; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems
constexpr static int NBX = 15;
public:

View File

@@ -57,6 +57,7 @@ ADD_EXECUTABLE(jfjoch_test
CrystalLatticeTest.cpp
FPGAPTPTest.cpp
ResolutionShellsTest.cpp
ImageSpotFinderGPUTest.cpp
)
target_link_libraries(jfjoch_test Catch2WithMain JFJochBroker JFJochReceiver JFJochReader JFJochWriter JFJochImageAnalysis JFJochCommon JFJochHLSSimulation JFJochPreview)

View File

@@ -0,0 +1,116 @@
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute
// SPDX-License-Identifier: GPL-3.0-only
#include <catch2/catch_all.hpp>
#ifdef JFJOCH_USE_CUDA
#include "../image_analysis/spot_finding//ImageAnalysisGPU.h"
#include "../image_analysis/spot_finding/ImageSpotFinder.h"
static void fill_test_image(std::vector<int32_t>& input, size_t width, size_t height) {
input.resize(width * height);
for (size_t i = 0; i < width * height; i++)
input[i] = (i % 2) * 5 + 5;
input[width * 50 + 50] = 20;
input[width * 25 + 26] = 16;
input[width * 75 + 25] = 12;
}
// Helper to run GPU and get DiffractionSpot list via StrongPixelSet -> FindSpotsImage
static std::vector<DiffractionSpot> run_gpu_and_collect_spots(const std::vector<int32_t>& input,
size_t width, size_t height,
const SpotFindingSettings& settings)
{
GPUImageAnalysis gpu(static_cast<int32_t>(width), static_cast<int32_t>(height));
REQUIRE(GPUImageAnalysis::GPUPresent());
// Set input buffer pointer to our CPU data, register and upload
// Note: RegisterBuffer/UnregisterBuffer are optional for plain memcpy path,
// but we use them to mirror intended flow.
const_cast<GPUImageAnalysis&>(gpu).SetInputBuffer((void*)input.data());
gpu.RegisterBuffer();
gpu.LoadDataToGPU();
// Run kernel
gpu.RunSpotFinder(settings);
// Collect strong pixels and convert to spots like CPU does
StrongPixelSet strong;
gpu.GetSpotFinderResults(strong);
std::vector<DiffractionSpot> spots;
strong.FindSpotsImage(settings, spots);
gpu.UnregisterBuffer();
return spots;
}
// Mirror of ImageSpotFinder_SignalToNoise
TEST_CASE("GPUImageAnalysis_SignalToNoise") {
if (!GPUImageAnalysis::GPUPresent()) {
WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise");
return;
}
const size_t width = 100, height = 100;
std::vector<float> resolution(width * height, 2.0f);
std::vector<bool> mask(width * height, false);
resolution[width * 50 + 50] = 1.0f;
std::vector<int32_t> input;
fill_test_image(input, width, height);
SpotFindingSettings settings{
.signal_to_noise_threshold = 3.0,
.photon_count_threshold = 0,
.min_pix_per_spot = 1,
.max_pix_per_spot = 20,
.high_resolution_limit = 0.5,
.low_resolution_limit = 3.0,
};
// GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
// StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
// matches CPU ImageSpotFinder test behavior for these synthetic inputs.
auto spots = run_gpu_and_collect_spots(input, width, height, settings);
REQUIRE(spots.size() == 2);
REQUIRE(spots[0].RawCoord().y == 25);
REQUIRE(spots[1].RawCoord().y == 50);
}
TEST_CASE("GPUImageAnalysis_CountThreshold") {
if (!GPUImageAnalysis::GPUPresent()) {
WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise");
return;
}
const size_t width = 100, height = 100;
std::vector<float> resolution(width * height, 2.0f);
std::vector<bool> mask(width * height, false);
resolution[width * 50 + 50] = 1.0f;
std::vector<int32_t> input;
fill_test_image(input, width, height);
SpotFindingSettings settings{
.signal_to_noise_threshold = 0.0,
.photon_count_threshold = 11,
.min_pix_per_spot = 1,
.max_pix_per_spot = 20,
.high_resolution_limit = 0.5,
.low_resolution_limit = 3.0,
};
// GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
// StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
// matches CPU ImageSpotFinder test behavior for these synthetic inputs.
auto spots = run_gpu_and_collect_spots(input, width, height, settings);
REQUIRE(spots.size() == 3);
REQUIRE(spots[0].RawCoord().y == 25);
REQUIRE(spots[1].RawCoord().y == 50);
REQUIRE(spots[2].RawCoord().y == 75);
}
#endif