ImageAnalysisGPU: Add test and minor fixes
This commit is contained in:
@@ -18,8 +18,8 @@ struct spot_parameters {
|
||||
uint32_t nby;
|
||||
uint32_t lines;
|
||||
uint32_t cols;
|
||||
int16_t min_viable_number;
|
||||
int16_t max_viable_number;
|
||||
int32_t min_viable_number;
|
||||
int32_t max_viable_number;
|
||||
};
|
||||
|
||||
// input X x Y pixels array
|
||||
@@ -108,8 +108,10 @@ __device__ __forceinline__ uint8_t pixel_result(const spot_parameters& params, c
|
||||
|
||||
const int64_t tmp1 = in_minus_mean * in_minus_mean * (count-1);
|
||||
const float tmp2 = (var * count) * params.strong_pixel_threshold2;
|
||||
const bool strong_pixel = (val > params.max_viable_number)
|
||||
|| ((val >= params.count_threshold) & (in_minus_mean > 0) & (tmp1 > tmp2));
|
||||
bool snr_criterion = (params.strong_pixel_threshold2 <= 0) || (in_minus_mean > 0) && (tmp1 > tmp2);
|
||||
bool count_criterion = (val >= params.count_threshold);
|
||||
|
||||
const bool strong_pixel = (val > params.max_viable_number) || (snr_criterion && count_criterion);
|
||||
|
||||
return strong_pixel ? 1 : 0;
|
||||
}
|
||||
@@ -311,7 +313,7 @@ void GPUImageAnalysis::RunSpotFinder(const SpotFindingSettings &settings) {
|
||||
spot_params.cols = xpixels;
|
||||
spot_params.count_threshold = settings.photon_count_threshold;
|
||||
spot_params.min_viable_number = -100;
|
||||
spot_params.max_viable_number = (1<22) - 1; // 22-bit
|
||||
spot_params.max_viable_number = (1<<22) - 1; // 22-bit
|
||||
|
||||
if (2 * spot_params.nbx + 1 > windowSizeLimit)
|
||||
throw JFJochException(JFJochExceptionCategory::SpotFinderError, "nbx exceeds window size limit");
|
||||
@@ -356,7 +358,7 @@ void GPUImageAnalysis::GetSpotFinderResults(StrongPixelSet &pixel_set) {
|
||||
if (out_ptr[i]) {
|
||||
for (int j = 0; j < 8 * sizeof(out_ptr[0]); j++) {
|
||||
if (out_ptr[i] & (1 << j)) {
|
||||
size_t npixel = i * 8 * sizeof(out_ptr[0])| j;
|
||||
size_t npixel = i * 8 * sizeof(out_ptr[0]) + j;
|
||||
size_t line = npixel / xpixels;
|
||||
size_t col = npixel % xpixels;
|
||||
pixel_set.AddStrongPixel(col, line, host_in[npixel]);
|
||||
|
||||
@@ -28,7 +28,7 @@ class GPUImageAnalysis {
|
||||
int numberOfSMs;
|
||||
const int numberOfCudaThreads = 128; // #threads per block that works well for Nvidia T4
|
||||
const int numberOfWaves = 40; // #waves that works well for Nvidia T4
|
||||
const int windowSizeLimit = 21; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems
|
||||
const int windowSizeLimit = 32; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems
|
||||
constexpr static int NBX = 15;
|
||||
public:
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@ ADD_EXECUTABLE(jfjoch_test
|
||||
CrystalLatticeTest.cpp
|
||||
FPGAPTPTest.cpp
|
||||
ResolutionShellsTest.cpp
|
||||
ImageSpotFinderGPUTest.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(jfjoch_test Catch2WithMain JFJochBroker JFJochReceiver JFJochReader JFJochWriter JFJochImageAnalysis JFJochCommon JFJochHLSSimulation JFJochPreview)
|
||||
|
||||
116
tests/ImageSpotFinderGPUTest.cpp
Normal file
116
tests/ImageSpotFinderGPUTest.cpp
Normal file
@@ -0,0 +1,116 @@
|
||||
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute
|
||||
// SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
#include <catch2/catch_all.hpp>
|
||||
|
||||
#ifdef JFJOCH_USE_CUDA
|
||||
|
||||
#include "../image_analysis/spot_finding//ImageAnalysisGPU.h"
|
||||
#include "../image_analysis/spot_finding/ImageSpotFinder.h"
|
||||
static void fill_test_image(std::vector<int32_t>& input, size_t width, size_t height) {
|
||||
input.resize(width * height);
|
||||
for (size_t i = 0; i < width * height; i++)
|
||||
input[i] = (i % 2) * 5 + 5;
|
||||
input[width * 50 + 50] = 20;
|
||||
input[width * 25 + 26] = 16;
|
||||
input[width * 75 + 25] = 12;
|
||||
}
|
||||
|
||||
// Helper to run GPU and get DiffractionSpot list via StrongPixelSet -> FindSpotsImage
|
||||
static std::vector<DiffractionSpot> run_gpu_and_collect_spots(const std::vector<int32_t>& input,
|
||||
size_t width, size_t height,
|
||||
const SpotFindingSettings& settings)
|
||||
{
|
||||
GPUImageAnalysis gpu(static_cast<int32_t>(width), static_cast<int32_t>(height));
|
||||
REQUIRE(GPUImageAnalysis::GPUPresent());
|
||||
|
||||
// Set input buffer pointer to our CPU data, register and upload
|
||||
// Note: RegisterBuffer/UnregisterBuffer are optional for plain memcpy path,
|
||||
// but we use them to mirror intended flow.
|
||||
const_cast<GPUImageAnalysis&>(gpu).SetInputBuffer((void*)input.data());
|
||||
gpu.RegisterBuffer();
|
||||
gpu.LoadDataToGPU();
|
||||
|
||||
// Run kernel
|
||||
gpu.RunSpotFinder(settings);
|
||||
|
||||
// Collect strong pixels and convert to spots like CPU does
|
||||
StrongPixelSet strong;
|
||||
gpu.GetSpotFinderResults(strong);
|
||||
|
||||
std::vector<DiffractionSpot> spots;
|
||||
strong.FindSpotsImage(settings, spots);
|
||||
|
||||
gpu.UnregisterBuffer();
|
||||
return spots;
|
||||
}
|
||||
|
||||
// Mirror of ImageSpotFinder_SignalToNoise
|
||||
TEST_CASE("GPUImageAnalysis_SignalToNoise") {
|
||||
if (!GPUImageAnalysis::GPUPresent()) {
|
||||
WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise");
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t width = 100, height = 100;
|
||||
std::vector<float> resolution(width * height, 2.0f);
|
||||
std::vector<bool> mask(width * height, false);
|
||||
resolution[width * 50 + 50] = 1.0f;
|
||||
|
||||
std::vector<int32_t> input;
|
||||
fill_test_image(input, width, height);
|
||||
|
||||
SpotFindingSettings settings{
|
||||
.signal_to_noise_threshold = 3.0,
|
||||
.photon_count_threshold = 0,
|
||||
.min_pix_per_spot = 1,
|
||||
.max_pix_per_spot = 20,
|
||||
.high_resolution_limit = 0.5,
|
||||
.low_resolution_limit = 3.0,
|
||||
};
|
||||
|
||||
// GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
|
||||
// StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
|
||||
// matches CPU ImageSpotFinder test behavior for these synthetic inputs.
|
||||
auto spots = run_gpu_and_collect_spots(input, width, height, settings);
|
||||
|
||||
REQUIRE(spots.size() == 2);
|
||||
REQUIRE(spots[0].RawCoord().y == 25);
|
||||
REQUIRE(spots[1].RawCoord().y == 50);
|
||||
}
|
||||
|
||||
TEST_CASE("GPUImageAnalysis_CountThreshold") {
|
||||
if (!GPUImageAnalysis::GPUPresent()) {
|
||||
WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise");
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t width = 100, height = 100;
|
||||
std::vector<float> resolution(width * height, 2.0f);
|
||||
std::vector<bool> mask(width * height, false);
|
||||
resolution[width * 50 + 50] = 1.0f;
|
||||
|
||||
std::vector<int32_t> input;
|
||||
fill_test_image(input, width, height);
|
||||
|
||||
SpotFindingSettings settings{
|
||||
.signal_to_noise_threshold = 0.0,
|
||||
.photon_count_threshold = 11,
|
||||
.min_pix_per_spot = 1,
|
||||
.max_pix_per_spot = 20,
|
||||
.high_resolution_limit = 0.5,
|
||||
.low_resolution_limit = 3.0,
|
||||
};
|
||||
|
||||
// GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
|
||||
// StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
|
||||
// matches CPU ImageSpotFinder test behavior for these synthetic inputs.
|
||||
auto spots = run_gpu_and_collect_spots(input, width, height, settings);
|
||||
|
||||
REQUIRE(spots.size() == 3);
|
||||
REQUIRE(spots[0].RawCoord().y == 25);
|
||||
REQUIRE(spots[1].RawCoord().y == 50);
|
||||
REQUIRE(spots[2].RawCoord().y == 75);
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user