diff --git a/image_analysis/spot_finding/ImageAnalysisGPU.cu b/image_analysis/spot_finding/ImageAnalysisGPU.cu index f7171c2d..9d21345f 100644 --- a/image_analysis/spot_finding/ImageAnalysisGPU.cu +++ b/image_analysis/spot_finding/ImageAnalysisGPU.cu @@ -18,8 +18,8 @@ struct spot_parameters { uint32_t nby; uint32_t lines; uint32_t cols; - int16_t min_viable_number; - int16_t max_viable_number; + int32_t min_viable_number; + int32_t max_viable_number; }; // input X x Y pixels array @@ -108,8 +108,10 @@ __device__ __forceinline__ uint8_t pixel_result(const spot_parameters& params, c const int64_t tmp1 = in_minus_mean * in_minus_mean * (count-1); const float tmp2 = (var * count) * params.strong_pixel_threshold2; - const bool strong_pixel = (val > params.max_viable_number) - || ((val >= params.count_threshold) & (in_minus_mean > 0) & (tmp1 > tmp2)); + bool snr_criterion = (params.strong_pixel_threshold2 <= 0) || (in_minus_mean > 0) && (tmp1 > tmp2); + bool count_criterion = (val >= params.count_threshold); + + const bool strong_pixel = (val > params.max_viable_number) || (snr_criterion && count_criterion); return strong_pixel ? 1 : 0; } @@ -311,7 +313,7 @@ void GPUImageAnalysis::RunSpotFinder(const SpotFindingSettings &settings) { spot_params.cols = xpixels; spot_params.count_threshold = settings.photon_count_threshold; spot_params.min_viable_number = -100; - spot_params.max_viable_number = (1<22) - 1; // 22-bit + spot_params.max_viable_number = (1<<22) - 1; // 22-bit if (2 * spot_params.nbx + 1 > windowSizeLimit) throw JFJochException(JFJochExceptionCategory::SpotFinderError, "nbx exceeds window size limit"); @@ -356,7 +358,7 @@ void GPUImageAnalysis::GetSpotFinderResults(StrongPixelSet &pixel_set) { if (out_ptr[i]) { for (int j = 0; j < 8 * sizeof(out_ptr[0]); j++) { if (out_ptr[i] & (1 << j)) { - size_t npixel = i * 8 * sizeof(out_ptr[0])| j; + size_t npixel = i * 8 * sizeof(out_ptr[0]) + j; size_t line = npixel / xpixels; size_t col = npixel % xpixels; pixel_set.AddStrongPixel(col, line, host_in[npixel]); diff --git a/image_analysis/spot_finding/ImageAnalysisGPU.h b/image_analysis/spot_finding/ImageAnalysisGPU.h index 187fafbc..4ff34044 100644 --- a/image_analysis/spot_finding/ImageAnalysisGPU.h +++ b/image_analysis/spot_finding/ImageAnalysisGPU.h @@ -28,7 +28,7 @@ class GPUImageAnalysis { int numberOfSMs; const int numberOfCudaThreads = 128; // #threads per block that works well for Nvidia T4 const int numberOfWaves = 40; // #waves that works well for Nvidia T4 - const int windowSizeLimit = 21; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems + const int windowSizeLimit = 32; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems constexpr static int NBX = 15; public: diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5b91a04b..a1367f37 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -57,6 +57,7 @@ ADD_EXECUTABLE(jfjoch_test CrystalLatticeTest.cpp FPGAPTPTest.cpp ResolutionShellsTest.cpp + ImageSpotFinderGPUTest.cpp ) target_link_libraries(jfjoch_test Catch2WithMain JFJochBroker JFJochReceiver JFJochReader JFJochWriter JFJochImageAnalysis JFJochCommon JFJochHLSSimulation JFJochPreview) diff --git a/tests/ImageSpotFinderGPUTest.cpp b/tests/ImageSpotFinderGPUTest.cpp new file mode 100644 index 00000000..a5782bac --- /dev/null +++ b/tests/ImageSpotFinderGPUTest.cpp @@ -0,0 +1,116 @@ +// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-only + +#include + +#ifdef JFJOCH_USE_CUDA + +#include "../image_analysis/spot_finding//ImageAnalysisGPU.h" +#include "../image_analysis/spot_finding/ImageSpotFinder.h" +static void fill_test_image(std::vector& input, size_t width, size_t height) { + input.resize(width * height); + for (size_t i = 0; i < width * height; i++) + input[i] = (i % 2) * 5 + 5; + input[width * 50 + 50] = 20; + input[width * 25 + 26] = 16; + input[width * 75 + 25] = 12; +} + +// Helper to run GPU and get DiffractionSpot list via StrongPixelSet -> FindSpotsImage +static std::vector run_gpu_and_collect_spots(const std::vector& input, + size_t width, size_t height, + const SpotFindingSettings& settings) +{ + GPUImageAnalysis gpu(static_cast(width), static_cast(height)); + REQUIRE(GPUImageAnalysis::GPUPresent()); + + // Set input buffer pointer to our CPU data, register and upload + // Note: RegisterBuffer/UnregisterBuffer are optional for plain memcpy path, + // but we use them to mirror intended flow. + const_cast(gpu).SetInputBuffer((void*)input.data()); + gpu.RegisterBuffer(); + gpu.LoadDataToGPU(); + + // Run kernel + gpu.RunSpotFinder(settings); + + // Collect strong pixels and convert to spots like CPU does + StrongPixelSet strong; + gpu.GetSpotFinderResults(strong); + + std::vector spots; + strong.FindSpotsImage(settings, spots); + + gpu.UnregisterBuffer(); + return spots; +} + +// Mirror of ImageSpotFinder_SignalToNoise +TEST_CASE("GPUImageAnalysis_SignalToNoise") { + if (!GPUImageAnalysis::GPUPresent()) { + WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise"); + return; + } + + const size_t width = 100, height = 100; + std::vector resolution(width * height, 2.0f); + std::vector mask(width * height, false); + resolution[width * 50 + 50] = 1.0f; + + std::vector input; + fill_test_image(input, width, height); + + SpotFindingSettings settings{ + .signal_to_noise_threshold = 3.0, + .photon_count_threshold = 0, + .min_pix_per_spot = 1, + .max_pix_per_spot = 20, + .high_resolution_limit = 0.5, + .low_resolution_limit = 3.0, + }; + + // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet. + // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec) + // matches CPU ImageSpotFinder test behavior for these synthetic inputs. + auto spots = run_gpu_and_collect_spots(input, width, height, settings); + + REQUIRE(spots.size() == 2); + REQUIRE(spots[0].RawCoord().y == 25); + REQUIRE(spots[1].RawCoord().y == 50); +} + +TEST_CASE("GPUImageAnalysis_CountThreshold") { + if (!GPUImageAnalysis::GPUPresent()) { + WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise"); + return; + } + + const size_t width = 100, height = 100; + std::vector resolution(width * height, 2.0f); + std::vector mask(width * height, false); + resolution[width * 50 + 50] = 1.0f; + + std::vector input; + fill_test_image(input, width, height); + + SpotFindingSettings settings{ + .signal_to_noise_threshold = 0.0, + .photon_count_threshold = 11, + .min_pix_per_spot = 1, + .max_pix_per_spot = 20, + .high_resolution_limit = 0.5, + .low_resolution_limit = 3.0, + }; + + // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet. + // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec) + // matches CPU ImageSpotFinder test behavior for these synthetic inputs. + auto spots = run_gpu_and_collect_spots(input, width, height, settings); + + REQUIRE(spots.size() == 3); + REQUIRE(spots[0].RawCoord().y == 25); + REQUIRE(spots[1].RawCoord().y == 50); + REQUIRE(spots[2].RawCoord().y == 75); +} + +#endif