ImageAnalysisGPU: Add test and minor fixes

2025-10-04 19:38:08 +02:00
parent 12addb0ff4
commit bb4e5a7f8d
4 changed files with 126 additions and 7 deletions
--- a/image_analysis/spot_finding/ImageAnalysisGPU.cu
+++ b/image_analysis/spot_finding/ImageAnalysisGPU.cu
@@ -18,8 +18,8 @@ struct spot_parameters {
    uint32_t nby;
    uint32_t lines;
    uint32_t cols;
-    int16_t min_viable_number;
-    int16_t max_viable_number;
+    int32_t min_viable_number;
+    int32_t max_viable_number;
 };

 // input      X x Y pixels array
@@ -108,8 +108,10 @@ __device__ __forceinline__ uint8_t pixel_result(const spot_parameters& params, c

    const int64_t tmp1 = in_minus_mean * in_minus_mean * (count-1);
    const float tmp2 = (var * count) * params.strong_pixel_threshold2;
-    const bool strong_pixel = (val > params.max_viable_number)
-               || ((val >= params.count_threshold) & (in_minus_mean > 0) & (tmp1 > tmp2));
+    bool snr_criterion = (params.strong_pixel_threshold2 <= 0) || (in_minus_mean > 0) && (tmp1 > tmp2);
+    bool count_criterion = (val >= params.count_threshold);
+
+    const bool strong_pixel = (val > params.max_viable_number) || (snr_criterion && count_criterion);

    return strong_pixel ? 1 : 0;
 }
@@ -311,7 +313,7 @@ void GPUImageAnalysis::RunSpotFinder(const SpotFindingSettings &settings) {
    spot_params.cols = xpixels;
    spot_params.count_threshold = settings.photon_count_threshold;
    spot_params.min_viable_number = -100;
-    spot_params.max_viable_number = (1<22) - 1; // 22-bit
+    spot_params.max_viable_number = (1<<22) - 1; // 22-bit

    if (2 * spot_params.nbx + 1 > windowSizeLimit)
        throw JFJochException(JFJochExceptionCategory::SpotFinderError, "nbx exceeds window size limit");
@@ -356,7 +358,7 @@ void GPUImageAnalysis::GetSpotFinderResults(StrongPixelSet &pixel_set) {
        if (out_ptr[i]) {
            for (int j = 0; j < 8 * sizeof(out_ptr[0]); j++) {
                if (out_ptr[i] & (1 << j)) {
-                    size_t npixel = i * 8 * sizeof(out_ptr[0])| j;
+                    size_t npixel = i * 8 * sizeof(out_ptr[0]) + j;
                    size_t line = npixel / xpixels;
                    size_t col = npixel % xpixels;
                    pixel_set.AddStrongPixel(col, line, host_in[npixel]);
--- a/image_analysis/spot_finding/ImageAnalysisGPU.h
+++ b/image_analysis/spot_finding/ImageAnalysisGPU.h
@@ -28,7 +28,7 @@ class GPUImageAnalysis {
    int numberOfSMs;
    const int numberOfCudaThreads = 128;    // #threads per block that works well for Nvidia T4
    const int numberOfWaves = 40;           // #waves that works well for Nvidia T4
-    const int windowSizeLimit = 21;         // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems
+    const int windowSizeLimit = 32;         // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems
    constexpr static int NBX = 15;
 public:

--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -57,6 +57,7 @@ ADD_EXECUTABLE(jfjoch_test
        CrystalLatticeTest.cpp
        FPGAPTPTest.cpp
        ResolutionShellsTest.cpp
+        ImageSpotFinderGPUTest.cpp
 )

 target_link_libraries(jfjoch_test Catch2WithMain JFJochBroker JFJochReceiver JFJochReader JFJochWriter JFJochImageAnalysis JFJochCommon JFJochHLSSimulation JFJochPreview)
--- a/tests/ImageSpotFinderGPUTest.cpp
+++ b/tests/ImageSpotFinderGPUTest.cpp
@@ -0,0 +1,116 @@
+// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute
+// SPDX-License-Identifier: GPL-3.0-only
+
+#include <catch2/catch_all.hpp>
+
+#ifdef JFJOCH_USE_CUDA
+
+#include "../image_analysis/spot_finding//ImageAnalysisGPU.h"
+#include "../image_analysis/spot_finding/ImageSpotFinder.h"
+static void fill_test_image(std::vector<int32_t>& input, size_t width, size_t height) {
+    input.resize(width * height);
+    for (size_t i = 0; i < width * height; i++)
+        input[i] = (i % 2) * 5 + 5;
+    input[width * 50 + 50] = 20;
+    input[width * 25 + 26] = 16;
+    input[width * 75 + 25] = 12;
+}
+
+// Helper to run GPU and get DiffractionSpot list via StrongPixelSet -> FindSpotsImage
+static std::vector<DiffractionSpot> run_gpu_and_collect_spots(const std::vector<int32_t>& input,
+                                                              size_t width, size_t height,
+                                                              const SpotFindingSettings& settings)
+{
+    GPUImageAnalysis gpu(static_cast<int32_t>(width), static_cast<int32_t>(height));
+    REQUIRE(GPUImageAnalysis::GPUPresent());
+
+    // Set input buffer pointer to our CPU data, register and upload
+    // Note: RegisterBuffer/UnregisterBuffer are optional for plain memcpy path,
+    // but we use them to mirror intended flow.
+    const_cast<GPUImageAnalysis&>(gpu).SetInputBuffer((void*)input.data());
+    gpu.RegisterBuffer();
+    gpu.LoadDataToGPU();
+
+    // Run kernel
+    gpu.RunSpotFinder(settings);
+
+    // Collect strong pixels and convert to spots like CPU does
+    StrongPixelSet strong;
+    gpu.GetSpotFinderResults(strong);
+
+    std::vector<DiffractionSpot> spots;
+    strong.FindSpotsImage(settings, spots);
+
+    gpu.UnregisterBuffer();
+    return spots;
+}
+
+// Mirror of ImageSpotFinder_SignalToNoise
+TEST_CASE("GPUImageAnalysis_SignalToNoise") {
+    if (!GPUImageAnalysis::GPUPresent()) {
+        WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise");
+        return;
+    }
+
+    const size_t width = 100, height = 100;
+    std::vector<float> resolution(width * height, 2.0f);
+    std::vector<bool> mask(width * height, false);
+    resolution[width * 50 + 50] = 1.0f;
+
+    std::vector<int32_t> input;
+    fill_test_image(input, width, height);
+
+    SpotFindingSettings settings{
+        .signal_to_noise_threshold = 3.0,
+        .photon_count_threshold = 0,
+        .min_pix_per_spot = 1,
+        .max_pix_per_spot = 20,
+        .high_resolution_limit = 0.5,
+        .low_resolution_limit = 3.0,
+    };
+
+    // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
+    // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
+    // matches CPU ImageSpotFinder test behavior for these synthetic inputs.
+    auto spots = run_gpu_and_collect_spots(input, width, height, settings);
+
+    REQUIRE(spots.size() == 2);
+    REQUIRE(spots[0].RawCoord().y == 25);
+    REQUIRE(spots[1].RawCoord().y == 50);
+}
+
+TEST_CASE("GPUImageAnalysis_CountThreshold") {
+    if (!GPUImageAnalysis::GPUPresent()) {
+        WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise");
+        return;
+    }
+
+    const size_t width = 100, height = 100;
+    std::vector<float> resolution(width * height, 2.0f);
+    std::vector<bool> mask(width * height, false);
+    resolution[width * 50 + 50] = 1.0f;
+
+    std::vector<int32_t> input;
+    fill_test_image(input, width, height);
+
+    SpotFindingSettings settings{
+        .signal_to_noise_threshold = 0.0,
+        .photon_count_threshold = 11,
+        .min_pix_per_spot = 1,
+        .max_pix_per_spot = 20,
+        .high_resolution_limit = 0.5,
+        .low_resolution_limit = 3.0,
+    };
+
+    // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
+    // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
+    // matches CPU ImageSpotFinder test behavior for these synthetic inputs.
+    auto spots = run_gpu_and_collect_spots(input, width, height, settings);
+
+    REQUIRE(spots.size() == 3);
+    REQUIRE(spots[0].RawCoord().y == 25);
+    REQUIRE(spots[1].RawCoord().y == 50);
+    REQUIRE(spots[2].RawCoord().y == 75);
+}
+
+#endif