ImageSpotFinder: Work in progress to combine ImageSpotFinders

2025-10-04 21:15:15 +02:00
parent 57f2d05b32
commit e643513bfe
10 changed files with 177 additions and 193 deletions
@@ -19,11 +19,13 @@ ImageAnalysisCPU::ImageAnalysisCPU(const DiffractionExperiment &in_experiment,
      npixels(experiment.GetPixelsNum()),
      xpixels(experiment.GetXPixelsNum()),
      mask_1bit(npixels, false),
-      spotFinder(mask_1bit, in_integration),
      indexer(in_indexer),
      saturation_limit(experiment.GetSaturationLimit()),
      mask(in_mask),
-      azint_bins(in_integration.GetBinNumber()) {
+      azint_bins(in_integration.GetBinNumber()),
+      mask_resolution(experiment.GetPixelsNum(), false),
+      mask_high_res(-1),
+      mask_low_res(-1) {

    roi_map = experiment.ExportROIMap();
    roi_count = experiment.ROI().size();
@@ -31,6 +33,8 @@ ImageAnalysisCPU::ImageAnalysisCPU(const DiffractionExperiment &in_experiment,

    for (int i = 0; i < npixels; i++)
        mask_1bit[i] = (in_mask.GetMask().at(i) != 0);
+
+    spotFinder = std::make_unique<ImageSpotFinderCPU>(mask_1bit, experiment.GetXPixelsNum(), experiment.GetYPixelsNum());
 }

 void ImageAnalysisCPU::Analyze(DataMessage &output, std::vector<uint8_t> &image, AzimuthalIntegrationProfile &profile,
@@ -67,13 +71,21 @@ void ImageAnalysisCPU::Analyze(DataMessage &output, std::vector<uint8_t> &image,
    }
 }

+void ImageAnalysisCPU::UpdateMaskResolution(const SpotFindingSettings &settings) {
+    mask_low_res = settings.low_resolution_limit;
+    mask_high_res = settings.high_resolution_limit;
+    auto const &resolution_map = integration.Resolution();
+    for (int i = 0; i < mask_resolution.size(); i++)
+        mask_resolution[i] = (resolution_map[i] > mask_low_res) || (resolution_map[i] < mask_high_res);
+}
+
 template<class T>
 void ImageAnalysisCPU::Analyze(DataMessage &output,
                               const uint8_t *in_image,
                               T err_pixel_val,
                               T sat_pixel_val,
                               AzimuthalIntegrationProfile &profile,
-                               const SpotFindingSettings &spot_finding_settings) {
+                               const SpotFindingSettings &settings) {
    auto image = reinterpret_cast<const T *>(in_image);

    std::vector<ROIMessage> roi(roi_count);
@@ -141,10 +153,15 @@ void ImageAnalysisCPU::Analyze(DataMessage &output,
        }
    }

-    if (spot_finding_settings.enable) {
-        std::vector<DiffractionSpot> spots = spotFinder.Run(image, spot_finding_settings);
+    if (settings.enable) {
+        // Update resolution mask
+        if (mask_high_res != settings.high_resolution_limit
+            || mask_low_res != settings.low_resolution_limit)
+            UpdateMaskResolution(settings);

-        SpotAnalyze(experiment, spot_finding_settings, spots,
+        const std::vector<DiffractionSpot> spots = spotFinder->Run(image, settings, mask_resolution);
+
+        SpotAnalyze(experiment, settings, spots,
                    CompressedImage(updated_image, experiment.GetXPixelsNum(), experiment.GetYPixelsNum()),
                    indexer, output);
    }
@@ -11,7 +11,7 @@
 #include "../common/AzimuthalIntegration.h"
 #include "../common/PixelMask.h"
 #include "../common/AzimuthalIntegrationProfile.h"
-#include "spot_finding/ImageSpotFinder.h"
+#include "spot_finding/ImageSpotFinderCPU.h"
 #include "indexing/IndexerThreadPool.h"

 class ImageAnalysisCPU {
@@ -28,7 +28,7 @@ class ImageAnalysisCPU {
    size_t xpixels;
    std::vector<bool> mask_1bit;

-    ImageSpotFinder spotFinder;
+    std::unique_ptr<ImageSpotFinderCPU> spotFinder;
    IndexerThreadPool *indexer;

    uint16_t azint_bins;
@@ -37,8 +37,13 @@ class ImageAnalysisCPU {

    const PixelMask &mask;

+    std::vector<bool> mask_resolution;
+    float mask_high_res;
+    float mask_low_res;
+    void UpdateMaskResolution(const SpotFindingSettings& settings);
+
    template <class T>
-    void Analyze(DataMessage &output, const uint8_t *image, T err_pixel_val, T sat_pixel_val, AzimuthalIntegrationProfile &profile, const SpotFindingSettings &spot_finding_settings);
+    void Analyze(DataMessage &output, const uint8_t *image, T err_pixel_val, T sat_pixel_val, AzimuthalIntegrationProfile &profile, const SpotFindingSettings &settings);
 public:
    ImageAnalysisCPU(const DiffractionExperiment &experiment, const AzimuthalIntegration &integration,
                     const PixelMask &mask,
@@ -1,6 +1,6 @@
 ADD_LIBRARY(JFJochSpotFinding STATIC
-        ImageSpotFinder.cpp
-        ImageSpotFinder.h
+        ImageSpotFinderCPU.cpp
+        ImageSpotFinderCPU.h
        SpotUtils.cpp
        SpotUtils.h
        SpotFindingSettings.h
@@ -12,5 +12,5 @@ ADD_LIBRARY(JFJochSpotFinding STATIC
 TARGET_LINK_LIBRARIES(JFJochSpotFinding JFJochCommon)

 IF (JFJOCH_CUDA_AVAILABLE)
-    TARGET_SOURCES(JFJochSpotFinding PRIVATE ImageAnalysisGPU.cu ImageAnalysisGPU.h)
+    TARGET_SOURCES(JFJochSpotFinding PRIVATE ImageSpotFinderGPU.cu ImageSpotFinderGPU.h)
 ENDIF()
@@ -1,76 +1,55 @@
 // SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
 // SPDX-License-Identifier: GPL-3.0-only

-#include "ImageSpotFinder.h"
+#include "ImageSpotFinderCPU.h"
 #include "StrongPixelSet.h"

-ImageSpotFinder::ImageSpotFinder(const std::vector<bool> &mask, const std::vector<float> &in_resolution_map, size_t in_width, size_t in_height)
-    : width(in_width), height(in_height), resolution_map(in_resolution_map),
-      resolution_mask(width * height, 0),
-      mask(mask) {
-    if (mask.size() != width * height)
-        throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
-                              "Coordinate mismatch of mask size");
-    if (resolution_map.size() != width * height)
-        throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
-                              "Coordinate mismatch");
-}
-
-ImageSpotFinder::ImageSpotFinder(const std::vector<bool> &mask, const AzimuthalIntegration &integration)
-    : width(integration.GetWidth()),
-      height(integration.GetHeight()),
-      resolution_map(integration.Resolution()),
-      resolution_mask(width * height, 0),
-      mask(mask) {
+ImageSpotFinderCPU::ImageSpotFinderCPU(const std::vector<bool> &mask, size_t in_width, size_t in_height)
+    : width(in_width), height(in_height), pxl_mask(mask) {
    if (mask.size() != width * height)
        throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
                              "Coordinate mismatch of mask size");
 }

-void ImageSpotFinder::nbx(int input) {
+void ImageSpotFinderCPU::nbx(int input) {
    NBX = input;
 }

-void ImageSpotFinder::CalcResolutionMap(const SpotFindingSettings &settings) {
-    low_resolution = settings.low_resolution_limit;
-    high_resolution = settings.high_resolution_limit;
-
-    for (int i = 0; i < width * height; i++) {
-        resolution_mask[i] = mask[i] || (resolution_map[i] > low_resolution) || (resolution_map[i] < high_resolution);
-    }
+std::vector<DiffractionSpot> ImageSpotFinderCPU::Run(const int32_t *input, const SpotFindingSettings &settings,
+                                     const std::vector<bool> &res_mask) {
+    return Run<int32_t>(input, INT32_MIN, settings, res_mask);
 }

-std::vector<DiffractionSpot> ImageSpotFinder::Run(const int32_t *input, const SpotFindingSettings &settings) {
-    return Run<int32_t>(input, INT32_MIN, settings);
+std::vector<DiffractionSpot> ImageSpotFinderCPU::Run(const int16_t *input, const SpotFindingSettings &settings,
+                                     const std::vector<bool> &res_mask) {
+    return Run<int16_t>(input, INT16_MIN, settings, res_mask);
 }

-std::vector<DiffractionSpot> ImageSpotFinder::Run(const int16_t *input, const SpotFindingSettings &settings) {
-    return Run<int16_t>(input, INT16_MIN, settings);
+std::vector<DiffractionSpot> ImageSpotFinderCPU::Run(const int8_t *input, const SpotFindingSettings &settings,
+                                     const std::vector<bool> &res_mask) {
+    return Run<int8_t>(input, INT8_MIN, settings, res_mask);
 }

-std::vector<DiffractionSpot> ImageSpotFinder::Run(const int8_t *input, const SpotFindingSettings &settings) {
-    return Run<int8_t>(input, INT8_MIN, settings);
+std::vector<DiffractionSpot> ImageSpotFinderCPU::Run(const uint32_t *input, const SpotFindingSettings &settings,
+                                     const std::vector<bool> &res_mask) {
+    return Run<uint32_t>(input, UINT32_MAX, settings, res_mask);
 }

-std::vector<DiffractionSpot> ImageSpotFinder::Run(const uint32_t *input, const SpotFindingSettings &settings) {
-    return Run<uint32_t>(input, UINT32_MAX, settings);
+std::vector<DiffractionSpot> ImageSpotFinderCPU::Run(const uint16_t *input, const SpotFindingSettings &settings,
+                                     const std::vector<bool> &res_mask) {
+    return Run<uint16_t>(input, UINT16_MAX, settings, res_mask);
 }

-std::vector<DiffractionSpot> ImageSpotFinder::Run(const uint16_t *input, const SpotFindingSettings &settings) {
-    return Run<uint16_t>(input, UINT16_MAX, settings);
-}
-
-std::vector<DiffractionSpot> ImageSpotFinder::Run(const uint8_t *input, const SpotFindingSettings &settings) {
-    return Run<uint8_t>(input, UINT8_MAX, settings);
+std::vector<DiffractionSpot> ImageSpotFinderCPU::Run(const uint8_t *input, const SpotFindingSettings &settings,
+                                     const std::vector<bool> &res_mask) {
+    return Run<uint8_t>(input, UINT8_MAX, settings, res_mask);
 }

 template<class T>
-std::vector<DiffractionSpot> ImageSpotFinder::Run(const T *input,
+std::vector<DiffractionSpot> ImageSpotFinderCPU::Run(const T *input,
                                                  const T special_value,
-                                                  const SpotFindingSettings &settings) {
-    if (settings.low_resolution_limit != low_resolution || settings.high_resolution_limit != high_resolution)
-        CalcResolutionMap(settings);
-
+                                                  const SpotFindingSettings &settings,
+                                                  const std::vector<bool> &res_mask) {
    std::vector<DiffractionSpot> output;
    StrongPixelSet strong_pixel_set;

@@ -79,7 +58,7 @@ std::vector<DiffractionSpot> ImageSpotFinder::Run(const T *input,
            for (int line = 0; line < height; line++) {
                for (int col = 0; col < width; col++) {
                    int32_t pxl_val = input[line * width + col];
-                    if (pxl_val > settings.photon_count_threshold && !resolution_mask[line * width + col])
+                    if (pxl_val > settings.photon_count_threshold && !pxl_mask[line * width + col] && !res_mask[line * width + col])
                        strong_pixel_set.AddStrongPixel(col, line, pxl_val);
                }
            }
@@ -102,7 +81,7 @@ std::vector<DiffractionSpot> ImageSpotFinder::Run(const T *input,
    for (int line = 0; line < NBX; line++) {
        for (int col = 0; col < width; col++) {
            auto pxl = line * width + col;
-            mask[pxl] = (input[pxl] == special_value) || resolution_mask[pxl];
+            mask[pxl] = (input[pxl] == special_value) || pxl_mask[pxl];

            if (mask[pxl] == 0) {
                int64_t tmp = input[pxl];
@@ -117,7 +96,7 @@ std::vector<DiffractionSpot> ImageSpotFinder::Run(const T *input,
        for (int col = 0; col < width; col++) {
            if (line < height - NBX) {
                auto pxl = (line + NBX) * width + col;
-                mask[pxl] = (input[pxl] == special_value) || resolution_mask[pxl];
+                mask[pxl] = (input[pxl] == special_value) || pxl_mask[pxl];

                if (mask[pxl] == 0) {
                    int64_t tmp = input[pxl];
@@ -168,7 +147,8 @@ std::vector<DiffractionSpot> ImageSpotFinder::Run(const T *input,
            if ((pxl_val > settings.photon_count_threshold) && // pixel is above count threshold
                (in_minus_mean > 0) && // pixel value is larger than mean
                (in_minus_mean * in_minus_mean > std::ceil(var * strong2)) && // pixel is above SNR threshold
-                (mask[line * width + col] == 0)) // pixel is not bad pixel
+                (mask[line * width + col] == 0) && // pixel is not bad pixel
+                (res_mask[line * width + col] == 0)) // pixel is within a valid resolution range
                strong_pixel_set.AddStrongPixel(col, line, pxl_val);
        }
    }
@@ -17,31 +17,27 @@
 // This one is expected to be used in cases, where images are already assembled
 // and it aims for 100 ms execution

-class ImageSpotFinder {
+class ImageSpotFinderCPU {
    size_t width;
    size_t height;
-    const std::vector<float> &resolution_map;
-    mutable std::vector<uint8_t> resolution_mask;
-    float high_resolution = -1, low_resolution = -1;

-    const std::vector<bool> &mask;
-    void CalcResolutionMap(const SpotFindingSettings& settings);
+    const std::vector<bool> &pxl_mask;

    int NBX = 15;
    template <class T>
    std::vector<DiffractionSpot> Run(const T *input,
                                     const T special_value,
-                                     const SpotFindingSettings &settings);
+                                     const SpotFindingSettings &settings,
+                                     const std::vector<bool> &res_mask);
 public:
-    explicit ImageSpotFinder(const std::vector<bool> &mask, const AzimuthalIntegration &integration);
-    ImageSpotFinder(const std::vector<bool> &mask, const std::vector<float> &resolution_map, size_t width, size_t height);
+    ImageSpotFinderCPU(const std::vector<bool> &mask, size_t width, size_t height);
    void nbx(int input);
-    std::vector<DiffractionSpot> Run(const int32_t *input, const SpotFindingSettings &settings);
-    std::vector<DiffractionSpot> Run(const int16_t *input, const SpotFindingSettings &settings);
-    std::vector<DiffractionSpot> Run(const int8_t *input, const SpotFindingSettings &settings);
-    std::vector<DiffractionSpot> Run(const uint32_t *input, const SpotFindingSettings &settings);
-    std::vector<DiffractionSpot> Run(const uint16_t *input, const SpotFindingSettings &settings);
-    std::vector<DiffractionSpot> Run(const uint8_t *input, const SpotFindingSettings &settings);
+    std::vector<DiffractionSpot> Run(const int32_t *input, const SpotFindingSettings &settings, const std::vector<bool> &res_mask);
+    std::vector<DiffractionSpot> Run(const int16_t *input, const SpotFindingSettings &settings, const std::vector<bool> &res_mask);
+    std::vector<DiffractionSpot> Run(const int8_t *input, const SpotFindingSettings &settings, const std::vector<bool> &res_mask);
+    std::vector<DiffractionSpot> Run(const uint32_t *input, const SpotFindingSettings &settings, const std::vector<bool> &res_mask);
+    std::vector<DiffractionSpot> Run(const uint16_t *input, const SpotFindingSettings &settings, const std::vector<bool> &res_mask);
+    std::vector<DiffractionSpot> Run(const uint8_t *input, const SpotFindingSettings &settings, const std::vector<bool> &res_mask);
 };

 #endif //JFJOCH_IMAGESPOTFINDER_H
@@ -7,7 +7,7 @@
 // GPU Spot finding developed by Hans-Christian Stadler (PSI)
 // Copyright (2019-2023) Paul Scherrer Institute

-#include "ImageAnalysisGPU.h"
+#include "ImageSpotFinderGPU.h"

 #include "../../common/JFJochException.h"

@@ -257,7 +257,7 @@ __global__ void analyze_pixel(const int32_t *in, uint32_t *out, const spot_param
 }

 ImageSpotFinderGPU::ImageSpotFinderGPU(int32_t in_xpixels, int32_t in_ypixels) :
-        xpixels(in_xpixels), ypixels(in_ypixels), gpu_out(nullptr) {
+        xpixels(in_xpixels), ypixels(in_ypixels), gpu_out(nullptr), numberOfSMs(0) {

    int device_count;
    cuda_err(cudaGetDeviceCount(&device_count));
@@ -278,6 +278,7 @@ ImageSpotFinderGPU::ImageSpotFinderGPU(int32_t in_xpixels, int32_t in_ypixels) :
    cuda_err(cudaStreamCreate(&cudastream->v));
    cuda_err(cudaMalloc(&gpu_in, xpixels * ypixels * sizeof(int32_t)));
    cuda_err(cudaMalloc(&gpu_out, output_byte_size(gpu_out, xpixels, ypixels)));
+    cuda_err(cudaHostAlloc(&host_in, xpixels * ypixels * sizeof(int32_t), cudaHostAllocPortable));
    cuda_err(cudaHostAlloc(&host_out, output_byte_size(host_out, xpixels, ypixels), cudaHostAllocPortable));
 }

@@ -285,13 +286,14 @@ ImageSpotFinderGPU::~ImageSpotFinderGPU() {
    cudaStreamDestroy(cudastream->v);
    delete(cudastream);

+    cudaFreeHost(host_in);
    cudaFreeHost(host_out);
    cudaFree(gpu_in);
    cudaFree(gpu_out);
 }

-void ImageSpotFinderGPU::SetInputBuffer(void *ptr) {
-    host_in = (int32_t *) ptr;
+int32_t *ImageSpotFinderGPU::GetHostBuffer() {
+    return host_in;
 }

 bool ImageSpotFinderGPU::GPUPresent() {
@@ -301,7 +303,7 @@ bool ImageSpotFinderGPU::GPUPresent() {
    return (device_count > 0);
 }

-void ImageSpotFinderGPU::RunSpotFinder(const SpotFindingSettings &settings) {
+std::vector<DiffractionSpot> ImageSpotFinderGPU::Run(const SpotFindingSettings &settings, const std::vector<bool> &res_mask) {
    // data_in is CUDA registered memory

    // Run COLSPOT (GPU version)
@@ -326,31 +328,28 @@ void ImageSpotFinderGPU::RunSpotFinder(const SpotFindingSettings &settings) {
    if (windowSizeLimit > spot_params.lines)
        throw JFJochException(JFJochExceptionCategory::SpotFinderError, "window size limit exceeds number of lines");

-    { // call cuda kernel
-        const auto nWriters = numberOfCudaThreads - 2 * spot_params.nby;
-        const auto nBlocks = (spot_params.cols + nWriters - 1) / nWriters;
-        const auto window = 2 * spot_params.nby + 1;
-        const auto sharedSize = (2 * sizeof(int64_t) + // sum, sum2
-                                 window * sizeof(int32_t) + // val
-                                 1 * sizeof(int16_t) // count
-                                ) * numberOfCudaThreads;
-        const dim3 blocks(nBlocks, numberOfWaves);
-
-        cuda_err(cudaMemsetAsync(gpu_out, 0, output_byte_size(gpu_out, xpixels, ypixels), cudastream->v));
-        analyze_pixel<<<blocks, numberOfCudaThreads, sharedSize, cudastream->v>>>
-                (gpu_in, gpu_out, spot_params);
-    }
+    const auto nWriters = numberOfCudaThreads - 2 * spot_params.nby;
+    const auto nBlocks = (spot_params.cols + nWriters - 1) / nWriters;
+    const auto window = 2 * spot_params.nby + 1;
+    const auto sharedSize = (2 * sizeof(int64_t) + // sum, sum2
+                             window * sizeof(int32_t) + // val
+                             1 * sizeof(int16_t) // count
+                            ) * numberOfCudaThreads;
+    const dim3 blocks(nBlocks, numberOfWaves);

+    cuda_err(cudaMemcpyAsync(gpu_in, host_in, xpixels * ypixels * sizeof(int32_t), cudaMemcpyHostToDevice,
+                             cudastream->v));
+    cuda_err(cudaMemsetAsync(gpu_out, 0, output_byte_size(gpu_out, xpixels, ypixels),
+                             cudastream->v));
+    analyze_pixel<<<blocks, numberOfCudaThreads, sharedSize, cudastream->v>>>
+            (gpu_in, gpu_out, spot_params);
    cuda_err(cudaMemcpyAsync(host_out, gpu_out, output_byte_size(gpu_out, xpixels, ypixels),
-                             cudaMemcpyDeviceToHost,cudastream->v));
-}
-
-void ImageSpotFinderGPU::GetSpotFinderResults(StrongPixelSet &pixel_set) {
-    if (host_in == nullptr)
-        throw JFJochException(JFJochExceptionCategory::SpotFinderError, "Host/GPU buffer not defined");
+                             cudaMemcpyDeviceToHost, cudastream->v));

    cuda_err(cudaStreamSynchronize(cudastream->v));

+    StrongPixelSet pixel_set;
+
    auto out_size = output_byte_size(host_out, xpixels, ypixels);
    auto out_ptr = (uint32_t *) host_out;

@@ -361,32 +360,14 @@ void ImageSpotFinderGPU::GetSpotFinderResults(StrongPixelSet &pixel_set) {
                    size_t npixel = i * 8 * sizeof(out_ptr[0]) + j;
                    size_t line = npixel / xpixels;
                    size_t col = npixel % xpixels;
-                    pixel_set.AddStrongPixel(col, line, host_in[npixel]);
+                    if (res_mask[npixel] == 0)
+                        pixel_set.AddStrongPixel(col, line, host_in[npixel]);
                }
            }
        }
    }
-}

-void ImageSpotFinderGPU::GetSpotFinderResults(const DiffractionExperiment &experiment,
-                                            const SpotFindingSettings &settings,
-                                            std::vector<DiffractionSpot> &vec) {
-    StrongPixelSet pixel_set;
-    GetSpotFinderResults(pixel_set);
+    std::vector<DiffractionSpot> vec;
    pixel_set.FindSpotsImage(settings, vec);
-}
-
-void ImageSpotFinderGPU::RegisterBuffer() {
-    cudaHostRegister(host_in, xpixels * ypixels * sizeof(int32_t), cudaHostRegisterDefault);
-}
-
-void ImageSpotFinderGPU::UnregisterBuffer() {
-    cudaHostUnregister(host_in);
-}
-
-void ImageSpotFinderGPU::LoadDataToGPU() {
-    if (host_in == nullptr)
-        throw JFJochException(JFJochExceptionCategory::SpotFinderError, "Host/GPU buffer not defined");
-
-    cuda_err(cudaMemcpy(gpu_in, host_in, xpixels * ypixels * sizeof(int32_t), cudaMemcpyHostToDevice));
+    return vec;
 }
@@ -35,17 +35,11 @@ public:
    ImageSpotFinderGPU(int32_t xpixels, int32_t ypixels);
    ~ImageSpotFinderGPU();

-    void SetInputBuffer(void *host_in);
-    void LoadDataToGPU();
+    int32_t *GetHostBuffer();

-    void RunSpotFinder(const SpotFindingSettings &settings);
-    void GetSpotFinderResults(StrongPixelSet &pixel_set);
-    void GetSpotFinderResults(const DiffractionExperiment &experiment, const SpotFindingSettings &settings,
-                    std::vector<DiffractionSpot> &vec);
+    std::vector<DiffractionSpot> Run(const SpotFindingSettings &settings, const std::vector<bool> &res_mask);

    static bool GPUPresent();
-    void RegisterBuffer();
-    void UnregisterBuffer();
 };

 #endif //JFJOCH_IMAGEANALYSISGPU_H
@@ -45,7 +45,6 @@ ADD_EXECUTABLE(jfjoch_test
        ZMQMetadataSocketTest.cpp
        JFJochReaderTest.cpp
        MovingAverageTest.cpp
-        ImageSpotFinderTest.cpp
        ImageMetadataTest.cpp
        JFJochReceiverLiteTest.cpp
        GridScanSettingsTest.cpp
@@ -57,6 +56,7 @@ ADD_EXECUTABLE(jfjoch_test
        CrystalLatticeTest.cpp
        FPGAPTPTest.cpp
        ResolutionShellsTest.cpp
+        ImageSpotFinderCPUTest.cpp
        ImageSpotFinderGPUTest.cpp
 )

@@ -2,18 +2,15 @@
 // SPDX-License-Identifier: GPL-3.0-only

 #include <catch2/catch_all.hpp>
-#include "../image_analysis/spot_finding/ImageSpotFinder.h"
+#include "../image_analysis/spot_finding/ImageSpotFinderCPU.h"

-TEST_CASE("ImageSpotFinder_SignalToNoise") {
+TEST_CASE("ImageSpotFinderCPU_SignalToNoise") {

    size_t width = 100;
    size_t height = 100;
-    std::vector<float> resolution(width * height, 2.0);
    std::vector<bool> mask(width * height, false);

-    resolution[width * 50 + 50] = 1.0;
-
-    ImageSpotFinder s(mask, resolution, width, height);
+    ImageSpotFinderCPU s(mask, width, height);

    std::vector<int32_t> input (width * height);
    for (int i = 0; i < width * height; i++)
@@ -33,22 +30,20 @@ TEST_CASE("ImageSpotFinder_SignalToNoise") {
            .low_resolution_limit = 3.0,
    };

-    auto spots = s.Run(input.data(), settings);
+    std::vector<bool> mask_resolution(width * height, false);
+    auto spots = s.Run(input.data(), settings, mask_resolution);

    REQUIRE(spots.size() == 2);
    REQUIRE(spots[0].RawCoord().y == 25);
    REQUIRE(spots[1].RawCoord().y == 50);
 }

-TEST_CASE("ImageSpotFinder_SignalToNoise_Resolution") {
+TEST_CASE("ImageSpotFinderCPU_SignalToNoise_Resolution") {
    size_t width = 100;
    size_t height = 100;
    std::vector<bool> mask(width * height, false);
-    std::vector<float> resolution(width * height, 2.0);

-    resolution[width * 50 + 50] = 1.0;
-
-    ImageSpotFinder s(mask, resolution, width, height);
+    ImageSpotFinderCPU s(mask, width, height);

    std::vector<int32_t> input (width * height);
    for (int i = 0; i < width * height; i++)
@@ -68,22 +63,21 @@ TEST_CASE("ImageSpotFinder_SignalToNoise_Resolution") {
            .low_resolution_limit = 3.0,
    };

-    auto spots = s.Run(input.data(), settings);
+    std::vector<bool> mask_resolution(width * height, false);
+    mask_resolution[width * 50 + 50] = true;
+    auto spots = s.Run(input.data(), settings, mask_resolution);

    REQUIRE(spots.size() == 1);
    REQUIRE(spots[0].RawCoord().x == 26);
    REQUIRE(spots[0].RawCoord().y == 25);
 }

-TEST_CASE("ImageSpotFinder_CountThreshold_Resolution") {
+TEST_CASE("ImageSpotFinderCPU_CountThreshold_Resolution") {
    size_t width = 100;
    size_t height = 100;
-    std::vector<float> resolution(width * height, 2.0);
    std::vector<bool> mask(width * height, false);

-    resolution[width * 50 + 50] = 1.0;
-
-    ImageSpotFinder s(mask, resolution, width, height);
+    ImageSpotFinderCPU s(mask, width, height);

    std::vector<int32_t> input (width * height);
    for (int i = 0; i < width * height; i++)
@@ -103,22 +97,23 @@ TEST_CASE("ImageSpotFinder_CountThreshold_Resolution") {
            .low_resolution_limit = 3.0,
    };

-    auto spots = s.Run(input.data(), settings);
+    std::vector<bool> mask_resolution(width * height, false);
+    mask_resolution[width * 50 + 50] = true;
+    auto spots = s.Run(input.data(), settings, mask_resolution);

    REQUIRE(spots.size() == 2);
    REQUIRE(spots[0].RawCoord().y == 25);
    REQUIRE(spots[1].RawCoord().y == 75);
 }

-TEST_CASE("ImageSpotFinder_CountThreshold_Mask") {
+TEST_CASE("ImageSpotFinderCPU_CountThreshold_Mask") {
    size_t width = 100;
    size_t height = 100;
-    std::vector<float> resolution(width * height, 2.0);
-    std::vector<bool> mask(width * height, false);

+    std::vector<bool> mask(width * height, false);
    mask[width * 50 + 50] = true;

-    ImageSpotFinder s(mask, resolution, width, height);
+    ImageSpotFinderCPU s(mask, width, height);

    std::vector<int32_t> input (width * height);
    for (int i = 0; i < width * height; i++)
@@ -138,22 +133,22 @@ TEST_CASE("ImageSpotFinder_CountThreshold_Mask") {
        .low_resolution_limit = 3.0,
    };

-    auto spots = s.Run(input.data(), settings);
+    std::vector<bool> mask_resolution(width * height, false);
+    auto spots = s.Run(input.data(), settings, mask_resolution);

    REQUIRE(spots.size() == 2);
    REQUIRE(spots[0].RawCoord().y == 25);
    REQUIRE(spots[1].RawCoord().y == 75);
 }

-TEST_CASE("ImageSpotFinder_SignalToNoise_Mask") {
+TEST_CASE("ImageSpotFinderCPU_SignalToNoise_Mask") {
    size_t width = 100;
    size_t height = 100;
    std::vector<bool> mask(width * height, false);
-    std::vector<float> resolution(width * height, 2.0);

    mask[width * 50 + 50] = true;

-    ImageSpotFinder s(mask, resolution, width, height);
+    ImageSpotFinderCPU s(mask, width, height);

    std::vector<int32_t> input (width * height);
    for (int i = 0; i < width * height; i++)
@@ -173,7 +168,8 @@ TEST_CASE("ImageSpotFinder_SignalToNoise_Mask") {
        .low_resolution_limit = 3.0,
 };

-    auto spots = s.Run(input.data(), settings);
+    std::vector<bool> mask_resolution(width * height, false);
+    auto spots = s.Run(input.data(), settings, mask_resolution);

    REQUIRE(spots.size() == 1);
    REQUIRE(spots[0].RawCoord().x == 26);
@@ -5,8 +5,8 @@

 #ifdef JFJOCH_USE_CUDA

-#include "../image_analysis/spot_finding//ImageAnalysisGPU.h"
-#include "../image_analysis/spot_finding/ImageSpotFinder.h"
+#include "../image_analysis/spot_finding/ImageSpotFinderGPU.h"
+
 static void fill_test_image(std::vector<int32_t>& input, size_t width, size_t height) {
    input.resize(width * height);
    for (size_t i = 0; i < width * height; i++)
@@ -19,43 +19,26 @@ static void fill_test_image(std::vector<int32_t>& input, size_t width, size_t he
 // Helper to run GPU and get DiffractionSpot list via StrongPixelSet -> FindSpotsImage
 static std::vector<DiffractionSpot> run_gpu_and_collect_spots(const std::vector<int32_t>& input,
                                                              size_t width, size_t height,
-                                                              const SpotFindingSettings& settings)
+                                                              const SpotFindingSettings& settings,
+                                                              const std::vector<bool>& res_mask)
 {
    ImageSpotFinderGPU gpu(static_cast<int32_t>(width), static_cast<int32_t>(height));
    REQUIRE(ImageSpotFinderGPU::GPUPresent());

-    // Set input buffer pointer to our CPU data, register and upload
-    // Note: RegisterBuffer/UnregisterBuffer are optional for plain memcpy path,
-    // but we use them to mirror intended flow.
-    const_cast<ImageSpotFinderGPU&>(gpu).SetInputBuffer((void*)input.data());
-    gpu.RegisterBuffer();
-    gpu.LoadDataToGPU();
-
-    // Run kernel
-    gpu.RunSpotFinder(settings);
-
-    // Collect strong pixels and convert to spots like CPU does
-    StrongPixelSet strong;
-    gpu.GetSpotFinderResults(strong);
-
-    std::vector<DiffractionSpot> spots;
-    strong.FindSpotsImage(settings, spots);
-
-    gpu.UnregisterBuffer();
-    return spots;
+    memcpy(gpu.GetHostBuffer(), input.data(), width * height * sizeof(int32_t));
+    return gpu.Run(settings, res_mask);
 }

 // Mirror of ImageSpotFinder_SignalToNoise
-TEST_CASE("GPUImageAnalysis_SignalToNoise") {
+TEST_CASE("ImageSpotFinderGPU_SignalToNoise") {
    if (!ImageSpotFinderGPU::GPUPresent()) {
-        WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise");
+        WARN("No CUDA GPU present. Skipping ImageSpotFinderGPU_SignalToNoise");
        return;
    }

    const size_t width = 100, height = 100;
-    std::vector<float> resolution(width * height, 2.0f);
+    std::vector<bool> res_mask(width * height, false);
    std::vector<bool> mask(width * height, false);
-    resolution[width * 50 + 50] = 1.0f;

    std::vector<int32_t> input;
    fill_test_image(input, width, height);
@@ -72,23 +55,22 @@ TEST_CASE("GPUImageAnalysis_SignalToNoise") {
    // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
    // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
    // matches CPU ImageSpotFinder test behavior for these synthetic inputs.
-    auto spots = run_gpu_and_collect_spots(input, width, height, settings);
+    auto spots = run_gpu_and_collect_spots(input, width, height, settings, res_mask);

    REQUIRE(spots.size() == 2);
    REQUIRE(spots[0].RawCoord().y == 25);
    REQUIRE(spots[1].RawCoord().y == 50);
 }

-TEST_CASE("GPUImageAnalysis_CountThreshold") {
+TEST_CASE("ImageSpotFinderGPU_CountThreshold") {
    if (!ImageSpotFinderGPU::GPUPresent()) {
-        WARN("No CUDA GPU present. Skipping GPUImageAnalysis_SignalToNoise");
+        WARN("No CUDA GPU present. Skipping ImageSpotFinderGPU_SignalToNoise");
        return;
    }

    const size_t width = 100, height = 100;
-    std::vector<float> resolution(width * height, 2.0f);
+    std::vector<bool> res_mask(width * height, false);
    std::vector<bool> mask(width * height, false);
-    resolution[width * 50 + 50] = 1.0f;

    std::vector<int32_t> input;
    fill_test_image(input, width, height);
@@ -105,7 +87,7 @@ TEST_CASE("GPUImageAnalysis_CountThreshold") {
    // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
    // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
    // matches CPU ImageSpotFinder test behavior for these synthetic inputs.
-    auto spots = run_gpu_and_collect_spots(input, width, height, settings);
+    auto spots = run_gpu_and_collect_spots(input, width, height, settings, res_mask);

    REQUIRE(spots.size() == 3);
    REQUIRE(spots[0].RawCoord().y == 25);
@@ -113,4 +95,37 @@ TEST_CASE("GPUImageAnalysis_CountThreshold") {
    REQUIRE(spots[2].RawCoord().y == 75);
 }

+TEST_CASE("ImageSpotFinderGPU_20M") {
+    if (!ImageSpotFinderGPU::GPUPresent()) {
+        WARN("No CUDA GPU present. Skipping ImageSpotFinderGPU_SignalToNoise");
+        return;
+    }
+
+    const size_t width = 4500, height = 4500;
+    std::vector<bool> res_mask(width * height, false);
+    std::vector<bool> mask(width * height, false);
+
+
+    std::vector<int32_t> input;
+    fill_test_image(input, width, height);
+
+    SpotFindingSettings settings{
+        .signal_to_noise_threshold = 3.0,
+        .photon_count_threshold = 0,
+        .min_pix_per_spot = 1,
+        .max_pix_per_spot = 20,
+        .high_resolution_limit = 0.5,
+        .low_resolution_limit = 3.0,
+    };
+
+    // GPU produces strong pixels; FindSpotsImage uses mask/resolution implicit in StrongPixelSet.
+    // StrongPixelSet doesn't carry resolution/mask by itself, but FindSpotsImage(settings, vec)
+    // matches CPU ImageSpotFinder test behavior for these synthetic inputs.
+    auto spots = run_gpu_and_collect_spots(input, width, height, settings, res_mask);
+
+    REQUIRE(spots.size() == 2);
+    REQUIRE(spots[0].RawCoord().y == 25);
+    REQUIRE(spots[1].RawCoord().y == 50);
+}
+
 #endif