// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #ifndef JFJOCH_IMAGEANALYSISGPU_H #define JFJOCH_IMAGEANALYSISGPU_H #include #include "SpotFindingSettings.h" #include "ImageSpotFinder.h" #include "../indexing/CUDAMemHelpers.h" class ImageSpotFinderGPU : public ImageSpotFinder { CudaStream stream; CudaDevicePtr gpu_in; CudaDevicePtr gpu_out_0; CudaDevicePtr gpu_out_1; CudaRegisteredVector input_buffer_reg; CudaRegisteredVector output_buffer_reg; const int numberOfCudaThreads = 128; // #threads per block that should work well for Nvidia L4 const int numberOfWaves = 32; // #waves that should work well for Nvidia L4 const int windowSizeLimit = 32; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems public: ImageSpotFinderGPU(int32_t width, int32_t height); ~ImageSpotFinderGPU() override = default; std::vector Run(const SpotFindingSettings &settings, const std::vector &res_mask) override; }; #endif //JFJOCH_IMAGEANALYSISGPU_H