// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only // SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #ifndef JFJOCH_IMAGEANALYSISGPU_H #define JFJOCH_IMAGEANALYSISGPU_H #include #include "SpotFindingSettings.h" #include "StrongPixelSet.h" struct CudaStreamWrapper; class GPUImageAnalysis { std::mutex m; CudaStreamWrapper *cudastream; const int32_t xpixels; const int32_t ypixels; int32_t *host_in = nullptr; int32_t *gpu_in = nullptr; uint32_t *gpu_out = nullptr; uint32_t *host_out = nullptr; int numberOfSMs; const int numberOfCudaThreads = 128; // #threads per block that works well for Nvidia T4 const int numberOfWaves = 40; // #waves that works well for Nvidia T4 const int windowSizeLimit = 21; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems constexpr static int NBX = 15; public: GPUImageAnalysis(int32_t xpixels, int32_t ypixels); ~GPUImageAnalysis(); void SetInputBuffer(void *host_in); void LoadDataToGPU(); void RunSpotFinder(const SpotFindingSettings &settings); void GetSpotFinderResults(StrongPixelSet &pixel_set); void GetSpotFinderResults(const DiffractionExperiment &experiment, const SpotFindingSettings &settings, std::vector &vec); static bool GPUPresent(); void RegisterBuffer(); void UnregisterBuffer(); }; #endif //JFJOCH_IMAGEANALYSISGPU_H