// Copyright (2019-2023) Paul Scherrer Institute #ifndef JUNGFRAUJOCH_GPUIMAGEANALYSIS_H #define JUNGFRAUJOCH_GPUIMAGEANALYSIS_H #include #include "StrongPixelSet.h" struct spot_parameters { float strong_pixel_threshold2; uint32_t count_threshold; uint32_t nbx; uint32_t nby; uint32_t lines; uint32_t cols; int16_t min_viable_number; }; struct CudaStreamWrapper; class GPUImageAnalysis { std::mutex m; CudaStreamWrapper *cudastream; const int32_t xpixels; const int32_t ypixels; int16_t *host_in = nullptr; int16_t *gpu_in = nullptr; uint32_t *gpu_out = nullptr; uint32_t *host_out = nullptr; int numberOfSMs; const int numberOfCudaThreads = 128; // #threads per block that works well for Nvidia T4 const int numberOfWaves = 40; // #waves that works well for Nvidia T4 const int windowSizeLimit = 21; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems public: GPUImageAnalysis(int32_t xpixels, int32_t ypixels); ~GPUImageAnalysis(); void SetInputBuffer(void *host_in); void LoadDataToGPU(); void RunSpotFinder(const JFJochProtoBuf::DataProcessingSettings &settings); void GetSpotFinderResults(StrongPixelSet &pixel_set); void GetSpotFinderResults(const DiffractionExperiment &experiment, const JFJochProtoBuf::DataProcessingSettings &settings, std::vector &vec); static bool GPUPresent(); void RegisterBuffer(); void UnregisterBuffer(); }; #endif //JUNGFRAUJOCH_GPUIMAGEANALYSIS_H