// Copyright (2019-2022) Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-or-later #ifndef JUNGFRAUJOCH_GPUIMAGEANALYSIS_H #define JUNGFRAUJOCH_GPUIMAGEANALYSIS_H #include #include "StrongPixelSet.h" #include "RadialIntegrationMapping.h" struct spot_parameters { float strong_pixel_threshold2; uint32_t count_threshold; uint32_t nbx; uint32_t nby; uint32_t lines; uint32_t cols; int16_t min_viable_number; }; struct CudaStreamWrapper; class GPUImageAnalysis { std::mutex m; CudaStreamWrapper *cudastream; static std::atomic threadid; const int32_t xpixels; const int32_t ypixels; int16_t *host_in = nullptr; uint8_t *gpu_mask = nullptr; int16_t *gpu_in = nullptr; uint8_t *gpu_out = nullptr; uint32_t *gpu_out_reduced = nullptr; uint32_t *gpu_out_reduced_counter = nullptr; uint32_t *host_out_reduced = nullptr; uint32_t *host_out_reduced_counter = nullptr; uint16_t rad_integration_nbins; uint16_t *gpu_rad_integration_bin_map = nullptr; int32_t *gpu_rad_integration_sum = nullptr; int32_t *gpu_rad_integration_count = nullptr; int32_t *host_rad_integration_sum = nullptr, *host_rad_integration_count = nullptr; int numberOfSMs; const int numberOfCudaThreads = 128; // #threads per block that works well for Nvidia T4 const int numberOfWaves = 40; // #waves that works well for Nvidia T4 const int windowSizeLimit = 21; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems const int maxStrongPixel = 65536; public: GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, int32_t gpu_device = -1); GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, const std::vector &rad_int_mapping, uint16_t rad_int_nbins, int32_t gpu_device = -1); GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, const RadialIntegrationMapping& mapping,int32_t gpu_device = -1); ~GPUImageAnalysis(); void SetInputBuffer(void *host_in); void LoadDataToGPU(bool apply_pixel_mask_on_gpu = true); void RunSpotFinder(const JFJochProtoBuf::DataProcessingSettings &settings); void GetSpotFinderResults(StrongPixelSet &pixel_set); void GetSpotFinderResults(const DiffractionExperiment &experiment, const JFJochProtoBuf::DataProcessingSettings &settings, std::vector &vec); void RunRadialIntegration(); void GetRadialIntegrationProfile(std::vector &result); [[nodiscard]] std::vector GetRadialIntegrationSum() const; [[nodiscard]] std::vector GetRadialIntegrationCount() const; [[nodiscard]] float GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_t max_bin); static bool GPUPresent(); void RegisterBuffer(); void UnregisterBuffer(); }; #endif //JUNGFRAUJOCH_GPUIMAGEANALYSIS_H