Files
Jungfraujoch/image_analysis/spot_finding/ImageSpotFinderGPU.h
2025-10-20 20:43:44 +02:00

34 lines
1.2 KiB
C++

// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#ifndef JFJOCH_IMAGEANALYSISGPU_H
#define JFJOCH_IMAGEANALYSISGPU_H
#include <vector>
#include "SpotFindingSettings.h"
#include "ImageSpotFinder.h"
#include "../indexing/CUDAMemHelpers.h"
class ImageSpotFinderGPU : public ImageSpotFinder {
CudaStream stream;
CudaDevicePtr<int32_t> gpu_in;
CudaDevicePtr<uint32_t> gpu_out_0;
CudaDevicePtr<uint32_t> gpu_out_1;
CudaRegisteredVector<int32_t> input_buffer_reg;
CudaRegisteredVector<uint32_t> output_buffer_reg;
const int numberOfCudaThreads = 128; // #threads per block that should work well for Nvidia L4
const int numberOfWaves = 32; // #waves that should work well for Nvidia L4
const int windowSizeLimit = 32; // limit on the window size (2nby+1, 2nbx+1) to prevent shared memory problems
public:
ImageSpotFinderGPU(int32_t width, int32_t height);
~ImageSpotFinderGPU() override = default;
std::vector<DiffractionSpot> Run(const SpotFindingSettings &settings, const std::vector<bool> &res_mask) override;
};
#endif //JFJOCH_IMAGEANALYSISGPU_H