From 669b2d9358d69c060b52d17d7b2acee8a8f6f345 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Thu, 27 Jul 2023 21:30:10 +0200 Subject: [PATCH] CUDAWrapper: Move select device to dedicated wrapper --- common/CMakeLists.txt | 9 ++++++++- common/CUDAWrapper.cpp | 14 ++++++++++++++ common/CUDAWrapper.cu | 25 +++++++++++++++++++++++++ common/CUDAWrapper.h | 12 ++++++++++++ image_analysis/GPUImageAnalysis.cpp | 2 -- image_analysis/GPUImageAnalysis.cu | 27 +++++++-------------------- image_analysis/GPUImageAnalysis.h | 9 +++------ 7 files changed, 69 insertions(+), 29 deletions(-) create mode 100644 common/CUDAWrapper.cpp create mode 100644 common/CUDAWrapper.cu create mode 100644 common/CUDAWrapper.h diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index c7272c3c..b2e7e530 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -44,10 +44,17 @@ ADD_LIBRARY( CommonFunctions STATIC DetectorGeometry.cpp DetectorGeometry.h DetectorModuleGeometry.cpp DetectorModuleGeometry.h DetectorSetup.h DetectorSetup.cpp ZeroCopyReturnValue.h Histogram.h DiffractionGeometry.h - ROIFilter.h) + ROIFilter.h + CUDAWrapper.cpp + CUDAWrapper.h) TARGET_LINK_LIBRARIES(CommonFunctions Compression FrameSerialize libzmq JFCalibration JFJochProtoBuf -lrt) +IF (CMAKE_CUDA_COMPILER) + TARGET_SOURCES(CommonFunctions PRIVATE CUDAWrapper.cu ) + TARGET_LINK_LIBRARIES(CommonFunctions ${CUDART_LIBRARY} ${CMAKE_DL_LIBS} rt) +ENDIF() + IF(HAS_NUMAIF AND NUMA_LIBRARY) TARGET_COMPILE_DEFINITIONS(CommonFunctions PRIVATE -DJFJOCH_USE_NUMA) TARGET_LINK_LIBRARIES(CommonFunctions ${NUMA_LIBRARY}) diff --git a/common/CUDAWrapper.cpp b/common/CUDAWrapper.cpp new file mode 100644 index 00000000..ead14de6 --- /dev/null +++ b/common/CUDAWrapper.cpp @@ -0,0 +1,14 @@ +// Copyright (2019-2023) Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef JFJOCH_USE_CUDA + +#include "CUDAWrapper.h" + +int32_t get_gpu_count() { + return 0; +} + +void set_gpu_count(int32_t dev_id) {} + +#endif diff --git a/common/CUDAWrapper.cu b/common/CUDAWrapper.cu new file mode 100644 index 00000000..df4108fb --- /dev/null +++ b/common/CUDAWrapper.cu @@ -0,0 +1,25 @@ +// Copyright (2019-2023) Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "CUDAWrapper.h" +#include "JFJochException.h" + +inline void cuda_err(cudaError_t val) { + if (val != cudaSuccess) + throw JFJochException(JFJochExceptionCategory::GPUCUDAError, cudaGetErrorString(val)); +} + +int32_t get_gpu_count() { + int device_count; + cuda_err(cudaGetDeviceCount(&device_count)); + return device_count; +} + +void set_gpu_count(int32_t dev_id) { + auto dev_count = get_gpu_count(); + + if ((dev_id < 0) || (dev_id >= dev_count)) + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Device ID cannot be negative"); + + cuda_err(cudaSetDevice(dev_id)); +} \ No newline at end of file diff --git a/common/CUDAWrapper.h b/common/CUDAWrapper.h new file mode 100644 index 00000000..61865da5 --- /dev/null +++ b/common/CUDAWrapper.h @@ -0,0 +1,12 @@ +// Copyright (2019-2023) Paul Scherrer Institute +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef JUNGFRAUJOCH_CUDAWRAPPER_H +#define JUNGFRAUJOCH_CUDAWRAPPER_H + +#include + +int32_t get_gpu_count(); +void set_gpu_count(int32_t dev_id); + +#endif //JUNGFRAUJOCH_CUDAWRAPPER_H diff --git a/image_analysis/GPUImageAnalysis.cpp b/image_analysis/GPUImageAnalysis.cpp index 63e6e9c5..f3f48921 100644 --- a/image_analysis/GPUImageAnalysis.cpp +++ b/image_analysis/GPUImageAnalysis.cpp @@ -62,8 +62,6 @@ float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_ return 0; } -std::atomic GPUImageAnalysis::threadid{0}; - void GPUImageAnalysis::LoadRadialIntegrationCorr(const std::vector& v) {} #endif diff --git a/image_analysis/GPUImageAnalysis.cu b/image_analysis/GPUImageAnalysis.cu index 943c5f90..ba054a5d 100644 --- a/image_analysis/GPUImageAnalysis.cu +++ b/image_analysis/GPUImageAnalysis.cu @@ -6,6 +6,7 @@ #include "../common/DiffractionGeometry.h" #include +#include "../common/CUDAWrapper.h" // input X x Y pixels array // output X x Y byte array @@ -228,22 +229,12 @@ __global__ void apply_pixel_mask(int16_t *image, const uint8_t *mask, uint32_t n } } -GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const std::vector &mask, - int32_t gpu_device) : +GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const std::vector &mask) : xpixels(in_xpixels), ypixels(in_ypixels), gpu_out(nullptr), rad_integration_nbins(0), numberOfSMs(1) { - int device_count; - cuda_err(cudaGetDeviceCount(&device_count)); - - if (device_count == 0) + if (get_gpu_count() == 0) throw JFJochException(JFJochExceptionCategory::GPUCUDAError, "No CUDA devices found"); - if (gpu_device < 0) - gpu_device = threadid++; - - if (device_count > 1) - cuda_err(cudaSetDevice(gpu_device % device_count)); - int deviceId; cuda_err(cudaGetDevice(&deviceId)); cudaDeviceGetAttribute(&numberOfSMs, cudaDevAttrMultiProcessorCount, deviceId); @@ -271,8 +262,8 @@ GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const } GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, - const std::vector &rad_int_mapping, uint16_t rad_int_nbins, - int32_t gpu_device) : GPUImageAnalysis(xpixels, ypixels, mask, gpu_device) { + const std::vector &rad_int_mapping, uint16_t rad_int_nbins) + : GPUImageAnalysis(xpixels, ypixels, mask) { rad_integration_nbins = rad_int_nbins; if (rad_int_mapping.size() != xpixels * ypixels) @@ -297,10 +288,8 @@ GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std:: } GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, - const RadialIntegrationMapping& mapping, - int32_t gpu_device) - : GPUImageAnalysis(xpixels, ypixels, mask, mapping.GetPixelToBinMapping(), - mapping.GetBinNumber(), gpu_device) {} + const RadialIntegrationMapping& mapping) + : GPUImageAnalysis(xpixels, ypixels, mask, mapping.GetPixelToBinMapping(),mapping.GetBinNumber()) {} GPUImageAnalysis::~GPUImageAnalysis() { cudaStreamDestroy(cudastream->v); @@ -520,5 +509,3 @@ float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_ else return ret_sum / ret_count; } - -std::atomic GPUImageAnalysis::threadid{0}; diff --git a/image_analysis/GPUImageAnalysis.h b/image_analysis/GPUImageAnalysis.h index 963a64fe..a0086fff 100644 --- a/image_analysis/GPUImageAnalysis.h +++ b/image_analysis/GPUImageAnalysis.h @@ -25,8 +25,6 @@ class GPUImageAnalysis { std::mutex m; CudaStreamWrapper *cudastream; - static std::atomic threadid; - const int32_t xpixels; const int32_t ypixels; @@ -54,12 +52,11 @@ class GPUImageAnalysis { const int maxStrongPixel = 65536; public: - GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, int32_t gpu_device = -1); + GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask); GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, - const std::vector &rad_int_mapping, uint16_t rad_int_nbins, - int32_t gpu_device = -1); + const std::vector &rad_int_mapping, uint16_t rad_int_nbins); GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector &mask, - const RadialIntegrationMapping& mapping,int32_t gpu_device = -1); + const RadialIntegrationMapping& mapping); ~GPUImageAnalysis();