CUDAWrapper: Move select device to dedicated wrapper

This commit is contained in:
2023-07-27 21:30:10 +02:00
parent 5ca23ff64f
commit 669b2d9358
7 changed files with 69 additions and 29 deletions

View File

@@ -44,10 +44,17 @@ ADD_LIBRARY( CommonFunctions STATIC
DetectorGeometry.cpp DetectorGeometry.h
DetectorModuleGeometry.cpp DetectorModuleGeometry.h
DetectorSetup.h DetectorSetup.cpp ZeroCopyReturnValue.h Histogram.h DiffractionGeometry.h
ROIFilter.h)
ROIFilter.h
CUDAWrapper.cpp
CUDAWrapper.h)
TARGET_LINK_LIBRARIES(CommonFunctions Compression FrameSerialize libzmq JFCalibration JFJochProtoBuf -lrt)
IF (CMAKE_CUDA_COMPILER)
TARGET_SOURCES(CommonFunctions PRIVATE CUDAWrapper.cu )
TARGET_LINK_LIBRARIES(CommonFunctions ${CUDART_LIBRARY} ${CMAKE_DL_LIBS} rt)
ENDIF()
IF(HAS_NUMAIF AND NUMA_LIBRARY)
TARGET_COMPILE_DEFINITIONS(CommonFunctions PRIVATE -DJFJOCH_USE_NUMA)
TARGET_LINK_LIBRARIES(CommonFunctions ${NUMA_LIBRARY})

14
common/CUDAWrapper.cpp Normal file
View File

@@ -0,0 +1,14 @@
// Copyright (2019-2023) Paul Scherrer Institute
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef JFJOCH_USE_CUDA
#include "CUDAWrapper.h"
int32_t get_gpu_count() {
return 0;
}
void set_gpu_count(int32_t dev_id) {}
#endif

25
common/CUDAWrapper.cu Normal file
View File

@@ -0,0 +1,25 @@
// Copyright (2019-2023) Paul Scherrer Institute
// SPDX-License-Identifier: GPL-3.0-or-later
#include "CUDAWrapper.h"
#include "JFJochException.h"
inline void cuda_err(cudaError_t val) {
if (val != cudaSuccess)
throw JFJochException(JFJochExceptionCategory::GPUCUDAError, cudaGetErrorString(val));
}
int32_t get_gpu_count() {
int device_count;
cuda_err(cudaGetDeviceCount(&device_count));
return device_count;
}
void set_gpu_count(int32_t dev_id) {
auto dev_count = get_gpu_count();
if ((dev_id < 0) || (dev_id >= dev_count))
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Device ID cannot be negative");
cuda_err(cudaSetDevice(dev_id));
}

12
common/CUDAWrapper.h Normal file
View File

@@ -0,0 +1,12 @@
// Copyright (2019-2023) Paul Scherrer Institute
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef JUNGFRAUJOCH_CUDAWRAPPER_H
#define JUNGFRAUJOCH_CUDAWRAPPER_H
#include <cstdint>
int32_t get_gpu_count();
void set_gpu_count(int32_t dev_id);
#endif //JUNGFRAUJOCH_CUDAWRAPPER_H

View File

@@ -62,8 +62,6 @@ float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_
return 0;
}
std::atomic<uint16_t> GPUImageAnalysis::threadid{0};
void GPUImageAnalysis::LoadRadialIntegrationCorr(const std::vector<float>& v) {}
#endif

View File

@@ -6,6 +6,7 @@
#include "../common/DiffractionGeometry.h"
#include <sstream>
#include "../common/CUDAWrapper.h"
// input X x Y pixels array
// output X x Y byte array
@@ -228,22 +229,12 @@ __global__ void apply_pixel_mask(int16_t *image, const uint8_t *mask, uint32_t n
}
}
GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const std::vector<uint8_t> &mask,
int32_t gpu_device) :
GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const std::vector<uint8_t> &mask) :
xpixels(in_xpixels), ypixels(in_ypixels), gpu_out(nullptr), rad_integration_nbins(0), numberOfSMs(1) {
int device_count;
cuda_err(cudaGetDeviceCount(&device_count));
if (device_count == 0)
if (get_gpu_count() == 0)
throw JFJochException(JFJochExceptionCategory::GPUCUDAError, "No CUDA devices found");
if (gpu_device < 0)
gpu_device = threadid++;
if (device_count > 1)
cuda_err(cudaSetDevice(gpu_device % device_count));
int deviceId;
cuda_err(cudaGetDevice(&deviceId));
cudaDeviceGetAttribute(&numberOfSMs, cudaDevAttrMultiProcessorCount, deviceId);
@@ -271,8 +262,8 @@ GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const
}
GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask,
const std::vector<uint16_t> &rad_int_mapping, uint16_t rad_int_nbins,
int32_t gpu_device) : GPUImageAnalysis(xpixels, ypixels, mask, gpu_device) {
const std::vector<uint16_t> &rad_int_mapping, uint16_t rad_int_nbins)
: GPUImageAnalysis(xpixels, ypixels, mask) {
rad_integration_nbins = rad_int_nbins;
if (rad_int_mapping.size() != xpixels * ypixels)
@@ -297,10 +288,8 @@ GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::
}
GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask,
const RadialIntegrationMapping& mapping,
int32_t gpu_device)
: GPUImageAnalysis(xpixels, ypixels, mask, mapping.GetPixelToBinMapping(),
mapping.GetBinNumber(), gpu_device) {}
const RadialIntegrationMapping& mapping)
: GPUImageAnalysis(xpixels, ypixels, mask, mapping.GetPixelToBinMapping(),mapping.GetBinNumber()) {}
GPUImageAnalysis::~GPUImageAnalysis() {
cudaStreamDestroy(cudastream->v);
@@ -520,5 +509,3 @@ float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_
else
return ret_sum / ret_count;
}
std::atomic<uint16_t> GPUImageAnalysis::threadid{0};

View File

@@ -25,8 +25,6 @@ class GPUImageAnalysis {
std::mutex m;
CudaStreamWrapper *cudastream;
static std::atomic<uint16_t> threadid;
const int32_t xpixels;
const int32_t ypixels;
@@ -54,12 +52,11 @@ class GPUImageAnalysis {
const int maxStrongPixel = 65536;
public:
GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask, int32_t gpu_device = -1);
GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask);
GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask,
const std::vector<uint16_t> &rad_int_mapping, uint16_t rad_int_nbins,
int32_t gpu_device = -1);
const std::vector<uint16_t> &rad_int_mapping, uint16_t rad_int_nbins);
GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask,
const RadialIntegrationMapping& mapping,int32_t gpu_device = -1);
const RadialIntegrationMapping& mapping);
~GPUImageAnalysis();