CUDAWrapper: Move select device to dedicated wrapper
This commit is contained in:
@@ -44,10 +44,17 @@ ADD_LIBRARY( CommonFunctions STATIC
|
||||
DetectorGeometry.cpp DetectorGeometry.h
|
||||
DetectorModuleGeometry.cpp DetectorModuleGeometry.h
|
||||
DetectorSetup.h DetectorSetup.cpp ZeroCopyReturnValue.h Histogram.h DiffractionGeometry.h
|
||||
ROIFilter.h)
|
||||
ROIFilter.h
|
||||
CUDAWrapper.cpp
|
||||
CUDAWrapper.h)
|
||||
|
||||
TARGET_LINK_LIBRARIES(CommonFunctions Compression FrameSerialize libzmq JFCalibration JFJochProtoBuf -lrt)
|
||||
|
||||
IF (CMAKE_CUDA_COMPILER)
|
||||
TARGET_SOURCES(CommonFunctions PRIVATE CUDAWrapper.cu )
|
||||
TARGET_LINK_LIBRARIES(CommonFunctions ${CUDART_LIBRARY} ${CMAKE_DL_LIBS} rt)
|
||||
ENDIF()
|
||||
|
||||
IF(HAS_NUMAIF AND NUMA_LIBRARY)
|
||||
TARGET_COMPILE_DEFINITIONS(CommonFunctions PRIVATE -DJFJOCH_USE_NUMA)
|
||||
TARGET_LINK_LIBRARIES(CommonFunctions ${NUMA_LIBRARY})
|
||||
|
||||
14
common/CUDAWrapper.cpp
Normal file
14
common/CUDAWrapper.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef JFJOCH_USE_CUDA
|
||||
|
||||
#include "CUDAWrapper.h"
|
||||
|
||||
int32_t get_gpu_count() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void set_gpu_count(int32_t dev_id) {}
|
||||
|
||||
#endif
|
||||
25
common/CUDAWrapper.cu
Normal file
25
common/CUDAWrapper.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "CUDAWrapper.h"
|
||||
#include "JFJochException.h"
|
||||
|
||||
inline void cuda_err(cudaError_t val) {
|
||||
if (val != cudaSuccess)
|
||||
throw JFJochException(JFJochExceptionCategory::GPUCUDAError, cudaGetErrorString(val));
|
||||
}
|
||||
|
||||
int32_t get_gpu_count() {
|
||||
int device_count;
|
||||
cuda_err(cudaGetDeviceCount(&device_count));
|
||||
return device_count;
|
||||
}
|
||||
|
||||
void set_gpu_count(int32_t dev_id) {
|
||||
auto dev_count = get_gpu_count();
|
||||
|
||||
if ((dev_id < 0) || (dev_id >= dev_count))
|
||||
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Device ID cannot be negative");
|
||||
|
||||
cuda_err(cudaSetDevice(dev_id));
|
||||
}
|
||||
12
common/CUDAWrapper.h
Normal file
12
common/CUDAWrapper.h
Normal file
@@ -0,0 +1,12 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef JUNGFRAUJOCH_CUDAWRAPPER_H
|
||||
#define JUNGFRAUJOCH_CUDAWRAPPER_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
int32_t get_gpu_count();
|
||||
void set_gpu_count(int32_t dev_id);
|
||||
|
||||
#endif //JUNGFRAUJOCH_CUDAWRAPPER_H
|
||||
@@ -62,8 +62,6 @@ float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::atomic<uint16_t> GPUImageAnalysis::threadid{0};
|
||||
|
||||
void GPUImageAnalysis::LoadRadialIntegrationCorr(const std::vector<float>& v) {}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "../common/DiffractionGeometry.h"
|
||||
|
||||
#include <sstream>
|
||||
#include "../common/CUDAWrapper.h"
|
||||
|
||||
// input X x Y pixels array
|
||||
// output X x Y byte array
|
||||
@@ -228,22 +229,12 @@ __global__ void apply_pixel_mask(int16_t *image, const uint8_t *mask, uint32_t n
|
||||
}
|
||||
}
|
||||
|
||||
GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const std::vector<uint8_t> &mask,
|
||||
int32_t gpu_device) :
|
||||
GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const std::vector<uint8_t> &mask) :
|
||||
xpixels(in_xpixels), ypixels(in_ypixels), gpu_out(nullptr), rad_integration_nbins(0), numberOfSMs(1) {
|
||||
|
||||
int device_count;
|
||||
cuda_err(cudaGetDeviceCount(&device_count));
|
||||
|
||||
if (device_count == 0)
|
||||
if (get_gpu_count() == 0)
|
||||
throw JFJochException(JFJochExceptionCategory::GPUCUDAError, "No CUDA devices found");
|
||||
|
||||
if (gpu_device < 0)
|
||||
gpu_device = threadid++;
|
||||
|
||||
if (device_count > 1)
|
||||
cuda_err(cudaSetDevice(gpu_device % device_count));
|
||||
|
||||
int deviceId;
|
||||
cuda_err(cudaGetDevice(&deviceId));
|
||||
cudaDeviceGetAttribute(&numberOfSMs, cudaDevAttrMultiProcessorCount, deviceId);
|
||||
@@ -271,8 +262,8 @@ GPUImageAnalysis::GPUImageAnalysis(int32_t in_xpixels, int32_t in_ypixels, const
|
||||
}
|
||||
|
||||
GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask,
|
||||
const std::vector<uint16_t> &rad_int_mapping, uint16_t rad_int_nbins,
|
||||
int32_t gpu_device) : GPUImageAnalysis(xpixels, ypixels, mask, gpu_device) {
|
||||
const std::vector<uint16_t> &rad_int_mapping, uint16_t rad_int_nbins)
|
||||
: GPUImageAnalysis(xpixels, ypixels, mask) {
|
||||
rad_integration_nbins = rad_int_nbins;
|
||||
|
||||
if (rad_int_mapping.size() != xpixels * ypixels)
|
||||
@@ -297,10 +288,8 @@ GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::
|
||||
}
|
||||
|
||||
GPUImageAnalysis::GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask,
|
||||
const RadialIntegrationMapping& mapping,
|
||||
int32_t gpu_device)
|
||||
: GPUImageAnalysis(xpixels, ypixels, mask, mapping.GetPixelToBinMapping(),
|
||||
mapping.GetBinNumber(), gpu_device) {}
|
||||
const RadialIntegrationMapping& mapping)
|
||||
: GPUImageAnalysis(xpixels, ypixels, mask, mapping.GetPixelToBinMapping(),mapping.GetBinNumber()) {}
|
||||
|
||||
GPUImageAnalysis::~GPUImageAnalysis() {
|
||||
cudaStreamDestroy(cudastream->v);
|
||||
@@ -520,5 +509,3 @@ float GPUImageAnalysis::GetRadialIntegrationRangeValue(uint16_t min_bin, uint16_
|
||||
else
|
||||
return ret_sum / ret_count;
|
||||
}
|
||||
|
||||
std::atomic<uint16_t> GPUImageAnalysis::threadid{0};
|
||||
|
||||
@@ -25,8 +25,6 @@ class GPUImageAnalysis {
|
||||
std::mutex m;
|
||||
CudaStreamWrapper *cudastream;
|
||||
|
||||
static std::atomic<uint16_t> threadid;
|
||||
|
||||
const int32_t xpixels;
|
||||
const int32_t ypixels;
|
||||
|
||||
@@ -54,12 +52,11 @@ class GPUImageAnalysis {
|
||||
const int maxStrongPixel = 65536;
|
||||
public:
|
||||
|
||||
GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask, int32_t gpu_device = -1);
|
||||
GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask);
|
||||
GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask,
|
||||
const std::vector<uint16_t> &rad_int_mapping, uint16_t rad_int_nbins,
|
||||
int32_t gpu_device = -1);
|
||||
const std::vector<uint16_t> &rad_int_mapping, uint16_t rad_int_nbins);
|
||||
GPUImageAnalysis(int32_t xpixels, int32_t ypixels, const std::vector<uint8_t> &mask,
|
||||
const RadialIntegrationMapping& mapping,int32_t gpu_device = -1);
|
||||
const RadialIntegrationMapping& mapping);
|
||||
|
||||
~GPUImageAnalysis();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user