From b5c5da29b245e0d2b93cfee1497f2d6d77ee9ff8 Mon Sep 17 00:00:00 2001 From: Uldis Locans Date: Fri, 9 Dec 2016 13:43:09 +0100 Subject: [PATCH] add function to generate list of random numbers with cuda and opencl on the device --- CMakeLists.txt | 10 +++++ src/CUDA/CudaBase.cu | 16 ++++++++ src/CUDA/CudaBase.cuh | 7 ++++ src/DKSBase.cpp | 9 ++++ src/DKSBase.h | 5 +++ src/OpenCL/OpenCLBase.cpp | 41 ++++++++++++------- src/OpenCL/OpenCLBase.h | 20 +++++++-- .../OpenCLKernels/OpenCLCollimatorPhysics.cl | 22 +++++++--- test/CMakeLists.txt | 2 + 9 files changed, 110 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d9370c..d708a6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,16 @@ MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}") INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) LINK_DIRECTORIES (${CLFFT_LIBRARIES}) +#find clRNG +#SET (clRNG_USE_STATIC_LIBS OFF) +#FIND_PACKAGE(clRng REQUIRED HINTS &ENV{CLRNG_PREFIX} $ENV{CLRNG_DIR} $ENV{CLRNG}) +#MESSAGE (STATUS "Found clRNG library: ${CLRNG_LIBRARIES}") +#MESSAGE (STATUS "Found clRNG include dir: ${CLRNG_INCLUDE_DIRS}") +#INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) +#LINK_DIRECTORIES (${CLRNG_LIBRARIES}) +#find_package(PkgConfig) +#pkg_check_modules(clRng REQUIRED) + #enable UQTK OPTION (USE_UQTK "Use UQTK" OFF) diff --git a/src/CUDA/CudaBase.cu b/src/CUDA/CudaBase.cu index 03fa327..a4f3851 100644 --- a/src/CUDA/CudaBase.cu +++ b/src/CUDA/CudaBase.cu @@ -13,6 +13,13 @@ __global__ void initcuRandState(curandState *state, int size, int seed = 0) { } +__global__ void kernelCreateRandNumbers(curandState *state, double *data, int size) { + + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) + data[idx] = curand_uniform_double(&state[idx]); +} + //=====================================// //==========Private functions==========// @@ -68,6 +75,15 @@ int CudaBase::cuda_deleteCurandStates() { return DKS_SUCCESS; } +int CudaBase::cuda_createRandomNumbers(void *mem_ptr, int size) { + int threads = BLOCK_SIZE; + int blocks = size / threads + 1; + + kernelCreateRandNumbers<<>>(defaultRndState, (double *)mem_ptr, size); + + return DKS_SUCCESS; +} + curandState* CudaBase::cuda_getCurandStates() { return defaultRndState; } diff --git a/src/CUDA/CudaBase.cuh b/src/CUDA/CudaBase.cuh index 6aa502d..8d8c63e 100644 --- a/src/CUDA/CudaBase.cuh +++ b/src/CUDA/CudaBase.cuh @@ -15,6 +15,8 @@ #include #include +#define BLOCK_SIZE 128 + class CudaBase { private: @@ -50,6 +52,11 @@ public: */ int cuda_deleteCurandStates(); + /** Create 'size' random numbers on the device and save in mem_ptr array + * + */ + int cuda_createRandomNumbers(void *mem_ptr, int size); + /** Get a pointer to curand states * */ diff --git a/src/DKSBase.cpp b/src/DKSBase.cpp index 4de3700..c66a003 100644 --- a/src/DKSBase.cpp +++ b/src/DKSBase.cpp @@ -878,3 +878,12 @@ int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr, return DKS_ERROR; } + +int DKSBase::callCreateRandomNumbers(void *mem_ptr, int size) { + if (apiCuda()) + return CUDA_SAFECALL(cbase->cuda_createRandomNumbers(mem_ptr, size)); + if (apiOpenCL()) + return OPENCL_SAFECALL(oclbase->ocl_createRandomNumbers(mem_ptr, size)); + + return DKS_ERROR; +} diff --git a/src/DKSBase.h b/src/DKSBase.h index 5fa09e2..6c13e50 100644 --- a/src/DKSBase.h +++ b/src/DKSBase.h @@ -1090,6 +1090,11 @@ public: double dt, double c, bool usedt = false, int streamId = -1); + /** + * Create random numbers on the device and fille mem_data array + */ + int callCreateRandomNumbers(void *mem_ptr, int size); + /** * Print memory information on device (total, used, available) * TODO: opencl and mic imlementation diff --git a/src/OpenCL/OpenCLBase.cpp b/src/OpenCL/OpenCLBase.cpp index e3d3898..4dad528 100644 --- a/src/OpenCL/OpenCLBase.cpp +++ b/src/OpenCL/OpenCLBase.cpp @@ -7,21 +7,13 @@ cl_device_id OpenCLBase::m_device_id = NULL; cl_event OpenCLBase::m_last_event = NULL; OpenCLBase::OpenCLBase() { - //m_context = NULL; - //m_command_queue = NULL; m_program = NULL; m_kernel = NULL; - //m_device_id = NULL; - //m_platform_id = NULL; m_kernel_file = NULL; m_last_event = NULL; - //m_events = new cl_event[500]; - //m_num_events = 0; - defaultRndSet = 0; - } OpenCLBase::~OpenCLBase() { @@ -41,11 +33,11 @@ int OpenCLBase::ocl_createRndStates(int size) { strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl"); ocl_loadKernel(kernel_file); delete[] kernel_file; - + //allocate memory for rand states int ierr; defaultRndState = ocl_allocateMemory(sizeof(RNDState)*size, ierr); - + //exec kernel int seed = 0; ocl_createKernel("initRand"); @@ -55,13 +47,34 @@ int OpenCLBase::ocl_createRndStates(int size) { size_t work_items = size; size_t work_group_size = 1; - ocl_executeKernel(1, &work_items, &work_group_size); - defaultRndSet = 1; + + return DKS_SUCCESS; +} - return OCL_SUCCESS; +int OpenCLBase::ocl_createRandomNumbers(void *mem_ptr, int size) { + //load kernel + char * kernel_file = new char[500]; + kernel_file[0] = '\0'; + strcat(kernel_file, OPENCL_KERNELS); + strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl"); + ocl_loadKernel(kernel_file); + delete[] kernel_file; + //set kernel variables + cl_mem tmp_data = (cl_mem) mem_ptr; + + ocl_createKernel("createRandoms"); + ocl_setKernelArg(0, sizeof(cl_mem), &defaultRndState); + ocl_setKernelArg(1, sizeof(cl_mem), &tmp_data); + ocl_setKernelArg(2, sizeof(int), &size); + + size_t work_size = 128; + size_t work_items = (size % work_size + 1) * work_size; + ocl_executeKernel(1, &work_items, &work_size); + + return DKS_SUCCESS; } /* destroy rnd states */ @@ -70,7 +83,7 @@ int OpenCLBase::ocl_deleteRndStates() { ocl_freeMemory(defaultRndState); defaultRndSet = 0; - return OCL_SUCCESS; + return DKS_SUCCESS; } diff --git a/src/OpenCL/OpenCLBase.h b/src/OpenCL/OpenCLBase.h index af74dea..e8b9d16 100644 --- a/src/OpenCL/OpenCLBase.h +++ b/src/OpenCL/OpenCLBase.h @@ -30,11 +30,20 @@ #include #endif -#include "clRNG/clRNG.h" -#include "clRNG/mrg31k3p.h" - #include "../DKSDefinitions.h" +/* struct for random number state */ +typedef struct { + double s10; + double s11; + double s12; + double s20; + double s21; + double s22; + double z; + bool gen; +} RNDState; + class OpenCLBase { private: @@ -122,6 +131,11 @@ public: */ int ocl_createRndStates(int size); + /* Create an array of random numbers on the device + * + */ + int ocl_createRandomNumbers(void *mem_ptr, int size); + /* Destroy rnd states Return: success or error code diff --git a/src/OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl b/src/OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl index 34b08bd..de5a5ee 100644 --- a/src/OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl +++ b/src/OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl @@ -1,6 +1,4 @@ #pragma OPENCL EXTENSION cl_khr_fp64 : enable -#pragma OPENCL EXTENSION - /******Random numbers********/ @@ -89,13 +87,14 @@ __kernel void initRand(__global RNDState *s, unsigned int seed, int N) { if (id < N) { RNDState tmp; - int tmp_seed = id;// * 0x100000000ULL; + int tmp_seed = 2*id;// * 0x100000000ULL; tmp.s10 = 12345 + tmp_seed; tmp.s11 = 12345 + tmp_seed; - tmp.s12 = 123 + tmp_seed; + tmp.s12 = 12345 + tmp_seed; tmp.s20 = 12345 + tmp_seed; tmp.s21 = 12345 + tmp_seed; - tmp.s22 = 123 + tmp_seed; + tmp.s22 = 12345 + tmp_seed; + tmp.z = 0; tmp.gen = true; @@ -105,6 +104,19 @@ __kernel void initRand(__global RNDState *s, unsigned int seed, int N) { } +/* create random numbers and fill an array */ +__kernel void createRandoms(__global RNDState *states, __global double *data, int size) { + + int idx = get_global_id(0); + + if (idx < size) { + RNDState s = states[idx]; + data[idx] = rand_uniform(&s); + states[idx] = s; + } + +} + /**********Degrader**********/ enum PARAMS { POSITION, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index db9facd..47b1b69 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -22,6 +22,7 @@ ADD_EXECUTABLE(testFFT3DRC testFFT3DRC.cpp) #ADD_EXECUTABLE(testGather testGather.cpp) #ADD_EXECUTABLE(testGatherAsync testGatherAsync.cpp) #ADD_EXECUTABLE(testTranspose testTranspose.cpp) +ADD_EXECUTABLE(testRandom testRandom.cpp) ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp) ADD_EXECUTABLE(testCollimatorPhysicsSoA testCollimatorPhysicsSoA.cpp) #ADD_EXECUTABLE(testPush testPush.cpp) @@ -53,6 +54,7 @@ TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) #TARGET_LINK_LIBRARIES(testGather dks) #TARGET_LINK_LIBRARIES(testGatherAsync dks) #TARGET_LINK_LIBRARIES(testTranspose dks) +TARGET_LINK_LIBRARIES(testRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) #TARGET_LINK_LIBRARIES(testPush dks)