add function to generate list of random numbers with cuda and opencl on the device

This commit is contained in:
Uldis Locans
2016-12-09 13:43:09 +01:00
parent 3a74d6cdee
commit b5c5da29b2
9 changed files with 110 additions and 22 deletions

View File

@ -46,6 +46,16 @@ MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}")
INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
LINK_DIRECTORIES (${CLFFT_LIBRARIES}) LINK_DIRECTORIES (${CLFFT_LIBRARIES})
#find clRNG
#SET (clRNG_USE_STATIC_LIBS OFF)
#FIND_PACKAGE(clRng REQUIRED HINTS &ENV{CLRNG_PREFIX} $ENV{CLRNG_DIR} $ENV{CLRNG})
#MESSAGE (STATUS "Found clRNG library: ${CLRNG_LIBRARIES}")
#MESSAGE (STATUS "Found clRNG include dir: ${CLRNG_INCLUDE_DIRS}")
#INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
#LINK_DIRECTORIES (${CLRNG_LIBRARIES})
#find_package(PkgConfig)
#pkg_check_modules(clRng REQUIRED)
#enable UQTK #enable UQTK
OPTION (USE_UQTK "Use UQTK" OFF) OPTION (USE_UQTK "Use UQTK" OFF)

View File

@ -13,6 +13,13 @@ __global__ void initcuRandState(curandState *state, int size, int seed = 0) {
} }
__global__ void kernelCreateRandNumbers(curandState *state, double *data, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < size)
data[idx] = curand_uniform_double(&state[idx]);
}
//=====================================// //=====================================//
//==========Private functions==========// //==========Private functions==========//
@ -68,6 +75,15 @@ int CudaBase::cuda_deleteCurandStates() {
return DKS_SUCCESS; return DKS_SUCCESS;
} }
int CudaBase::cuda_createRandomNumbers(void *mem_ptr, int size) {
int threads = BLOCK_SIZE;
int blocks = size / threads + 1;
kernelCreateRandNumbers<<<blocks, threads>>>(defaultRndState, (double *)mem_ptr, size);
return DKS_SUCCESS;
}
curandState* CudaBase::cuda_getCurandStates() { curandState* CudaBase::cuda_getCurandStates() {
return defaultRndState; return defaultRndState;
} }

View File

@ -15,6 +15,8 @@
#include <nvToolsExt.h> #include <nvToolsExt.h>
#include <time.h> #include <time.h>
#define BLOCK_SIZE 128
class CudaBase { class CudaBase {
private: private:
@ -50,6 +52,11 @@ public:
*/ */
int cuda_deleteCurandStates(); int cuda_deleteCurandStates();
/** Create 'size' random numbers on the device and save in mem_ptr array
*
*/
int cuda_createRandomNumbers(void *mem_ptr, int size);
/** Get a pointer to curand states /** Get a pointer to curand states
* *
*/ */

View File

@ -878,3 +878,12 @@ int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
return DKS_ERROR; return DKS_ERROR;
} }
int DKSBase::callCreateRandomNumbers(void *mem_ptr, int size) {
if (apiCuda())
return CUDA_SAFECALL(cbase->cuda_createRandomNumbers(mem_ptr, size));
if (apiOpenCL())
return OPENCL_SAFECALL(oclbase->ocl_createRandomNumbers(mem_ptr, size));
return DKS_ERROR;
}

View File

@ -1090,6 +1090,11 @@ public:
double dt, double c, bool usedt = false, double dt, double c, bool usedt = false,
int streamId = -1); int streamId = -1);
/**
* Create random numbers on the device and fille mem_data array
*/
int callCreateRandomNumbers(void *mem_ptr, int size);
/** /**
* Print memory information on device (total, used, available) * Print memory information on device (total, used, available)
* TODO: opencl and mic imlementation * TODO: opencl and mic imlementation

View File

@ -7,21 +7,13 @@ cl_device_id OpenCLBase::m_device_id = NULL;
cl_event OpenCLBase::m_last_event = NULL; cl_event OpenCLBase::m_last_event = NULL;
OpenCLBase::OpenCLBase() { OpenCLBase::OpenCLBase() {
//m_context = NULL;
//m_command_queue = NULL;
m_program = NULL; m_program = NULL;
m_kernel = NULL; m_kernel = NULL;
//m_device_id = NULL;
//m_platform_id = NULL;
m_kernel_file = NULL; m_kernel_file = NULL;
m_last_event = NULL; m_last_event = NULL;
//m_events = new cl_event[500];
//m_num_events = 0;
defaultRndSet = 0; defaultRndSet = 0;
} }
OpenCLBase::~OpenCLBase() { OpenCLBase::~OpenCLBase() {
@ -41,11 +33,11 @@ int OpenCLBase::ocl_createRndStates(int size) {
strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl"); strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl");
ocl_loadKernel(kernel_file); ocl_loadKernel(kernel_file);
delete[] kernel_file; delete[] kernel_file;
//allocate memory for rand states //allocate memory for rand states
int ierr; int ierr;
defaultRndState = ocl_allocateMemory(sizeof(RNDState)*size, ierr); defaultRndState = ocl_allocateMemory(sizeof(RNDState)*size, ierr);
//exec kernel //exec kernel
int seed = 0; int seed = 0;
ocl_createKernel("initRand"); ocl_createKernel("initRand");
@ -55,13 +47,34 @@ int OpenCLBase::ocl_createRndStates(int size) {
size_t work_items = size; size_t work_items = size;
size_t work_group_size = 1; size_t work_group_size = 1;
ocl_executeKernel(1, &work_items, &work_group_size); ocl_executeKernel(1, &work_items, &work_group_size);
defaultRndSet = 1; defaultRndSet = 1;
return DKS_SUCCESS;
}
return OCL_SUCCESS; int OpenCLBase::ocl_createRandomNumbers(void *mem_ptr, int size) {
//load kernel
char * kernel_file = new char[500];
kernel_file[0] = '\0';
strcat(kernel_file, OPENCL_KERNELS);
strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl");
ocl_loadKernel(kernel_file);
delete[] kernel_file;
//set kernel variables
cl_mem tmp_data = (cl_mem) mem_ptr;
ocl_createKernel("createRandoms");
ocl_setKernelArg(0, sizeof(cl_mem), &defaultRndState);
ocl_setKernelArg(1, sizeof(cl_mem), &tmp_data);
ocl_setKernelArg(2, sizeof(int), &size);
size_t work_size = 128;
size_t work_items = (size % work_size + 1) * work_size;
ocl_executeKernel(1, &work_items, &work_size);
return DKS_SUCCESS;
} }
/* destroy rnd states */ /* destroy rnd states */
@ -70,7 +83,7 @@ int OpenCLBase::ocl_deleteRndStates() {
ocl_freeMemory(defaultRndState); ocl_freeMemory(defaultRndState);
defaultRndSet = 0; defaultRndSet = 0;
return OCL_SUCCESS; return DKS_SUCCESS;
} }

View File

@ -30,11 +30,20 @@
#include <CL/cl_ext.h> #include <CL/cl_ext.h>
#endif #endif
#include "clRNG/clRNG.h"
#include "clRNG/mrg31k3p.h"
#include "../DKSDefinitions.h" #include "../DKSDefinitions.h"
/* struct for random number state */
typedef struct {
double s10;
double s11;
double s12;
double s20;
double s21;
double s22;
double z;
bool gen;
} RNDState;
class OpenCLBase { class OpenCLBase {
private: private:
@ -122,6 +131,11 @@ public:
*/ */
int ocl_createRndStates(int size); int ocl_createRndStates(int size);
/* Create an array of random numbers on the device
*
*/
int ocl_createRandomNumbers(void *mem_ptr, int size);
/* /*
Destroy rnd states Destroy rnd states
Return: success or error code Return: success or error code

View File

@ -1,6 +1,4 @@
#pragma OPENCL EXTENSION cl_khr_fp64 : enable #pragma OPENCL EXTENSION cl_khr_fp64 : enable
#pragma OPENCL EXTENSION
/******Random numbers********/ /******Random numbers********/
@ -89,13 +87,14 @@ __kernel void initRand(__global RNDState *s, unsigned int seed, int N) {
if (id < N) { if (id < N) {
RNDState tmp; RNDState tmp;
int tmp_seed = id;// * 0x100000000ULL; int tmp_seed = 2*id;// * 0x100000000ULL;
tmp.s10 = 12345 + tmp_seed; tmp.s10 = 12345 + tmp_seed;
tmp.s11 = 12345 + tmp_seed; tmp.s11 = 12345 + tmp_seed;
tmp.s12 = 123 + tmp_seed; tmp.s12 = 12345 + tmp_seed;
tmp.s20 = 12345 + tmp_seed; tmp.s20 = 12345 + tmp_seed;
tmp.s21 = 12345 + tmp_seed; tmp.s21 = 12345 + tmp_seed;
tmp.s22 = 123 + tmp_seed; tmp.s22 = 12345 + tmp_seed;
tmp.z = 0; tmp.z = 0;
tmp.gen = true; tmp.gen = true;
@ -105,6 +104,19 @@ __kernel void initRand(__global RNDState *s, unsigned int seed, int N) {
} }
/* create random numbers and fill an array */
__kernel void createRandoms(__global RNDState *states, __global double *data, int size) {
int idx = get_global_id(0);
if (idx < size) {
RNDState s = states[idx];
data[idx] = rand_uniform(&s);
states[idx] = s;
}
}
/**********Degrader**********/ /**********Degrader**********/
enum PARAMS { POSITION, enum PARAMS { POSITION,

View File

@ -22,6 +22,7 @@ ADD_EXECUTABLE(testFFT3DRC testFFT3DRC.cpp)
#ADD_EXECUTABLE(testGather testGather.cpp) #ADD_EXECUTABLE(testGather testGather.cpp)
#ADD_EXECUTABLE(testGatherAsync testGatherAsync.cpp) #ADD_EXECUTABLE(testGatherAsync testGatherAsync.cpp)
#ADD_EXECUTABLE(testTranspose testTranspose.cpp) #ADD_EXECUTABLE(testTranspose testTranspose.cpp)
ADD_EXECUTABLE(testRandom testRandom.cpp)
ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp) ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
ADD_EXECUTABLE(testCollimatorPhysicsSoA testCollimatorPhysicsSoA.cpp) ADD_EXECUTABLE(testCollimatorPhysicsSoA testCollimatorPhysicsSoA.cpp)
#ADD_EXECUTABLE(testPush testPush.cpp) #ADD_EXECUTABLE(testPush testPush.cpp)
@ -53,6 +54,7 @@ TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testGather dks) #TARGET_LINK_LIBRARIES(testGather dks)
#TARGET_LINK_LIBRARIES(testGatherAsync dks) #TARGET_LINK_LIBRARIES(testGatherAsync dks)
#TARGET_LINK_LIBRARIES(testTranspose dks) #TARGET_LINK_LIBRARIES(testTranspose dks)
TARGET_LINK_LIBRARIES(testRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testPush dks) #TARGET_LINK_LIBRARIES(testPush dks)