add function to generate list of random numbers with cuda and opencl on the device
This commit is contained in:
@ -46,6 +46,16 @@ MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}")
|
||||
INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
|
||||
LINK_DIRECTORIES (${CLFFT_LIBRARIES})
|
||||
|
||||
#find clRNG
|
||||
#SET (clRNG_USE_STATIC_LIBS OFF)
|
||||
#FIND_PACKAGE(clRng REQUIRED HINTS &ENV{CLRNG_PREFIX} $ENV{CLRNG_DIR} $ENV{CLRNG})
|
||||
#MESSAGE (STATUS "Found clRNG library: ${CLRNG_LIBRARIES}")
|
||||
#MESSAGE (STATUS "Found clRNG include dir: ${CLRNG_INCLUDE_DIRS}")
|
||||
#INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
|
||||
#LINK_DIRECTORIES (${CLRNG_LIBRARIES})
|
||||
#find_package(PkgConfig)
|
||||
#pkg_check_modules(clRng REQUIRED)
|
||||
|
||||
#enable UQTK
|
||||
OPTION (USE_UQTK "Use UQTK" OFF)
|
||||
|
||||
|
@ -13,6 +13,13 @@ __global__ void initcuRandState(curandState *state, int size, int seed = 0) {
|
||||
|
||||
}
|
||||
|
||||
__global__ void kernelCreateRandNumbers(curandState *state, double *data, int size) {
|
||||
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < size)
|
||||
data[idx] = curand_uniform_double(&state[idx]);
|
||||
}
|
||||
|
||||
|
||||
//=====================================//
|
||||
//==========Private functions==========//
|
||||
@ -68,6 +75,15 @@ int CudaBase::cuda_deleteCurandStates() {
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
int CudaBase::cuda_createRandomNumbers(void *mem_ptr, int size) {
|
||||
int threads = BLOCK_SIZE;
|
||||
int blocks = size / threads + 1;
|
||||
|
||||
kernelCreateRandNumbers<<<blocks, threads>>>(defaultRndState, (double *)mem_ptr, size);
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
curandState* CudaBase::cuda_getCurandStates() {
|
||||
return defaultRndState;
|
||||
}
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <nvToolsExt.h>
|
||||
#include <time.h>
|
||||
|
||||
#define BLOCK_SIZE 128
|
||||
|
||||
class CudaBase {
|
||||
|
||||
private:
|
||||
@ -50,6 +52,11 @@ public:
|
||||
*/
|
||||
int cuda_deleteCurandStates();
|
||||
|
||||
/** Create 'size' random numbers on the device and save in mem_ptr array
|
||||
*
|
||||
*/
|
||||
int cuda_createRandomNumbers(void *mem_ptr, int size);
|
||||
|
||||
/** Get a pointer to curand states
|
||||
*
|
||||
*/
|
||||
|
@ -878,3 +878,12 @@ int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callCreateRandomNumbers(void *mem_ptr, int size) {
|
||||
if (apiCuda())
|
||||
return CUDA_SAFECALL(cbase->cuda_createRandomNumbers(mem_ptr, size));
|
||||
if (apiOpenCL())
|
||||
return OPENCL_SAFECALL(oclbase->ocl_createRandomNumbers(mem_ptr, size));
|
||||
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
@ -1090,6 +1090,11 @@ public:
|
||||
double dt, double c, bool usedt = false,
|
||||
int streamId = -1);
|
||||
|
||||
/**
|
||||
* Create random numbers on the device and fille mem_data array
|
||||
*/
|
||||
int callCreateRandomNumbers(void *mem_ptr, int size);
|
||||
|
||||
/**
|
||||
* Print memory information on device (total, used, available)
|
||||
* TODO: opencl and mic imlementation
|
||||
|
@ -7,21 +7,13 @@ cl_device_id OpenCLBase::m_device_id = NULL;
|
||||
cl_event OpenCLBase::m_last_event = NULL;
|
||||
|
||||
OpenCLBase::OpenCLBase() {
|
||||
//m_context = NULL;
|
||||
//m_command_queue = NULL;
|
||||
m_program = NULL;
|
||||
m_kernel = NULL;
|
||||
//m_device_id = NULL;
|
||||
//m_platform_id = NULL;
|
||||
m_kernel_file = NULL;
|
||||
|
||||
m_last_event = NULL;
|
||||
|
||||
//m_events = new cl_event[500];
|
||||
//m_num_events = 0;
|
||||
|
||||
defaultRndSet = 0;
|
||||
|
||||
}
|
||||
|
||||
OpenCLBase::~OpenCLBase() {
|
||||
@ -41,11 +33,11 @@ int OpenCLBase::ocl_createRndStates(int size) {
|
||||
strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl");
|
||||
ocl_loadKernel(kernel_file);
|
||||
delete[] kernel_file;
|
||||
|
||||
|
||||
//allocate memory for rand states
|
||||
int ierr;
|
||||
defaultRndState = ocl_allocateMemory(sizeof(RNDState)*size, ierr);
|
||||
|
||||
|
||||
//exec kernel
|
||||
int seed = 0;
|
||||
ocl_createKernel("initRand");
|
||||
@ -55,13 +47,34 @@ int OpenCLBase::ocl_createRndStates(int size) {
|
||||
|
||||
size_t work_items = size;
|
||||
size_t work_group_size = 1;
|
||||
|
||||
ocl_executeKernel(1, &work_items, &work_group_size);
|
||||
|
||||
defaultRndSet = 1;
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
return OCL_SUCCESS;
|
||||
int OpenCLBase::ocl_createRandomNumbers(void *mem_ptr, int size) {
|
||||
//load kernel
|
||||
char * kernel_file = new char[500];
|
||||
kernel_file[0] = '\0';
|
||||
strcat(kernel_file, OPENCL_KERNELS);
|
||||
strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl");
|
||||
ocl_loadKernel(kernel_file);
|
||||
delete[] kernel_file;
|
||||
|
||||
//set kernel variables
|
||||
cl_mem tmp_data = (cl_mem) mem_ptr;
|
||||
|
||||
ocl_createKernel("createRandoms");
|
||||
ocl_setKernelArg(0, sizeof(cl_mem), &defaultRndState);
|
||||
ocl_setKernelArg(1, sizeof(cl_mem), &tmp_data);
|
||||
ocl_setKernelArg(2, sizeof(int), &size);
|
||||
|
||||
size_t work_size = 128;
|
||||
size_t work_items = (size % work_size + 1) * work_size;
|
||||
ocl_executeKernel(1, &work_items, &work_size);
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
/* destroy rnd states */
|
||||
@ -70,7 +83,7 @@ int OpenCLBase::ocl_deleteRndStates() {
|
||||
ocl_freeMemory(defaultRndState);
|
||||
defaultRndSet = 0;
|
||||
|
||||
return OCL_SUCCESS;
|
||||
return DKS_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
|
@ -30,11 +30,20 @@
|
||||
#include <CL/cl_ext.h>
|
||||
#endif
|
||||
|
||||
#include "clRNG/clRNG.h"
|
||||
#include "clRNG/mrg31k3p.h"
|
||||
|
||||
#include "../DKSDefinitions.h"
|
||||
|
||||
/* struct for random number state */
|
||||
typedef struct {
|
||||
double s10;
|
||||
double s11;
|
||||
double s12;
|
||||
double s20;
|
||||
double s21;
|
||||
double s22;
|
||||
double z;
|
||||
bool gen;
|
||||
} RNDState;
|
||||
|
||||
class OpenCLBase {
|
||||
|
||||
private:
|
||||
@ -122,6 +131,11 @@ public:
|
||||
*/
|
||||
int ocl_createRndStates(int size);
|
||||
|
||||
/* Create an array of random numbers on the device
|
||||
*
|
||||
*/
|
||||
int ocl_createRandomNumbers(void *mem_ptr, int size);
|
||||
|
||||
/*
|
||||
Destroy rnd states
|
||||
Return: success or error code
|
||||
|
@ -1,6 +1,4 @@
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#pragma OPENCL EXTENSION
|
||||
|
||||
|
||||
/******Random numbers********/
|
||||
|
||||
@ -89,13 +87,14 @@ __kernel void initRand(__global RNDState *s, unsigned int seed, int N) {
|
||||
|
||||
if (id < N) {
|
||||
RNDState tmp;
|
||||
int tmp_seed = id;// * 0x100000000ULL;
|
||||
int tmp_seed = 2*id;// * 0x100000000ULL;
|
||||
tmp.s10 = 12345 + tmp_seed;
|
||||
tmp.s11 = 12345 + tmp_seed;
|
||||
tmp.s12 = 123 + tmp_seed;
|
||||
tmp.s12 = 12345 + tmp_seed;
|
||||
tmp.s20 = 12345 + tmp_seed;
|
||||
tmp.s21 = 12345 + tmp_seed;
|
||||
tmp.s22 = 123 + tmp_seed;
|
||||
tmp.s22 = 12345 + tmp_seed;
|
||||
|
||||
|
||||
tmp.z = 0;
|
||||
tmp.gen = true;
|
||||
@ -105,6 +104,19 @@ __kernel void initRand(__global RNDState *s, unsigned int seed, int N) {
|
||||
|
||||
}
|
||||
|
||||
/* create random numbers and fill an array */
|
||||
__kernel void createRandoms(__global RNDState *states, __global double *data, int size) {
|
||||
|
||||
int idx = get_global_id(0);
|
||||
|
||||
if (idx < size) {
|
||||
RNDState s = states[idx];
|
||||
data[idx] = rand_uniform(&s);
|
||||
states[idx] = s;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**********Degrader**********/
|
||||
enum PARAMS { POSITION,
|
||||
|
@ -22,6 +22,7 @@ ADD_EXECUTABLE(testFFT3DRC testFFT3DRC.cpp)
|
||||
#ADD_EXECUTABLE(testGather testGather.cpp)
|
||||
#ADD_EXECUTABLE(testGatherAsync testGatherAsync.cpp)
|
||||
#ADD_EXECUTABLE(testTranspose testTranspose.cpp)
|
||||
ADD_EXECUTABLE(testRandom testRandom.cpp)
|
||||
ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
|
||||
ADD_EXECUTABLE(testCollimatorPhysicsSoA testCollimatorPhysicsSoA.cpp)
|
||||
#ADD_EXECUTABLE(testPush testPush.cpp)
|
||||
@ -53,6 +54,7 @@ TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
#TARGET_LINK_LIBRARIES(testGather dks)
|
||||
#TARGET_LINK_LIBRARIES(testGatherAsync dks)
|
||||
#TARGET_LINK_LIBRARIES(testTranspose dks)
|
||||
TARGET_LINK_LIBRARIES(testRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
#TARGET_LINK_LIBRARIES(testPush dks)
|
||||
|
Reference in New Issue
Block a user