updated documentation

This commit is contained in:
Uldis Locans
2017-08-10 14:57:48 +02:00
parent 7ca93a3a49
commit ccc4329bef
38 changed files with 939 additions and 673 deletions

View File

@ -1,16 +1,3 @@
/*
Name: OpenCLBase
Author: Uldis Locans
Info: OpenCL base class to handle all the common details associated
with kernel launch on OpenCL device
Date: 2014.09.18
*/
#ifndef H_OPENCL_BASE
#define H_OPENCL_BASE
@ -32,7 +19,7 @@
#include "../DKSDefinitions.h"
/* struct for random number state */
/** struct for random number state. */
typedef struct {
double s10;
double s11;
@ -44,168 +31,194 @@ typedef struct {
bool gen;
} RNDState;
/**
* OpenCL base class to handle device setup and basic communication wiht the device.
* Handles initialization of OpenCL device, memory manegement, data transfer and kernel launch.
* The OpenCL kernels are located in seperate files in OpenCLKernels folder, the OpenCLBase
* class contains methods to read the kernel files, compile the kernel codes and launch kernels
* from the compiled codes. Which kernel file needs to be loaded for the specif functin is
* handled by the base class that is launching the kernel.
*/
class OpenCLBase {
private:
//variables containig OpenCL device and platform ids
static cl_platform_id m_platform_id;
static cl_device_id m_device_id;
//variables containit compiled OpenCL program and kernel
cl_context_properties m_context_properties[3];
cl_program m_program;
cl_kernel m_kernel;
//variables for tracking OpenCL events
static cl_event m_last_event;
cl_int m_num_events;
std::vector<cl_event> m_events;
//currently load kernel file
char * m_kernel_file;
//type of device used by OpenCL
cl_device_type m_device_type;
/*
Name: getPlatforms
Info: get all avaialble platforms and save in m_platform_ids, save number of platforms
Return: success or error code
*/
/**
* Get all available OpenCL platforms.
* Get all avaialble platforms and save in m_platform_ids, save number of platforms
* Return: success or error code
*/
int ocl_getPlatforms();
/*
Name: getDevice
Info: get first avaialble devices and save device id and platform id for this device, device name: (-gpu, -mic, -cpu)
ReturnL success or error code
*/
/**
* Get first available OpenCL device of specified type.
* Get first avaialble devices and save device id and platform id for this device,
* device name: (-gpu, -mic, -cpu)
* ReturnL success or error code
*/
int ocl_getDevice(const char* device_name);
/*
Name getDeviceType
Info: get device type from device name (-gpu, -cpu, -mic)
Return: success or error code
*/
/**
* Get cl_device_type from the specified device name.
* get device type from device name (-gpu, -cpu, -mic)
* Return: success or error code
*/
int ocl_getDeviceType(const char* device_name, cl_device_type &device_type);
/*
Name: createContext
Info: create context with specified device
Return: success or error code
*/
/**
* Create OpenCL context with specified device.
* Return: success or error code
*/
int ocl_createContext();
/*
Name: buildProgram
Info: build program from specified kernel file
Return: success or error code
/**
* Build program from specified kernel file.
* Return: success or error code.
*/
int ocl_buildProgram(const char* kernel_file);
/** Compile program from kernel source string
*
/**
* Compile program from kernel source string.
* Takes a string read from OpenCL kernel file saved in kernel_source and compiles the
* OpenCL program, that can be then executed on the device.
* opts is a string specifiend additional compiler flags.
*/
int ocl_compileProgram(const char* kernel_source, const char* opts = NULL);
protected:
//memory for random number states
int defaultRndSet;
cl_mem defaultRndState;
public:
//OpenCL context and commad queue
static cl_context m_context;
static cl_command_queue m_command_queue;
static cl_command_queue m_command_queue;
/*
constructor
*/
/**
* constructor
*/
OpenCLBase();
/*
destructor
*/
/**
* destructor
*/
~OpenCLBase();
/*
Create RND states
Return: success or error code
*/
/**
* Allocate memory for size random number states and init the rnd states.
* Uses AMD clRng library for random numbers.
* This library is only compatible with AMD devices.
*/
int ocl_createRndStates(int size);
/* Create an array of random numbers on the device
*
/**
* Create an array of random numbers on the device.
* Filles hte mem_ptr with random numbers.
*/
int ocl_createRandomNumbers(void *mem_ptr, int size);
/*
Destroy rnd states
Return: success or error code
*/
/**
* Destroy rnd states and free device memory.
* Return: success or error code
*/
int ocl_deleteRndStates();
/*
Name: getAllDevices
Info: get all available devices
ReturnL success or error code
/**
* Prints info about all the available platforms and devices.
* Can be used for information purposes to see what devices are available on the system.
* ReturnL success or error code.
*/
int ocl_getAllDevices();
/** Get the OpenCL device count for the set type of device
*
/**
* Get the OpenCL device count for the set type of device.
* Device count is set in ndev parameter, returns success or error code.
*/
int ocl_getDeviceCount(int &ndev);
/** Get the name of the device used
/**
* Get the name of the device currently us use.
*/
int ocl_getDeviceName(std::string &device_name);
/** Set the device to use for OpenCL kernels.
* device id to use is passed as integer.
/**
* Set the device to use for OpenCL kernels.
* Device id to use is passed as integer.
*/
int ocl_setDevice(int device);
/** Get a list of all the unique devices of the same type that can run OpenCL kernels
* Used when GPUs of different types might be pressent on the system.
/**
* Get a list of all the unique devices of the same type that can run OpenCL kernels.
* Used when GPUs of different types might be pressent on the system.
*/
int ocl_getUniqueDevices(std::vector<int> &devices);
/*
Name: setUp
Info: set up opencl resources
Return: success or error code
*/
/**
* Initialize OpenCL connection with a device of specified type.
* Find if specified device is avaialble, creates a contex and command queue.
* Returns success or error code.
*/
int ocl_setUp(const char* device_name);
/*
Name: loadKernel
Info: load and compile opencl kernel file if it has changed
Return: success or error code
/**
* Given a OpenCL kernel file name loads the content and compile the OpenCL code.
* Load and compile opencl kernel file if it has changed.
* Return: success or error code
*/
int ocl_loadKernel(const char* kernel_file);
/** Build program from kernel source.
/**
* Build program from kernel source.
* Builds a program from source code provided in kernel_source.
* If compilation fails will return DKS_ERROR
*/
int ocl_loadKernelFromSource(const char* kernel_source, const char* opts = NULL);
/*
Name: allocateMemory
Info: allocate memory on device
Return: return pointer to memory
/**
* Allocate memory on the device.
* Return: return pointer to memory
*/
cl_mem ocl_allocateMemory(size_t size, int &ierr);
/*
Name: allocateMemory
Info: allocate memory on device
Return: return pointer to memory
/**
* Allocate memory of specific type on device.
* The availabel types are cl_mem_flags type listed in OpenCL documentation:
* CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_USE_HOST_PTR,
* CL_MEM_ALLOC_HOST_PTR and CL_MEM_COPY_HOST_PTR.
* Return: return pointer to memory
*/
cl_mem ocl_allocateMemory(size_t size, int type, int &ierr);
/** Zero OpenCL memory buffer
* Set all the elemetns in the device array to zero
/**
* Zero OpenCL memory buffer.
* Set all the elemetns in the device array to zero.
*/
template <typename T>
int ocl_fillMemory(cl_mem mem_ptr, size_t size, T value, int offset = 0) {
@ -218,92 +231,90 @@ public:
return DKS_SUCCESS;
}
/*
Name: writeData
Info: write data to device memory (needs ptr to mem object)
Return: success or error code
*/
/**
* Write data to device memory (needs ptr to mem object)
* Return: success or error code
*/
int ocl_writeData(cl_mem mem_ptr, const void * in_data, size_t size, size_t offset = 0, int blocking = CL_TRUE);
/*
Name: copyData
Info: copy data from one buffer on the device to another
Return: success or error code
*/
/**
* Copy data from one buffer on the device to another
* Return: success or error code
*/
int ocl_copyData(cl_mem src_ptr, cl_mem dst_ptr, size_t size);
/*
Name: createKernel
Info: create kernel from program
Return: success or error code
*/
/**
* Create kernel from compiled OpenCL program.
* Return: success or error code
*/
int ocl_createKernel(const char* kernel_name);
/*
Name: setKernelArgs
Info: set opencl kernel arguments
Return: success or error code
*/
/**
* Set argiments for the kernel that will be launched.
* Return: success or error code
*/
int ocl_setKernelArg(int idx, size_t size, const void *arg_value);
/*
Name: executeKernel
Info: execute selected kernel (needs kernel parameters)
Return: success or error code
/**
* Execute selected kernel.
* Before kenrel can be executed buildProgram must be executed, create kernel must be executed
* and kenre specifeid in execute kerenel must be in compiled source, and the necessary
* kernel arguments must be set.
* Return: success or error code
*/
int ocl_executeKernel(cl_uint, const size_t *work_items, const size_t *work_grou_size = NULL);
/*
Name: readData
Info: read data from device (needs pointer to mem object)
Return: success or error code
*/
/**
* Read data from device (needs pointer to mem object).
* Return: success or error code
*/
int ocl_readData(cl_mem mem_ptr, void * out_data, size_t size, size_t offset = 0, int blocking = CL_TRUE);
/*
Name: freeMemory
Info: free device memory (needs ptr to mem object)
Return: success or error code
*/
/**
* Free device memory (needs ptr to mem object).
* Return: success or error code
*/
int ocl_freeMemory(cl_mem mem_ptr);
/*
Name: cleanUp
Info: free opencl resources
Return: success or error code
*/
/**
* Free opencl resources.
* Deletes the kernel, compiled program, command queue and colese the connection
* to device by releasing the context.
* Return: success or error code
*/
int ocl_cleanUp();
/*
Name: deviceInfo
Info: print device info (mostly for debugging purposes)
Return: success or error code
*/
/**
* Print info of currently selected device.
* Mostly for debugging purposes, but in verbose mode can be used to see device properties.
* Return: success or error code
*/
int ocl_deviceInfo(bool verbose = true);
/* Check OpenCL kernel.
* Query device and check if it can run the kernel with required parameters
/*
* Check OpenCL kernel.
* Query device and check if it can run the kernel with required parameters.
* Also check the available OpenCL extensions - usefull for checking the supported device
* features, like double precission.
*/
int ocl_checkKernel(const char* kernel_name, int work_group_size,
bool double_precision, int &threadsPerBlock);
/*
Name: clearEvents
Info: clear saved events (for debuging purposes)
Return: nothing
*/
/**
* Clear the event list.
* Events can be used for timing and synchronization purposes.
*/
void ocl_clearEvents();
/*
Name: eventInfo
Info: print information about kernel timings (for debuging purposes)
Return: nothing
*/
/**
* print information about kernel timings from event list.
* for debuging purposes
*/
void ocl_eventInfo();
/*
Return current command queue
*/
/**
* Return current command queue.
*/
cl_command_queue ocl_getQueue() { return m_command_queue; }
};

View File

@ -14,7 +14,7 @@
#define DKS_SUCCESS 0
#define DKS_ERROR 1
/** Deprecated, SimpleFit implementation of ChiSquare. */
class OpenCLChiSquare {
private:

View File

@ -226,6 +226,7 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
}
int OpenCLChiSquareRuntime::writeParams(const double *params, int numparams) {
//write params to gpu
int ierr = m_oclbase->ocl_writeData( (cl_mem)mem_param_m, params, sizeof(double)*numparams);
return ierr;
}
@ -235,6 +236,7 @@ int OpenCLChiSquareRuntime::writeFunc(const double *func, int numfunc) {
if (numfunc == 0)
return DKS_SUCCESS;
//write function values to the GPU
int ierr = m_oclbase->ocl_writeData( (cl_mem)mem_func_m, func, sizeof(double)*numfunc);
return ierr;
}
@ -243,6 +245,7 @@ int OpenCLChiSquareRuntime::writeMap(const int *map, int nummap) {
if (nummap == 0)
return DKS_SUCCESS;
//wrtie map values to the GPU
int ierr = m_oclbase->ocl_writeData( (cl_mem)mem_map_m, map, sizeof(int)*nummap);
return ierr;
}
@ -257,7 +260,7 @@ int OpenCLChiSquareRuntime::initChiSquare(int size_data, int size_param,
freeChiSquare();
}
//allocate temporary memory
//allocate temporary memory, memory is allocated for the data set, parametrs, functions and maps
mem_chisq_m = m_oclbase->ocl_allocateMemory(size_data*sizeof(double), ierr);
mem_param_m = m_oclbase->ocl_allocateMemory(size_param*sizeof(double), ierr);
if (size_func == 0)
@ -277,7 +280,7 @@ int OpenCLChiSquareRuntime::freeChiSquare() {
int ierr = DKS_ERROR;
if (initDone_m) {
//free memory
//free GPU memory
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_chisq_m);
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_param_m);
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_func_m);
@ -308,6 +311,7 @@ int OpenCLChiSquareRuntime::checkChiSquareKernels(int fitType, int &threadsPerBl
return DKS_ERROR;
}
//check the GPU kernel
ierr = m_oclbase->ocl_checkKernel(kernel, 128, true, threadsPerBlock);
return ierr;

View File

@ -17,44 +17,54 @@ const std::string openclFunctHeader = "double fTheory(double t, __local double *
const std::string openclFunctFooter = "}\n";
/**
* OpenCL implementation of ChiSquareRuntime class.
* Implements ChiSquareRuntime interface to allow musrfit to target devices that
* support OpenCL - Nvidia and AMD GPUs, Intel and AMD CPUs, Intel Xeon Phi.
*/
class OpenCLChiSquareRuntime : public ChiSquareRuntime {
private:
OpenCLBase *m_oclbase;
/** Private function to add user defined function to kernel string
*
/**
* Private function to add user defined function to kernel string.
*/
std::string buildProgram(std::string function);
/**
* Launch parallel reduction kernel to calculate the sum of data array
*/
double calculateSum(cl_mem data, int length);
public:
/** Constructor wiht openclbase argument
*
/**
* Constructor wiht openclbase argument.
*/
OpenCLChiSquareRuntime(OpenCLBase *base);
/** Default constructor
*
/**
* Default constructor
*/
OpenCLChiSquareRuntime();
/** Default destructor
*
/**
* Default destructor
*/
~OpenCLChiSquareRuntime();
/** Compile program and save ptx.
/**
* Compile program and save ptx.
* Add function string to the calcFunction kernel and compile the program
* Function must be valid C math expression. Parameters can be addressed in
* a form par[map[idx]]
*/
int compileProgram(std::string function, bool mlh = false);
/** Launch selected kernel
/**
* Launch selected kernel.
* Launched the selected kernel from the compiled code.
* Result is put in &result variable
*/
@ -64,22 +74,26 @@ public:
double timeStart, double timeStep,
double &result);
/** Write params to device.
/**
* Write params to device.
* Write params from double array to mem_param_m memory on the device.
*/
int writeParams(const double *params, int numparams);
/** Write functions to device.
/**
* Write functions to device.
* Write function values from double array to mem_func_m memory on the device.
*/
int writeFunc(const double *func, int numfunc);
/** Write maps to device.
/**
* Write maps to device.
* Write map values from int array to mem_map_m memory on the device.
*/
int writeMap(const int *map, int nummap);
/** Allocate temporary memory needed for chi square.
/**
* Allocate temporary memory needed for chi square.
* Initializes the necessary temporary memory for the chi square calculations. Size_data needs to
* the maximum number of elements in any datasets that will be used for calculations. Size_param,
* size_func and size_map are the maximum number of parameters, functions and maps used in
@ -87,14 +101,16 @@ public:
*/
int initChiSquare(int size_data, int size_param, int size_func, int size_map);
/** Free temporary memory allocated for chi square.
/**
* Free temporary memory allocated for chi square.
* Frees the chisq temporary memory and memory for params, functions and maps
*/
int freeChiSquare();
/** Check MuSR kernels for necessary resources.
/**
* Check MuSR kernels for necessary resources.
* Query device properties to get if sufficient resources are
* available to run the kernels
* available to run the kernels. Also checks if double precission is enabled on the device.
*/
int checkChiSquareKernels(int fitType, int &threadsPerBlock);

View File

@ -17,12 +17,16 @@
#include "boost/compute/core.hpp"
*/
/** Double3 structure for use in OpenCL code. */
typedef struct {
double x;
double y;
double z;
} Double3;
/**
* Structure for stroing particles in OpenCL code.
*/
typedef struct {
int label;
unsigned localID;
@ -35,6 +39,10 @@ typedef struct {
//BOOST_COMPUTE_ADAPT_STRUCT(Double3, Double3, (x, y, z));
//BOOST_COMPUTE_ADAPT_STRUCT(PART_OPENCL, PART_OPENCL, (label, localID, Rincol, Pincol));
/**
* OpenCLCollimatorPhysics class based on DKSCollimatorPhysics interface.
* Implementes CollimatorPhysics for OPAL using OpenCL for execution on AMD GPUs.
*/
class OpenCLCollimatorPhysics : public DKSCollimatorPhysics {
private:
@ -42,16 +50,20 @@ private:
public:
/* constructor */
/**
* Constructor with OpenCLBase as argument.
* Create a new instace of the OpenCLCollimatorPhysics using existing OpenCLBase object.
*/
OpenCLCollimatorPhysics(OpenCLBase *base) {
m_oclbase = base;
}
/* destructor */
/**
* Destructor.
*/
~OpenCLCollimatorPhysics() {
}
/* execute degrader code on device */
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
bool enableRutherforScattering = true);

View File

@ -1,14 +1,3 @@
/*
Name: OpenCLFFT
Author: Uldis Locans
Info:Extend OpenCLBase class to implement fft and ifft functions using OpenCL
Data: 19.09.2014
*/
#ifndef H_OPENCL_FFT
#define H_OPENCL_FFT
@ -22,6 +11,12 @@
#include "clFFT.h"
/**
* OpenCL FFT class based on BaseFFT interface.
* Uses clFFT library to perform FFTs on AMD gpus.
* clFFT library works also on nvida GPUs and other devices that
* support OpenCL.
*/
class OpenCLFFT : public BaseFFT {
private:

View File

@ -7,6 +7,7 @@
#include "../Algorithms/GreensFunction.h"
#include "OpenCLBase.h"
/** OpenCL implementation of GreensFunction calculation for OPALs Poisson Solver. */
class OpenCLGreensFunction : public GreensFunction {
private:
@ -31,7 +32,7 @@ public:
int buildProgram();
/**
Info: calc itegral on device memory (taken from OPAL src code)
Info: calc itegral on device memory (taken from OPAL src code).
Return: success or error code
*/
int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
@ -39,20 +40,20 @@ public:
int streamId = -1);
/**
Info: integration of rho2_m field (taken from OPAL src code)
Info: integration of rho2_m field (taken from OPAL src code).
Return: success or error code
*/
int integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1);
/**
Info: mirror rho field (taken from OPAL src code)
Info: mirror rho field (taken from OPAL src code).
Return: succes or error code
*/
int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1);
/**
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
Info: multiply complex fields already on the GPU memory, result will be put in ptr1.
Return: success or error code
*/
int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1);