updated documentation
This commit is contained in:
@ -1,16 +1,3 @@
|
||||
/*
|
||||
|
||||
Name: OpenCLBase
|
||||
|
||||
Author: Uldis Locans
|
||||
|
||||
Info: OpenCL base class to handle all the common details associated
|
||||
with kernel launch on OpenCL device
|
||||
|
||||
Date: 2014.09.18
|
||||
|
||||
*/
|
||||
|
||||
#ifndef H_OPENCL_BASE
|
||||
#define H_OPENCL_BASE
|
||||
|
||||
@ -32,7 +19,7 @@
|
||||
|
||||
#include "../DKSDefinitions.h"
|
||||
|
||||
/* struct for random number state */
|
||||
/** struct for random number state. */
|
||||
typedef struct {
|
||||
double s10;
|
||||
double s11;
|
||||
@ -44,168 +31,194 @@ typedef struct {
|
||||
bool gen;
|
||||
} RNDState;
|
||||
|
||||
/**
|
||||
* OpenCL base class to handle device setup and basic communication wiht the device.
|
||||
* Handles initialization of OpenCL device, memory manegement, data transfer and kernel launch.
|
||||
* The OpenCL kernels are located in seperate files in OpenCLKernels folder, the OpenCLBase
|
||||
* class contains methods to read the kernel files, compile the kernel codes and launch kernels
|
||||
* from the compiled codes. Which kernel file needs to be loaded for the specif functin is
|
||||
* handled by the base class that is launching the kernel.
|
||||
*/
|
||||
class OpenCLBase {
|
||||
|
||||
private:
|
||||
|
||||
|
||||
//variables containig OpenCL device and platform ids
|
||||
static cl_platform_id m_platform_id;
|
||||
static cl_device_id m_device_id;
|
||||
|
||||
//variables containit compiled OpenCL program and kernel
|
||||
cl_context_properties m_context_properties[3];
|
||||
cl_program m_program;
|
||||
cl_kernel m_kernel;
|
||||
|
||||
//variables for tracking OpenCL events
|
||||
static cl_event m_last_event;
|
||||
cl_int m_num_events;
|
||||
std::vector<cl_event> m_events;
|
||||
|
||||
//currently load kernel file
|
||||
char * m_kernel_file;
|
||||
|
||||
//type of device used by OpenCL
|
||||
cl_device_type m_device_type;
|
||||
|
||||
/*
|
||||
Name: getPlatforms
|
||||
Info: get all avaialble platforms and save in m_platform_ids, save number of platforms
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Get all available OpenCL platforms.
|
||||
* Get all avaialble platforms and save in m_platform_ids, save number of platforms
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_getPlatforms();
|
||||
|
||||
|
||||
/*
|
||||
Name: getDevice
|
||||
Info: get first avaialble devices and save device id and platform id for this device, device name: (-gpu, -mic, -cpu)
|
||||
ReturnL success or error code
|
||||
*/
|
||||
/**
|
||||
* Get first available OpenCL device of specified type.
|
||||
* Get first avaialble devices and save device id and platform id for this device,
|
||||
* device name: (-gpu, -mic, -cpu)
|
||||
* ReturnL success or error code
|
||||
*/
|
||||
int ocl_getDevice(const char* device_name);
|
||||
|
||||
/*
|
||||
Name getDeviceType
|
||||
Info: get device type from device name (-gpu, -cpu, -mic)
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Get cl_device_type from the specified device name.
|
||||
* get device type from device name (-gpu, -cpu, -mic)
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_getDeviceType(const char* device_name, cl_device_type &device_type);
|
||||
|
||||
/*
|
||||
Name: createContext
|
||||
Info: create context with specified device
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Create OpenCL context with specified device.
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_createContext();
|
||||
|
||||
/*
|
||||
Name: buildProgram
|
||||
Info: build program from specified kernel file
|
||||
Return: success or error code
|
||||
/**
|
||||
* Build program from specified kernel file.
|
||||
* Return: success or error code.
|
||||
*/
|
||||
int ocl_buildProgram(const char* kernel_file);
|
||||
|
||||
/** Compile program from kernel source string
|
||||
*
|
||||
/**
|
||||
* Compile program from kernel source string.
|
||||
* Takes a string read from OpenCL kernel file saved in kernel_source and compiles the
|
||||
* OpenCL program, that can be then executed on the device.
|
||||
* opts is a string specifiend additional compiler flags.
|
||||
*/
|
||||
int ocl_compileProgram(const char* kernel_source, const char* opts = NULL);
|
||||
|
||||
protected:
|
||||
|
||||
//memory for random number states
|
||||
int defaultRndSet;
|
||||
cl_mem defaultRndState;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
//OpenCL context and commad queue
|
||||
static cl_context m_context;
|
||||
static cl_command_queue m_command_queue;
|
||||
static cl_command_queue m_command_queue;
|
||||
|
||||
/*
|
||||
constructor
|
||||
*/
|
||||
/**
|
||||
* constructor
|
||||
*/
|
||||
OpenCLBase();
|
||||
|
||||
/*
|
||||
destructor
|
||||
*/
|
||||
/**
|
||||
* destructor
|
||||
*/
|
||||
~OpenCLBase();
|
||||
|
||||
/*
|
||||
Create RND states
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Allocate memory for size random number states and init the rnd states.
|
||||
* Uses AMD clRng library for random numbers.
|
||||
* This library is only compatible with AMD devices.
|
||||
*/
|
||||
int ocl_createRndStates(int size);
|
||||
|
||||
/* Create an array of random numbers on the device
|
||||
*
|
||||
/**
|
||||
* Create an array of random numbers on the device.
|
||||
* Filles hte mem_ptr with random numbers.
|
||||
*/
|
||||
int ocl_createRandomNumbers(void *mem_ptr, int size);
|
||||
|
||||
/*
|
||||
Destroy rnd states
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Destroy rnd states and free device memory.
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_deleteRndStates();
|
||||
|
||||
|
||||
/*
|
||||
Name: getAllDevices
|
||||
Info: get all available devices
|
||||
ReturnL success or error code
|
||||
/**
|
||||
* Prints info about all the available platforms and devices.
|
||||
* Can be used for information purposes to see what devices are available on the system.
|
||||
* ReturnL success or error code.
|
||||
*/
|
||||
int ocl_getAllDevices();
|
||||
|
||||
/** Get the OpenCL device count for the set type of device
|
||||
*
|
||||
/**
|
||||
* Get the OpenCL device count for the set type of device.
|
||||
* Device count is set in ndev parameter, returns success or error code.
|
||||
*/
|
||||
int ocl_getDeviceCount(int &ndev);
|
||||
|
||||
/** Get the name of the device used
|
||||
/**
|
||||
* Get the name of the device currently us use.
|
||||
*/
|
||||
int ocl_getDeviceName(std::string &device_name);
|
||||
|
||||
/** Set the device to use for OpenCL kernels.
|
||||
* device id to use is passed as integer.
|
||||
/**
|
||||
* Set the device to use for OpenCL kernels.
|
||||
* Device id to use is passed as integer.
|
||||
*/
|
||||
int ocl_setDevice(int device);
|
||||
|
||||
/** Get a list of all the unique devices of the same type that can run OpenCL kernels
|
||||
* Used when GPUs of different types might be pressent on the system.
|
||||
/**
|
||||
* Get a list of all the unique devices of the same type that can run OpenCL kernels.
|
||||
* Used when GPUs of different types might be pressent on the system.
|
||||
*/
|
||||
int ocl_getUniqueDevices(std::vector<int> &devices);
|
||||
|
||||
/*
|
||||
Name: setUp
|
||||
Info: set up opencl resources
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Initialize OpenCL connection with a device of specified type.
|
||||
* Find if specified device is avaialble, creates a contex and command queue.
|
||||
* Returns success or error code.
|
||||
*/
|
||||
int ocl_setUp(const char* device_name);
|
||||
|
||||
/*
|
||||
Name: loadKernel
|
||||
Info: load and compile opencl kernel file if it has changed
|
||||
Return: success or error code
|
||||
/**
|
||||
* Given a OpenCL kernel file name loads the content and compile the OpenCL code.
|
||||
* Load and compile opencl kernel file if it has changed.
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_loadKernel(const char* kernel_file);
|
||||
|
||||
|
||||
/** Build program from kernel source.
|
||||
/**
|
||||
* Build program from kernel source.
|
||||
* Builds a program from source code provided in kernel_source.
|
||||
* If compilation fails will return DKS_ERROR
|
||||
*/
|
||||
int ocl_loadKernelFromSource(const char* kernel_source, const char* opts = NULL);
|
||||
|
||||
/*
|
||||
Name: allocateMemory
|
||||
Info: allocate memory on device
|
||||
Return: return pointer to memory
|
||||
/**
|
||||
* Allocate memory on the device.
|
||||
* Return: return pointer to memory
|
||||
*/
|
||||
cl_mem ocl_allocateMemory(size_t size, int &ierr);
|
||||
|
||||
/*
|
||||
Name: allocateMemory
|
||||
Info: allocate memory on device
|
||||
Return: return pointer to memory
|
||||
/**
|
||||
* Allocate memory of specific type on device.
|
||||
* The availabel types are cl_mem_flags type listed in OpenCL documentation:
|
||||
* CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_USE_HOST_PTR,
|
||||
* CL_MEM_ALLOC_HOST_PTR and CL_MEM_COPY_HOST_PTR.
|
||||
* Return: return pointer to memory
|
||||
*/
|
||||
cl_mem ocl_allocateMemory(size_t size, int type, int &ierr);
|
||||
|
||||
/** Zero OpenCL memory buffer
|
||||
* Set all the elemetns in the device array to zero
|
||||
/**
|
||||
* Zero OpenCL memory buffer.
|
||||
* Set all the elemetns in the device array to zero.
|
||||
*/
|
||||
template <typename T>
|
||||
int ocl_fillMemory(cl_mem mem_ptr, size_t size, T value, int offset = 0) {
|
||||
@ -218,92 +231,90 @@ public:
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
Name: writeData
|
||||
Info: write data to device memory (needs ptr to mem object)
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Write data to device memory (needs ptr to mem object)
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_writeData(cl_mem mem_ptr, const void * in_data, size_t size, size_t offset = 0, int blocking = CL_TRUE);
|
||||
|
||||
/*
|
||||
Name: copyData
|
||||
Info: copy data from one buffer on the device to another
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Copy data from one buffer on the device to another
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_copyData(cl_mem src_ptr, cl_mem dst_ptr, size_t size);
|
||||
|
||||
/*
|
||||
Name: createKernel
|
||||
Info: create kernel from program
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Create kernel from compiled OpenCL program.
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_createKernel(const char* kernel_name);
|
||||
|
||||
/*
|
||||
Name: setKernelArgs
|
||||
Info: set opencl kernel arguments
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Set argiments for the kernel that will be launched.
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_setKernelArg(int idx, size_t size, const void *arg_value);
|
||||
|
||||
/*
|
||||
Name: executeKernel
|
||||
Info: execute selected kernel (needs kernel parameters)
|
||||
Return: success or error code
|
||||
/**
|
||||
* Execute selected kernel.
|
||||
* Before kenrel can be executed buildProgram must be executed, create kernel must be executed
|
||||
* and kenre specifeid in execute kerenel must be in compiled source, and the necessary
|
||||
* kernel arguments must be set.
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_executeKernel(cl_uint, const size_t *work_items, const size_t *work_grou_size = NULL);
|
||||
|
||||
/*
|
||||
Name: readData
|
||||
Info: read data from device (needs pointer to mem object)
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Read data from device (needs pointer to mem object).
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_readData(cl_mem mem_ptr, void * out_data, size_t size, size_t offset = 0, int blocking = CL_TRUE);
|
||||
|
||||
/*
|
||||
Name: freeMemory
|
||||
Info: free device memory (needs ptr to mem object)
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Free device memory (needs ptr to mem object).
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_freeMemory(cl_mem mem_ptr);
|
||||
|
||||
/*
|
||||
Name: cleanUp
|
||||
Info: free opencl resources
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Free opencl resources.
|
||||
* Deletes the kernel, compiled program, command queue and colese the connection
|
||||
* to device by releasing the context.
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_cleanUp();
|
||||
|
||||
/*
|
||||
Name: deviceInfo
|
||||
Info: print device info (mostly for debugging purposes)
|
||||
Return: success or error code
|
||||
*/
|
||||
/**
|
||||
* Print info of currently selected device.
|
||||
* Mostly for debugging purposes, but in verbose mode can be used to see device properties.
|
||||
* Return: success or error code
|
||||
*/
|
||||
int ocl_deviceInfo(bool verbose = true);
|
||||
|
||||
/* Check OpenCL kernel.
|
||||
* Query device and check if it can run the kernel with required parameters
|
||||
/*
|
||||
* Check OpenCL kernel.
|
||||
* Query device and check if it can run the kernel with required parameters.
|
||||
* Also check the available OpenCL extensions - usefull for checking the supported device
|
||||
* features, like double precission.
|
||||
*/
|
||||
int ocl_checkKernel(const char* kernel_name, int work_group_size,
|
||||
bool double_precision, int &threadsPerBlock);
|
||||
|
||||
/*
|
||||
Name: clearEvents
|
||||
Info: clear saved events (for debuging purposes)
|
||||
Return: nothing
|
||||
*/
|
||||
/**
|
||||
* Clear the event list.
|
||||
* Events can be used for timing and synchronization purposes.
|
||||
*/
|
||||
void ocl_clearEvents();
|
||||
|
||||
/*
|
||||
Name: eventInfo
|
||||
Info: print information about kernel timings (for debuging purposes)
|
||||
Return: nothing
|
||||
*/
|
||||
/**
|
||||
* print information about kernel timings from event list.
|
||||
* for debuging purposes
|
||||
*/
|
||||
void ocl_eventInfo();
|
||||
|
||||
/*
|
||||
Return current command queue
|
||||
*/
|
||||
/**
|
||||
* Return current command queue.
|
||||
*/
|
||||
cl_command_queue ocl_getQueue() { return m_command_queue; }
|
||||
};
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
#define DKS_SUCCESS 0
|
||||
#define DKS_ERROR 1
|
||||
|
||||
|
||||
/** Deprecated, SimpleFit implementation of ChiSquare. */
|
||||
class OpenCLChiSquare {
|
||||
|
||||
private:
|
||||
|
@ -226,6 +226,7 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
|
||||
}
|
||||
|
||||
int OpenCLChiSquareRuntime::writeParams(const double *params, int numparams) {
|
||||
//write params to gpu
|
||||
int ierr = m_oclbase->ocl_writeData( (cl_mem)mem_param_m, params, sizeof(double)*numparams);
|
||||
return ierr;
|
||||
}
|
||||
@ -235,6 +236,7 @@ int OpenCLChiSquareRuntime::writeFunc(const double *func, int numfunc) {
|
||||
if (numfunc == 0)
|
||||
return DKS_SUCCESS;
|
||||
|
||||
//write function values to the GPU
|
||||
int ierr = m_oclbase->ocl_writeData( (cl_mem)mem_func_m, func, sizeof(double)*numfunc);
|
||||
return ierr;
|
||||
}
|
||||
@ -243,6 +245,7 @@ int OpenCLChiSquareRuntime::writeMap(const int *map, int nummap) {
|
||||
if (nummap == 0)
|
||||
return DKS_SUCCESS;
|
||||
|
||||
//wrtie map values to the GPU
|
||||
int ierr = m_oclbase->ocl_writeData( (cl_mem)mem_map_m, map, sizeof(int)*nummap);
|
||||
return ierr;
|
||||
}
|
||||
@ -257,7 +260,7 @@ int OpenCLChiSquareRuntime::initChiSquare(int size_data, int size_param,
|
||||
freeChiSquare();
|
||||
}
|
||||
|
||||
//allocate temporary memory
|
||||
//allocate temporary memory, memory is allocated for the data set, parametrs, functions and maps
|
||||
mem_chisq_m = m_oclbase->ocl_allocateMemory(size_data*sizeof(double), ierr);
|
||||
mem_param_m = m_oclbase->ocl_allocateMemory(size_param*sizeof(double), ierr);
|
||||
if (size_func == 0)
|
||||
@ -277,7 +280,7 @@ int OpenCLChiSquareRuntime::freeChiSquare() {
|
||||
int ierr = DKS_ERROR;
|
||||
if (initDone_m) {
|
||||
|
||||
//free memory
|
||||
//free GPU memory
|
||||
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_chisq_m);
|
||||
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_param_m);
|
||||
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_func_m);
|
||||
@ -308,6 +311,7 @@ int OpenCLChiSquareRuntime::checkChiSquareKernels(int fitType, int &threadsPerBl
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
//check the GPU kernel
|
||||
ierr = m_oclbase->ocl_checkKernel(kernel, 128, true, threadsPerBlock);
|
||||
|
||||
return ierr;
|
||||
|
@ -17,44 +17,54 @@ const std::string openclFunctHeader = "double fTheory(double t, __local double *
|
||||
|
||||
const std::string openclFunctFooter = "}\n";
|
||||
|
||||
/**
|
||||
* OpenCL implementation of ChiSquareRuntime class.
|
||||
* Implements ChiSquareRuntime interface to allow musrfit to target devices that
|
||||
* support OpenCL - Nvidia and AMD GPUs, Intel and AMD CPUs, Intel Xeon Phi.
|
||||
*/
|
||||
class OpenCLChiSquareRuntime : public ChiSquareRuntime {
|
||||
|
||||
private:
|
||||
|
||||
OpenCLBase *m_oclbase;
|
||||
|
||||
/** Private function to add user defined function to kernel string
|
||||
*
|
||||
/**
|
||||
* Private function to add user defined function to kernel string.
|
||||
*/
|
||||
std::string buildProgram(std::string function);
|
||||
|
||||
/**
|
||||
* Launch parallel reduction kernel to calculate the sum of data array
|
||||
*/
|
||||
double calculateSum(cl_mem data, int length);
|
||||
|
||||
public:
|
||||
|
||||
/** Constructor wiht openclbase argument
|
||||
*
|
||||
/**
|
||||
* Constructor wiht openclbase argument.
|
||||
*/
|
||||
OpenCLChiSquareRuntime(OpenCLBase *base);
|
||||
|
||||
/** Default constructor
|
||||
*
|
||||
/**
|
||||
* Default constructor
|
||||
*/
|
||||
OpenCLChiSquareRuntime();
|
||||
|
||||
/** Default destructor
|
||||
*
|
||||
/**
|
||||
* Default destructor
|
||||
*/
|
||||
~OpenCLChiSquareRuntime();
|
||||
|
||||
/** Compile program and save ptx.
|
||||
/**
|
||||
* Compile program and save ptx.
|
||||
* Add function string to the calcFunction kernel and compile the program
|
||||
* Function must be valid C math expression. Parameters can be addressed in
|
||||
* a form par[map[idx]]
|
||||
*/
|
||||
int compileProgram(std::string function, bool mlh = false);
|
||||
|
||||
/** Launch selected kernel
|
||||
/**
|
||||
* Launch selected kernel.
|
||||
* Launched the selected kernel from the compiled code.
|
||||
* Result is put in &result variable
|
||||
*/
|
||||
@ -64,22 +74,26 @@ public:
|
||||
double timeStart, double timeStep,
|
||||
double &result);
|
||||
|
||||
/** Write params to device.
|
||||
/**
|
||||
* Write params to device.
|
||||
* Write params from double array to mem_param_m memory on the device.
|
||||
*/
|
||||
int writeParams(const double *params, int numparams);
|
||||
|
||||
/** Write functions to device.
|
||||
/**
|
||||
* Write functions to device.
|
||||
* Write function values from double array to mem_func_m memory on the device.
|
||||
*/
|
||||
int writeFunc(const double *func, int numfunc);
|
||||
|
||||
/** Write maps to device.
|
||||
/**
|
||||
* Write maps to device.
|
||||
* Write map values from int array to mem_map_m memory on the device.
|
||||
*/
|
||||
int writeMap(const int *map, int nummap);
|
||||
|
||||
/** Allocate temporary memory needed for chi square.
|
||||
/**
|
||||
* Allocate temporary memory needed for chi square.
|
||||
* Initializes the necessary temporary memory for the chi square calculations. Size_data needs to
|
||||
* the maximum number of elements in any datasets that will be used for calculations. Size_param,
|
||||
* size_func and size_map are the maximum number of parameters, functions and maps used in
|
||||
@ -87,14 +101,16 @@ public:
|
||||
*/
|
||||
int initChiSquare(int size_data, int size_param, int size_func, int size_map);
|
||||
|
||||
/** Free temporary memory allocated for chi square.
|
||||
/**
|
||||
* Free temporary memory allocated for chi square.
|
||||
* Frees the chisq temporary memory and memory for params, functions and maps
|
||||
*/
|
||||
int freeChiSquare();
|
||||
|
||||
/** Check MuSR kernels for necessary resources.
|
||||
/**
|
||||
* Check MuSR kernels for necessary resources.
|
||||
* Query device properties to get if sufficient resources are
|
||||
* available to run the kernels
|
||||
* available to run the kernels. Also checks if double precission is enabled on the device.
|
||||
*/
|
||||
int checkChiSquareKernels(int fitType, int &threadsPerBlock);
|
||||
|
||||
|
@ -17,12 +17,16 @@
|
||||
#include "boost/compute/core.hpp"
|
||||
*/
|
||||
|
||||
/** Double3 structure for use in OpenCL code. */
|
||||
typedef struct {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
} Double3;
|
||||
|
||||
/**
|
||||
* Structure for stroing particles in OpenCL code.
|
||||
*/
|
||||
typedef struct {
|
||||
int label;
|
||||
unsigned localID;
|
||||
@ -35,6 +39,10 @@ typedef struct {
|
||||
//BOOST_COMPUTE_ADAPT_STRUCT(Double3, Double3, (x, y, z));
|
||||
//BOOST_COMPUTE_ADAPT_STRUCT(PART_OPENCL, PART_OPENCL, (label, localID, Rincol, Pincol));
|
||||
|
||||
/**
|
||||
* OpenCLCollimatorPhysics class based on DKSCollimatorPhysics interface.
|
||||
* Implementes CollimatorPhysics for OPAL using OpenCL for execution on AMD GPUs.
|
||||
*/
|
||||
class OpenCLCollimatorPhysics : public DKSCollimatorPhysics {
|
||||
|
||||
private:
|
||||
@ -42,16 +50,20 @@ private:
|
||||
|
||||
public:
|
||||
|
||||
/* constructor */
|
||||
/**
|
||||
* Constructor with OpenCLBase as argument.
|
||||
* Create a new instace of the OpenCLCollimatorPhysics using existing OpenCLBase object.
|
||||
*/
|
||||
OpenCLCollimatorPhysics(OpenCLBase *base) {
|
||||
m_oclbase = base;
|
||||
}
|
||||
|
||||
/* destructor */
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~OpenCLCollimatorPhysics() {
|
||||
}
|
||||
|
||||
/* execute degrader code on device */
|
||||
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
|
||||
bool enableRutherforScattering = true);
|
||||
|
||||
|
@ -1,14 +1,3 @@
|
||||
/*
|
||||
|
||||
Name: OpenCLFFT
|
||||
|
||||
Author: Uldis Locans
|
||||
|
||||
Info:Extend OpenCLBase class to implement fft and ifft functions using OpenCL
|
||||
|
||||
Data: 19.09.2014
|
||||
|
||||
*/
|
||||
#ifndef H_OPENCL_FFT
|
||||
#define H_OPENCL_FFT
|
||||
|
||||
@ -22,6 +11,12 @@
|
||||
|
||||
#include "clFFT.h"
|
||||
|
||||
/**
|
||||
* OpenCL FFT class based on BaseFFT interface.
|
||||
* Uses clFFT library to perform FFTs on AMD gpus.
|
||||
* clFFT library works also on nvida GPUs and other devices that
|
||||
* support OpenCL.
|
||||
*/
|
||||
class OpenCLFFT : public BaseFFT {
|
||||
|
||||
private:
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "../Algorithms/GreensFunction.h"
|
||||
#include "OpenCLBase.h"
|
||||
|
||||
/** OpenCL implementation of GreensFunction calculation for OPALs Poisson Solver. */
|
||||
class OpenCLGreensFunction : public GreensFunction {
|
||||
|
||||
private:
|
||||
@ -31,7 +32,7 @@ public:
|
||||
int buildProgram();
|
||||
|
||||
/**
|
||||
Info: calc itegral on device memory (taken from OPAL src code)
|
||||
Info: calc itegral on device memory (taken from OPAL src code).
|
||||
Return: success or error code
|
||||
*/
|
||||
int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
|
||||
@ -39,20 +40,20 @@ public:
|
||||
int streamId = -1);
|
||||
|
||||
/**
|
||||
Info: integration of rho2_m field (taken from OPAL src code)
|
||||
Info: integration of rho2_m field (taken from OPAL src code).
|
||||
Return: success or error code
|
||||
*/
|
||||
int integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
|
||||
int streamId = -1);
|
||||
|
||||
/**
|
||||
Info: mirror rho field (taken from OPAL src code)
|
||||
Info: mirror rho field (taken from OPAL src code).
|
||||
Return: succes or error code
|
||||
*/
|
||||
int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1);
|
||||
|
||||
/**
|
||||
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
|
||||
Info: multiply complex fields already on the GPU memory, result will be put in ptr1.
|
||||
Return: success or error code
|
||||
*/
|
||||
int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1);
|
||||
|
Reference in New Issue
Block a user