798 lines
20 KiB
C++
798 lines
20 KiB
C++
#include "DKSBase.h"
|
|
|
|
#define API_OPENCL "OpenCL"
|
|
#define API_CUDA "Cuda"
|
|
#define API_OPENMP "OpenMP"
|
|
|
|
#define DEVICE_GPU "-gpu"
|
|
#define DEVICE_CPU "-cpu"
|
|
#define DEVICE_MIC "-mic"
|
|
|
|
//=====================================//
|
|
//==========Private functions==========//
|
|
//=====================================//
|
|
|
|
bool DKSBase::apiOpenCL() {
|
|
|
|
if (!m_api_set)
|
|
return false;
|
|
|
|
if (strcmp(m_api_name, API_OPENCL) != 0)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DKSBase::apiCuda() {
|
|
|
|
if (!m_api_set)
|
|
return false;
|
|
|
|
if (strcmp(m_api_name, API_CUDA) != 0)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DKSBase::apiOpenMP() {
|
|
if (!m_api_set)
|
|
return false;
|
|
|
|
if (strcmp(m_api_name, API_OPENMP) != 0)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DKSBase::deviceGPU() {
|
|
if (!m_device_set)
|
|
return false;
|
|
if (strcmp(m_device_name, DEVICE_GPU) != 0)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DKSBase::deviceCPU() {
|
|
if (!m_device_set)
|
|
return false;
|
|
if (strcmp(m_device_name, DEVICE_CPU) != 0)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DKSBase::deviceMIC() {
|
|
if (!m_device_set)
|
|
return false;
|
|
if (strcmp(m_device_name, DEVICE_MIC) != 0)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
int DKSBase::loadOpenCLKernel(const char *kernel_name) {
|
|
//load kernel
|
|
char * kernel_file = new char[500];
|
|
kernel_file[0] = '\0';
|
|
strcat(kernel_file, OPENCL_KERNELS);
|
|
strcat(kernel_file, kernel_name);
|
|
int ierr = OPENCL_SAFECALL( oclbase->ocl_loadKernel(kernel_file) );
|
|
delete[] kernel_file;
|
|
|
|
return ierr;
|
|
}
|
|
|
|
//=====================================//
|
|
//==========Public functions===========//
|
|
//=====================================//
|
|
|
|
DKSBase::DKSBase() {
|
|
|
|
m_device_name = NULL;
|
|
m_api_name = NULL;
|
|
m_function_name = NULL;
|
|
|
|
m_device_set = false;
|
|
m_api_set = false;
|
|
m_function_set = false;
|
|
|
|
m_auto_tuning = false;
|
|
m_use_config = false;
|
|
|
|
#ifdef DKS_CUDA
|
|
cbase = new CudaBase();
|
|
cchi = new CudaChiSquare(cbase);
|
|
#endif
|
|
|
|
ls#ifdef DKS_OPENCL
|
|
oclbase = new OpenCLBase();
|
|
oclchi = new OpenCLChiSquare(oclbase);
|
|
#endif
|
|
|
|
#ifdef DKS_MIC
|
|
micbase = new MICBase();
|
|
micchi = new MICChiSquare(micbase);
|
|
#endif
|
|
|
|
}
|
|
|
|
DKSBase::DKSBase(const char* api_name, const char* device_name) {
|
|
|
|
setAPI(api_name, strlen(api_name));
|
|
setDevice(device_name, strlen(device_name));
|
|
m_function_name = NULL;
|
|
m_function_set = false;
|
|
|
|
m_auto_tuning = false;
|
|
m_use_config = false;
|
|
|
|
#ifdef DKS_CUDA
|
|
cbase = new CudaBase();
|
|
cchi = new CudaChiSquare(cbase);
|
|
#endif
|
|
|
|
#ifdef DKS_OPENCL
|
|
oclbase = new OpenCLBase();
|
|
oclchi = new OpenCLChiSquare(oclbase);
|
|
#endif
|
|
|
|
#ifdef DKS_MIC
|
|
micbase = new MICBase();
|
|
miccol = new MICCollimatorPhysics(micbase);
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
DKSBase::~DKSBase() {
|
|
|
|
if (m_device_name != NULL)
|
|
delete[] m_device_name;
|
|
|
|
if (m_api_name != NULL)
|
|
delete[] m_api_name;
|
|
|
|
if (m_function_name != NULL)
|
|
delete[] m_function_name;
|
|
|
|
delete dksfft;
|
|
delete dkscol;
|
|
delete dksgreens;
|
|
|
|
#ifdef DKS_CUDA
|
|
delete cchi;
|
|
delete cbase;
|
|
#endif
|
|
|
|
#ifdef DKS_OPENCL
|
|
delete oclchi;
|
|
delete oclbase;
|
|
#endif
|
|
|
|
|
|
#ifdef DKS_MIC
|
|
delete micchi;
|
|
delete micbase;
|
|
#endif
|
|
|
|
}
|
|
|
|
/*
|
|
Name: setDevice
|
|
Info: sets specific device to use. length specifies device_name string length (deprecated)
|
|
Return: success or error code
|
|
*/
|
|
int DKSBase::setDevice(const char* device_name, int length) {
|
|
|
|
if (m_device_set)
|
|
delete[] m_device_name;
|
|
|
|
int l = strlen(device_name);
|
|
m_device_name = new char[l+1];
|
|
|
|
for (int i = 0; i < l; i++)
|
|
m_device_name[i] = device_name[i];
|
|
m_device_name[l] = '\0';
|
|
|
|
m_device_set = true;
|
|
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|
|
/*
|
|
Name: setAPI
|
|
Info: sets specific api (OpenCL, CUDA, OpenACC, OpenMP) to use
|
|
Return: success or error code
|
|
*/
|
|
int DKSBase::setAPI(const char* api_name, int length) {
|
|
|
|
if (m_api_set)
|
|
delete[] m_api_name;
|
|
|
|
int l = strlen(api_name);
|
|
m_api_name = new char[l+1];
|
|
|
|
for (int i = 0; i < l; i++)
|
|
m_api_name[i] = api_name[i];
|
|
m_api_name[l] = '\0';
|
|
|
|
m_api_set = true;
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
Name: getDevices
|
|
Info: get all available devices
|
|
Return: success or error code
|
|
*/
|
|
int DKSBase::getDevices() {
|
|
|
|
int ierr1 = OPENCL_SAFECALL( oclbase->ocl_getAllDevices() );
|
|
int ierr2 = CUDA_SAFECALL( cbase->cuda_getDevices() );
|
|
int ierr3 = MIC_SAFECALL( micbase->mic_getDevices() );
|
|
|
|
if (ierr1 + ierr2 + ierr3 != DKS_SUCCESS)
|
|
return DKS_ERROR;
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int DKSBase::getDeviceCount(int &ndev) {
|
|
ndev = 0;
|
|
if (apiOpenCL())
|
|
return OPENCL_SAFECALL( oclbase->ocl_getDeviceCount(ndev) );
|
|
else if (apiCuda())
|
|
return CUDA_SAFECALL( cbase->cuda_getDeviceCount(ndev) );
|
|
else if (apiOpenMP())
|
|
return DKS_ERROR;
|
|
else
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
int DKSBase::getDeviceName(std::string &device_name) {
|
|
if (apiOpenCL())
|
|
return OPENCL_SAFECALL( oclbase->ocl_getDeviceName(device_name) );
|
|
else if (apiCuda())
|
|
return CUDA_SAFECALL( cbase->cuda_getDeviceName(device_name) );
|
|
else if (apiOpenMP())
|
|
return DKS_ERROR;
|
|
else
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
int DKSBase::setDefaultDevice(int device) {
|
|
std::cout << "Set device " << device << std::endl;
|
|
if (apiOpenCL())
|
|
return OPENCL_SAFECALL( oclbase->ocl_setDevice(device) );
|
|
else if (apiCuda())
|
|
return CUDA_SAFECALL( cbase->cuda_setDevice(device) );
|
|
else if (apiOpenMP())
|
|
return DKS_ERROR;
|
|
else
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
int DKSBase::getDeviceList(std::vector<int> &devices) {
|
|
if (apiOpenCL())
|
|
return OPENCL_SAFECALL( oclbase->ocl_getUniqueDevices(devices) );
|
|
else if (apiCuda())
|
|
return CUDA_SAFECALL( cbase->cuda_getUniqueDevices(devices) );
|
|
else if (apiOpenMP())
|
|
return DKS_ERROR;
|
|
else
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
int DKSBase::setup() {
|
|
|
|
int ierr = DKS_ERROR;
|
|
|
|
if (apiOpenCL()) {
|
|
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
|
|
//TODO: only enable if AMD libraries are available
|
|
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(oclbase) );
|
|
dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(oclbase) );
|
|
dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(oclbase) );
|
|
} else if (apiCuda()) {
|
|
ierr = CUDA_SAFECALL( DKS_SUCCESS );
|
|
dksfft = CUDA_SAFEINIT( new CudaFFT(cbase) );
|
|
dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(cbase) );
|
|
dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(cbase) );
|
|
} else if (apiOpenMP()) {
|
|
ierr = MIC_SAFECALL( DKS_SUCCESS );
|
|
dksfft = MIC_SAFEINIT( new MICFFT(micbase) );
|
|
dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(micbase) );
|
|
dksgreens = MIC_SAFEINIT( new MICGreensFunction(micbase) );
|
|
} else {
|
|
ierr = DKS_ERROR;
|
|
}
|
|
|
|
return ierr;
|
|
}
|
|
|
|
/*
|
|
init device
|
|
*/
|
|
int DKSBase::initDevice() {
|
|
|
|
int ierr = DKS_ERROR;
|
|
|
|
//if api is not set default is OpenCL
|
|
if (!m_api_set) {
|
|
setDevice("-gpu", 4);
|
|
setAPI(API_OPENCL, 6);
|
|
ierr = OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") );
|
|
} else {
|
|
if (apiOpenCL()) {
|
|
if (!m_device_set) {
|
|
setDevice("-gpu", 4);
|
|
setAPI(API_OPENCL, 6);
|
|
ierr = OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") );
|
|
} else {
|
|
setAPI(API_OPENCL, 6);
|
|
ierr = OPENCL_SAFECALL( oclbase->ocl_setUp(m_device_name) );
|
|
}
|
|
} else if (apiCuda()) {
|
|
setDevice("-gpu", 4);
|
|
setAPI(API_CUDA, 4);
|
|
ierr = CUDA_SAFECALL(DKS_SUCCESS);
|
|
} else if (apiOpenMP()) {
|
|
setDevice("-mic", 4);
|
|
setAPI(API_OPENMP, 6);
|
|
ierr = MIC_SAFECALL(DKS_SUCCESS);
|
|
}
|
|
}
|
|
|
|
if (ierr == DKS_SUCCESS)
|
|
ierr = setup();
|
|
|
|
return ierr;
|
|
}
|
|
|
|
/*
|
|
set up cuda, opencl and mic to allow async data transfer and kernel execution.
|
|
name stream 'stolen' from cuda. opencl context ~ cuda stream.
|
|
TODO: implementations for OpenCL and MIC still needed
|
|
*/
|
|
int DKSBase::createStream(int &streamId) {
|
|
|
|
if (apiCuda())
|
|
return CUDA_SAFECALL( cbase->cuda_createStream(streamId) );
|
|
else if (apiOpenMP())
|
|
return MIC_SAFECALL( micbase->mic_createStream(streamId) );
|
|
|
|
DEBUG_MSG("Streams not enbled for this platforms jet");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
/* send device pointer to other processes */
|
|
#ifdef DKS_MPI
|
|
int DKSBase::sendPointer(void *mem_ptr, int dest, MPI_Comm comm) {
|
|
|
|
if ( apiCuda() ) {
|
|
#ifdef DKS_CUDA
|
|
cudaError cerror;
|
|
cudaIpcMemHandle_t shandle;
|
|
cerror = cudaIpcGetMemHandle(&shandle, mem_ptr);
|
|
MPI_Send(&shandle, sizeof(cudaIpcMemHandle_t), MPI_BYTE, dest, 100, comm);
|
|
if (cerror != cudaSuccess) {
|
|
DEBUG_MSG("Error geting mem handle");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
#endif
|
|
}
|
|
else if (apiOpenMP()) {
|
|
#ifdef DKS_MIC
|
|
//BENI:
|
|
DEBUG_MSG("No SendPointer for MIC is implemented");
|
|
return DKS_ERROR;
|
|
#endif
|
|
}
|
|
else {
|
|
DEBUG_MSG("Send device pointer not implemented on selected platform");
|
|
return DKS_ERROR;
|
|
}
|
|
return DKS_ERROR;
|
|
}
|
|
#endif
|
|
|
|
/* receive device pointer */
|
|
#ifdef DKS_MPI
|
|
void * DKSBase::receivePointer(int hostproc, MPI_Comm comm, int &ierr) {
|
|
|
|
void *mem_ptr;
|
|
if (apiCuda()) {
|
|
#ifdef DKS_CUDA
|
|
cudaError cerror;
|
|
cudaIpcMemHandle_t rhandle;
|
|
MPI_Recv(&rhandle, sizeof(cudaIpcMemHandle_t), MPI_BYTE, hostproc, 100, comm, NULL);
|
|
cerror = cudaIpcOpenMemHandle(&mem_ptr, rhandle, cudaIpcMemLazyEnablePeerAccess);
|
|
if (cerror != cudaSuccess) {
|
|
DEBUG_MSG("Error opening received handle");
|
|
ierr = DKS_ERROR;
|
|
}
|
|
#endif
|
|
return mem_ptr;
|
|
}
|
|
else if (apiOpenMP()) {
|
|
#ifdef DKS_MIC
|
|
//BENI:
|
|
DEBUG_MSG("No ReceivePointer for MIC is implemented");
|
|
return DKS_SUCCESS;
|
|
#endif
|
|
return mem_ptr;
|
|
}
|
|
else {
|
|
ierr = DKS_ERROR;
|
|
DEBUG_MSG("Receive device pointer not implemented for selected platform");
|
|
return mem_ptr;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* close received handle */
|
|
int DKSBase::closeHandle(void *mem_ptr) {
|
|
|
|
if (apiCuda()) {
|
|
#ifdef DKS_CUDA
|
|
cudaError cerror;
|
|
cerror = cudaIpcCloseMemHandle(mem_ptr);
|
|
if (cerror != cudaSuccess) {
|
|
DEBUG_MSG("Error closing memory handle");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
#endif
|
|
}
|
|
|
|
DEBUG_MSG("Memory handles not implemented for selected platform");
|
|
return DKS_ERROR;
|
|
|
|
}
|
|
|
|
/* sync device calls */
|
|
int DKSBase::syncDevice() {
|
|
|
|
if (apiCuda())
|
|
return CUDA_SAFECALL( cbase->cuda_syncDevice() );
|
|
else if (apiOpenMP())
|
|
return MIC_SAFECALL( micbase->mic_syncDevice() );
|
|
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
/* setup fft plans to reuse if multiple ffts of same size are needed */
|
|
int DKSBase::setupFFT(int ndim, int N[3]) {
|
|
|
|
if (apiCuda()) {
|
|
return dksfft->setupFFT(ndim, N);
|
|
} else if (apiOpenCL()) {
|
|
int ierr1 = dksfft->setupFFT(ndim, N);
|
|
int ierr2 = dksfft->setupFFTRC(ndim, N);
|
|
int ierr3 = dksfft->setupFFTCR(ndim, N);
|
|
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
|
|
return DKS_ERROR;
|
|
|
|
return DKS_SUCCESS;
|
|
} else if (apiOpenMP()) {
|
|
//micbase.mic_setupFFT(ndim, N);
|
|
//BENI: setting up RC and CR transformations on MIC
|
|
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
|
|
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
|
|
if (ierr1 != DKS_SUCCESS)
|
|
return ierr1;
|
|
if (ierr2 != DKS_SUCCESS)
|
|
return ierr2;
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
return DKS_ERROR;
|
|
|
|
}
|
|
//BENI:
|
|
int DKSBase::setupFFTRC(int ndim, int N[3], double scale) {
|
|
|
|
if (apiCuda())
|
|
return dksfft->setupFFT(ndim, N);
|
|
if (apiOpenCL())
|
|
return dksfft->setupFFTRC(ndim, N);
|
|
else if (apiOpenMP())
|
|
return dksfft->setupFFTRC(ndim, N, scale);
|
|
|
|
return DKS_ERROR;
|
|
|
|
}
|
|
|
|
//BENI:
|
|
int DKSBase::setupFFTCR(int ndim, int N[3], double scale) {
|
|
|
|
if (apiCuda())
|
|
return dksfft->setupFFT(ndim, N);
|
|
if (apiOpenCL())
|
|
return dksfft->setupFFTCR(ndim, N);
|
|
else if (apiOpenMP())
|
|
return dksfft->setupFFTCR(ndim, N, scale);
|
|
|
|
return DKS_ERROR;
|
|
|
|
}
|
|
|
|
/* call OpenCL FFT function for selected platform */
|
|
int DKSBase::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
|
|
|
if (apiOpenCL() || apiOpenMP())
|
|
return dksfft->executeFFT(data_ptr, ndim, dimsize);
|
|
else if (apiCuda())
|
|
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
|
|
|
|
DEBUG_MSG("No implementation for selected platform");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
/* call OpenCL IFFT function for selected platform */
|
|
int DKSBase::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
|
if (apiOpenCL() || apiOpenMP())
|
|
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
|
|
else if (apiCuda())
|
|
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
|
|
|
|
DEBUG_MSG("No implementation for selected platform");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
/* call normalize FFT function for selected platform */
|
|
int DKSBase::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
|
|
|
if (apiOpenCL()) {
|
|
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
|
|
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
|
else
|
|
return DKS_ERROR;
|
|
} else if (apiCuda()) {
|
|
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
|
|
} else if (apiOpenMP()) {
|
|
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
|
}
|
|
|
|
DEBUG_MSG("No implementation for selected platform");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
/* call real to complex FFT */
|
|
int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
|
|
|
if (apiCuda())
|
|
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
|
else if (apiOpenCL() || apiOpenMP())
|
|
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
|
|
|
|
DEBUG_MSG("No implementation for selected platform");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
/* call complex to real FFT */
|
|
int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
|
if (apiCuda())
|
|
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
|
else if (apiOpenCL() || apiOpenMP())
|
|
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
|
|
|
|
DEBUG_MSG("No implementation for selected platform");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
/* normalize complex to real iFFT */
|
|
int DKSBase::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
|
|
if (apiCuda())
|
|
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
|
|
else if (apiOpenCL())
|
|
return DKS_ERROR;
|
|
else if (apiOpenMP())
|
|
return DKS_ERROR;
|
|
|
|
DEBUG_MSG("No implementation for selected platform");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
|
|
double hz_m0, double hz_m1, double hz_m2, int streamId) {
|
|
|
|
return dksgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ,
|
|
hz_m0, hz_m1, hz_m2, streamId);
|
|
|
|
}
|
|
|
|
int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr,
|
|
int I, int J, int K, int streamId) {
|
|
|
|
return dksgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId);
|
|
}
|
|
|
|
int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) {
|
|
|
|
return dksgreens->mirrorRhoField(mem_ptr, I, J, K, streamId);
|
|
}
|
|
|
|
int DKSBase::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) {
|
|
|
|
return dksgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId);
|
|
}
|
|
|
|
|
|
int DKSBase::callPHistoTFFcn(void *mem_data, void *mem_par, void *mem_chisq,
|
|
double fTimeResolution, double fRebin,
|
|
int sensors, int length, int numpar, double &result)
|
|
{
|
|
|
|
if (apiCuda()) {
|
|
return CUDA_SAFECALL(cchi->cuda_PHistoTFFcn(mem_data, mem_par, mem_chisq,
|
|
fTimeResolution, fRebin,
|
|
sensors, length, numpar,
|
|
result));
|
|
} else if (apiOpenCL()) {
|
|
|
|
if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLChiSquare.cl") == DKS_SUCCESS)
|
|
return OPENCL_SAFECALL(oclchi->ocl_PHistoTFFcn(mem_data, mem_par, mem_chisq,
|
|
fTimeResolution, fRebin,
|
|
sensors, length, numpar, result));
|
|
else
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
DEBUG_MSG("No implementation for selceted platform");
|
|
return DKS_ERROR;
|
|
|
|
}
|
|
|
|
int DKSBase::callSingleGaussTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
|
|
double fTimeResolution, double fRebin, double fGoodBinOffset,
|
|
int sensors, int length, int numpar,
|
|
double &result)
|
|
{
|
|
if (apiCuda()) {
|
|
return CUDA_SAFECALL(cchi->cuda_singleGaussTF(mem_data, mem_t0, mem_par, mem_result,
|
|
fTimeResolution, fRebin, fGoodBinOffset,
|
|
sensors, length, numpar,
|
|
result));
|
|
} else if (apiOpenCL()) {
|
|
if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLChiSquare.cl") == DKS_SUCCESS)
|
|
return OPENCL_SAFECALL(oclchi->ocl_singleGaussTF(mem_data, mem_t0, mem_par, mem_result,
|
|
fTimeResolution, fRebin, fGoodBinOffset,
|
|
sensors, length, numpar, result));
|
|
else
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
DEBUG_MSG("No implementation for selceted platform");
|
|
return DKS_ERROR;
|
|
|
|
}
|
|
|
|
int DKSBase::callDoubleLorentzTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
|
|
double fTimeResolution, double fRebin, double fGoodBinOffset,
|
|
int sensors, int length, int numpar,
|
|
double &result)
|
|
{
|
|
if (apiCuda()) {
|
|
return CUDA_SAFECALL(cchi->cuda_doubleLorentzTF(mem_data, mem_t0, mem_par, mem_result,
|
|
fTimeResolution, fRebin, fGoodBinOffset,
|
|
sensors, length, numpar,
|
|
result));
|
|
} else if (apiOpenCL()) {
|
|
|
|
if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLChiSquare.cl") == DKS_SUCCESS)
|
|
return OPENCL_SAFECALL(oclchi->ocl_doubleLorentzTF(mem_data, mem_t0, mem_par, mem_result,
|
|
fTimeResolution, fRebin, fGoodBinOffset,
|
|
sensors, length, numpar, result));
|
|
else
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
DEBUG_MSG("No implementation for selceted platform");
|
|
return DKS_ERROR;
|
|
|
|
}
|
|
|
|
int DKSBase::callCollimatorPhysics(void *mem_ptr, void *par_ptr,
|
|
int numparticles, int numparams,
|
|
int &numaddback, int &numdead)
|
|
{
|
|
|
|
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
|
|
|
|
}
|
|
|
|
|
|
int DKSBase::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles)
|
|
{
|
|
|
|
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
|
|
|
|
}
|
|
|
|
int DKSBase::callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
|
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
|
void *px_ptr, void *py_ptr, void *pz_ptr,
|
|
void *par_ptr, int numparticles)
|
|
{
|
|
|
|
|
|
return dkscol->CollimatorPhysicsSoA(label_ptr, localID_ptr,
|
|
rx_ptr, ry_ptr, rz_ptr,
|
|
px_ptr, py_ptr, pz_ptr,
|
|
par_ptr, numparticles);
|
|
|
|
}
|
|
|
|
|
|
int DKSBase::callCollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback)
|
|
{
|
|
|
|
|
|
return dkscol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback);
|
|
|
|
}
|
|
|
|
int DKSBase::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
|
|
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
|
void *px_ptr, void *py_ptr, void *pz_ptr,
|
|
void *par_ptr, int numparticles, int &numaddback)
|
|
{
|
|
|
|
return MIC_SAFECALL(dkscol->CollimatorPhysicsSortSoA(label_ptr, localID_ptr,
|
|
rx_ptr, ry_ptr, rz_ptr,
|
|
px_ptr, py_ptr, pz_ptr,
|
|
par_ptr, numparticles, numaddback));
|
|
|
|
}
|
|
|
|
|
|
int DKSBase::callInitRandoms(int size) {
|
|
if (apiCuda())
|
|
return CUDA_SAFECALL(cbase->cuda_createCurandStates(size));
|
|
else if (apiOpenCL())
|
|
return OPENCL_SAFECALL(oclbase->ocl_createRndStates(size));
|
|
else if (apiOpenMP())
|
|
return MIC_SAFECALL(micbase->mic_createRandStreams(size));
|
|
|
|
DEBUG_MSG("No implementation for selceted platform");
|
|
return DKS_ERROR;
|
|
|
|
}
|
|
|
|
int DKSBase::callParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart,
|
|
void *dt_ptr, double dt, double c,
|
|
bool usedt, int streamId)
|
|
{
|
|
|
|
return dkscol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt, c, usedt, streamId);
|
|
|
|
}
|
|
|
|
int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
|
|
void *lastSec_ptr, void *orient_ptr,
|
|
int npart, int nsec, void *dt_ptr, double dt,
|
|
double c, bool usedt, int streamId)
|
|
{
|
|
|
|
return dkscol->ParallelTTrackerPushTransform(x_ptr, p_ptr, lastSec_ptr, orient_ptr,
|
|
npart, nsec, dt_ptr, dt, c, usedt, streamId);
|
|
|
|
}
|
|
|
|
int DKSBase::callCreateRandomNumbers(void *mem_ptr, int size) {
|
|
if (apiCuda())
|
|
return CUDA_SAFECALL(cbase->cuda_createRandomNumbers(mem_ptr, size));
|
|
if (apiOpenCL())
|
|
return OPENCL_SAFECALL(oclbase->ocl_createRandomNumbers(mem_ptr, size));
|
|
|
|
return DKS_ERROR;
|
|
}
|