seperate OPAL DKS functions from base
This commit is contained in:
363
src/DKSBase.cpp
363
src/DKSBase.cpp
@ -103,17 +103,14 @@ DKSBase::DKSBase() {
|
||||
|
||||
#ifdef DKS_CUDA
|
||||
cbase = new CudaBase();
|
||||
cchi = new CudaChiSquare(cbase);
|
||||
#endif
|
||||
|
||||
ls#ifdef DKS_OPENCL
|
||||
#ifdef DKS_OPENCL
|
||||
oclbase = new OpenCLBase();
|
||||
oclchi = new OpenCLChiSquare(oclbase);
|
||||
#endif
|
||||
|
||||
#ifdef DKS_MIC
|
||||
micbase = new MICBase();
|
||||
micchi = new MICChiSquare(micbase);
|
||||
#endif
|
||||
|
||||
}
|
||||
@ -157,10 +154,6 @@ DKSBase::~DKSBase() {
|
||||
if (m_function_name != NULL)
|
||||
delete[] m_function_name;
|
||||
|
||||
delete dksfft;
|
||||
delete dkscol;
|
||||
delete dksgreens;
|
||||
|
||||
#ifdef DKS_CUDA
|
||||
delete cchi;
|
||||
delete cbase;
|
||||
@ -287,37 +280,7 @@ int DKSBase::getDeviceList(std::vector<int> &devices) {
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
int DKSBase::setup() {
|
||||
|
||||
int ierr = DKS_ERROR;
|
||||
|
||||
if (apiOpenCL()) {
|
||||
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
|
||||
//TODO: only enable if AMD libraries are available
|
||||
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(oclbase) );
|
||||
dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(oclbase) );
|
||||
dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(oclbase) );
|
||||
} else if (apiCuda()) {
|
||||
ierr = CUDA_SAFECALL( DKS_SUCCESS );
|
||||
dksfft = CUDA_SAFEINIT( new CudaFFT(cbase) );
|
||||
dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(cbase) );
|
||||
dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(cbase) );
|
||||
} else if (apiOpenMP()) {
|
||||
ierr = MIC_SAFECALL( DKS_SUCCESS );
|
||||
dksfft = MIC_SAFEINIT( new MICFFT(micbase) );
|
||||
dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(micbase) );
|
||||
dksgreens = MIC_SAFEINIT( new MICGreensFunction(micbase) );
|
||||
} else {
|
||||
ierr = DKS_ERROR;
|
||||
}
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
/*
|
||||
init device
|
||||
*/
|
||||
int DKSBase::initDevice() {
|
||||
int DKSBase::setupDevice() {
|
||||
|
||||
int ierr = DKS_ERROR;
|
||||
|
||||
@ -347,10 +310,15 @@ int DKSBase::initDevice() {
|
||||
}
|
||||
}
|
||||
|
||||
if (ierr == DKS_SUCCESS)
|
||||
ierr = setup();
|
||||
|
||||
return ierr;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
init device
|
||||
*/
|
||||
int DKSBase::initDevice() {
|
||||
return setupDevice();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -468,292 +436,16 @@ int DKSBase::syncDevice() {
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* setup fft plans to reuse if multiple ffts of same size are needed */
|
||||
int DKSBase::setupFFT(int ndim, int N[3]) {
|
||||
|
||||
if (apiCuda()) {
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
} else if (apiOpenCL()) {
|
||||
int ierr1 = dksfft->setupFFT(ndim, N);
|
||||
int ierr2 = dksfft->setupFFTRC(ndim, N);
|
||||
int ierr3 = dksfft->setupFFTCR(ndim, N);
|
||||
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
|
||||
return DKS_ERROR;
|
||||
|
||||
return DKS_SUCCESS;
|
||||
} else if (apiOpenMP()) {
|
||||
//micbase.mic_setupFFT(ndim, N);
|
||||
//BENI: setting up RC and CR transformations on MIC
|
||||
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
|
||||
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
|
||||
if (ierr1 != DKS_SUCCESS)
|
||||
return ierr1;
|
||||
if (ierr2 != DKS_SUCCESS)
|
||||
return ierr2;
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
//BENI:
|
||||
int DKSBase::setupFFTRC(int ndim, int N[3], double scale) {
|
||||
|
||||
int DKSBase::callCreateRandomNumbers(void *mem_ptr, int size) {
|
||||
if (apiCuda())
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
return CUDA_SAFECALL(cbase->cuda_createRandomNumbers(mem_ptr, size));
|
||||
if (apiOpenCL())
|
||||
return dksfft->setupFFTRC(ndim, N);
|
||||
else if (apiOpenMP())
|
||||
return dksfft->setupFFTRC(ndim, N, scale);
|
||||
return OPENCL_SAFECALL(oclbase->ocl_createRandomNumbers(mem_ptr, size));
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
//BENI:
|
||||
int DKSBase::setupFFTCR(int ndim, int N[3], double scale) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
if (apiOpenCL())
|
||||
return dksfft->setupFFTCR(ndim, N);
|
||||
else if (apiOpenMP())
|
||||
return dksfft->setupFFTCR(ndim, N, scale);
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
/* call OpenCL FFT function for selected platform */
|
||||
int DKSBase::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeFFT(data_ptr, ndim, dimsize);
|
||||
else if (apiCuda())
|
||||
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call OpenCL IFFT function for selected platform */
|
||||
int DKSBase::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
|
||||
else if (apiCuda())
|
||||
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call normalize FFT function for selected platform */
|
||||
int DKSBase::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiOpenCL()) {
|
||||
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
||||
else
|
||||
return DKS_ERROR;
|
||||
} else if (apiCuda()) {
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
|
||||
} else if (apiOpenMP()) {
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
||||
}
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call real to complex FFT */
|
||||
int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call complex to real FFT */
|
||||
int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiCuda())
|
||||
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* normalize complex to real iFFT */
|
||||
int DKSBase::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiCuda())
|
||||
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL())
|
||||
return DKS_ERROR;
|
||||
else if (apiOpenMP())
|
||||
return DKS_ERROR;
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
|
||||
double hz_m0, double hz_m1, double hz_m2, int streamId) {
|
||||
|
||||
return dksgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ,
|
||||
hz_m0, hz_m1, hz_m2, streamId);
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr,
|
||||
int I, int J, int K, int streamId) {
|
||||
|
||||
return dksgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId);
|
||||
}
|
||||
|
||||
int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) {
|
||||
|
||||
return dksgreens->mirrorRhoField(mem_ptr, I, J, K, streamId);
|
||||
}
|
||||
|
||||
int DKSBase::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) {
|
||||
|
||||
return dksgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId);
|
||||
}
|
||||
|
||||
|
||||
int DKSBase::callPHistoTFFcn(void *mem_data, void *mem_par, void *mem_chisq,
|
||||
double fTimeResolution, double fRebin,
|
||||
int sensors, int length, int numpar, double &result)
|
||||
{
|
||||
|
||||
if (apiCuda()) {
|
||||
return CUDA_SAFECALL(cchi->cuda_PHistoTFFcn(mem_data, mem_par, mem_chisq,
|
||||
fTimeResolution, fRebin,
|
||||
sensors, length, numpar,
|
||||
result));
|
||||
} else if (apiOpenCL()) {
|
||||
|
||||
if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLChiSquare.cl") == DKS_SUCCESS)
|
||||
return OPENCL_SAFECALL(oclchi->ocl_PHistoTFFcn(mem_data, mem_par, mem_chisq,
|
||||
fTimeResolution, fRebin,
|
||||
sensors, length, numpar, result));
|
||||
else
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_MSG("No implementation for selceted platform");
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callSingleGaussTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
|
||||
double fTimeResolution, double fRebin, double fGoodBinOffset,
|
||||
int sensors, int length, int numpar,
|
||||
double &result)
|
||||
{
|
||||
if (apiCuda()) {
|
||||
return CUDA_SAFECALL(cchi->cuda_singleGaussTF(mem_data, mem_t0, mem_par, mem_result,
|
||||
fTimeResolution, fRebin, fGoodBinOffset,
|
||||
sensors, length, numpar,
|
||||
result));
|
||||
} else if (apiOpenCL()) {
|
||||
if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLChiSquare.cl") == DKS_SUCCESS)
|
||||
return OPENCL_SAFECALL(oclchi->ocl_singleGaussTF(mem_data, mem_t0, mem_par, mem_result,
|
||||
fTimeResolution, fRebin, fGoodBinOffset,
|
||||
sensors, length, numpar, result));
|
||||
else
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_MSG("No implementation for selceted platform");
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callDoubleLorentzTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
|
||||
double fTimeResolution, double fRebin, double fGoodBinOffset,
|
||||
int sensors, int length, int numpar,
|
||||
double &result)
|
||||
{
|
||||
if (apiCuda()) {
|
||||
return CUDA_SAFECALL(cchi->cuda_doubleLorentzTF(mem_data, mem_t0, mem_par, mem_result,
|
||||
fTimeResolution, fRebin, fGoodBinOffset,
|
||||
sensors, length, numpar,
|
||||
result));
|
||||
} else if (apiOpenCL()) {
|
||||
|
||||
if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLChiSquare.cl") == DKS_SUCCESS)
|
||||
return OPENCL_SAFECALL(oclchi->ocl_doubleLorentzTF(mem_data, mem_t0, mem_par, mem_result,
|
||||
fTimeResolution, fRebin, fGoodBinOffset,
|
||||
sensors, length, numpar, result));
|
||||
else
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
DEBUG_MSG("No implementation for selceted platform");
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callCollimatorPhysics(void *mem_ptr, void *par_ptr,
|
||||
int numparticles, int numparams,
|
||||
int &numaddback, int &numdead)
|
||||
{
|
||||
|
||||
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
|
||||
|
||||
}
|
||||
|
||||
|
||||
int DKSBase::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles)
|
||||
{
|
||||
|
||||
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
void *px_ptr, void *py_ptr, void *pz_ptr,
|
||||
void *par_ptr, int numparticles)
|
||||
{
|
||||
|
||||
|
||||
return dkscol->CollimatorPhysicsSoA(label_ptr, localID_ptr,
|
||||
rx_ptr, ry_ptr, rz_ptr,
|
||||
px_ptr, py_ptr, pz_ptr,
|
||||
par_ptr, numparticles);
|
||||
|
||||
}
|
||||
|
||||
|
||||
int DKSBase::callCollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback)
|
||||
{
|
||||
|
||||
|
||||
return dkscol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback);
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
void *px_ptr, void *py_ptr, void *pz_ptr,
|
||||
void *par_ptr, int numparticles, int &numaddback)
|
||||
{
|
||||
|
||||
return MIC_SAFECALL(dkscol->CollimatorPhysicsSortSoA(label_ptr, localID_ptr,
|
||||
rx_ptr, ry_ptr, rz_ptr,
|
||||
px_ptr, py_ptr, pz_ptr,
|
||||
par_ptr, numparticles, numaddback));
|
||||
|
||||
}
|
||||
|
||||
|
||||
int DKSBase::callInitRandoms(int size) {
|
||||
if (apiCuda())
|
||||
return CUDA_SAFECALL(cbase->cuda_createCurandStates(size));
|
||||
@ -766,32 +458,3 @@ int DKSBase::callInitRandoms(int size) {
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart,
|
||||
void *dt_ptr, double dt, double c,
|
||||
bool usedt, int streamId)
|
||||
{
|
||||
|
||||
return dkscol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt, c, usedt, streamId);
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
|
||||
void *lastSec_ptr, void *orient_ptr,
|
||||
int npart, int nsec, void *dt_ptr, double dt,
|
||||
double c, bool usedt, int streamId)
|
||||
{
|
||||
|
||||
return dkscol->ParallelTTrackerPushTransform(x_ptr, p_ptr, lastSec_ptr, orient_ptr,
|
||||
npart, nsec, dt_ptr, dt, c, usedt, streamId);
|
||||
|
||||
}
|
||||
|
||||
int DKSBase::callCreateRandomNumbers(void *mem_ptr, int size) {
|
||||
if (apiCuda())
|
||||
return CUDA_SAFECALL(cbase->cuda_createRandomNumbers(mem_ptr, size));
|
||||
if (apiOpenCL())
|
||||
return OPENCL_SAFECALL(oclbase->ocl_createRandomNumbers(mem_ptr, size));
|
||||
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
220
src/DKSBase.h
220
src/DKSBase.h
@ -76,10 +76,6 @@ private:
|
||||
bool m_auto_tuning;
|
||||
bool m_use_config;
|
||||
|
||||
DKSFFT *dksfft;
|
||||
DKSCollimatorPhysics *dkscol;
|
||||
GreensFunction *dksgreens;
|
||||
|
||||
#ifdef DKS_OPENCL
|
||||
OpenCLBase *oclbase;
|
||||
OpenCLChiSquare *oclchi;
|
||||
@ -140,6 +136,12 @@ protected:
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DKS_MIC
|
||||
MICBase *getMICBase() {
|
||||
return micbase;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Call OpenCL base to load specified kenrel file.
|
||||
*
|
||||
*/
|
||||
@ -155,10 +157,6 @@ protected:
|
||||
return device_name;
|
||||
}
|
||||
|
||||
/** Private function to initialize objects based on the device used.
|
||||
*
|
||||
*/
|
||||
int setup();
|
||||
|
||||
public:
|
||||
|
||||
@ -179,6 +177,11 @@ public:
|
||||
*/
|
||||
~DKSBase();
|
||||
|
||||
/** Function to initialize objects based on the device used.
|
||||
*
|
||||
*/
|
||||
int setupDevice();
|
||||
|
||||
/** Turn on auto tuning */
|
||||
void setAutoTuningOn() { m_auto_tuning = true; }
|
||||
|
||||
@ -891,184 +894,10 @@ public:
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////
|
||||
///////Function library part of dksbase////////
|
||||
///////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Setup FFT function.
|
||||
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
|
||||
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
|
||||
* each fft will do its own setup according to fft size and dimensions.
|
||||
* TODO: opencl and mic implementations
|
||||
*/
|
||||
int setupFFT(int ndim, int N[3]);
|
||||
//BENI:
|
||||
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
|
||||
//BENI:
|
||||
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
|
||||
|
||||
/**
|
||||
* Call complex-to-complex fft.
|
||||
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
|
||||
* stream id can be specified to use other streams than default.
|
||||
* TODO: mic implementation
|
||||
*/
|
||||
int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call complex-to-complex ifft.
|
||||
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
|
||||
* stream id can be specified to use other streams than default.
|
||||
* TODO: mic implementation.
|
||||
*/
|
||||
int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Normalize complex to complex ifft.
|
||||
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
|
||||
* fft size
|
||||
* TODO: mic implementation.
|
||||
*/
|
||||
int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call real to complex FFT.
|
||||
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
|
||||
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
|
||||
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
|
||||
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
|
||||
* TODO: opencl and mic implementations
|
||||
*/
|
||||
int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call complex to real iFFT.
|
||||
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
|
||||
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
|
||||
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
|
||||
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Normalize compelx to real ifft.
|
||||
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
|
||||
* fft size.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
|
||||
* For specifics check OPAL docs.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
|
||||
double hz_m0, double hz_m1, double hz_m2, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
|
||||
* For specifics check OPAL docs.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callGreensIntegration(void *mem_ptr, void *tmp_ptr,
|
||||
int I, int J, int K, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
|
||||
* For specifics check OPAL docs.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Element by element multiplication.
|
||||
* Multiplies each element of mem_ptr1 with corresponding element of mem_ptr2, size specifies
|
||||
* the number of elements in mem_ptr1 and mem_ptr2 to use. Results are put in mem_ptr1.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Chi square for parameter fitting on device.
|
||||
* mem_data - measurement data, mem_par - pointer to parameter set, mem_chisq - pointer for
|
||||
* intermediate results. Chi square results are put in &results
|
||||
*/
|
||||
int callPHistoTFFcn(void *mem_data, void *mem_par, void *mem_chisq,
|
||||
double fTimeResolution, double fRebin,
|
||||
int sensors, int length, int numpar, double &result);
|
||||
|
||||
/**
|
||||
* max-log-likelihood for parameter fitting on device.
|
||||
* mem_data - measurement data, mem_t0 - pointer to time 0 for each sensor,
|
||||
* mem_par - pointer to parameter set, mem_results - pointer for
|
||||
* intermediate results. Chi square results are put in &results.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callSingleGaussTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
|
||||
double fTimeResolution, double fRebin, double fGoodBinOffser,
|
||||
int sensors, int length, int numpar,
|
||||
double &result);
|
||||
|
||||
/**
|
||||
* max-log-likelihood for parameter fitting on device.
|
||||
* mem_data - measurement data, mem_t0 - pointer to time 0 for each sensor,
|
||||
* mem_par - pointer to parameter set, mem_results - pointer for
|
||||
* intermediate results. Chi square results are put in &results.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callDoubleLorentzTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
|
||||
double fTimeResolution, double fRebin, double fGoodBinOffser,
|
||||
int sensors, int length, int numpar,
|
||||
double &result);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callCollimatorPhysics(void *mem_ptr, void *par_ptr,
|
||||
int numparticles, int numparams,
|
||||
int &numaddback, int &numdead);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* Test function for the MIC to test SoA layout vs AoS layout used in previous versions
|
||||
*/
|
||||
int callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
void *px_ptr, void *py_ptr, void *pz_ptr,
|
||||
void *par_ptr, int numparticles);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
* Create random numbers on the device and fille mem_data array
|
||||
*/
|
||||
int callCollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
void *px_ptr, void *py_ptr, void *pz_ptr,
|
||||
void *par_ptr, int numparticles, int &numaddback);
|
||||
int callCreateRandomNumbers(void *mem_ptr, int size);
|
||||
|
||||
/**
|
||||
* Init random number states and save for reuse on device.
|
||||
@ -1076,29 +905,6 @@ public:
|
||||
*/
|
||||
int callInitRandoms(int size);
|
||||
|
||||
/**
|
||||
* Integration code from ParallelTTracker from OPAL.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class docs
|
||||
*/
|
||||
int callParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart,
|
||||
void *dt_ptr, double dt, double c,
|
||||
bool usedt = false, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integration code from ParallelTTracker from OPAL.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class docs
|
||||
*/
|
||||
int callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
|
||||
void *lastSec_ptr, void *orient_ptr,
|
||||
int npart, int nsec, void *dt_ptr,
|
||||
double dt, double c, bool usedt = false,
|
||||
int streamId = -1);
|
||||
|
||||
/**
|
||||
* Create random numbers on the device and fille mem_data array
|
||||
*/
|
||||
int callCreateRandomNumbers(void *mem_ptr, int size);
|
||||
|
||||
/**
|
||||
* Print memory information on device (total, used, available)
|
||||
* TODO: opencl and mic imlementation
|
||||
|
277
src/DKSOPAL.cpp
Normal file
277
src/DKSOPAL.cpp
Normal file
@ -0,0 +1,277 @@
|
||||
#include "DKSOPAL.h"
|
||||
|
||||
DKSOPAL::DKSOPAL() {
|
||||
dksfft = nullptr;
|
||||
dkscol = nullptr;
|
||||
dksgreens = nullptr;
|
||||
}
|
||||
|
||||
DKSOPAL::~DKSOPAL() {
|
||||
delete dksfft;
|
||||
delete dkscol;
|
||||
delete dksgreens;
|
||||
}
|
||||
|
||||
int DKSOPAL::setupOPAL() {
|
||||
int ierr = DKS_ERROR;
|
||||
|
||||
if (apiOpenCL()) {
|
||||
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
|
||||
//TODO: only enable if AMD libraries are available
|
||||
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
|
||||
dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(getOpenCLBase()) );
|
||||
dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(getOpenCLBase()) );
|
||||
} else if (apiCuda()) {
|
||||
ierr = CUDA_SAFECALL( DKS_SUCCESS );
|
||||
dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
|
||||
dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(getCudaBase()) );
|
||||
dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(getCudaBase()) );
|
||||
} else if (apiOpenMP()) {
|
||||
ierr = MIC_SAFECALL( DKS_SUCCESS );
|
||||
dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
|
||||
dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(getMICBase()) );
|
||||
dksgreens = MIC_SAFEINIT( new MICGreensFunction(getMICBase()) );
|
||||
} else {
|
||||
ierr = DKS_ERROR;
|
||||
}
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
int DKSOPAL::initDevice() {
|
||||
int ierr = setupDevice();
|
||||
if (ierr == DKS_ERROR)
|
||||
ierr = setupOPAL();
|
||||
return ierr;
|
||||
}
|
||||
|
||||
/* setup fft plans to reuse if multiple ffts of same size are needed */
|
||||
int DKSOPAL::setupFFT(int ndim, int N[3]) {
|
||||
|
||||
if (apiCuda()) {
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
} else if (apiOpenCL()) {
|
||||
int ierr1 = dksfft->setupFFT(ndim, N);
|
||||
int ierr2 = dksfft->setupFFTRC(ndim, N);
|
||||
int ierr3 = dksfft->setupFFTCR(ndim, N);
|
||||
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
|
||||
return DKS_ERROR;
|
||||
|
||||
return DKS_SUCCESS;
|
||||
} else if (apiOpenMP()) {
|
||||
//micbase.mic_setupFFT(ndim, N);
|
||||
//BENI: setting up RC and CR transformations on MIC
|
||||
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
|
||||
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
|
||||
if (ierr1 != DKS_SUCCESS)
|
||||
return ierr1;
|
||||
if (ierr2 != DKS_SUCCESS)
|
||||
return ierr2;
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
//BENI:
|
||||
int DKSOPAL::setupFFTRC(int ndim, int N[3], double scale) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
if (apiOpenCL())
|
||||
return dksfft->setupFFTRC(ndim, N);
|
||||
else if (apiOpenMP())
|
||||
return dksfft->setupFFTRC(ndim, N, scale);
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
//BENI:
|
||||
int DKSOPAL::setupFFTCR(int ndim, int N[3], double scale) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
if (apiOpenCL())
|
||||
return dksfft->setupFFTCR(ndim, N);
|
||||
else if (apiOpenMP())
|
||||
return dksfft->setupFFTCR(ndim, N, scale);
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
/* call OpenCL FFT function for selected platform */
|
||||
int DKSOPAL::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeFFT(data_ptr, ndim, dimsize);
|
||||
else if (apiCuda())
|
||||
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call OpenCL IFFT function for selected platform */
|
||||
int DKSOPAL::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
|
||||
else if (apiCuda())
|
||||
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call normalize FFT function for selected platform */
|
||||
int DKSOPAL::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiOpenCL()) {
|
||||
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
||||
else
|
||||
return DKS_ERROR;
|
||||
} else if (apiCuda()) {
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
|
||||
} else if (apiOpenMP()) {
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
||||
}
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call real to complex FFT */
|
||||
int DKSOPAL::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call complex to real FFT */
|
||||
int DKSOPAL::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiCuda())
|
||||
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* normalize complex to real iFFT */
|
||||
int DKSOPAL::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiCuda())
|
||||
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL())
|
||||
return DKS_ERROR;
|
||||
else if (apiOpenMP())
|
||||
return DKS_ERROR;
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
int DKSOPAL::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
|
||||
double hz_m0, double hz_m1, double hz_m2, int streamId) {
|
||||
|
||||
return dksgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ,
|
||||
hz_m0, hz_m1, hz_m2, streamId);
|
||||
|
||||
}
|
||||
|
||||
int DKSOPAL::callGreensIntegration(void *mem_ptr, void *tmp_ptr,
|
||||
int I, int J, int K, int streamId) {
|
||||
|
||||
return dksgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId);
|
||||
}
|
||||
|
||||
int DKSOPAL::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) {
|
||||
|
||||
return dksgreens->mirrorRhoField(mem_ptr, I, J, K, streamId);
|
||||
}
|
||||
|
||||
int DKSOPAL::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) {
|
||||
|
||||
return dksgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId);
|
||||
}
|
||||
|
||||
int DKSOPAL::callCollimatorPhysics(void *mem_ptr, void *par_ptr,
|
||||
int numparticles, int numparams,
|
||||
int &numaddback, int &numdead)
|
||||
{
|
||||
|
||||
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
|
||||
|
||||
}
|
||||
|
||||
|
||||
int DKSOPAL::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles)
|
||||
{
|
||||
|
||||
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
|
||||
|
||||
}
|
||||
|
||||
int DKSOPAL::callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
void *px_ptr, void *py_ptr, void *pz_ptr,
|
||||
void *par_ptr, int numparticles)
|
||||
{
|
||||
|
||||
|
||||
return dkscol->CollimatorPhysicsSoA(label_ptr, localID_ptr,
|
||||
rx_ptr, ry_ptr, rz_ptr,
|
||||
px_ptr, py_ptr, pz_ptr,
|
||||
par_ptr, numparticles);
|
||||
|
||||
}
|
||||
|
||||
|
||||
int DKSOPAL::callCollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback)
|
||||
{
|
||||
|
||||
|
||||
return dkscol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback);
|
||||
|
||||
}
|
||||
|
||||
int DKSOPAL::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
void *px_ptr, void *py_ptr, void *pz_ptr,
|
||||
void *par_ptr, int numparticles, int &numaddback)
|
||||
{
|
||||
|
||||
return MIC_SAFECALL(dkscol->CollimatorPhysicsSortSoA(label_ptr, localID_ptr,
|
||||
rx_ptr, ry_ptr, rz_ptr,
|
||||
px_ptr, py_ptr, pz_ptr,
|
||||
par_ptr, numparticles, numaddback));
|
||||
|
||||
}
|
||||
|
||||
|
||||
int DKSOPAL::callParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart,
|
||||
void *dt_ptr, double dt, double c,
|
||||
bool usedt, int streamId)
|
||||
{
|
||||
|
||||
return dkscol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt, c, usedt, streamId);
|
||||
|
||||
}
|
||||
|
||||
int DKSOPAL::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
|
||||
void *lastSec_ptr, void *orient_ptr,
|
||||
int npart, int nsec, void *dt_ptr, double dt,
|
||||
double c, bool usedt, int streamId)
|
||||
{
|
||||
|
||||
return dkscol->ParallelTTrackerPushTransform(x_ptr, p_ptr, lastSec_ptr, orient_ptr,
|
||||
npart, nsec, dt_ptr, dt, c, usedt, streamId);
|
||||
|
||||
}
|
217
src/DKSOPAL.h
Normal file
217
src/DKSOPAL.h
Normal file
@ -0,0 +1,217 @@
|
||||
#ifndef H_DKS_OPAL
|
||||
#define H_DKS_OPAL
|
||||
|
||||
#include <iostream>
|
||||
#include "AutoTuning/DKSAutoTuning.h"
|
||||
|
||||
#include "DKSBase.h"
|
||||
|
||||
#include "DKSDefinitions.h"
|
||||
|
||||
#include "Algorithms/GreensFunction.h"
|
||||
#include "Algorithms/CollimatorPhysics.h"
|
||||
#include "Algorithms/FFT.h"
|
||||
|
||||
|
||||
#ifdef DKS_AMD
|
||||
#include "OpenCL/OpenCLFFT.h"
|
||||
#include "OpenCL/OpenCLGreensFunction.h"
|
||||
#include "OpenCL/OpenCLCollimatorPhysics.h"
|
||||
#endif
|
||||
|
||||
#ifdef DKS_CUDA
|
||||
#include "CUDA/CudaFFT.cuh"
|
||||
#include "CUDA/CudaGreensFunction.cuh"
|
||||
#include "CUDA/CudaCollimatorPhysics.cuh"
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef DKS_MIC
|
||||
#include "MIC/MICFFT.h"
|
||||
#include "MIC/MICGreensFunction.hpp"
|
||||
#include "MIC/MICCollimatorPhysics.h"
|
||||
#endif
|
||||
|
||||
class DKSOPAL : public DKSBase {
|
||||
|
||||
private:
|
||||
|
||||
DKSFFT *dksfft;
|
||||
DKSCollimatorPhysics *dkscol;
|
||||
GreensFunction *dksgreens;
|
||||
|
||||
int setupOPAL();
|
||||
|
||||
public:
|
||||
|
||||
DKSOPAL();
|
||||
|
||||
~DKSOPAL();
|
||||
|
||||
int initDevice();
|
||||
|
||||
///////////////////////////////////////////////
|
||||
///////Function library part of dksbase////////
|
||||
///////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Setup FFT function.
|
||||
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
|
||||
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
|
||||
* each fft will do its own setup according to fft size and dimensions.
|
||||
* TODO: opencl and mic implementations
|
||||
*/
|
||||
int setupFFT(int ndim, int N[3]);
|
||||
//BENI:
|
||||
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
|
||||
//BENI:
|
||||
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
|
||||
|
||||
/**
|
||||
* Call complex-to-complex fft.
|
||||
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
|
||||
* stream id can be specified to use other streams than default.
|
||||
* TODO: mic implementation
|
||||
*/
|
||||
int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call complex-to-complex ifft.
|
||||
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
|
||||
* stream id can be specified to use other streams than default.
|
||||
* TODO: mic implementation.
|
||||
*/
|
||||
int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Normalize complex to complex ifft.
|
||||
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
|
||||
* fft size
|
||||
* TODO: mic implementation.
|
||||
*/
|
||||
int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call real to complex FFT.
|
||||
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
|
||||
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
|
||||
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
|
||||
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
|
||||
* TODO: opencl and mic implementations
|
||||
*/
|
||||
int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call complex to real iFFT.
|
||||
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
|
||||
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
|
||||
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
|
||||
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Normalize compelx to real ifft.
|
||||
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
|
||||
* fft size.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
|
||||
* For specifics check OPAL docs.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
|
||||
double hz_m0, double hz_m1, double hz_m2, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
|
||||
* For specifics check OPAL docs.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callGreensIntegration(void *mem_ptr, void *tmp_ptr,
|
||||
int I, int J, int K, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
|
||||
* For specifics check OPAL docs.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Element by element multiplication.
|
||||
* Multiplies each element of mem_ptr1 with corresponding element of mem_ptr2, size specifies
|
||||
* the number of elements in mem_ptr1 and mem_ptr2 to use. Results are put in mem_ptr1.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callCollimatorPhysics(void *mem_ptr, void *par_ptr,
|
||||
int numparticles, int numparams,
|
||||
int &numaddback, int &numdead);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* Test function for the MIC to test SoA layout vs AoS layout used in previous versions
|
||||
*/
|
||||
int callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
void *px_ptr, void *py_ptr, void *pz_ptr,
|
||||
void *par_ptr, int numparticles);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callCollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
void *px_ptr, void *py_ptr, void *pz_ptr,
|
||||
void *par_ptr, int numparticles, int &numaddback);
|
||||
|
||||
/**
|
||||
* Integration code from ParallelTTracker from OPAL.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class docs
|
||||
*/
|
||||
int callParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart,
|
||||
void *dt_ptr, double dt, double c,
|
||||
bool usedt = false, int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integration code from ParallelTTracker from OPAL.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class docs
|
||||
*/
|
||||
int callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
|
||||
void *lastSec_ptr, void *orient_ptr,
|
||||
int npart, int nsec, void *dt_ptr,
|
||||
double dt, double c, bool usedt = false,
|
||||
int streamId = -1);
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user