dksbase use Algoruthms base class for fft, colimator physics and greens function

This commit is contained in:
Uldis Locans
2017-02-15 09:00:55 +01:00
parent e9d411235c
commit 7c7c2e240b
7 changed files with 174 additions and 242 deletions

View File

@ -39,22 +39,27 @@ IF (Boost_FOUND)
ENDIF (Boost_FOUND) ENDIF (Boost_FOUND)
#find clFFT #find clFFT
SET (clFFT_USE_STATIC_LIBS OFF) OPTION (ENABLE_AMD "Enable AMD libraries" OFF)
FIND_PACKAGE(clFFT REQUIRED HINTS $ENV{CLFFT_PREFIX} $ENV{CLFFT_DIR} $ENV{CLFFT}) IF (ENABLE_AMD)
MESSAGE (STATUS "Found clFFT library: ${CLFFT_LIBRARIES}") SET (clFFT_USE_STATIC_LIBS OFF)
MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}") FIND_PACKAGE(clFFT REQUIRED HINTS $ENV{CLFFT_PREFIX} $ENV{CLFFT_DIR} $ENV{CLFFT})
INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) MESSAGE (STATUS "Found clFFT library: ${CLFFT_LIBRARIES}")
LINK_DIRECTORIES (${CLFFT_LIBRARIES}) MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}")
INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
LINK_DIRECTORIES (${CLFFT_LIBRARIES})
#find clRNG #find clRNG
#SET (clRNG_USE_STATIC_LIBS OFF) #SET (clRNG_USE_STATIC_LIBS OFF)
#FIND_PACKAGE(clRng REQUIRED HINTS &ENV{CLRNG_PREFIX} $ENV{CLRNG_DIR} $ENV{CLRNG}) #FIND_PACKAGE(clRng REQUIRED HINTS &ENV{CLRNG_PREFIX} $ENV{CLRNG_DIR} $ENV{CLRNG})
#MESSAGE (STATUS "Found clRNG library: ${CLRNG_LIBRARIES}") #MESSAGE (STATUS "Found clRNG library: ${CLRNG_LIBRARIES}")
#MESSAGE (STATUS "Found clRNG include dir: ${CLRNG_INCLUDE_DIRS}") #MESSAGE (STATUS "Found clRNG include dir: ${CLRNG_INCLUDE_DIRS}")
#INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) #INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
#LINK_DIRECTORIES (${CLRNG_LIBRARIES}) #LINK_DIRECTORIES (${CLRNG_LIBRARIES})
#find_package(PkgConfig) #find_package(PkgConfig)
#pkg_check_modules(clRng REQUIRED) #pkg_check_modules(clRng REQUIRED)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_AMD")
ENDIF (ENABLE_AMD)
#enable UQTK #enable UQTK
OPTION (USE_UQTK "Use UQTK" OFF) OPTION (USE_UQTK "Use UQTK" OFF)

View File

@ -5,10 +5,7 @@
#include <string> #include <string>
#include "../DKSDefinitions.h" #include "../DKSDefinitions.h"
class DKSBaseMuSR;
class DKSCollimatorPhysics { class DKSCollimatorPhysics {
friend class DKSBaseMuSR;
protected: protected:

View File

@ -103,25 +103,16 @@ DKSBase::DKSBase() {
#ifdef DKS_CUDA #ifdef DKS_CUDA
cbase = new CudaBase(); cbase = new CudaBase();
cfft = new CudaFFT(cbase);
cgreens = new CudaGreensFunction(cbase);
cchi = new CudaChiSquare(cbase); cchi = new CudaChiSquare(cbase);
ccol = new CudaCollimatorPhysics(cbase);
#endif #endif
#ifdef DKS_OPENCL ls#ifdef DKS_OPENCL
oclbase = new OpenCLBase(); oclbase = new OpenCLBase();
oclfft = new OpenCLFFT(oclbase);
oclchi = new OpenCLChiSquare(oclbase); oclchi = new OpenCLChiSquare(oclbase);
oclcol = new OpenCLCollimatorPhysics(oclbase);
oclgreens = new OpenCLGreensFunction(oclbase);
#endif #endif
#ifdef DKS_MIC #ifdef DKS_MIC
micbase = new MICBase(); micbase = new MICBase();
micfft = new MICFFT(micbase);
miccol = new MICCollimatorPhysics(micbase);
micgreens = new MICGreensFunction(micbase);
micchi = new MICChiSquare(micbase); micchi = new MICChiSquare(micbase);
#endif #endif
@ -139,26 +130,17 @@ DKSBase::DKSBase(const char* api_name, const char* device_name) {
#ifdef DKS_CUDA #ifdef DKS_CUDA
cbase = new CudaBase(); cbase = new CudaBase();
cfft = new CudaFFT(cbase);
cgreens = new CudaGreensFunction(cbase);
cchi = new CudaChiSquare(cbase); cchi = new CudaChiSquare(cbase);
ccol = new CudaCollimatorPhysics(cbase);
#endif #endif
#ifdef DKS_OPENCL #ifdef DKS_OPENCL
oclbase = new OpenCLBase(); oclbase = new OpenCLBase();
oclfft = new OpenCLFFT(oclbase);
oclchi = new OpenCLChiSquare(oclbase); oclchi = new OpenCLChiSquare(oclbase);
oclcol = new OpenCLCollimatorPhysics(oclbase);
oclgreens = new OpenCLGreensFunction(oclbase);
#endif #endif
#ifdef DKS_MIC #ifdef DKS_MIC
micbase = new MICBase(); micbase = new MICBase();
micfft = new MICFFT(micbase);
miccol = new MICCollimatorPhysics(micbase); miccol = new MICCollimatorPhysics(micbase);
micgreens = new MICGreensFunction(micbase);
micchi = new MICChiSquare(micbase);
#endif #endif
} }
@ -175,28 +157,22 @@ DKSBase::~DKSBase() {
if (m_function_name != NULL) if (m_function_name != NULL)
delete[] m_function_name; delete[] m_function_name;
delete dksfft;
delete dkscol;
delete dksgreens;
#ifdef DKS_CUDA #ifdef DKS_CUDA
delete cfft;
delete cgreens;
delete cchi; delete cchi;
delete ccol;
delete cbase; delete cbase;
#endif #endif
#ifdef DKS_OPENCL #ifdef DKS_OPENCL
delete oclfft;
delete oclchi; delete oclchi;
delete oclcol;
delete oclbase; delete oclbase;
delete oclgreens;
#endif #endif
#ifdef DKS_MIC #ifdef DKS_MIC
delete micfft;
delete miccol;
delete micgreens;
delete micchi; delete micchi;
delete micbase; delete micbase;
#endif #endif
@ -311,38 +287,70 @@ int DKSBase::getDeviceList(std::vector<int> &devices) {
return DKS_ERROR; return DKS_ERROR;
} }
int DKSBase::setup() {
int ierr = DKS_ERROR;
if (apiOpenCL()) {
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
//TODO: only enable if AMD libraries are available
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(oclbase) );
dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(oclbase) );
dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(oclbase) );
} else if (apiCuda()) {
ierr = CUDA_SAFECALL( DKS_SUCCESS );
dksfft = CUDA_SAFEINIT( new CudaFFT(cbase) );
dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(cbase) );
dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(cbase) );
} else if (apiOpenMP()) {
ierr = MIC_SAFECALL( DKS_SUCCESS );
dksfft = MIC_SAFEINIT( new MICFFT(micbase) );
dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(micbase) );
dksgreens = MIC_SAFEINIT( new MICGreensFunction(micbase) );
} else {
ierr = DKS_ERROR;
}
return ierr;
}
/* /*
init device init device
*/ */
int DKSBase::initDevice() { int DKSBase::initDevice() {
int ierr = DKS_ERROR;
//if api is not set default is OpenCL //if api is not set default is OpenCL
if (!m_api_set) { if (!m_api_set) {
setDevice("-gpu", 4); setDevice("-gpu", 4);
setAPI(API_OPENCL, 6); setAPI(API_OPENCL, 6);
return OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") ); ierr = OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") );
} else { } else {
if (apiOpenCL()) { if (apiOpenCL()) {
if (!m_device_set) { if (!m_device_set) {
setDevice("-gpu", 4); setDevice("-gpu", 4);
setAPI(API_OPENCL, 6); setAPI(API_OPENCL, 6);
return OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") ); ierr = OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") );
} else { } else {
setAPI(API_OPENCL, 6); setAPI(API_OPENCL, 6);
return OPENCL_SAFECALL( oclbase->ocl_setUp(m_device_name) ); ierr = OPENCL_SAFECALL( oclbase->ocl_setUp(m_device_name) );
} }
} else if (apiCuda()) { } else if (apiCuda()) {
setDevice("-gpu", 4); setDevice("-gpu", 4);
setAPI(API_CUDA, 4); setAPI(API_CUDA, 4);
return CUDA_SAFECALL(DKS_SUCCESS); ierr = CUDA_SAFECALL(DKS_SUCCESS);
} else if (apiOpenMP()) { } else if (apiOpenMP()) {
setDevice("-mic", 4); setDevice("-mic", 4);
setAPI(API_OPENMP, 6); setAPI(API_OPENMP, 6);
return MIC_SAFECALL(DKS_SUCCESS); ierr = MIC_SAFECALL(DKS_SUCCESS);
} }
} }
return DKS_ERROR; if (ierr == DKS_SUCCESS)
ierr = setup();
return ierr;
} }
/* /*
@ -464,11 +472,11 @@ int DKSBase::syncDevice() {
int DKSBase::setupFFT(int ndim, int N[3]) { int DKSBase::setupFFT(int ndim, int N[3]) {
if (apiCuda()) { if (apiCuda()) {
return CUDA_SAFECALL( cfft->setupFFT(ndim, N) ); return dksfft->setupFFT(ndim, N);
} else if (apiOpenCL()) { } else if (apiOpenCL()) {
int ierr1 = OPENCL_SAFECALL( oclfft->setupFFT(ndim, N) ); int ierr1 = dksfft->setupFFT(ndim, N);
int ierr2 = OPENCL_SAFECALL( oclfft->setupFFTRC(ndim, N) ); int ierr2 = dksfft->setupFFTRC(ndim, N);
int ierr3 = OPENCL_SAFECALL( oclfft->setupFFTCR(ndim, N) ); int ierr3 = dksfft->setupFFTCR(ndim, N);
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS) if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
return DKS_ERROR; return DKS_ERROR;
@ -476,8 +484,8 @@ int DKSBase::setupFFT(int ndim, int N[3]) {
} else if (apiOpenMP()) { } else if (apiOpenMP()) {
//micbase.mic_setupFFT(ndim, N); //micbase.mic_setupFFT(ndim, N);
//BENI: setting up RC and CR transformations on MIC //BENI: setting up RC and CR transformations on MIC
int ierr1 = MIC_SAFECALL( micfft->setupFFTRC(ndim, N, 1.) ); int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
int ierr2 = MIC_SAFECALL( micfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2])) ); int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
if (ierr1 != DKS_SUCCESS) if (ierr1 != DKS_SUCCESS)
return ierr1; return ierr1;
if (ierr2 != DKS_SUCCESS) if (ierr2 != DKS_SUCCESS)
@ -492,11 +500,11 @@ int DKSBase::setupFFT(int ndim, int N[3]) {
int DKSBase::setupFFTRC(int ndim, int N[3], double scale) { int DKSBase::setupFFTRC(int ndim, int N[3], double scale) {
if (apiCuda()) if (apiCuda())
return CUDA_SAFECALL(cfft->setupFFT(ndim, N)); return dksfft->setupFFT(ndim, N);
if (apiOpenCL()) if (apiOpenCL())
return OPENCL_SAFECALL(oclfft->setupFFTRC(ndim, N)); return dksfft->setupFFTRC(ndim, N);
else if (apiOpenMP()) else if (apiOpenMP())
return MIC_SAFECALL(micfft->setupFFTRC(ndim, N, scale)); return dksfft->setupFFTRC(ndim, N, scale);
return DKS_ERROR; return DKS_ERROR;
@ -506,11 +514,11 @@ int DKSBase::setupFFTRC(int ndim, int N[3], double scale) {
int DKSBase::setupFFTCR(int ndim, int N[3], double scale) { int DKSBase::setupFFTCR(int ndim, int N[3], double scale) {
if (apiCuda()) if (apiCuda())
return CUDA_SAFECALL(cfft->setupFFT(ndim, N)); return dksfft->setupFFT(ndim, N);
if (apiOpenCL()) if (apiOpenCL())
return OPENCL_SAFECALL(oclfft->setupFFTCR(ndim, N)); return dksfft->setupFFTCR(ndim, N);
else if (apiOpenMP()) else if (apiOpenMP())
return MIC_SAFECALL(micfft->setupFFTCR(ndim, N, scale)); return dksfft->setupFFTCR(ndim, N, scale);
return DKS_ERROR; return DKS_ERROR;
@ -519,17 +527,10 @@ int DKSBase::setupFFTCR(int ndim, int N[3], double scale) {
/* call OpenCL FFT function for selected platform */ /* call OpenCL FFT function for selected platform */
int DKSBase::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { int DKSBase::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL()) { if (apiOpenCL() || apiOpenMP())
//load kernel and execute return dksfft->executeFFT(data_ptr, ndim, dimsize);
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS ) else if (apiCuda())
return OPENCL_SAFECALL( oclfft->executeFFT(data_ptr, ndim, dimsize) ); return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
else
return DKS_ERROR;
} else if (apiCuda()) {
return CUDA_SAFECALL(cfft->executeFFT(data_ptr, ndim, dimsize, streamId));
} else if (apiOpenMP()) {
return MIC_SAFECALL(micfft->executeFFT(data_ptr, ndim, dimsize));
}
DEBUG_MSG("No implementation for selected platform"); DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR; return DKS_ERROR;
@ -537,16 +538,10 @@ int DKSBase::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
/* call OpenCL IFFT function for selected platform */ /* call OpenCL IFFT function for selected platform */
int DKSBase::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { int DKSBase::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL()) { if (apiOpenCL() || apiOpenMP())
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS ) return dksfft->executeIFFT(data_ptr, ndim, dimsize);
return OPENCL_SAFECALL( oclfft->executeIFFT(data_ptr, ndim, dimsize) ); else if (apiCuda())
else return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
return DKS_ERROR;
} else if (apiCuda()) {
return CUDA_SAFECALL( cfft->executeIFFT(data_ptr, ndim, dimsize, streamId) );
} else if (apiOpenMP()) {
return MIC_SAFECALL( micfft->executeIFFT(data_ptr, ndim, dimsize) );
}
DEBUG_MSG("No implementation for selected platform"); DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR; return DKS_ERROR;
@ -557,13 +552,13 @@ int DKSBase::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int str
if (apiOpenCL()) { if (apiOpenCL()) {
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS ) if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
return OPENCL_SAFECALL( oclfft->normalizeFFT(data_ptr, ndim, dimsize) ); return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
else else
return DKS_ERROR; return DKS_ERROR;
} else if (apiCuda()) { } else if (apiCuda()) {
return CUDA_SAFECALL( cfft->normalizeFFT(data_ptr, ndim, dimsize, streamId) ); return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
} else if (apiOpenMP()) { } else if (apiOpenMP()) {
return MIC_SAFECALL( micfft->normalizeFFT(data_ptr, ndim, dimsize) ); return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
} }
DEBUG_MSG("No implementation for selected platform"); DEBUG_MSG("No implementation for selected platform");
@ -574,11 +569,9 @@ int DKSBase::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int str
int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) { int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda()) if (apiCuda())
return CUDA_SAFECALL( cfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId) ); return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL()) else if (apiOpenCL() || apiOpenMP())
return OPENCL_SAFECALL( oclfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize) ); return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
else if (apiOpenMP())
return MIC_SAFECALL( micfft->executeRCFFT(real_ptr,comp_ptr, ndim, dimsize) );
DEBUG_MSG("No implementation for selected platform"); DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR; return DKS_ERROR;
@ -587,11 +580,9 @@ int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[
/* call complex to real FFT */ /* call complex to real FFT */
int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) { int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda()) if (apiCuda())
return CUDA_SAFECALL( cfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId) ); return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL()) else if (apiOpenCL() || apiOpenMP())
return OPENCL_SAFECALL( oclfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize) ); return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
else if (apiOpenMP())
return MIC_SAFECALL( micfft->executeCRFFT(comp_ptr,real_ptr, ndim, dimsize) );
DEBUG_MSG("No implementation for selected platform"); DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR; return DKS_ERROR;
@ -600,72 +591,38 @@ int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[
/* normalize complex to real iFFT */ /* normalize complex to real iFFT */
int DKSBase::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) { int DKSBase::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda()) if (apiCuda())
return CUDA_SAFECALL( cfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId) ); return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
else if (apiOpenCL()) else if (apiOpenCL())
return DKS_SUCCESS; return DKS_ERROR;
else if (apiOpenMP()) else if (apiOpenMP())
return DKS_SUCCESS; return DKS_ERROR;
DEBUG_MSG("No implementation for selected platform"); DEBUG_MSG("No implementation for selected platform");
return DKS_SUCCESS; return DKS_ERROR;
} }
int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ, int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
double hz_m0, double hz_m1, double hz_m2, int streamId) { double hz_m0, double hz_m1, double hz_m2, int streamId) {
if (apiCuda()) { return dksgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ,
return CUDA_SAFECALL(cgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ, hz_m0, hz_m1, hz_m2, streamId);
hz_m0, hz_m1, hz_m2, streamId) );
} else if (apiOpenCL()) {
return OPENCL_SAFECALL(oclgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ,
hz_m0, hz_m1, hz_m2) );
} else if (apiOpenMP()) {
//BENI:
return MIC_SAFECALL(micgreens->greensIntegral(tmp_ptr, I, J, K, hz_m0, hz_m1, hz_m2));
}
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr, int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr,
int I, int J, int K, int streamId) { int I, int J, int K, int streamId) {
if (apiCuda()) return dksgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId);
return CUDA_SAFECALL(cgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId));
else if (apiOpenCL())
return OPENCL_SAFECALL(oclgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K));
else if (apiOpenMP())
return MIC_SAFECALL(micgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K));
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) { int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) {
if (apiCuda()) return dksgreens->mirrorRhoField(mem_ptr, I, J, K, streamId);
return CUDA_SAFECALL(cgreens->mirrorRhoField(mem_ptr, I, J, K, streamId));
else if (apiOpenCL())
return OPENCL_SAFECALL(oclgreens->mirrorRhoField(mem_ptr, I, J, K, streamId));
else if (apiOpenMP())
return MIC_SAFECALL(micgreens->mirrorRhoField(mem_ptr, I, J, K));
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
int DKSBase::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) { int DKSBase::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) {
if (apiCuda()) return dksgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId);
return CUDA_SAFECALL(cgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId));
else if (apiOpenCL())
return OPENCL_SAFECALL(oclgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size));
else if (apiOpenMP())
return MIC_SAFECALL(micgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size));
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
@ -748,19 +705,7 @@ int DKSBase::callCollimatorPhysics(void *mem_ptr, void *par_ptr,
int &numaddback, int &numdead) int &numaddback, int &numdead)
{ {
if (apiCuda()) { return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
return CUDA_SAFECALL(ccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles));
} else if (apiOpenCL()) {
if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl") == DKS_SUCCESS)
return OPENCL_SAFECALL(oclcol->CollimatorPhysics(mem_ptr, par_ptr, numparticles));
else
return DKS_ERROR;
} else if (apiOpenMP()) {
return MIC_SAFECALL(miccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles));
}
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
@ -768,13 +713,8 @@ int DKSBase::callCollimatorPhysics(void *mem_ptr, void *par_ptr,
int DKSBase::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles) int DKSBase::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles)
{ {
if (apiCuda()) return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
return CUDA_SAFECALL( ccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles) );
else if (apiOpenMP())
return MIC_SAFECALL( miccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles) );
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
int DKSBase::callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr, int DKSBase::callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
@ -783,28 +723,21 @@ int DKSBase::callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
void *par_ptr, int numparticles) void *par_ptr, int numparticles)
{ {
if (apiOpenMP()) {
return MIC_SAFECALL( miccol->CollimatorPhysicsSoA(label_ptr, localID_ptr, return dkscol->CollimatorPhysicsSoA(label_ptr, localID_ptr,
rx_ptr, ry_ptr, rz_ptr, rx_ptr, ry_ptr, rz_ptr,
px_ptr, py_ptr, pz_ptr, px_ptr, py_ptr, pz_ptr,
par_ptr, numparticles) ); par_ptr, numparticles);
}
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
int DKSBase::callCollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback) int DKSBase::callCollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback)
{ {
if (apiCuda())
return CUDA_SAFECALL(ccol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback));
else if (apiOpenMP())
return MIC_SAFECALL(miccol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback));
DEBUG_MSG("No implementation for selceted platform"); return dkscol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback);
return DKS_ERROR;
} }
int DKSBase::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr, int DKSBase::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
@ -813,15 +746,10 @@ int DKSBase::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
void *par_ptr, int numparticles, int &numaddback) void *par_ptr, int numparticles, int &numaddback)
{ {
if (apiOpenMP()) { return MIC_SAFECALL(dkscol->CollimatorPhysicsSortSoA(label_ptr, localID_ptr,
return MIC_SAFECALL(miccol->CollimatorPhysicsSortSoA(label_ptr, localID_ptr,
rx_ptr, ry_ptr, rz_ptr, rx_ptr, ry_ptr, rz_ptr,
px_ptr, py_ptr, pz_ptr, px_ptr, py_ptr, pz_ptr,
par_ptr, numparticles, numaddback)); par_ptr, numparticles, numaddback));
}
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
@ -844,15 +772,7 @@ int DKSBase::callParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart,
bool usedt, int streamId) bool usedt, int streamId)
{ {
if (apiCuda()) return dkscol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt, c, usedt, streamId);
return CUDA_SAFECALL(ccol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt, c,
usedt, streamId));
else if (apiOpenMP())
return MIC_SAFECALL(miccol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt,
c, usedt, streamId));
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }
@ -862,20 +782,8 @@ int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
double c, bool usedt, int streamId) double c, bool usedt, int streamId)
{ {
if (apiCuda()) { return dkscol->ParallelTTrackerPushTransform(x_ptr, p_ptr, lastSec_ptr, orient_ptr,
return CUDA_SAFECALL(ccol->ParallelTTrackerPushTransform(x_ptr, p_ptr, npart, nsec, dt_ptr, dt, c, usedt, streamId);
lastSec_ptr, orient_ptr,
npart, nsec, dt_ptr, dt,
c, usedt, streamId));
} else if (apiOpenMP()) {
return MIC_SAFECALL(miccol->ParallelTTrackerPushTransform(x_ptr, p_ptr,
lastSec_ptr, orient_ptr,
npart, nsec, dt_ptr, dt,
c, usedt, streamId));
}
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
} }

View File

@ -29,8 +29,11 @@
#endif #endif
#include "OpenCL/OpenCLBase.h" #include "OpenCL/OpenCLBase.h"
#include "OpenCL/OpenCLFFT.h"
#include "OpenCL/OpenCLChiSquare.h" #include "OpenCL/OpenCLChiSquare.h"
#endif
#ifdef DKS_AMD
#include "OpenCL/OpenCLFFT.h"
#include "OpenCL/OpenCLCollimatorPhysics.h" #include "OpenCL/OpenCLCollimatorPhysics.h"
#include "OpenCL/OpenCLGreensFunction.h" #include "OpenCL/OpenCLGreensFunction.h"
#endif #endif
@ -52,6 +55,7 @@
#include "MIC/MICGreensFunction.hpp" #include "MIC/MICGreensFunction.hpp"
#endif #endif
#include "Algorithms/GreensFunction.h"
#include "Algorithms/CollimatorPhysics.h" #include "Algorithms/CollimatorPhysics.h"
#include "Algorithms/FFT.h" #include "Algorithms/FFT.h"
@ -72,27 +76,22 @@ private:
bool m_auto_tuning; bool m_auto_tuning;
bool m_use_config; bool m_use_config;
DKSFFT *dksfft;
DKSCollimatorPhysics *dkscol;
GreensFunction *dksgreens;
#ifdef DKS_OPENCL #ifdef DKS_OPENCL
OpenCLBase *oclbase; OpenCLBase *oclbase;
OpenCLFFT *oclfft;
OpenCLChiSquare *oclchi; OpenCLChiSquare *oclchi;
OpenCLCollimatorPhysics *oclcol;
OpenCLGreensFunction *oclgreens;
#endif #endif
#ifdef DKS_CUDA #ifdef DKS_CUDA
CudaBase *cbase; CudaBase *cbase;
CudaFFT *cfft;
CudaGreensFunction *cgreens;
CudaChiSquare *cchi; CudaChiSquare *cchi;
CudaCollimatorPhysics *ccol;
#endif #endif
#ifdef DKS_MIC #ifdef DKS_MIC
MICBase *micbase; MICBase *micbase;
MICFFT *micfft;
MICCollimatorPhysics *miccol;
MICGreensFunction *micgreens;
MICChiSquare *micchi; MICChiSquare *micchi;
#endif #endif
@ -156,6 +155,11 @@ protected:
return device_name; return device_name;
} }
/** Private function to initialize objects based on the device used.
*
*/
int setup();
public: public:
/** /**

View File

@ -62,6 +62,12 @@
#define OPENCL_SAFEINIT(x) ( NULL ) #define OPENCL_SAFEINIT(x) ( NULL )
#endif #endif
#ifdef DKS_AMD
#define OPENCL_SAFEINIT_AMD(x) ( x )
#else
#define OPENCL_SAFEINIT_AMD(x) ( NULL )
#endif
#ifdef DKS_MIC #ifdef DKS_MIC
#define MIC_SAFEINIT(x) ( x ) #define MIC_SAFEINIT(x) ( x )
#else #else

View File

@ -29,8 +29,8 @@ public:
~MICGreensFunction(); ~MICGreensFunction();
/* compute greens integral analytically */ /* compute greens integral analytically */
int greensIntegral(void * tmpgreen_, int I, int J, int K, double hr_m0, double hr_m1, double hr_m2, int greensIntegral(void * tmpgreen_, int I, int J, int K, int NI, int NJ,
int streamId = -1); double hr_m0, double hr_m1, double hr_m2, int streamId = -1);
/* perform the actual integration */ /* perform the actual integration */
int integrationGreensFunction(void * rho2_m, void * tmpgreen,int I,int J, int K, int integrationGreensFunction(void * rho2_m, void * tmpgreen,int I,int J, int K,

View File

@ -1,4 +1,7 @@
SET (_SRCS #dont include FFT, GreensFunction and CollimatorPhysics if clFFT and clRNG not found
IF (ENABLE_AMD)
SET (_SRCS
OpenCLBase.cpp OpenCLBase.cpp
OpenCLFFT.cpp OpenCLFFT.cpp
OpenCLChiSquare.cpp OpenCLChiSquare.cpp
@ -7,7 +10,7 @@ SET (_SRCS
OpenCLGreensFunction.cpp OpenCLGreensFunction.cpp
) )
SET (_HDRS SET (_HDRS
OpenCLBase.h OpenCLBase.h
OpenCLFFT.h OpenCLFFT.h
OpenCLChiSquare.h OpenCLChiSquare.h
@ -15,10 +18,19 @@ SET (_HDRS
OpenCLChiSquareRuntime.h OpenCLChiSquareRuntime.h
OpenCLGreensFunction.h OpenCLGreensFunction.h
) )
ELSE (ENABLE_AMD)
SET (_SRCS
OpenCLBase.cpp
OpenCLChiSquare.cpp
OpenCLChiSquareRuntime.cpp
)
#INCLUDE_DIRECTORIES ( SET (_HDRS
# ${CMAKE_CURRENT_SOURCE_DIR} OpenCLBase.h
#) OpenCLChiSquare.h
OpenCLChiSquareRuntime.h
)
ENDIF (ENABLE_AMD)
SET (_KERNELS SET (_KERNELS
OpenCLKernels/OpenCLChiSquare.cl OpenCLKernels/OpenCLChiSquare.cl