diff --git a/CMakeLists.txt b/CMakeLists.txt index c2a8fb7..4df9aab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,22 +39,27 @@ IF (Boost_FOUND) ENDIF (Boost_FOUND) #find clFFT -SET (clFFT_USE_STATIC_LIBS OFF) -FIND_PACKAGE(clFFT REQUIRED HINTS $ENV{CLFFT_PREFIX} $ENV{CLFFT_DIR} $ENV{CLFFT}) -MESSAGE (STATUS "Found clFFT library: ${CLFFT_LIBRARIES}") -MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}") -INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) -LINK_DIRECTORIES (${CLFFT_LIBRARIES}) +OPTION (ENABLE_AMD "Enable AMD libraries" OFF) +IF (ENABLE_AMD) + SET (clFFT_USE_STATIC_LIBS OFF) + FIND_PACKAGE(clFFT REQUIRED HINTS $ENV{CLFFT_PREFIX} $ENV{CLFFT_DIR} $ENV{CLFFT}) + MESSAGE (STATUS "Found clFFT library: ${CLFFT_LIBRARIES}") + MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}") + INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) + LINK_DIRECTORIES (${CLFFT_LIBRARIES}) -#find clRNG -#SET (clRNG_USE_STATIC_LIBS OFF) -#FIND_PACKAGE(clRng REQUIRED HINTS &ENV{CLRNG_PREFIX} $ENV{CLRNG_DIR} $ENV{CLRNG}) -#MESSAGE (STATUS "Found clRNG library: ${CLRNG_LIBRARIES}") -#MESSAGE (STATUS "Found clRNG include dir: ${CLRNG_INCLUDE_DIRS}") -#INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) -#LINK_DIRECTORIES (${CLRNG_LIBRARIES}) -#find_package(PkgConfig) -#pkg_check_modules(clRng REQUIRED) + #find clRNG + #SET (clRNG_USE_STATIC_LIBS OFF) + #FIND_PACKAGE(clRng REQUIRED HINTS &ENV{CLRNG_PREFIX} $ENV{CLRNG_DIR} $ENV{CLRNG}) + #MESSAGE (STATUS "Found clRNG library: ${CLRNG_LIBRARIES}") + #MESSAGE (STATUS "Found clRNG include dir: ${CLRNG_INCLUDE_DIRS}") + #INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS}) + #LINK_DIRECTORIES (${CLRNG_LIBRARIES}) + #find_package(PkgConfig) + #pkg_check_modules(clRng REQUIRED) + + SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_AMD") +ENDIF (ENABLE_AMD) #enable UQTK OPTION (USE_UQTK "Use UQTK" OFF) diff --git a/src/Algorithms/CollimatorPhysics.h b/src/Algorithms/CollimatorPhysics.h index b7e8190..a2f19ae 100644 --- a/src/Algorithms/CollimatorPhysics.h +++ b/src/Algorithms/CollimatorPhysics.h @@ -5,10 +5,7 @@ #include #include "../DKSDefinitions.h" -class DKSBaseMuSR; - class DKSCollimatorPhysics { - friend class DKSBaseMuSR; protected: diff --git a/src/DKSBase.cpp b/src/DKSBase.cpp index c66a003..57567db 100644 --- a/src/DKSBase.cpp +++ b/src/DKSBase.cpp @@ -103,25 +103,16 @@ DKSBase::DKSBase() { #ifdef DKS_CUDA cbase = new CudaBase(); - cfft = new CudaFFT(cbase); - cgreens = new CudaGreensFunction(cbase); cchi = new CudaChiSquare(cbase); - ccol = new CudaCollimatorPhysics(cbase); #endif -#ifdef DKS_OPENCL +ls#ifdef DKS_OPENCL oclbase = new OpenCLBase(); - oclfft = new OpenCLFFT(oclbase); oclchi = new OpenCLChiSquare(oclbase); - oclcol = new OpenCLCollimatorPhysics(oclbase); - oclgreens = new OpenCLGreensFunction(oclbase); #endif #ifdef DKS_MIC micbase = new MICBase(); - micfft = new MICFFT(micbase); - miccol = new MICCollimatorPhysics(micbase); - micgreens = new MICGreensFunction(micbase); micchi = new MICChiSquare(micbase); #endif @@ -139,26 +130,17 @@ DKSBase::DKSBase(const char* api_name, const char* device_name) { #ifdef DKS_CUDA cbase = new CudaBase(); - cfft = new CudaFFT(cbase); - cgreens = new CudaGreensFunction(cbase); cchi = new CudaChiSquare(cbase); - ccol = new CudaCollimatorPhysics(cbase); #endif #ifdef DKS_OPENCL oclbase = new OpenCLBase(); - oclfft = new OpenCLFFT(oclbase); oclchi = new OpenCLChiSquare(oclbase); - oclcol = new OpenCLCollimatorPhysics(oclbase); - oclgreens = new OpenCLGreensFunction(oclbase); #endif #ifdef DKS_MIC micbase = new MICBase(); - micfft = new MICFFT(micbase); miccol = new MICCollimatorPhysics(micbase); - micgreens = new MICGreensFunction(micbase); - micchi = new MICChiSquare(micbase); #endif } @@ -175,28 +157,22 @@ DKSBase::~DKSBase() { if (m_function_name != NULL) delete[] m_function_name; + delete dksfft; + delete dkscol; + delete dksgreens; #ifdef DKS_CUDA - delete cfft; - delete cgreens; delete cchi; - delete ccol; delete cbase; #endif #ifdef DKS_OPENCL - delete oclfft; delete oclchi; - delete oclcol; delete oclbase; - delete oclgreens; #endif #ifdef DKS_MIC - delete micfft; - delete miccol; - delete micgreens; delete micchi; delete micbase; #endif @@ -311,38 +287,70 @@ int DKSBase::getDeviceList(std::vector &devices) { return DKS_ERROR; } +int DKSBase::setup() { + + int ierr = DKS_ERROR; + + if (apiOpenCL()) { + ierr = OPENCL_SAFECALL( DKS_SUCCESS ); + //TODO: only enable if AMD libraries are available + dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(oclbase) ); + dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(oclbase) ); + dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(oclbase) ); + } else if (apiCuda()) { + ierr = CUDA_SAFECALL( DKS_SUCCESS ); + dksfft = CUDA_SAFEINIT( new CudaFFT(cbase) ); + dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(cbase) ); + dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(cbase) ); + } else if (apiOpenMP()) { + ierr = MIC_SAFECALL( DKS_SUCCESS ); + dksfft = MIC_SAFEINIT( new MICFFT(micbase) ); + dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(micbase) ); + dksgreens = MIC_SAFEINIT( new MICGreensFunction(micbase) ); + } else { + ierr = DKS_ERROR; + } + + return ierr; +} + /* init device */ int DKSBase::initDevice() { + int ierr = DKS_ERROR; + //if api is not set default is OpenCL if (!m_api_set) { setDevice("-gpu", 4); setAPI(API_OPENCL, 6); - return OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") ); + ierr = OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") ); } else { if (apiOpenCL()) { if (!m_device_set) { setDevice("-gpu", 4); setAPI(API_OPENCL, 6); - return OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") ); + ierr = OPENCL_SAFECALL( oclbase->ocl_setUp("-gpu") ); } else { setAPI(API_OPENCL, 6); - return OPENCL_SAFECALL( oclbase->ocl_setUp(m_device_name) ); + ierr = OPENCL_SAFECALL( oclbase->ocl_setUp(m_device_name) ); } } else if (apiCuda()) { setDevice("-gpu", 4); setAPI(API_CUDA, 4); - return CUDA_SAFECALL(DKS_SUCCESS); + ierr = CUDA_SAFECALL(DKS_SUCCESS); } else if (apiOpenMP()) { setDevice("-mic", 4); setAPI(API_OPENMP, 6); - return MIC_SAFECALL(DKS_SUCCESS); + ierr = MIC_SAFECALL(DKS_SUCCESS); } } - return DKS_ERROR; + if (ierr == DKS_SUCCESS) + ierr = setup(); + + return ierr; } /* @@ -464,11 +472,11 @@ int DKSBase::syncDevice() { int DKSBase::setupFFT(int ndim, int N[3]) { if (apiCuda()) { - return CUDA_SAFECALL( cfft->setupFFT(ndim, N) ); + return dksfft->setupFFT(ndim, N); } else if (apiOpenCL()) { - int ierr1 = OPENCL_SAFECALL( oclfft->setupFFT(ndim, N) ); - int ierr2 = OPENCL_SAFECALL( oclfft->setupFFTRC(ndim, N) ); - int ierr3 = OPENCL_SAFECALL( oclfft->setupFFTCR(ndim, N) ); + int ierr1 = dksfft->setupFFT(ndim, N); + int ierr2 = dksfft->setupFFTRC(ndim, N); + int ierr3 = dksfft->setupFFTCR(ndim, N); if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS) return DKS_ERROR; @@ -476,8 +484,8 @@ int DKSBase::setupFFT(int ndim, int N[3]) { } else if (apiOpenMP()) { //micbase.mic_setupFFT(ndim, N); //BENI: setting up RC and CR transformations on MIC - int ierr1 = MIC_SAFECALL( micfft->setupFFTRC(ndim, N, 1.) ); - int ierr2 = MIC_SAFECALL( micfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2])) ); + int ierr1 = dksfft->setupFFTRC(ndim, N, 1.); + int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2])); if (ierr1 != DKS_SUCCESS) return ierr1; if (ierr2 != DKS_SUCCESS) @@ -492,11 +500,11 @@ int DKSBase::setupFFT(int ndim, int N[3]) { int DKSBase::setupFFTRC(int ndim, int N[3], double scale) { if (apiCuda()) - return CUDA_SAFECALL(cfft->setupFFT(ndim, N)); + return dksfft->setupFFT(ndim, N); if (apiOpenCL()) - return OPENCL_SAFECALL(oclfft->setupFFTRC(ndim, N)); + return dksfft->setupFFTRC(ndim, N); else if (apiOpenMP()) - return MIC_SAFECALL(micfft->setupFFTRC(ndim, N, scale)); + return dksfft->setupFFTRC(ndim, N, scale); return DKS_ERROR; @@ -506,11 +514,11 @@ int DKSBase::setupFFTRC(int ndim, int N[3], double scale) { int DKSBase::setupFFTCR(int ndim, int N[3], double scale) { if (apiCuda()) - return CUDA_SAFECALL(cfft->setupFFT(ndim, N)); + return dksfft->setupFFT(ndim, N); if (apiOpenCL()) - return OPENCL_SAFECALL(oclfft->setupFFTCR(ndim, N)); + return dksfft->setupFFTCR(ndim, N); else if (apiOpenMP()) - return MIC_SAFECALL(micfft->setupFFTCR(ndim, N, scale)); + return dksfft->setupFFTCR(ndim, N, scale); return DKS_ERROR; @@ -519,34 +527,21 @@ int DKSBase::setupFFTCR(int ndim, int N[3], double scale) { /* call OpenCL FFT function for selected platform */ int DKSBase::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { - if (apiOpenCL()) { - //load kernel and execute - if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS ) - return OPENCL_SAFECALL( oclfft->executeFFT(data_ptr, ndim, dimsize) ); - else - return DKS_ERROR; - } else if (apiCuda()) { - return CUDA_SAFECALL(cfft->executeFFT(data_ptr, ndim, dimsize, streamId)); - } else if (apiOpenMP()) { - return MIC_SAFECALL(micfft->executeFFT(data_ptr, ndim, dimsize)); - } - + if (apiOpenCL() || apiOpenMP()) + return dksfft->executeFFT(data_ptr, ndim, dimsize); + else if (apiCuda()) + return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId); + DEBUG_MSG("No implementation for selected platform"); return DKS_ERROR; } /* call OpenCL IFFT function for selected platform */ int DKSBase::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { - if (apiOpenCL()) { - if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS ) - return OPENCL_SAFECALL( oclfft->executeIFFT(data_ptr, ndim, dimsize) ); - else - return DKS_ERROR; - } else if (apiCuda()) { - return CUDA_SAFECALL( cfft->executeIFFT(data_ptr, ndim, dimsize, streamId) ); - } else if (apiOpenMP()) { - return MIC_SAFECALL( micfft->executeIFFT(data_ptr, ndim, dimsize) ); - } + if (apiOpenCL() || apiOpenMP()) + return dksfft->executeIFFT(data_ptr, ndim, dimsize); + else if (apiCuda()) + return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId); DEBUG_MSG("No implementation for selected platform"); return DKS_ERROR; @@ -557,13 +552,13 @@ int DKSBase::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int str if (apiOpenCL()) { if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS ) - return OPENCL_SAFECALL( oclfft->normalizeFFT(data_ptr, ndim, dimsize) ); + return dksfft->normalizeFFT(data_ptr, ndim, dimsize); else return DKS_ERROR; } else if (apiCuda()) { - return CUDA_SAFECALL( cfft->normalizeFFT(data_ptr, ndim, dimsize, streamId) ); + return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId); } else if (apiOpenMP()) { - return MIC_SAFECALL( micfft->normalizeFFT(data_ptr, ndim, dimsize) ); + return dksfft->normalizeFFT(data_ptr, ndim, dimsize); } DEBUG_MSG("No implementation for selected platform"); @@ -574,11 +569,9 @@ int DKSBase::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int str int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) { if (apiCuda()) - return CUDA_SAFECALL( cfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId) ); - else if (apiOpenCL()) - return OPENCL_SAFECALL( oclfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize) ); - else if (apiOpenMP()) - return MIC_SAFECALL( micfft->executeRCFFT(real_ptr,comp_ptr, ndim, dimsize) ); + return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId); + else if (apiOpenCL() || apiOpenMP()) + return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize); DEBUG_MSG("No implementation for selected platform"); return DKS_ERROR; @@ -587,11 +580,9 @@ int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[ /* call complex to real FFT */ int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) { if (apiCuda()) - return CUDA_SAFECALL( cfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId) ); - else if (apiOpenCL()) - return OPENCL_SAFECALL( oclfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize) ); - else if (apiOpenMP()) - return MIC_SAFECALL( micfft->executeCRFFT(comp_ptr,real_ptr, ndim, dimsize) ); + return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId); + else if (apiOpenCL() || apiOpenMP()) + return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize); DEBUG_MSG("No implementation for selected platform"); return DKS_ERROR; @@ -600,72 +591,38 @@ int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[ /* normalize complex to real iFFT */ int DKSBase::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) { if (apiCuda()) - return CUDA_SAFECALL( cfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId) ); + return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId); else if (apiOpenCL()) - return DKS_SUCCESS; + return DKS_ERROR; else if (apiOpenMP()) - return DKS_SUCCESS; + return DKS_ERROR; DEBUG_MSG("No implementation for selected platform"); - return DKS_SUCCESS; + return DKS_ERROR; } int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ, double hz_m0, double hz_m1, double hz_m2, int streamId) { - if (apiCuda()) { - return CUDA_SAFECALL(cgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ, - hz_m0, hz_m1, hz_m2, streamId) ); - } else if (apiOpenCL()) { - return OPENCL_SAFECALL(oclgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ, - hz_m0, hz_m1, hz_m2) ); - } else if (apiOpenMP()) { - //BENI: - return MIC_SAFECALL(micgreens->greensIntegral(tmp_ptr, I, J, K, hz_m0, hz_m1, hz_m2)); - } + return dksgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ, + hz_m0, hz_m1, hz_m2, streamId); - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; } int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr, int I, int J, int K, int streamId) { - if (apiCuda()) - return CUDA_SAFECALL(cgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId)); - else if (apiOpenCL()) - return OPENCL_SAFECALL(oclgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K)); - else if (apiOpenMP()) - return MIC_SAFECALL(micgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K)); - - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; + return dksgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId); } int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) { - if (apiCuda()) - return CUDA_SAFECALL(cgreens->mirrorRhoField(mem_ptr, I, J, K, streamId)); - else if (apiOpenCL()) - return OPENCL_SAFECALL(oclgreens->mirrorRhoField(mem_ptr, I, J, K, streamId)); - else if (apiOpenMP()) - return MIC_SAFECALL(micgreens->mirrorRhoField(mem_ptr, I, J, K)); - - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; + return dksgreens->mirrorRhoField(mem_ptr, I, J, K, streamId); } int DKSBase::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) { - - if (apiCuda()) - return CUDA_SAFECALL(cgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId)); - else if (apiOpenCL()) - return OPENCL_SAFECALL(oclgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size)); - else if (apiOpenMP()) - return MIC_SAFECALL(micgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size)); - - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; + + return dksgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId); } @@ -748,19 +705,7 @@ int DKSBase::callCollimatorPhysics(void *mem_ptr, void *par_ptr, int &numaddback, int &numdead) { - if (apiCuda()) { - return CUDA_SAFECALL(ccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles)); - } else if (apiOpenCL()) { - if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl") == DKS_SUCCESS) - return OPENCL_SAFECALL(oclcol->CollimatorPhysics(mem_ptr, par_ptr, numparticles)); - else - return DKS_ERROR; - - } else if (apiOpenMP()) { - return MIC_SAFECALL(miccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles)); - } - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; + return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles); } @@ -768,13 +713,8 @@ int DKSBase::callCollimatorPhysics(void *mem_ptr, void *par_ptr, int DKSBase::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles) { - if (apiCuda()) - return CUDA_SAFECALL( ccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles) ); - else if (apiOpenMP()) - return MIC_SAFECALL( miccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles) ); - - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; + return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles); + } int DKSBase::callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr, @@ -783,28 +723,21 @@ int DKSBase::callCollimatorPhysicsSoA(void *label_ptr, void *localID_ptr, void *par_ptr, int numparticles) { - if (apiOpenMP()) { - return MIC_SAFECALL( miccol->CollimatorPhysicsSoA(label_ptr, localID_ptr, - rx_ptr, ry_ptr, rz_ptr, - px_ptr, py_ptr, pz_ptr, - par_ptr, numparticles) ); - } + + return dkscol->CollimatorPhysicsSoA(label_ptr, localID_ptr, + rx_ptr, ry_ptr, rz_ptr, + px_ptr, py_ptr, pz_ptr, + par_ptr, numparticles); - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; } int DKSBase::callCollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback) { - if (apiCuda()) - return CUDA_SAFECALL(ccol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback)); - else if (apiOpenMP()) - return MIC_SAFECALL(miccol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback)); - - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; + + return dkscol->CollimatorPhysicsSort(mem_ptr, numparticles, numaddback); + } int DKSBase::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr, @@ -813,15 +746,10 @@ int DKSBase::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr, void *par_ptr, int numparticles, int &numaddback) { - if (apiOpenMP()) { - return MIC_SAFECALL(miccol->CollimatorPhysicsSortSoA(label_ptr, localID_ptr, - rx_ptr, ry_ptr, rz_ptr, - px_ptr, py_ptr, pz_ptr, - par_ptr, numparticles, numaddback)); - } - - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; + return MIC_SAFECALL(dkscol->CollimatorPhysicsSortSoA(label_ptr, localID_ptr, + rx_ptr, ry_ptr, rz_ptr, + px_ptr, py_ptr, pz_ptr, + par_ptr, numparticles, numaddback)); } @@ -844,16 +772,8 @@ int DKSBase::callParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart, bool usedt, int streamId) { - if (apiCuda()) - return CUDA_SAFECALL(ccol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt, c, - usedt, streamId)); - else if (apiOpenMP()) - return MIC_SAFECALL(miccol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt, - c, usedt, streamId)); - - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; - + return dkscol->ParallelTTrackerPush(r_ptr, p_ptr, npart, dt_ptr, dt, c, usedt, streamId); + } int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr, @@ -862,20 +782,8 @@ int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr, double c, bool usedt, int streamId) { - if (apiCuda()) { - return CUDA_SAFECALL(ccol->ParallelTTrackerPushTransform(x_ptr, p_ptr, - lastSec_ptr, orient_ptr, - npart, nsec, dt_ptr, dt, - c, usedt, streamId)); - } else if (apiOpenMP()) { - return MIC_SAFECALL(miccol->ParallelTTrackerPushTransform(x_ptr, p_ptr, - lastSec_ptr, orient_ptr, - npart, nsec, dt_ptr, dt, - c, usedt, streamId)); - } - - DEBUG_MSG("No implementation for selceted platform"); - return DKS_ERROR; + return dkscol->ParallelTTrackerPushTransform(x_ptr, p_ptr, lastSec_ptr, orient_ptr, + npart, nsec, dt_ptr, dt, c, usedt, streamId); } diff --git a/src/DKSBase.h b/src/DKSBase.h index 6c13e50..ebd8d78 100644 --- a/src/DKSBase.h +++ b/src/DKSBase.h @@ -29,8 +29,11 @@ #endif #include "OpenCL/OpenCLBase.h" -#include "OpenCL/OpenCLFFT.h" #include "OpenCL/OpenCLChiSquare.h" +#endif + +#ifdef DKS_AMD +#include "OpenCL/OpenCLFFT.h" #include "OpenCL/OpenCLCollimatorPhysics.h" #include "OpenCL/OpenCLGreensFunction.h" #endif @@ -52,6 +55,7 @@ #include "MIC/MICGreensFunction.hpp" #endif +#include "Algorithms/GreensFunction.h" #include "Algorithms/CollimatorPhysics.h" #include "Algorithms/FFT.h" @@ -72,27 +76,22 @@ private: bool m_auto_tuning; bool m_use_config; + DKSFFT *dksfft; + DKSCollimatorPhysics *dkscol; + GreensFunction *dksgreens; + #ifdef DKS_OPENCL OpenCLBase *oclbase; - OpenCLFFT *oclfft; OpenCLChiSquare *oclchi; - OpenCLCollimatorPhysics *oclcol; - OpenCLGreensFunction *oclgreens; #endif #ifdef DKS_CUDA CudaBase *cbase; - CudaFFT *cfft; - CudaGreensFunction *cgreens; CudaChiSquare *cchi; - CudaCollimatorPhysics *ccol; #endif #ifdef DKS_MIC MICBase *micbase; - MICFFT *micfft; - MICCollimatorPhysics *miccol; - MICGreensFunction *micgreens; MICChiSquare *micchi; #endif @@ -156,6 +155,11 @@ protected: return device_name; } + /** Private function to initialize objects based on the device used. + * + */ + int setup(); + public: /** diff --git a/src/DKSDefinitions.h b/src/DKSDefinitions.h index 63fba34..452e276 100644 --- a/src/DKSDefinitions.h +++ b/src/DKSDefinitions.h @@ -62,6 +62,12 @@ #define OPENCL_SAFEINIT(x) ( NULL ) #endif +#ifdef DKS_AMD +#define OPENCL_SAFEINIT_AMD(x) ( x ) +#else +#define OPENCL_SAFEINIT_AMD(x) ( NULL ) +#endif + #ifdef DKS_MIC #define MIC_SAFEINIT(x) ( x ) #else diff --git a/src/MIC/MICGreensFunction.hpp b/src/MIC/MICGreensFunction.hpp index dc2641c..db3944f 100644 --- a/src/MIC/MICGreensFunction.hpp +++ b/src/MIC/MICGreensFunction.hpp @@ -29,8 +29,8 @@ public: ~MICGreensFunction(); /* compute greens integral analytically */ - int greensIntegral(void * tmpgreen_, int I, int J, int K, double hr_m0, double hr_m1, double hr_m2, - int streamId = -1); + int greensIntegral(void * tmpgreen_, int I, int J, int K, int NI, int NJ, + double hr_m0, double hr_m1, double hr_m2, int streamId = -1); /* perform the actual integration */ int integrationGreensFunction(void * rho2_m, void * tmpgreen,int I,int J, int K, diff --git a/src/OpenCL/CMakeLists.txt b/src/OpenCL/CMakeLists.txt index b0a4792..8ff5a2f 100644 --- a/src/OpenCL/CMakeLists.txt +++ b/src/OpenCL/CMakeLists.txt @@ -1,24 +1,36 @@ -SET (_SRCS - OpenCLBase.cpp - OpenCLFFT.cpp - OpenCLChiSquare.cpp - OpenCLCollimatorPhysics.cpp - OpenCLChiSquareRuntime.cpp - OpenCLGreensFunction.cpp - ) +#dont include FFT, GreensFunction and CollimatorPhysics if clFFT and clRNG not found -SET (_HDRS - OpenCLBase.h - OpenCLFFT.h - OpenCLChiSquare.h - OpenCLCollimatorPhysics.h - OpenCLChiSquareRuntime.h - OpenCLGreensFunction.h - ) +IF (ENABLE_AMD) + SET (_SRCS + OpenCLBase.cpp + OpenCLFFT.cpp + OpenCLChiSquare.cpp + OpenCLCollimatorPhysics.cpp + OpenCLChiSquareRuntime.cpp + OpenCLGreensFunction.cpp + ) -#INCLUDE_DIRECTORIES ( -# ${CMAKE_CURRENT_SOURCE_DIR} -#) + SET (_HDRS + OpenCLBase.h + OpenCLFFT.h + OpenCLChiSquare.h + OpenCLCollimatorPhysics.h + OpenCLChiSquareRuntime.h + OpenCLGreensFunction.h + ) +ELSE (ENABLE_AMD) + SET (_SRCS + OpenCLBase.cpp + OpenCLChiSquare.cpp + OpenCLChiSquareRuntime.cpp + ) + + SET (_HDRS + OpenCLBase.h + OpenCLChiSquare.h + OpenCLChiSquareRuntime.h + ) +ENDIF (ENABLE_AMD) SET (_KERNELS OpenCLKernels/OpenCLChiSquare.cl