13 Commits

23 changed files with 585 additions and 295 deletions

View File

@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
PROJECT (DKS) PROJECT (DKS)
SET (DKS_VERSION_MAJOR 1) SET (DKS_VERSION_MAJOR 1)
SET (DKS_VERSION_MINOR 1) SET (DKS_VERSION_MINOR 1)
SET (DKS_VERSION_PATCH 1) SET (DKS_VERSION_PATCH 2)
set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH}) set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
SET (PACKAGE \"dks\") SET (PACKAGE \"dks\")
SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\") SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
@ -28,11 +28,13 @@ MESSAGE (STATUS "OpenCL kernel files: ${OPENCL_KERNELS}")
set (BOOSTROOT $ENV{BOOST_DIR}) set (BOOSTROOT $ENV{BOOST_DIR})
SET (Boost_USE_STATIC_LIBS OFF) SET (Boost_USE_STATIC_LIBS OFF)
SET (Boost_USE_STATIC_RUNTIME OFF) SET (Boost_USE_STATIC_RUNTIME OFF)
FIND_PACKAGE(Boost 1.55.0 REQUIRED COMPONENTS filesystem system) #FIND_PACKAGE(Boost 1.55 REQUIRED COMPONENTS filesystem system)
FIND_PACKAGE(Boost 1.41 REQUIRED)
IF (Boost_FOUND) IF (Boost_FOUND)
MESSAGE (STATUS "Boost version: ${Boost_VERSION}")
MESSAGE (STATUS "Found boost include dir: ${Boost_INCLUDE_DIRS}") MESSAGE (STATUS "Found boost include dir: ${Boost_INCLUDE_DIRS}")
MESSAGE (STATUS "Found boost library dir: ${Boost_LIBRARY_DIRS}") MESSAGE (STATUS "Found boost library dir: ${Boost_LIBRARY_DIRS}")
MESSAGE (STATUS "Found boost libraries: ${Boost_LIBRARIES}") #MESSAGE (STATUS "Found boost libraries: ${Boost_LIBRARIES}")
INCLUDE_DIRECTORIES (${Boost_INCLUDE_DIRS}) INCLUDE_DIRECTORIES (${Boost_INCLUDE_DIRS})
LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
ENDIF (Boost_FOUND) ENDIF (Boost_FOUND)
@ -79,7 +81,7 @@ OPTION (USE_UQTK "Use UQTK" OFF)
IF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL) IF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)
#for intel compiler turn on openmp and opencl #for intel compiler turn on openmp and opencl
OPTION (USE_OPENCL "Use OpenCL" ON) OPTION (USE_OPENCL "Use OpenCL" OFF)
OPTION (USE_CUDA "Use CUDA" OFF) OPTION (USE_CUDA "Use CUDA" OFF)
OPTION (USE_MIC "Use intel MIC" ON) OPTION (USE_MIC "Use intel MIC" ON)
@ -113,15 +115,21 @@ ENDIF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)
IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") AND NOT USE_INTEL) IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") AND NOT USE_INTEL)
OPTION (USE_OPENCL "Use OpenCL" ON) OPTION (USE_OPENCL "Use OpenCL" OFF)
OPTION (USE_CUDA "Use CUDA" OFF) OPTION (USE_CUDA "Use CUDA" OFF)
OPTION (USE_MIC "Use intel MIC" OFF) OPTION (USE_MIC "Use intel MIC" OFF)
OPTION (STATIC_CUDA "Link static cuda libraries" OFF)
IF (ENABLE_MUSR)
SET (USE_OPENCL ON)
ENDIF (ENABLE_MUSR)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEBUG -O3 -Wall -fopenmp -std=c++11 -D__wsu") SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEBUG -O3 -Wall -fopenmp -std=c++11 -D__wsu")
FIND_PACKAGE(CUDA) FIND_PACKAGE(CUDA)
IF (CUDA_FOUND) IF (CUDA_FOUND)
SET (USE_CUDA ON) SET (USE_CUDA ON)
OPTION(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cuda libraries" OFF)
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64) LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs) LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs)
@ -131,20 +139,27 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
MESSAGE (STATUS "cuda version: ${CUDA_VERSION}") MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
SET(CUDA_PROPAGATE_HOST_FLAGS OFF) SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lcudart -lcufft -lcublas -lnvToolsExt -DDKS_CUDA") SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")
SET (CUDA_NVCC_FLAGS "-arch=sm_35 -DDEBUG -lcufft -lcublas -lcudart -fmad=false") SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${OPENCL_KERNELS}")
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DDEBUG -std=c++11 -D__wsu")
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${OPENCL_KERNELS}") IF (NOT STATIC_CUDA)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_CUDA")
SET (DKS_CUDA_LIBS "-lcudadevrt -lcudart -lcufft -lcublas")
ELSE (NOT STATIC_CUDA)
SET (CUDA_SEPARABLE_COMPILATION ON)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_CUDA -fPIC")
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-rdc=true;-lcufft_static;-lcublas_static;-lcurand_static")
SET (DKS_CUDA_LIBS "-lcudadevrt -lcudart_static -lcufft_static -lcublas_static -lculibos")
ENDIF (NOT STATIC_CUDA)
#if cuda version >= 7.0 add runtime commpilation flags #if cuda version >= 7.0 add runtime commpilation flags
IF (NOT CUDA_VERSION VERSION_LESS "7.0") IF (NOT CUDA_VERSION VERSION_LESS "7.0" AND ENABLE_MUSR)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lnvrtc -lcuda") SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lnvrtc -lcuda")
ENDIF (NOT CUDA_VERSION VERSION_LESS "7.0") ENDIF (NOT CUDA_VERSION VERSION_LESS "7.0" AND ENABLE_MUSR)
MESSAGE (STATUS "nvcc flags: ${CUDA_NVCC_FLAGS}") MESSAGE (STATUS "nvcc flags: ${CUDA_NVCC_FLAGS}")
SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
#set(CUDA_SEPARABLE_COMPILATION ON)
SET(BUILD_SHARED_LIBS OFF) SET(BUILD_SHARED_LIBS OFF)
ENDIF (CUDA_FOUND) ENDIF (CUDA_FOUND)
@ -171,9 +186,9 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
ENDIF(APPLE AND NOT CUDA_FOUND) ENDIF(APPLE AND NOT CUDA_FOUND)
#if cuda found set cuda opencl flags #if cuda found set cuda opencl flags
IF (CUDA_FOUND) IF (CUDA_FOUND AND USE_OPENCL)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL -lpthread -DDKS_OPENCL") SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL -lpthread -DDKS_OPENCL")
ENDIF (CUDA_FOUND) ENDIF (CUDA_FOUND AND USE_OPENCL)
#if cuda not found but amd opencl found set opencl flags #if cuda not found but amd opencl found set opencl flags
IF (NOT CUDA_FOUND AND OpenCL_FOUND) IF (NOT CUDA_FOUND AND OpenCL_FOUND)

View File

@ -29,30 +29,30 @@ Intel MIC compilers (optional)
######Source###### ######Source######
https://gitlab.psi.ch/uldis_l/DKS https://gitlab.psi.ch/uldis_l/DKS
######Changes from DKS-1.0.x version######
DKS is split into three modules that can be enabled/disabled at compile time depending on which software it is used for.
By default only DKSBase and DKSFFT modules are enabled. In order to install other modules the necessary otion needs to be enabled.
Supported options are:
-DENABLE_OPAL option should be enabled if DKS will be used for OPAL
-DENABLE_MUSR option should be enable if DKS will be used for musrfit
-DENABLE_PET option should be enabled if DKS will be used for PET image reconstruction
See install instructions for more details on how to enable the necessary options in DKS
######Install###### ######Install######
#consult the https://gitlab.psi.ch/uldis_l/DKS/wikis/home for full install isntructions
#clone DKS #clone DKS
git clone git@gitlab.psi.ch:uldis_l/DKS.git DKS git clone git@gitlab.psi.ch:uldis_l/DKS.git DKS
#set compilers to use #switch to the desired version (OPTIONAL)
#supported c++ compilers: g++, icpc, mpicxx whith g++ git checkout DKS-1.1.0
#supported c compilers: gcc, icc, mpicc whith gcc
export CXX_COMPILER=cpp_compiler_name
export CC_COMPILER=c_compiler_name
#set dks root directory directory #configure installation in build directory
cd DKS #enable DKS modules to compile -DENABLE_OPAL, -DENABLE_MUSR, -DENABLE_PET
export DKS_ROOT = $PWD CXX=<c++ compiler> CC=<c compiler> -DCMAKE_INSTALL_PREFIX=<install dir> <path to DKS source> [-DENABLE_OPAL=1 -DENABLE_MUSR=1 -DENABLE_PET=1]
#set build directory
mkdir $DKS_BUILD_DIR
cd $DKS_BUILD_DIR
#set install directory
export DKS_INSTALL_DIR = $DKS_BUILD_DIR #default is /usr/local/
CXX=$CXX_COMPILER CC=$CC_COMPILER cmake -DCMAKE_INSTALL_PREFIX=$DKS_BUILD_DIR $DKS_ROOT
#install DKS
make make
make install make install

View File

@ -4,28 +4,30 @@ LINK_DIRECTORIES( ${CMAKE_SOURCE_DIR}/src )
#chi square kernel tests #chi square kernel tests
IF (ENABLE_MUSR) IF (ENABLE_MUSR)
ADD_EXECUTABLE(testChiSquareRT testChiSquareRT.cpp) ADD_EXECUTABLE(testChiSquareRT testChiSquareRT.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRT dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testChiSquareRT dks ${CLFFT_LIBRARIES})
ADD_EXECUTABLE(testChiSquareRTRandom testChiSquareRTRandom.cpp) ADD_EXECUTABLE(testChiSquareRTRandom testChiSquareRTRandom.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${CLFFT_LIBRARIES})
IF (USE_UQTK) IF (USE_UQTK)
ADD_EXECUTABLE(testChiSquareRTUQTK testChiSquareRTUQTK.cpp) ADD_EXECUTABLE(testChiSquareRTUQTK testChiSquareRTUQTK.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran) TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${CLFFT_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
ENDIF (USE_UQTK) ENDIF (USE_UQTK)
#TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES})
#test to verify search functions #test to verify search functions
ADD_EXECUTABLE(testSearch testSearch.cpp) ADD_EXECUTABLE(testSearch testSearch.cpp)
TARGET_LINK_LIBRARIES(testSearch dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testSearch dks ${CLFFT_LIBRARIES})
ENDIF (ENABLE_MUSR) ENDIF (ENABLE_MUSR)
IF (ENABLE_OPAL) IF (ENABLE_OPAL)
ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp) ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${CLFFT_LIBRARIES})
ADD_EXECUTABLE(testPushKick testPushKick.cpp) ADD_EXECUTABLE(testPushKick testPushKick.cpp)
TARGET_LINK_LIBRARIES(testPushKick dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testPushKick dks ${CLFFT_LIBRARIES})
ENDIF(ENABLE_OPAL) ENDIF(ENABLE_OPAL)
ADD_EXECUTABLE(testFFT testFFT.cpp)
TARGET_LINK_LIBRARIES(testFFT dks ${CLFFT_LIBRARIES})

214
auto-tuning/testFFT.cpp Normal file
View File

@ -0,0 +1,214 @@
#include <iostream>
#include <cstdlib>
#include <complex>
#include "Utility/TimeStamp.h"
#include "DKSFFT.h"
using namespace std;
void compareData(complex<double>* data1, complex<double>* data2, int N, int dim);
void compareData(double* data1, double *data2, int N, int dim);
void initData(complex<double> *data, int dimsize[3], int dim);
void initData(double *data, int dimsize[3], int dim);
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &dim,
char *api_name, char *device_name);
void printHelp();
int main(int argc, char *argv[]) {
int ierr;
int N1 = 8;
int N2 = 8;
int N3 = 8;
int dim = 3;
char *api_name = new char[10];
char *device_name = new char[10];
if ( readParams(argc, argv, N1, N2, N3, dim, api_name, device_name) )
return 0;
cout << "Use api: " << api_name << ", " << device_name << endl;
int dimsize[3] = {N1, N2, N3};
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2];
double *rdata = new double[sizereal];
double *ordata = new double[sizereal];
complex<double> *cdata = new complex<double>[sizereal];
complex<double> *codata = new complex<double>[sizereal];
initData(rdata, dimsize, 3);
initData(cdata, dimsize, 3);
/* init DKSBase */
cout << "Init device and set function" << endl;
DKSFFT base;
base.setAPI(api_name, strlen(api_name));
base.setDevice(device_name, strlen(device_name));
cout << "init device" << endl;
base.initDevice();
cout << "setup fft" << endl;
base.setupFFT(dim, dimsize);
//Test RC FFT -> CR FFT
void *real_ptr, *comp_ptr, *res_ptr;
cout << "allocate memory" << endl;
real_ptr = base.allocateMemory<double>(sizereal, ierr);
res_ptr = base.allocateMemory<double>(sizereal, ierr);
comp_ptr = base.allocateMemory< complex<double> >(sizecomp, ierr);
cout << "write data" << endl;
base.writeData<double>(real_ptr, rdata, sizereal);
cout << "perform fft" << endl;
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
base.callC2RFFT(res_ptr, comp_ptr, dim, dimsize);
base.callNormalizeC2RFFT(res_ptr, dim, dimsize);
cout << "read data" << endl;
base.readData<double>(res_ptr, ordata, sizereal);
compareData(rdata, ordata, N1, 3);
base.freeMemory<double>(real_ptr, sizereal);
base.freeMemory<double>(res_ptr, sizereal);
base.freeMemory< complex<double> >(comp_ptr, sizecomp);
//Test CC FFT
void *mem_ptr;
mem_ptr = base.allocateMemory< complex<double> >(sizereal, ierr);
base.writeData< complex<double> >(mem_ptr, cdata, sizereal);
base.callFFT(mem_ptr, 3, dimsize);
base.callIFFT(mem_ptr, 3, dimsize);
base.callNormalizeFFT(mem_ptr, 3, dimsize);
base.readData< complex<double> >(mem_ptr, codata, sizereal);
compareData(cdata, codata, N1, 3);
base.freeMemory< complex<double> > (mem_ptr, sizereal);
delete[] rdata;
delete[] ordata;
delete[] cdata;
delete[] codata;
}
void compareData(complex<double>* data1, complex<double>* data2, int N, int dim) {
int ni, nj, nk, id;
ni = (dim > 2) ? N : 1;
nj = (dim > 1) ? N : 1;
nk = N;
double sum = 0;
for (int i = 0; i < ni; i++) {
for (int j = 0; j < nj; j++) {
for (int k = 0; k < nk; k++) {
id = i*ni*ni + j*nj + k;
sum += fabs(data1[id].real() - data2[id].real());
sum += fabs(data1[id].imag() - data2[id].imag());
}
}
}
cout << "Size " << N << " CC <--> CC diff: " << sum << endl;
}
void compareData(double* data1, double* data2, int N, int dim) {
int ni, nj, nk, id;
ni = (dim > 2) ? N : 1;
nj = (dim > 1) ? N : 1;
nk = N;
double sum = 0;
for (int i = 0; i < ni; i++) {
for (int j = 0; j < nj; j++) {
for (int k = 0; k < nk; k++) {
id = i*ni*ni + j*nj + k;
sum += fabs(data1[id] - data2[id]);
}
}
}
cout << "Size " << N << " RC <--> CR diff: " << sum << endl;
}
void initData(complex<double> *data, int dimsize[3], int dim) {
if (dim == 3) {
for (int i = 0; i < dimsize[2]; i++)
for (int j = 0; j < dimsize[1]; j++)
for (int k = 0; k < dimsize[0]; k++)
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = complex<double>(sin(k), 0.0);
} else if (dim == 2) {
for (int j = 0; j < dimsize[1]; j++) {
for (int k = 0; k < dimsize[0]; k++) {
data[j*dimsize[0] + k] = complex<double>(sin(k), 0.0);
}
}
} else {
for (int k = 0; k < dimsize[0]; k++)
data[k] = complex<double>(sin(k), 0.0);
}
}
void initData(double *data, int dimsize[3], int dim) {
if (dim == 3) {
for (int i = 0; i < dimsize[2]; i++)
for (int j = 0; j < dimsize[1]; j++)
for (int k = 0; k < dimsize[0]; k++)
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = sin(k);
} else if (dim == 2) {
for (int j = 0; j < dimsize[1]; j++) {
for (int k = 0; k < dimsize[0]; k++) {
data[j*dimsize[0] + k] = sin(k);
}
}
} else {
for (int k = 0; k < dimsize[0]; k++)
data[k] = sin(k);
}
}
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &dim,
char *api_name, char *device_name)
{
for (int i = 1; i < argc; i++) {
if ( argv[i] == std::string("-dim")) {
dim = atoi(argv[i + 1]);
i++;
}
if ( argv[i] == std::string("-grid") ) {
N1 = atoi(argv[i + 1]);
N2 = atoi(argv[i + 2]);
N3 = atoi(argv[i + 3]);
i += 3;
}
if (argv[i] == string("-cuda")) {
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-opencl")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-mic")) {
strcpy(api_name, "OpenMP");
strcpy(device_name, "-mic");
}
if (argv[i] == string("-cpu")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-cpu");
}
}
return false;
}

View File

@ -3,5 +3,7 @@ SET(${PROJECT_NAME}_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib") SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib")
SET(${PROJECT_NAME}_LIBRARY "dks") SET(${PROJECT_NAME}_LIBRARY "dks")
SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH}) SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH})
SET(DKS_CUDA_STATIC ${STATIC_CUDA})
SET(DKS_CUDA_LIBS "${DKS_CUDA_LIBS}")
SET(DKS_VERSION ${DKS_VERSION}) SET(DKS_VERSION ${DKS_VERSION})
SET(DKS_VERSION_STR ${DKS_VERSION_STR}) SET(DKS_VERSION_STR ${DKS_VERSION_STR})

Binary file not shown.

View File

@ -6,7 +6,7 @@
#include "../DKSDefinitions.h" #include "../DKSDefinitions.h"
class DKSFFT { class BaseFFT {
protected: protected:
int defaultN[3]; int defaultN[3];
@ -22,7 +22,7 @@ protected:
public: public:
virtual ~DKSFFT() { } virtual ~BaseFFT() { }
virtual int setupFFT(int ndim, int N[3]) = 0; virtual int setupFFT(int ndim, int N[3]) = 0;
virtual int setupFFTRC(int ndim, int N[3], double scale = 1.0) = 0; virtual int setupFFTRC(int ndim, int N[3], double scale = 1.0) = 0;

View File

@ -11,7 +11,7 @@
#include <boost/optional/optional.hpp> #include <boost/optional/optional.hpp>
#include <boost/property_tree/xml_parser.hpp> #include <boost/property_tree/xml_parser.hpp>
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
#include <boost/filesystem.hpp> //#include <boost/filesystem.hpp>
#include <string> #include <string>
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
@ -24,7 +24,7 @@
#include "../DKSDefinitions.h" #include "../DKSDefinitions.h"
namespace pt = boost::property_tree; namespace pt = boost::property_tree;
namespace fs = boost::filesystem; //namespace fs = boost::filesystem;
const std::string config_dir = "/.config/DKS"; const std::string config_dir = "/.config/DKS";
const std::string config_file = "/autotuning.xml"; const std::string config_file = "/autotuning.xml";

View File

@ -35,12 +35,12 @@ ENDMACRO ()
SET (DKS_BASEDIR_HDRS SET (DKS_BASEDIR_HDRS
DKSBase.h DKSBase.h
DKSDefinitions.h DKSDefinitions.h
DKSOPAL.h DKSFFT.h
) )
SET (DKS_BASEDIR_SRCS SET (DKS_BASEDIR_SRCS
DKSBase.cpp DKSBase.cpp
DKSOPAL.cpp DKSFFT.cpp
) )
#add opal to DKS if enable_opal is set #add opal to DKS if enable_opal is set
@ -112,26 +112,18 @@ IF (USE_CUDA)
CUDA_ADD_LIBRARY(dks ${DKS_SRCS}) CUDA_ADD_LIBRARY(dks ${DKS_SRCS})
CUDA_ADD_LIBRARY(dksshared SHARED ${DKS_SRCS}) CUDA_ADD_LIBRARY(dksshared SHARED ${DKS_SRCS})
IF (USE_UQTK) TARGET_LINK_LIBRARIES(dks ${DKS_CUDA_LIBS})
TARGET_LINK_LIBRARIES(dks cudadevrt lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran) TARGET_LINK_LIBRARIES(dksshared ${DKS_CUDA_LIBS})
TARGET_LINK_LIBRARIES(dksshared cudadevrt lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran) #TARGET_LINK_LIBRARIES(dks)
ELSE (USE_UQTK) #TARGET_LINK_LIBRARIES(dksshared)
TARGET_LINK_LIBRARIES(dks cudadevrt)
TARGET_LINK_LIBRARIES(dksshared cudadevrt)
ENDIF (USE_UQTK)
ELSE (USE_CUDA) ELSE (USE_CUDA)
MESSAGE (STATUS "DKS srcs: ${DKS_SRCS}") MESSAGE (STATUS "DKS srcs: ${DKS_SRCS}")
ADD_LIBRARY(dks ${DKS_SRCS}) ADD_LIBRARY(dks ${DKS_SRCS})
ADD_LIBRARY(dksshared SHARED ${DKS_SRCS}) ADD_LIBRARY(dksshared SHARED ${DKS_SRCS})
IF (USE_UQTK) TARGET_LINK_LIBRARIES(dks)
TARGET_LINK_LIBRARIES(dks lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran) TARGET_LINK_LIBRARIES(dksshared)
TARGET_LINK_LIBRARIES(dksshared lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
ELSE (USE_UQTK)
TARGET_LINK_LIBRARIES(dks)
TARGET_LINK_LIBRARIES(dksshared)
ENDIF(USE_UQTK)
ENDIF (USE_CUDA) ENDIF (USE_CUDA)

View File

@ -1,9 +1,9 @@
SET (_HDRS CudaBase.cuh) SET (_HDRS CudaBase.cuh CudaFFT.cuh)
SET (_SRCS CudaBase.cu) SET (_SRCS CudaBase.cu CudaFFT.cu)
IF (ENABLE_OPAL) IF (ENABLE_OPAL)
SET (_HDRS ${_HDRS} CudaFFT.cuh CudaGreensFunction.cuh CudaCollimatorPhysics.cuh) SET (_HDRS ${_HDRS} CudaGreensFunction.cuh CudaCollimatorPhysics.cuh)
SET (_SRCS ${_SRCS} CudaFFT.cu CudaGreensFunction.cu CudaCollimatorPhysics.cu) SET (_SRCS ${_SRCS} CudaGreensFunction.cu CudaCollimatorPhysics.cu)
ENDIF (ENABLE_OPAL) ENDIF (ENABLE_OPAL)
IF (ENABLE_MUSR) IF (ENABLE_MUSR)

View File

@ -12,7 +12,6 @@
#include <cufft.h> #include <cufft.h>
#include <cublas_v2.h> #include <cublas_v2.h>
#include <curand_kernel.h> #include <curand_kernel.h>
#include <nvToolsExt.h>
#include <time.h> #include <time.h>
#define BLOCK_SIZE 128 #define BLOCK_SIZE 128

View File

@ -10,7 +10,7 @@
#include "../Algorithms/FFT.h" #include "../Algorithms/FFT.h"
#include "CudaBase.cuh" #include "CudaBase.cuh"
class CudaFFT : public DKSFFT{ class CudaFFT : public BaseFFT {
private: private:

View File

@ -33,7 +33,6 @@
#ifdef DKS_CUDA #ifdef DKS_CUDA
#include "CUDA/CudaBase.cuh" #include "CUDA/CudaBase.cuh"
#include "nvToolsExt.h"
#endif #endif
#ifdef DKS_MIC #ifdef DKS_MIC
@ -889,9 +888,10 @@ public:
* TODO: opencl and mic imlementation * TODO: opencl and mic imlementation
*/ */
int callMemInfo() { int callMemInfo() {
#ifdef DKS_CUDA
if (apiCuda()) if (apiCuda())
return CUDA_SAFECALL(cbase->cuda_memInfo()); return CUDA_SAFECALL(cbase->cuda_memInfo());
#endif
return DKS_ERROR; return DKS_ERROR;
} }
@ -900,11 +900,13 @@ public:
* Used for debuging and timing purposes only. * Used for debuging and timing purposes only.
*/ */
void oclEventInfo() { void oclEventInfo() {
#ifdef DKS_OPENCL
if (apiOpenCL()) if (apiOpenCL())
return OPENCL_SAFECALL(oclbase->ocl_eventInfo()); return OPENCL_SAFECALL(oclbase->ocl_eventInfo());
#endif
} }
/** /**
* Test function to profile opencl kernel calls. * Test function to profile opencl kernel calls.
* Used for debuging and timing purposes only. * Used for debuging and timing purposes only.

View File

@ -8,6 +8,7 @@
#include "AutoTuning/DKSAutoTuningTester.h" #include "AutoTuning/DKSAutoTuningTester.h"
#include "DKSBase.h" #include "DKSBase.h"
#include "DKSFFT.h"
#include "Algorithms/ChiSquareRuntime.h" #include "Algorithms/ChiSquareRuntime.h"
@ -19,7 +20,7 @@
#include "OpenCL/OpenCLChiSquareRuntime.h" #include "OpenCL/OpenCLChiSquareRuntime.h"
#endif #endif
class DKSBaseMuSR : public DKSBase { class DKSBaseMuSR : public DKSFFT {
private: private:

147
src/DKSFFT.cpp Normal file
View File

@ -0,0 +1,147 @@
#include "DKSFFT.h"
DKSFFT::DKSFFT() {
dksfft = nullptr;
}
DKSFFT::~DKSFFT() {
delete dksfft;
}
/* setup fft plans to reuse if multiple ffts of same size are needed */
int DKSFFT::setupFFT(int ndim, int N[3]) {
if (apiCuda()) {
dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
return dksfft->setupFFT(ndim, N);
} else if (apiOpenCL()) {
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
int ierr1 = dksfft->setupFFT(ndim, N);
int ierr2 = dksfft->setupFFTRC(ndim, N);
int ierr3 = dksfft->setupFFTCR(ndim, N);
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
return DKS_ERROR;
return DKS_SUCCESS;
} else if (apiOpenMP()) {
//micbase.mic_setupFFT(ndim, N);
//BENI: setting up RC and CR transformations on MIC
dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
if (ierr1 != DKS_SUCCESS)
return ierr1;
if (ierr2 != DKS_SUCCESS)
return ierr2;
return DKS_SUCCESS;
}
return DKS_ERROR;
}
//BENI:
int DKSFFT::setupFFTRC(int ndim, int N[3], double scale) {
if (apiCuda())
return dksfft->setupFFT(ndim, N);
if (apiOpenCL())
return dksfft->setupFFTRC(ndim, N);
else if (apiOpenMP())
return dksfft->setupFFTRC(ndim, N, scale);
return DKS_ERROR;
}
//BENI:
int DKSFFT::setupFFTCR(int ndim, int N[3], double scale) {
if (apiCuda())
return dksfft->setupFFT(ndim, N);
if (apiOpenCL())
return dksfft->setupFFTCR(ndim, N);
else if (apiOpenMP())
return dksfft->setupFFTCR(ndim, N, scale);
return DKS_ERROR;
}
/* call OpenCL FFT function for selected platform */
int DKSFFT::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL() || apiOpenMP())
return dksfft->executeFFT(data_ptr, ndim, dimsize);
else if (apiCuda())
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call OpenCL IFFT function for selected platform */
int DKSFFT::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL() || apiOpenMP())
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
else if (apiCuda())
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call normalize FFT function for selected platform */
int DKSFFT::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL()) {
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
else
return DKS_ERROR;
} else if (apiCuda()) {
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
} else if (apiOpenMP()) {
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
}
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call real to complex FFT */
int DKSFFT::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL() || apiOpenMP())
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call complex to real FFT */
int DKSFFT::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL() || apiOpenMP())
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* normalize complex to real iFFT */
int DKSFFT::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
else if (apiOpenCL())
return DKS_ERROR;
else if (apiOpenMP())
return DKS_ERROR;
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}

108
src/DKSFFT.h Normal file
View File

@ -0,0 +1,108 @@
#ifndef H_DKSBASE_FFT
#define H_DKSBASE_FFT
#include <iostream>
#include "AutoTuning/DKSAutoTuning.h"
#include "DKSBase.h"
#include "DKSDefinitions.h"
#include "Algorithms/GreensFunction.h"
#include "Algorithms/CollimatorPhysics.h"
#include "Algorithms/FFT.h"
#ifdef DKS_AMD
#include "OpenCL/OpenCLFFT.h"
#endif
#ifdef DKS_CUDA
#include "CUDA/CudaFFT.cuh"
#endif
#ifdef DKS_MIC
#include "MIC/MICFFT.h"
#endif
class DKSFFT : public DKSBase {
private:
BaseFFT *dksfft;
int initFFT();
public:
DKSFFT();
~DKSFFT();
/**
* Setup FFT function.
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
* each fft will do its own setup according to fft size and dimensions.
* TODO: opencl and mic implementations
*/
int setupFFT(int ndim, int N[3]);
//BENI:
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
//BENI:
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
/**
* Call complex-to-complex fft.
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation
*/
int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call complex-to-complex ifft.
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation.
*/
int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Normalize complex to complex ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size
* TODO: mic implementation.
*/
int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call real to complex FFT.
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations
*/
int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call complex to real iFFT.
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations.
*/
int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Normalize compelx to real ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size.
* TODO: opencl and mic implementations.
*/
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
};
#endif

View File

@ -1,7 +1,6 @@
#include "DKSOPAL.h" #include "DKSOPAL.h"
DKSOPAL::DKSOPAL() { DKSOPAL::DKSOPAL() {
dksfft = nullptr;
dkscol = nullptr; dkscol = nullptr;
dksgreens = nullptr; dksgreens = nullptr;
} }
@ -12,7 +11,6 @@ DKSOPAL::DKSOPAL(const char* api_name, const char* device_name) {
} }
DKSOPAL::~DKSOPAL() { DKSOPAL::~DKSOPAL() {
delete dksfft;
delete dkscol; delete dkscol;
delete dksgreens; delete dksgreens;
} }
@ -22,17 +20,14 @@ int DKSOPAL::setupOPAL() {
if (apiOpenCL()) { if (apiOpenCL()) {
ierr = OPENCL_SAFECALL( DKS_SUCCESS ); ierr = OPENCL_SAFECALL( DKS_SUCCESS );
//TODO: only enable if AMD libraries are available //TODO: only enable if AMD libraries are available
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(getOpenCLBase()) ); dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(getOpenCLBase()) );
dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(getOpenCLBase()) ); dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(getOpenCLBase()) );
} else if (apiCuda()) { } else if (apiCuda()) {
ierr = CUDA_SAFECALL( DKS_SUCCESS ); ierr = CUDA_SAFECALL( DKS_SUCCESS );
dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(getCudaBase()) ); dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(getCudaBase()) );
dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(getCudaBase()) ); dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(getCudaBase()) );
} else if (apiOpenMP()) { } else if (apiOpenMP()) {
ierr = MIC_SAFECALL( DKS_SUCCESS ); ierr = MIC_SAFECALL( DKS_SUCCESS );
dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(getMICBase()) ); dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(getMICBase()) );
dksgreens = MIC_SAFEINIT( new MICGreensFunction(getMICBase()) ); dksgreens = MIC_SAFEINIT( new MICGreensFunction(getMICBase()) );
} else { } else {
@ -50,139 +45,6 @@ int DKSOPAL::initDevice() {
} }
/* setup fft plans to reuse if multiple ffts of same size are needed */
int DKSOPAL::setupFFT(int ndim, int N[3]) {
if (apiCuda()) {
return dksfft->setupFFT(ndim, N);
} else if (apiOpenCL()) {
int ierr1 = dksfft->setupFFT(ndim, N);
int ierr2 = dksfft->setupFFTRC(ndim, N);
int ierr3 = dksfft->setupFFTCR(ndim, N);
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
return DKS_ERROR;
return DKS_SUCCESS;
} else if (apiOpenMP()) {
//micbase.mic_setupFFT(ndim, N);
//BENI: setting up RC and CR transformations on MIC
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
if (ierr1 != DKS_SUCCESS)
return ierr1;
if (ierr2 != DKS_SUCCESS)
return ierr2;
return DKS_SUCCESS;
}
return DKS_ERROR;
}
//BENI:
int DKSOPAL::setupFFTRC(int ndim, int N[3], double scale) {
if (apiCuda())
return dksfft->setupFFT(ndim, N);
if (apiOpenCL())
return dksfft->setupFFTRC(ndim, N);
else if (apiOpenMP())
return dksfft->setupFFTRC(ndim, N, scale);
return DKS_ERROR;
}
//BENI:
int DKSOPAL::setupFFTCR(int ndim, int N[3], double scale) {
if (apiCuda())
return dksfft->setupFFT(ndim, N);
if (apiOpenCL())
return dksfft->setupFFTCR(ndim, N);
else if (apiOpenMP())
return dksfft->setupFFTCR(ndim, N, scale);
return DKS_ERROR;
}
/* call OpenCL FFT function for selected platform */
int DKSOPAL::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL() || apiOpenMP())
return dksfft->executeFFT(data_ptr, ndim, dimsize);
else if (apiCuda())
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call OpenCL IFFT function for selected platform */
int DKSOPAL::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL() || apiOpenMP())
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
else if (apiCuda())
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call normalize FFT function for selected platform */
int DKSOPAL::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL()) {
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
else
return DKS_ERROR;
} else if (apiCuda()) {
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
} else if (apiOpenMP()) {
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
}
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call real to complex FFT */
int DKSOPAL::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL() || apiOpenMP())
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call complex to real FFT */
int DKSOPAL::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL() || apiOpenMP())
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* normalize complex to real iFFT */
int DKSOPAL::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
else if (apiOpenCL())
return DKS_ERROR;
else if (apiOpenMP())
return DKS_ERROR;
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
int DKSOPAL::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ, int DKSOPAL::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
double hz_m0, double hz_m1, double hz_m2, int streamId) { double hz_m0, double hz_m1, double hz_m2, int streamId) {

View File

@ -5,6 +5,7 @@
#include "AutoTuning/DKSAutoTuning.h" #include "AutoTuning/DKSAutoTuning.h"
#include "DKSBase.h" #include "DKSBase.h"
#include "DKSFFT.h"
#include "DKSDefinitions.h" #include "DKSDefinitions.h"
@ -32,11 +33,10 @@
#include "MIC/MICCollimatorPhysics.h" #include "MIC/MICCollimatorPhysics.h"
#endif #endif
class DKSOPAL : public DKSBase { class DKSOPAL : public DKSFFT {
private: private:
DKSFFT *dksfft;
DKSCollimatorPhysics *dkscol; DKSCollimatorPhysics *dkscol;
GreensFunction *dksgreens; GreensFunction *dksgreens;
@ -56,71 +56,6 @@ public:
///////Function library part of dksbase//////// ///////Function library part of dksbase////////
/////////////////////////////////////////////// ///////////////////////////////////////////////
/**
* Setup FFT function.
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
* each fft will do its own setup according to fft size and dimensions.
* TODO: opencl and mic implementations
*/
int setupFFT(int ndim, int N[3]);
//BENI:
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
//BENI:
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
/**
* Call complex-to-complex fft.
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation
*/
int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call complex-to-complex ifft.
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation.
*/
int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Normalize complex to complex ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size
* TODO: mic implementation.
*/
int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call real to complex FFT.
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations
*/
int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call complex to real iFFT.
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations.
*/
int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Normalize compelx to real ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size.
* TODO: opencl and mic implementations.
*/
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
/** /**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device. * Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs. * For specifics check OPAL docs.

View File

@ -1,11 +1,10 @@
SET (_SRCS MICBase.cpp) SET (_SRCS MICBase.cpp MICFFT.cpp)
SET (_HDRS MICBase.h) SET (_HDRS MICBase.h MICFFT.h)
IF (ENABLE_OPAL) IF (ENABLE_OPAL)
SET (_SRCS SET (_SRCS
${_SRCS} ${_SRCS}
MICChiSquare.cpp MICChiSquare.cpp
MICFFT.cpp
MICGreensFunction.cpp MICGreensFunction.cpp
MICCollimatorPhysics.cpp MICCollimatorPhysics.cpp
) )
@ -13,7 +12,6 @@ IF (ENABLE_OPAL)
SET (_HDRS SET (_HDRS
${_HDRS} ${_HDRS}
MICChiSquare.h MICChiSquare.h
MICFFT.h
MICCollimatorPhysics.h MICCollimatorPhysics.h
MICGreensFunction.hpp MICGreensFunction.hpp
MICMergeSort.h MICMergeSort.h

View File

@ -10,7 +10,7 @@
#include "../Algorithms/FFT.h" #include "../Algorithms/FFT.h"
#include "MICBase.h" #include "MICBase.h"
class MICFFT : public DKSFFT { class MICFFT : public BaseFFT {
private: private:

View File

@ -4,6 +4,25 @@ SET (_HDRS OpenCLBase.h)
SET (_SRCS OpenCLBase.cpp) SET (_SRCS OpenCLBase.cpp)
SET (_KERNELS "") SET (_KERNELS "")
IF (ENABLE_AMD)
SET (_SRCS
${_SRCS}
OpenCLFFT.cpp
)
SET (_HDRS
${_HDRS}
OpenCLFFT.h
)
SET (_KERNELS
${_KERNELS}
OpenCLKernels/OpenCLFFT.cl
OpenCLKernels/OpenCLFFTStockham.cl
OpenCLKernels/OpenCLTranspose.cl
)
ENDIF (ENABLE_AMD)
IF (ENABLE_MUSR) IF (ENABLE_MUSR)
SET (_HDRS ${_HDRS} OpenCLChiSquareRuntime.h) SET (_HDRS ${_HDRS} OpenCLChiSquareRuntime.h)
SET (_SRCS ${_SRCS} OpenCLChiSquareRuntime.cpp) SET (_SRCS ${_SRCS} OpenCLChiSquareRuntime.cpp)
@ -13,23 +32,18 @@ ENDIF (ENABLE_MUSR)
IF (ENABLE_AMD AND ENABLE_OPAL) IF (ENABLE_AMD AND ENABLE_OPAL)
SET (_SRCS SET (_SRCS
${_SRCS} ${_SRCS}
OpenCLFFT.cpp
OpenCLCollimatorPhysics.cpp OpenCLCollimatorPhysics.cpp
OpenCLGreensFunction.cpp OpenCLGreensFunction.cpp
) )
SET (_HDRS SET (_HDRS
${_HDRS} ${_HDRS}
OpenCLFFT.h
OpenCLCollimatorPhysics.h OpenCLCollimatorPhysics.h
OpenCLGreensFunction.h OpenCLGreensFunction.h
) )
SET (_KERNELS SET (_KERNELS
${_KERNELS} ${_KERNELS}
OpenCLKernels/OpenCLFFT.cl
OpenCLKernels/OpenCLFFTStockham.cl
OpenCLKernels/OpenCLTranspose.cl
OpenCLKernels/OpenCLCollimatorPhysics.cl OpenCLKernels/OpenCLCollimatorPhysics.cl
OpenCLKernels/OpenCLGreensFunction.cl OpenCLKernels/OpenCLGreensFunction.cl
) )

View File

@ -22,7 +22,7 @@
#include "clFFT.h" #include "clFFT.h"
class OpenCLFFT : public DKSFFT { class OpenCLFFT : public BaseFFT {
private: private:
@ -112,10 +112,9 @@ public:
int streamId = -1); int streamId = -1);
int executeCRFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3], int executeCRFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3],
int streamId = -1); int streamId = -1);
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) {
{ return DKS_ERROR;
return DKS_ERROR; }
}
//void printData3DN4(cl_double2* &data, int N); //void printData3DN4(cl_double2* &data, int N);

View File

@ -39,8 +39,8 @@ ADD_EXECUTABLE(testFFTSolverMIC testFFTSolver_MIC.cpp)
#TARGET_LINK_LIBRARIES(testFFT dks) #TARGET_LINK_LIBRARIES(testFFT dks)
#TARGET_LINK_LIBRARIES(testMIC dks) #TARGET_LINK_LIBRARIES(testMIC dks)
#TARGET_LINK_LIBRARIES(testMICOpenCL dks) #TARGET_LINK_LIBRARIES(testMICOpenCL dks)
TARGET_LINK_LIBRARIES(testFFT3D dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testFFT3D dks ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testFFT3DRC dks ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testFFT3DRC_MIC dks) #TARGET_LINK_LIBRARIES(testFFT3DRC_MIC dks)
#TARGET_LINK_LIBRARIES(testFFT3DTiming dks) #TARGET_LINK_LIBRARIES(testFFT3DTiming dks)
#TARGET_LINK_LIBRARIES(testStockhamFFT dks) #TARGET_LINK_LIBRARIES(testStockhamFFT dks)
@ -54,11 +54,11 @@ TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testGather dks) #TARGET_LINK_LIBRARIES(testGather dks)
#TARGET_LINK_LIBRARIES(testGatherAsync dks) #TARGET_LINK_LIBRARIES(testGatherAsync dks)
#TARGET_LINK_LIBRARIES(testTranspose dks) #TARGET_LINK_LIBRARIES(testTranspose dks)
TARGET_LINK_LIBRARIES(testRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testRandom dks ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testPush dks) #TARGET_LINK_LIBRARIES(testPush dks)
TARGET_LINK_LIBRARIES(testFFTSolverMIC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testFFTSolverMIC dks ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testIntegration dks) #TARGET_LINK_LIBRARIES(testIntegration dks)
#TARGET_LINK_LIBRARIES(testImageReconstruction dks) #TARGET_LINK_LIBRARIES(testImageReconstruction dks)