Compare commits
16 Commits
Author | SHA1 | Date | |
---|---|---|---|
7ca93a3a49 | |||
aa14065994 | |||
50ecb31042 | |||
3d130aa01f | |||
5071ea5741 | |||
efe5f0db38 | |||
1d420504cc | |||
cc59f550ab | |||
d20fea2caa | |||
8b7d824b3a | |||
2c9fe4ea6f | |||
e32f9aaff2 | |||
f3527969cb | |||
cadd258668 | |||
a94ed9f3b8 | |||
61919ae53c |
@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
||||
PROJECT (DKS)
|
||||
SET (DKS_VERSION_MAJOR 1)
|
||||
SET (DKS_VERSION_MINOR 1)
|
||||
SET (DKS_VERSION_PATCH 0)
|
||||
SET (DKS_VERSION_PATCH 2)
|
||||
set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
|
||||
SET (PACKAGE \"dks\")
|
||||
SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
|
||||
@ -28,11 +28,13 @@ MESSAGE (STATUS "OpenCL kernel files: ${OPENCL_KERNELS}")
|
||||
set (BOOSTROOT $ENV{BOOST_DIR})
|
||||
SET (Boost_USE_STATIC_LIBS OFF)
|
||||
SET (Boost_USE_STATIC_RUNTIME OFF)
|
||||
FIND_PACKAGE(Boost 1.55.0 REQUIRED COMPONENTS filesystem system)
|
||||
#FIND_PACKAGE(Boost 1.55 REQUIRED COMPONENTS filesystem system)
|
||||
FIND_PACKAGE(Boost 1.41 REQUIRED)
|
||||
IF (Boost_FOUND)
|
||||
MESSAGE (STATUS "Boost version: ${Boost_VERSION}")
|
||||
MESSAGE (STATUS "Found boost include dir: ${Boost_INCLUDE_DIRS}")
|
||||
MESSAGE (STATUS "Found boost library dir: ${Boost_LIBRARY_DIRS}")
|
||||
MESSAGE (STATUS "Found boost libraries: ${Boost_LIBRARIES}")
|
||||
#MESSAGE (STATUS "Found boost libraries: ${Boost_LIBRARIES}")
|
||||
INCLUDE_DIRECTORIES (${Boost_INCLUDE_DIRS})
|
||||
LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
|
||||
ENDIF (Boost_FOUND)
|
||||
@ -79,7 +81,7 @@ OPTION (USE_UQTK "Use UQTK" OFF)
|
||||
IF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)
|
||||
|
||||
#for intel compiler turn on openmp and opencl
|
||||
OPTION (USE_OPENCL "Use OpenCL" ON)
|
||||
OPTION (USE_OPENCL "Use OpenCL" OFF)
|
||||
OPTION (USE_CUDA "Use CUDA" OFF)
|
||||
OPTION (USE_MIC "Use intel MIC" ON)
|
||||
|
||||
@ -113,15 +115,21 @@ ENDIF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)
|
||||
IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") AND NOT USE_INTEL)
|
||||
|
||||
|
||||
OPTION (USE_OPENCL "Use OpenCL" ON)
|
||||
OPTION (USE_OPENCL "Use OpenCL" OFF)
|
||||
OPTION (USE_CUDA "Use CUDA" OFF)
|
||||
OPTION (USE_MIC "Use intel MIC" OFF)
|
||||
|
||||
OPTION (STATIC_CUDA "Link static cuda libraries" OFF)
|
||||
|
||||
IF (ENABLE_MUSR)
|
||||
SET (USE_OPENCL ON)
|
||||
ENDIF (ENABLE_MUSR)
|
||||
|
||||
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEBUG -O3 -Wall -fopenmp -std=c++11 -D__wsu")
|
||||
|
||||
FIND_PACKAGE(CUDA)
|
||||
IF (CUDA_FOUND)
|
||||
SET (USE_CUDA ON)
|
||||
OPTION(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cuda libraries" OFF)
|
||||
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
|
||||
LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
|
||||
LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs)
|
||||
@ -131,20 +139,27 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
|
||||
MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
|
||||
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||
|
||||
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lcudart -lcufft -lcublas -lnvToolsExt -DDKS_CUDA")
|
||||
SET (CUDA_NVCC_FLAGS "-arch=sm_35 -DDEBUG -lcufft -lcublas -lcudart -fmad=false")
|
||||
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DDEBUG -std=c++11 -D__wsu")
|
||||
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${OPENCL_KERNELS}")
|
||||
SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")
|
||||
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${OPENCL_KERNELS}")
|
||||
|
||||
IF (NOT STATIC_CUDA)
|
||||
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_CUDA")
|
||||
SET (DKS_CUDA_LIBS "-lcudadevrt -lcudart -lcufft -lcublas")
|
||||
ELSE (NOT STATIC_CUDA)
|
||||
SET (CUDA_SEPARABLE_COMPILATION ON)
|
||||
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_CUDA -fPIC")
|
||||
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-rdc=true;-lcufft_static;-lcublas_static;-lcurand_static")
|
||||
SET (DKS_CUDA_LIBS "-lcudadevrt -lcudart_static -lcufft_static -lcublas_static -lculibos")
|
||||
ENDIF (NOT STATIC_CUDA)
|
||||
|
||||
#if cuda version >= 7.0 add runtime commpilation flags
|
||||
IF (NOT CUDA_VERSION VERSION_LESS "7.0")
|
||||
IF (NOT CUDA_VERSION VERSION_LESS "7.0" AND ENABLE_MUSR)
|
||||
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lnvrtc -lcuda")
|
||||
ENDIF (NOT CUDA_VERSION VERSION_LESS "7.0")
|
||||
ENDIF (NOT CUDA_VERSION VERSION_LESS "7.0" AND ENABLE_MUSR)
|
||||
|
||||
MESSAGE (STATUS "nvcc flags: ${CUDA_NVCC_FLAGS}")
|
||||
|
||||
SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
|
||||
#set(CUDA_SEPARABLE_COMPILATION ON)
|
||||
SET(BUILD_SHARED_LIBS OFF)
|
||||
|
||||
ENDIF (CUDA_FOUND)
|
||||
@ -171,9 +186,9 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
|
||||
ENDIF(APPLE AND NOT CUDA_FOUND)
|
||||
|
||||
#if cuda found set cuda opencl flags
|
||||
IF (CUDA_FOUND)
|
||||
IF (CUDA_FOUND AND USE_OPENCL)
|
||||
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL -lpthread -DDKS_OPENCL")
|
||||
ENDIF (CUDA_FOUND)
|
||||
ENDIF (CUDA_FOUND AND USE_OPENCL)
|
||||
|
||||
#if cuda not found but amd opencl found set opencl flags
|
||||
IF (NOT CUDA_FOUND AND OpenCL_FOUND)
|
||||
|
34
ReadMe.first
34
ReadMe.first
@ -29,30 +29,30 @@ Intel MIC compilers (optional)
|
||||
######Source######
|
||||
https://gitlab.psi.ch/uldis_l/DKS
|
||||
|
||||
######Changes from DKS-1.0.x version######
|
||||
DKS is split into three modules that can be enabled/disabled at compile time depending on which software it is used for.
|
||||
By default only DKSBase and DKSFFT modules are enabled. In order to install other modules the necessary otion needs to be enabled.
|
||||
Supported options are:
|
||||
-DENABLE_OPAL option should be enabled if DKS will be used for OPAL
|
||||
-DENABLE_MUSR option should be enable if DKS will be used for musrfit
|
||||
-DENABLE_PET option should be enabled if DKS will be used for PET image reconstruction
|
||||
|
||||
See install instructions for more details on how to enable the necessary options in DKS
|
||||
|
||||
######Install######
|
||||
#consult the https://gitlab.psi.ch/uldis_l/DKS/wikis/home for full install isntructions
|
||||
|
||||
#clone DKS
|
||||
git clone git@gitlab.psi.ch:uldis_l/DKS.git DKS
|
||||
|
||||
#set compilers to use
|
||||
#supported c++ compilers: g++, icpc, mpicxx whith g++
|
||||
#supported c compilers: gcc, icc, mpicc whith gcc
|
||||
export CXX_COMPILER=cpp_compiler_name
|
||||
export CC_COMPILER=c_compiler_name
|
||||
#switch to the desired version (OPTIONAL)
|
||||
git checkout DKS-1.1.0
|
||||
|
||||
#set dks root directory directory
|
||||
cd DKS
|
||||
export DKS_ROOT = $PWD
|
||||
|
||||
#set build directory
|
||||
mkdir $DKS_BUILD_DIR
|
||||
cd $DKS_BUILD_DIR
|
||||
|
||||
#set install directory
|
||||
export DKS_INSTALL_DIR = $DKS_BUILD_DIR #default is /usr/local/
|
||||
|
||||
CXX=$CXX_COMPILER CC=$CC_COMPILER cmake -DCMAKE_INSTALL_PREFIX=$DKS_BUILD_DIR $DKS_ROOT
|
||||
#configure installation in build directory
|
||||
#enable DKS modules to compile -DENABLE_OPAL, -DENABLE_MUSR, -DENABLE_PET
|
||||
CXX=<c++ compiler> CC=<c compiler> -DCMAKE_INSTALL_PREFIX=<install dir> <path to DKS source> [-DENABLE_OPAL=1 -DENABLE_MUSR=1 -DENABLE_PET=1]
|
||||
|
||||
#install DKS
|
||||
make
|
||||
make install
|
||||
|
||||
|
@ -4,28 +4,30 @@ LINK_DIRECTORIES( ${CMAKE_SOURCE_DIR}/src )
|
||||
#chi square kernel tests
|
||||
IF (ENABLE_MUSR)
|
||||
ADD_EXECUTABLE(testChiSquareRT testChiSquareRT.cpp)
|
||||
TARGET_LINK_LIBRARIES(testChiSquareRT dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testChiSquareRT dks ${CLFFT_LIBRARIES})
|
||||
|
||||
ADD_EXECUTABLE(testChiSquareRTRandom testChiSquareRTRandom.cpp)
|
||||
TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${CLFFT_LIBRARIES})
|
||||
|
||||
IF (USE_UQTK)
|
||||
ADD_EXECUTABLE(testChiSquareRTUQTK testChiSquareRTUQTK.cpp)
|
||||
TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
|
||||
TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${CLFFT_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
|
||||
ENDIF (USE_UQTK)
|
||||
#TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES})
|
||||
|
||||
#test to verify search functions
|
||||
ADD_EXECUTABLE(testSearch testSearch.cpp)
|
||||
TARGET_LINK_LIBRARIES(testSearch dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testSearch dks ${CLFFT_LIBRARIES})
|
||||
ENDIF (ENABLE_MUSR)
|
||||
|
||||
IF (ENABLE_OPAL)
|
||||
ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${CLFFT_LIBRARIES})
|
||||
|
||||
ADD_EXECUTABLE(testPushKick testPushKick.cpp)
|
||||
TARGET_LINK_LIBRARIES(testPushKick dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testPushKick dks ${CLFFT_LIBRARIES})
|
||||
ENDIF(ENABLE_OPAL)
|
||||
|
||||
ADD_EXECUTABLE(testFFT testFFT.cpp)
|
||||
TARGET_LINK_LIBRARIES(testFFT dks ${CLFFT_LIBRARIES})
|
||||
|
||||
|
||||
|
214
auto-tuning/testFFT.cpp
Normal file
214
auto-tuning/testFFT.cpp
Normal file
@ -0,0 +1,214 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <complex>
|
||||
|
||||
#include "Utility/TimeStamp.h"
|
||||
#include "DKSFFT.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void compareData(complex<double>* data1, complex<double>* data2, int N, int dim);
|
||||
void compareData(double* data1, double *data2, int N, int dim);
|
||||
|
||||
void initData(complex<double> *data, int dimsize[3], int dim);
|
||||
void initData(double *data, int dimsize[3], int dim);
|
||||
|
||||
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &dim,
|
||||
char *api_name, char *device_name);
|
||||
|
||||
void printHelp();
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
int ierr;
|
||||
int N1 = 8;
|
||||
int N2 = 8;
|
||||
int N3 = 8;
|
||||
int dim = 3;
|
||||
char *api_name = new char[10];
|
||||
char *device_name = new char[10];
|
||||
|
||||
if ( readParams(argc, argv, N1, N2, N3, dim, api_name, device_name) )
|
||||
return 0;
|
||||
|
||||
cout << "Use api: " << api_name << ", " << device_name << endl;
|
||||
|
||||
int dimsize[3] = {N1, N2, N3};
|
||||
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
|
||||
int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2];
|
||||
|
||||
double *rdata = new double[sizereal];
|
||||
double *ordata = new double[sizereal];
|
||||
complex<double> *cdata = new complex<double>[sizereal];
|
||||
complex<double> *codata = new complex<double>[sizereal];
|
||||
|
||||
initData(rdata, dimsize, 3);
|
||||
initData(cdata, dimsize, 3);
|
||||
|
||||
/* init DKSBase */
|
||||
cout << "Init device and set function" << endl;
|
||||
DKSFFT base;
|
||||
base.setAPI(api_name, strlen(api_name));
|
||||
base.setDevice(device_name, strlen(device_name));
|
||||
cout << "init device" << endl;
|
||||
base.initDevice();
|
||||
cout << "setup fft" << endl;
|
||||
base.setupFFT(dim, dimsize);
|
||||
|
||||
//Test RC FFT -> CR FFT
|
||||
void *real_ptr, *comp_ptr, *res_ptr;
|
||||
cout << "allocate memory" << endl;
|
||||
real_ptr = base.allocateMemory<double>(sizereal, ierr);
|
||||
res_ptr = base.allocateMemory<double>(sizereal, ierr);
|
||||
comp_ptr = base.allocateMemory< complex<double> >(sizecomp, ierr);
|
||||
|
||||
cout << "write data" << endl;
|
||||
base.writeData<double>(real_ptr, rdata, sizereal);
|
||||
|
||||
cout << "perform fft" << endl;
|
||||
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
|
||||
base.callC2RFFT(res_ptr, comp_ptr, dim, dimsize);
|
||||
base.callNormalizeC2RFFT(res_ptr, dim, dimsize);
|
||||
|
||||
cout << "read data" << endl;
|
||||
base.readData<double>(res_ptr, ordata, sizereal);
|
||||
|
||||
compareData(rdata, ordata, N1, 3);
|
||||
|
||||
base.freeMemory<double>(real_ptr, sizereal);
|
||||
base.freeMemory<double>(res_ptr, sizereal);
|
||||
base.freeMemory< complex<double> >(comp_ptr, sizecomp);
|
||||
|
||||
//Test CC FFT
|
||||
void *mem_ptr;
|
||||
mem_ptr = base.allocateMemory< complex<double> >(sizereal, ierr);
|
||||
base.writeData< complex<double> >(mem_ptr, cdata, sizereal);
|
||||
base.callFFT(mem_ptr, 3, dimsize);
|
||||
base.callIFFT(mem_ptr, 3, dimsize);
|
||||
base.callNormalizeFFT(mem_ptr, 3, dimsize);
|
||||
base.readData< complex<double> >(mem_ptr, codata, sizereal);
|
||||
|
||||
compareData(cdata, codata, N1, 3);
|
||||
|
||||
base.freeMemory< complex<double> > (mem_ptr, sizereal);
|
||||
|
||||
delete[] rdata;
|
||||
delete[] ordata;
|
||||
delete[] cdata;
|
||||
delete[] codata;
|
||||
|
||||
}
|
||||
|
||||
void compareData(complex<double>* data1, complex<double>* data2, int N, int dim) {
|
||||
int ni, nj, nk, id;
|
||||
ni = (dim > 2) ? N : 1;
|
||||
nj = (dim > 1) ? N : 1;
|
||||
nk = N;
|
||||
double sum = 0;
|
||||
for (int i = 0; i < ni; i++) {
|
||||
for (int j = 0; j < nj; j++) {
|
||||
for (int k = 0; k < nk; k++) {
|
||||
id = i*ni*ni + j*nj + k;
|
||||
sum += fabs(data1[id].real() - data2[id].real());
|
||||
sum += fabs(data1[id].imag() - data2[id].imag());
|
||||
}
|
||||
}
|
||||
}
|
||||
cout << "Size " << N << " CC <--> CC diff: " << sum << endl;
|
||||
}
|
||||
|
||||
void compareData(double* data1, double* data2, int N, int dim) {
|
||||
int ni, nj, nk, id;
|
||||
ni = (dim > 2) ? N : 1;
|
||||
nj = (dim > 1) ? N : 1;
|
||||
nk = N;
|
||||
double sum = 0;
|
||||
for (int i = 0; i < ni; i++) {
|
||||
for (int j = 0; j < nj; j++) {
|
||||
for (int k = 0; k < nk; k++) {
|
||||
id = i*ni*ni + j*nj + k;
|
||||
sum += fabs(data1[id] - data2[id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
cout << "Size " << N << " RC <--> CR diff: " << sum << endl;
|
||||
}
|
||||
|
||||
void initData(complex<double> *data, int dimsize[3], int dim) {
|
||||
if (dim == 3) {
|
||||
for (int i = 0; i < dimsize[2]; i++)
|
||||
for (int j = 0; j < dimsize[1]; j++)
|
||||
for (int k = 0; k < dimsize[0]; k++)
|
||||
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = complex<double>(sin(k), 0.0);
|
||||
} else if (dim == 2) {
|
||||
for (int j = 0; j < dimsize[1]; j++) {
|
||||
for (int k = 0; k < dimsize[0]; k++) {
|
||||
data[j*dimsize[0] + k] = complex<double>(sin(k), 0.0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < dimsize[0]; k++)
|
||||
data[k] = complex<double>(sin(k), 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
void initData(double *data, int dimsize[3], int dim) {
|
||||
if (dim == 3) {
|
||||
for (int i = 0; i < dimsize[2]; i++)
|
||||
for (int j = 0; j < dimsize[1]; j++)
|
||||
for (int k = 0; k < dimsize[0]; k++)
|
||||
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = sin(k);
|
||||
} else if (dim == 2) {
|
||||
for (int j = 0; j < dimsize[1]; j++) {
|
||||
for (int k = 0; k < dimsize[0]; k++) {
|
||||
data[j*dimsize[0] + k] = sin(k);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < dimsize[0]; k++)
|
||||
data[k] = sin(k);
|
||||
}
|
||||
}
|
||||
|
||||
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &dim,
|
||||
char *api_name, char *device_name)
|
||||
{
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
|
||||
if ( argv[i] == std::string("-dim")) {
|
||||
dim = atoi(argv[i + 1]);
|
||||
i++;
|
||||
}
|
||||
|
||||
if ( argv[i] == std::string("-grid") ) {
|
||||
N1 = atoi(argv[i + 1]);
|
||||
N2 = atoi(argv[i + 2]);
|
||||
N3 = atoi(argv[i + 3]);
|
||||
i += 3;
|
||||
}
|
||||
|
||||
if (argv[i] == string("-cuda")) {
|
||||
strcpy(api_name, "Cuda");
|
||||
strcpy(device_name, "-gpu");
|
||||
}
|
||||
|
||||
if (argv[i] == string("-opencl")) {
|
||||
strcpy(api_name, "OpenCL");
|
||||
strcpy(device_name, "-gpu");
|
||||
}
|
||||
|
||||
if (argv[i] == string("-mic")) {
|
||||
strcpy(api_name, "OpenMP");
|
||||
strcpy(device_name, "-mic");
|
||||
}
|
||||
|
||||
if (argv[i] == string("-cpu")) {
|
||||
strcpy(api_name, "OpenCL");
|
||||
strcpy(device_name, "-cpu");
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -3,5 +3,7 @@ SET(${PROJECT_NAME}_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
|
||||
SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib")
|
||||
SET(${PROJECT_NAME}_LIBRARY "dks")
|
||||
SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH})
|
||||
SET(DKS_CUDA_STATIC ${STATIC_CUDA})
|
||||
SET(DKS_CUDA_LIBS "${DKS_CUDA_LIBS}")
|
||||
SET(DKS_VERSION ${DKS_VERSION})
|
||||
SET(DKS_VERSION_STR ${DKS_VERSION_STR})
|
||||
|
BIN
doc/refman.pdf
BIN
doc/refman.pdf
Binary file not shown.
@ -16,7 +16,8 @@ public:
|
||||
|
||||
virtual ~DKSCollimatorPhysics() { }
|
||||
|
||||
virtual int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numpartices) = 0;
|
||||
virtual int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numpartices,
|
||||
bool enableRutherforScattering = true) = 0;
|
||||
|
||||
virtual int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include "../DKSDefinitions.h"
|
||||
|
||||
class DKSFFT {
|
||||
class BaseFFT {
|
||||
|
||||
protected:
|
||||
int defaultN[3];
|
||||
@ -22,7 +22,7 @@ protected:
|
||||
|
||||
public:
|
||||
|
||||
virtual ~DKSFFT() { }
|
||||
virtual ~BaseFFT() { }
|
||||
|
||||
virtual int setupFFT(int ndim, int N[3]) = 0;
|
||||
virtual int setupFFTRC(int ndim, int N[3], double scale = 1.0) = 0;
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <boost/optional/optional.hpp>
|
||||
#include <boost/property_tree/xml_parser.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
//#include <boost/filesystem.hpp>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
@ -24,7 +24,7 @@
|
||||
#include "../DKSDefinitions.h"
|
||||
|
||||
namespace pt = boost::property_tree;
|
||||
namespace fs = boost::filesystem;
|
||||
//namespace fs = boost::filesystem;
|
||||
|
||||
const std::string config_dir = "/.config/DKS";
|
||||
const std::string config_file = "/autotuning.xml";
|
||||
|
@ -35,12 +35,12 @@ ENDMACRO ()
|
||||
SET (DKS_BASEDIR_HDRS
|
||||
DKSBase.h
|
||||
DKSDefinitions.h
|
||||
DKSOPAL.h
|
||||
DKSFFT.h
|
||||
)
|
||||
|
||||
SET (DKS_BASEDIR_SRCS
|
||||
DKSBase.cpp
|
||||
DKSOPAL.cpp
|
||||
DKSFFT.cpp
|
||||
)
|
||||
|
||||
#add opal to DKS if enable_opal is set
|
||||
@ -112,26 +112,18 @@ IF (USE_CUDA)
|
||||
CUDA_ADD_LIBRARY(dks ${DKS_SRCS})
|
||||
CUDA_ADD_LIBRARY(dksshared SHARED ${DKS_SRCS})
|
||||
|
||||
IF (USE_UQTK)
|
||||
TARGET_LINK_LIBRARIES(dks cudadevrt lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
|
||||
TARGET_LINK_LIBRARIES(dksshared cudadevrt lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
|
||||
ELSE (USE_UQTK)
|
||||
TARGET_LINK_LIBRARIES(dks cudadevrt)
|
||||
TARGET_LINK_LIBRARIES(dksshared cudadevrt)
|
||||
ENDIF (USE_UQTK)
|
||||
TARGET_LINK_LIBRARIES(dks ${DKS_CUDA_LIBS})
|
||||
TARGET_LINK_LIBRARIES(dksshared ${DKS_CUDA_LIBS})
|
||||
#TARGET_LINK_LIBRARIES(dks)
|
||||
#TARGET_LINK_LIBRARIES(dksshared)
|
||||
|
||||
ELSE (USE_CUDA)
|
||||
MESSAGE (STATUS "DKS srcs: ${DKS_SRCS}")
|
||||
ADD_LIBRARY(dks ${DKS_SRCS})
|
||||
ADD_LIBRARY(dksshared SHARED ${DKS_SRCS})
|
||||
|
||||
IF (USE_UQTK)
|
||||
TARGET_LINK_LIBRARIES(dks lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
|
||||
TARGET_LINK_LIBRARIES(dksshared lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
|
||||
ELSE (USE_UQTK)
|
||||
TARGET_LINK_LIBRARIES(dks)
|
||||
TARGET_LINK_LIBRARIES(dksshared)
|
||||
ENDIF(USE_UQTK)
|
||||
TARGET_LINK_LIBRARIES(dks)
|
||||
TARGET_LINK_LIBRARIES(dksshared)
|
||||
|
||||
ENDIF (USE_CUDA)
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
SET (_HDRS CudaBase.cuh)
|
||||
SET (_SRCS CudaBase.cu)
|
||||
SET (_HDRS CudaBase.cuh CudaFFT.cuh)
|
||||
SET (_SRCS CudaBase.cu CudaFFT.cu)
|
||||
|
||||
IF (ENABLE_OPAL)
|
||||
SET (_HDRS ${_HDRS} CudaFFT.cuh CudaGreensFunction.cuh CudaCollimatorPhysics.cuh)
|
||||
SET (_SRCS ${_SRCS} CudaFFT.cu CudaGreensFunction.cu CudaCollimatorPhysics.cu)
|
||||
SET (_HDRS ${_HDRS} CudaGreensFunction.cuh CudaCollimatorPhysics.cuh)
|
||||
SET (_SRCS ${_SRCS} CudaGreensFunction.cu CudaCollimatorPhysics.cu)
|
||||
ENDIF (ENABLE_OPAL)
|
||||
|
||||
IF (ENABLE_MUSR)
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <cufft.h>
|
||||
#include <cublas_v2.h>
|
||||
#include <curand_kernel.h>
|
||||
#include <nvToolsExt.h>
|
||||
#include <time.h>
|
||||
|
||||
#define BLOCK_SIZE 128
|
||||
|
@ -23,9 +23,10 @@
|
||||
#define X0_M 9
|
||||
#define I_M 10
|
||||
#define DT_M 11
|
||||
#define LOWENERGY_THR 12
|
||||
|
||||
#define BLOCK_SIZE 128
|
||||
#define NUMPAR 12
|
||||
#define NUMPAR 13
|
||||
|
||||
__device__ inline double dot(double3 &d1, double3 &d2) {
|
||||
|
||||
@ -41,6 +42,23 @@ __device__ inline double3 cross(double3 &lhs, double3 &rhs) {
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__device__ inline double3 ArbitraryRotation(double3 &W, double3 &Rorg, double Theta) {
|
||||
double c=cos(Theta);
|
||||
double s=sin(Theta);
|
||||
double dotW = sqrt(dot(W,W));
|
||||
W.x = W.x / dotW;
|
||||
W.y = W.y / dotW;
|
||||
W.z = W.z / dotW;
|
||||
|
||||
double dotWR = dot(W, Rorg) * (1.0 - c);
|
||||
double3 crossW = cross(W, Rorg);
|
||||
double3 tmp;
|
||||
tmp.x = Rorg.x * c + crossW.x * s + W.x * dotWR;
|
||||
tmp.y = Rorg.y * c + crossW.y * s + W.y * dotWR;
|
||||
tmp.z = Rorg.z * c + crossW.z * s + W.z * dotWR;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__device__ inline bool checkHit(double &z, double *par) {
|
||||
|
||||
/* check if particle is in the degrader material */
|
||||
@ -89,7 +107,7 @@ __device__ inline void energyLoss(double &Eng, bool &pdead, curandState &state,
|
||||
Eng = Eng + delta_E / 1E3;
|
||||
}
|
||||
|
||||
pdead = ((Eng<1E-4) || (dEdx>0));
|
||||
pdead = ( (Eng < par[LOWENERGY_THR]) || (dEdx > 0) );
|
||||
|
||||
}
|
||||
|
||||
@ -100,6 +118,7 @@ __device__ inline void Rot(double &px, double &pz, double &x, double &z, double
|
||||
double Psixz;
|
||||
double pxz;
|
||||
|
||||
/*
|
||||
if (px>=0 && pz>=0)
|
||||
Psixz = atan(px/pz);
|
||||
else if (px>0 && pz<0)
|
||||
@ -108,7 +127,8 @@ __device__ inline void Rot(double &px, double &pz, double &x, double &z, double
|
||||
Psixz = atan(px/pz) + 2*PI;
|
||||
else
|
||||
Psixz = atan(px/pz) + PI;
|
||||
|
||||
*/
|
||||
Psixz = atan2(px, pz);
|
||||
pxz = sqrt(px*px + pz*pz);
|
||||
|
||||
if(coord==1) {
|
||||
@ -125,7 +145,9 @@ __device__ inline void Rot(double &px, double &pz, double &x, double &z, double
|
||||
pz = -pxz*sin(Psixz)*sin(thetacou) + pxz*cos(Psixz)*cos(thetacou);
|
||||
}
|
||||
|
||||
__device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, double* par) {
|
||||
__device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, double* par,
|
||||
bool enableRutherfordScattering)
|
||||
{
|
||||
|
||||
double Eng = sqrt(dot(P, P) + 1.0) * M_P - M_P;
|
||||
double gamma = (Eng + M_P) / M_P;
|
||||
@ -148,20 +170,9 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d
|
||||
}
|
||||
|
||||
//__syncthreads();
|
||||
|
||||
double xplane = z1 * deltas * theta0 / sqrt(12.0) + z2 * deltas * theta0 / 2.0;
|
||||
Rot(P.x, P.z, R.x, R.z, xplane, normP, thetacou, deltas, 1, par);
|
||||
|
||||
double P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
||||
if(P2 < 0.0047) {
|
||||
double P3 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
||||
double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
|
||||
double P4 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
||||
if(P4 > 0.5)
|
||||
thetaru = -thetaru;
|
||||
Rot(P.x,P.z,R.x,R.z, xplane, normP, thetaru, deltas, 0, par);
|
||||
}
|
||||
|
||||
// y-direction: See Physical Review, "Multiple Scattering"
|
||||
z1 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
||||
z2 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
||||
@ -178,14 +189,23 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d
|
||||
double yplane = z1 * deltas * theta0 / sqrt(12.0) + z2 * deltas * theta0 / 2.0;
|
||||
Rot(P.y,P.z,R.y,R.z, yplane, normP, thetacou, deltas, 2, par);
|
||||
|
||||
P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
||||
if(P2 < 0.0047) {
|
||||
double P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
||||
if( (P2 < 0.0047) && enableRutherfordScattering) {
|
||||
double P3 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
||||
double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
|
||||
double P4 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
||||
if(P4 > 0.5)
|
||||
thetaru = -thetaru;
|
||||
Rot(P.y,P.z,R.y,R.z, yplane, normP, thetaru, deltas, 0, par);
|
||||
//double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
|
||||
double thetaru = 2.5 * sqrt(1 / P3) * 2.0 * theta0;
|
||||
double phiru = 2.0 * M_PI * curand_uniform_double(&state);
|
||||
double th0=atan2(sqrt(P.x*P.x+P.y*P.y),fabs(P.z));
|
||||
double3 W,X;
|
||||
|
||||
double dotP = sqrt(dot(P,P));
|
||||
X.x = cos(phiru)*sin(thetaru) * dotP;
|
||||
X.y = sin(phiru)*sin(thetaru) * dotP;
|
||||
X.z = cos(thetaru) * dotP;
|
||||
W.x = -P.y;
|
||||
W.y = P.x;
|
||||
W.z = 0.0;
|
||||
P = ArbitraryRotation(W, X, th0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -193,7 +213,7 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d
|
||||
|
||||
template <typename T>
|
||||
__global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state,
|
||||
int numparticles)
|
||||
int numparticles, bool enableRutherfordScattering)
|
||||
{
|
||||
|
||||
//get global id and thread id
|
||||
@ -235,7 +255,7 @@ __global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state
|
||||
P.x = P.x * ptot / sq;
|
||||
P.y = P.y * ptot / sq;
|
||||
P.z = P.z * ptot / sq;
|
||||
coulombScat(R[tid], P, s, p);
|
||||
coulombScat(R[tid], P, s, p, enableRutherfordScattering);
|
||||
|
||||
data[idx].Pincol = P;
|
||||
} else {
|
||||
@ -258,7 +278,8 @@ __global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state
|
||||
}
|
||||
|
||||
__global__ void kernelCollimatorPhysics2(CUDA_PART2_SMALL data, double *par,
|
||||
curandState *state, int numparticles)
|
||||
curandState *state, int numparticles,
|
||||
bool enableRutherfordScattering)
|
||||
{
|
||||
|
||||
//get global id and thread id
|
||||
@ -296,7 +317,7 @@ __global__ void kernelCollimatorPhysics2(CUDA_PART2_SMALL data, double *par,
|
||||
P.x = P.x * ptot / sq;
|
||||
P.y = P.y * ptot / sq;
|
||||
P.z = P.z * ptot / sq;
|
||||
coulombScat(R[tid], P, s, p);
|
||||
coulombScat(R[tid], P, s, p, enableRutherfordScattering);
|
||||
|
||||
data.Pincol[idx] = P;
|
||||
} else {
|
||||
@ -663,7 +684,8 @@ struct less_then
|
||||
}
|
||||
};
|
||||
|
||||
int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles)
|
||||
int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
|
||||
bool enableRutherfordScattering)
|
||||
{
|
||||
|
||||
int threads = BLOCK_SIZE;
|
||||
@ -676,7 +698,8 @@ int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int n
|
||||
kernelCollimatorPhysics<<<blocks, threads, smem_size>>>((CUDA_PART_SMALL*)mem_ptr,
|
||||
(double*)par_ptr,
|
||||
m_base->cuda_getCurandStates(),
|
||||
numparticles);
|
||||
numparticles,
|
||||
enableRutherfordScattering);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
|
@ -110,7 +110,7 @@ public:
|
||||
*
|
||||
*/
|
||||
int CollimatorPhysics(void *mem_ptr, void *par_ptr,
|
||||
int numpartices);
|
||||
int numpartices, bool enableRutherforScattering = true);
|
||||
|
||||
int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "../Algorithms/FFT.h"
|
||||
#include "CudaBase.cuh"
|
||||
|
||||
class CudaFFT : public DKSFFT{
|
||||
class CudaFFT : public BaseFFT {
|
||||
|
||||
private:
|
||||
|
||||
|
@ -33,7 +33,6 @@
|
||||
|
||||
#ifdef DKS_CUDA
|
||||
#include "CUDA/CudaBase.cuh"
|
||||
#include "nvToolsExt.h"
|
||||
#endif
|
||||
|
||||
#ifdef DKS_MIC
|
||||
@ -889,9 +888,10 @@ public:
|
||||
* TODO: opencl and mic imlementation
|
||||
*/
|
||||
int callMemInfo() {
|
||||
#ifdef DKS_CUDA
|
||||
if (apiCuda())
|
||||
return CUDA_SAFECALL(cbase->cuda_memInfo());
|
||||
|
||||
#endif
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
@ -900,11 +900,13 @@ public:
|
||||
* Used for debuging and timing purposes only.
|
||||
*/
|
||||
void oclEventInfo() {
|
||||
#ifdef DKS_OPENCL
|
||||
if (apiOpenCL())
|
||||
return OPENCL_SAFECALL(oclbase->ocl_eventInfo());
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test function to profile opencl kernel calls.
|
||||
* Used for debuging and timing purposes only.
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "AutoTuning/DKSAutoTuningTester.h"
|
||||
|
||||
#include "DKSBase.h"
|
||||
#include "DKSFFT.h"
|
||||
|
||||
#include "Algorithms/ChiSquareRuntime.h"
|
||||
|
||||
@ -19,7 +20,7 @@
|
||||
#include "OpenCL/OpenCLChiSquareRuntime.h"
|
||||
#endif
|
||||
|
||||
class DKSBaseMuSR : public DKSBase {
|
||||
class DKSBaseMuSR : public DKSFFT {
|
||||
|
||||
private:
|
||||
|
||||
|
147
src/DKSFFT.cpp
Normal file
147
src/DKSFFT.cpp
Normal file
@ -0,0 +1,147 @@
|
||||
#include "DKSFFT.h"
|
||||
|
||||
DKSFFT::DKSFFT() {
|
||||
dksfft = nullptr;
|
||||
}
|
||||
|
||||
DKSFFT::~DKSFFT() {
|
||||
delete dksfft;
|
||||
}
|
||||
|
||||
/* setup fft plans to reuse if multiple ffts of same size are needed */
|
||||
int DKSFFT::setupFFT(int ndim, int N[3]) {
|
||||
|
||||
if (apiCuda()) {
|
||||
dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
} else if (apiOpenCL()) {
|
||||
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
|
||||
int ierr1 = dksfft->setupFFT(ndim, N);
|
||||
int ierr2 = dksfft->setupFFTRC(ndim, N);
|
||||
int ierr3 = dksfft->setupFFTCR(ndim, N);
|
||||
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
|
||||
return DKS_ERROR;
|
||||
|
||||
return DKS_SUCCESS;
|
||||
} else if (apiOpenMP()) {
|
||||
//micbase.mic_setupFFT(ndim, N);
|
||||
//BENI: setting up RC and CR transformations on MIC
|
||||
dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
|
||||
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
|
||||
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
|
||||
if (ierr1 != DKS_SUCCESS)
|
||||
return ierr1;
|
||||
if (ierr2 != DKS_SUCCESS)
|
||||
return ierr2;
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
//BENI:
|
||||
int DKSFFT::setupFFTRC(int ndim, int N[3], double scale) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
if (apiOpenCL())
|
||||
return dksfft->setupFFTRC(ndim, N);
|
||||
else if (apiOpenMP())
|
||||
return dksfft->setupFFTRC(ndim, N, scale);
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
//BENI:
|
||||
int DKSFFT::setupFFTCR(int ndim, int N[3], double scale) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
if (apiOpenCL())
|
||||
return dksfft->setupFFTCR(ndim, N);
|
||||
else if (apiOpenMP())
|
||||
return dksfft->setupFFTCR(ndim, N, scale);
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
/* call OpenCL FFT function for selected platform */
|
||||
int DKSFFT::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeFFT(data_ptr, ndim, dimsize);
|
||||
else if (apiCuda())
|
||||
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call OpenCL IFFT function for selected platform */
|
||||
int DKSFFT::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
|
||||
else if (apiCuda())
|
||||
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call normalize FFT function for selected platform */
|
||||
int DKSFFT::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiOpenCL()) {
|
||||
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
||||
else
|
||||
return DKS_ERROR;
|
||||
} else if (apiCuda()) {
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
|
||||
} else if (apiOpenMP()) {
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
||||
}
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call real to complex FFT */
|
||||
int DKSFFT::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call complex to real FFT */
|
||||
int DKSFFT::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiCuda())
|
||||
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* normalize complex to real iFFT */
|
||||
int DKSFFT::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiCuda())
|
||||
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL())
|
||||
return DKS_ERROR;
|
||||
else if (apiOpenMP())
|
||||
return DKS_ERROR;
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
|
108
src/DKSFFT.h
Normal file
108
src/DKSFFT.h
Normal file
@ -0,0 +1,108 @@
|
||||
#ifndef H_DKSBASE_FFT
|
||||
#define H_DKSBASE_FFT
|
||||
|
||||
#include <iostream>
|
||||
#include "AutoTuning/DKSAutoTuning.h"
|
||||
|
||||
#include "DKSBase.h"
|
||||
|
||||
#include "DKSDefinitions.h"
|
||||
|
||||
#include "Algorithms/GreensFunction.h"
|
||||
#include "Algorithms/CollimatorPhysics.h"
|
||||
#include "Algorithms/FFT.h"
|
||||
|
||||
#ifdef DKS_AMD
|
||||
#include "OpenCL/OpenCLFFT.h"
|
||||
#endif
|
||||
|
||||
#ifdef DKS_CUDA
|
||||
#include "CUDA/CudaFFT.cuh"
|
||||
#endif
|
||||
|
||||
#ifdef DKS_MIC
|
||||
#include "MIC/MICFFT.h"
|
||||
#endif
|
||||
|
||||
class DKSFFT : public DKSBase {
|
||||
|
||||
private:
|
||||
|
||||
BaseFFT *dksfft;
|
||||
|
||||
int initFFT();
|
||||
|
||||
public:
|
||||
|
||||
DKSFFT();
|
||||
~DKSFFT();
|
||||
|
||||
/**
|
||||
* Setup FFT function.
|
||||
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
|
||||
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
|
||||
* each fft will do its own setup according to fft size and dimensions.
|
||||
* TODO: opencl and mic implementations
|
||||
*/
|
||||
int setupFFT(int ndim, int N[3]);
|
||||
//BENI:
|
||||
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
|
||||
//BENI:
|
||||
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
|
||||
|
||||
/**
|
||||
* Call complex-to-complex fft.
|
||||
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
|
||||
* stream id can be specified to use other streams than default.
|
||||
* TODO: mic implementation
|
||||
*/
|
||||
int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call complex-to-complex ifft.
|
||||
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
|
||||
* stream id can be specified to use other streams than default.
|
||||
* TODO: mic implementation.
|
||||
*/
|
||||
int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Normalize complex to complex ifft.
|
||||
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
|
||||
* fft size
|
||||
* TODO: mic implementation.
|
||||
*/
|
||||
int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call real to complex FFT.
|
||||
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
|
||||
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
|
||||
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
|
||||
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
|
||||
* TODO: opencl and mic implementations
|
||||
*/
|
||||
int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call complex to real iFFT.
|
||||
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
|
||||
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
|
||||
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
|
||||
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Normalize compelx to real ifft.
|
||||
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
|
||||
* fft size.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
148
src/DKSOPAL.cpp
148
src/DKSOPAL.cpp
@ -1,7 +1,6 @@
|
||||
#include "DKSOPAL.h"
|
||||
|
||||
DKSOPAL::DKSOPAL() {
|
||||
dksfft = nullptr;
|
||||
dkscol = nullptr;
|
||||
dksgreens = nullptr;
|
||||
}
|
||||
@ -12,7 +11,6 @@ DKSOPAL::DKSOPAL(const char* api_name, const char* device_name) {
|
||||
}
|
||||
|
||||
DKSOPAL::~DKSOPAL() {
|
||||
delete dksfft;
|
||||
delete dkscol;
|
||||
delete dksgreens;
|
||||
}
|
||||
@ -22,17 +20,14 @@ int DKSOPAL::setupOPAL() {
|
||||
if (apiOpenCL()) {
|
||||
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
|
||||
//TODO: only enable if AMD libraries are available
|
||||
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
|
||||
dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(getOpenCLBase()) );
|
||||
dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(getOpenCLBase()) );
|
||||
} else if (apiCuda()) {
|
||||
ierr = CUDA_SAFECALL( DKS_SUCCESS );
|
||||
dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
|
||||
dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(getCudaBase()) );
|
||||
dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(getCudaBase()) );
|
||||
} else if (apiOpenMP()) {
|
||||
ierr = MIC_SAFECALL( DKS_SUCCESS );
|
||||
dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
|
||||
dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(getMICBase()) );
|
||||
dksgreens = MIC_SAFEINIT( new MICGreensFunction(getMICBase()) );
|
||||
} else {
|
||||
@ -50,139 +45,6 @@ int DKSOPAL::initDevice() {
|
||||
|
||||
}
|
||||
|
||||
/* setup fft plans to reuse if multiple ffts of same size are needed */
|
||||
int DKSOPAL::setupFFT(int ndim, int N[3]) {
|
||||
|
||||
if (apiCuda()) {
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
} else if (apiOpenCL()) {
|
||||
int ierr1 = dksfft->setupFFT(ndim, N);
|
||||
int ierr2 = dksfft->setupFFTRC(ndim, N);
|
||||
int ierr3 = dksfft->setupFFTCR(ndim, N);
|
||||
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
|
||||
return DKS_ERROR;
|
||||
|
||||
return DKS_SUCCESS;
|
||||
} else if (apiOpenMP()) {
|
||||
//micbase.mic_setupFFT(ndim, N);
|
||||
//BENI: setting up RC and CR transformations on MIC
|
||||
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
|
||||
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
|
||||
if (ierr1 != DKS_SUCCESS)
|
||||
return ierr1;
|
||||
if (ierr2 != DKS_SUCCESS)
|
||||
return ierr2;
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
//BENI:
|
||||
int DKSOPAL::setupFFTRC(int ndim, int N[3], double scale) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
if (apiOpenCL())
|
||||
return dksfft->setupFFTRC(ndim, N);
|
||||
else if (apiOpenMP())
|
||||
return dksfft->setupFFTRC(ndim, N, scale);
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
//BENI:
|
||||
int DKSOPAL::setupFFTCR(int ndim, int N[3], double scale) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->setupFFT(ndim, N);
|
||||
if (apiOpenCL())
|
||||
return dksfft->setupFFTCR(ndim, N);
|
||||
else if (apiOpenMP())
|
||||
return dksfft->setupFFTCR(ndim, N, scale);
|
||||
|
||||
return DKS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
/* call OpenCL FFT function for selected platform */
|
||||
int DKSOPAL::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeFFT(data_ptr, ndim, dimsize);
|
||||
else if (apiCuda())
|
||||
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call OpenCL IFFT function for selected platform */
|
||||
int DKSOPAL::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
|
||||
else if (apiCuda())
|
||||
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call normalize FFT function for selected platform */
|
||||
int DKSOPAL::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiOpenCL()) {
|
||||
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
||||
else
|
||||
return DKS_ERROR;
|
||||
} else if (apiCuda()) {
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
|
||||
} else if (apiOpenMP()) {
|
||||
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
|
||||
}
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call real to complex FFT */
|
||||
int DKSOPAL::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
|
||||
if (apiCuda())
|
||||
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* call complex to real FFT */
|
||||
int DKSOPAL::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiCuda())
|
||||
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL() || apiOpenMP())
|
||||
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
/* normalize complex to real iFFT */
|
||||
int DKSOPAL::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
|
||||
if (apiCuda())
|
||||
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
|
||||
else if (apiOpenCL())
|
||||
return DKS_ERROR;
|
||||
else if (apiOpenMP())
|
||||
return DKS_ERROR;
|
||||
|
||||
DEBUG_MSG("No implementation for selected platform");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
int DKSOPAL::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
|
||||
double hz_m0, double hz_m1, double hz_m2, int streamId) {
|
||||
|
||||
@ -209,18 +71,20 @@ int DKSOPAL::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size,
|
||||
|
||||
int DKSOPAL::callCollimatorPhysics(void *mem_ptr, void *par_ptr,
|
||||
int numparticles, int numparams,
|
||||
int &numaddback, int &numdead)
|
||||
int &numaddback, int &numdead,
|
||||
bool enableRutherforScattering)
|
||||
{
|
||||
|
||||
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
|
||||
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles, enableRutherforScattering);
|
||||
|
||||
}
|
||||
|
||||
|
||||
int DKSOPAL::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles)
|
||||
int DKSOPAL::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles,
|
||||
bool enableRutherforScattering)
|
||||
{
|
||||
|
||||
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
|
||||
return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles, enableRutherforScattering);
|
||||
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "AutoTuning/DKSAutoTuning.h"
|
||||
|
||||
#include "DKSBase.h"
|
||||
#include "DKSFFT.h"
|
||||
|
||||
#include "DKSDefinitions.h"
|
||||
|
||||
@ -32,11 +33,10 @@
|
||||
#include "MIC/MICCollimatorPhysics.h"
|
||||
#endif
|
||||
|
||||
class DKSOPAL : public DKSBase {
|
||||
class DKSOPAL : public DKSFFT {
|
||||
|
||||
private:
|
||||
|
||||
DKSFFT *dksfft;
|
||||
DKSCollimatorPhysics *dkscol;
|
||||
GreensFunction *dksgreens;
|
||||
|
||||
@ -56,71 +56,6 @@ public:
|
||||
///////Function library part of dksbase////////
|
||||
///////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Setup FFT function.
|
||||
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
|
||||
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
|
||||
* each fft will do its own setup according to fft size and dimensions.
|
||||
* TODO: opencl and mic implementations
|
||||
*/
|
||||
int setupFFT(int ndim, int N[3]);
|
||||
//BENI:
|
||||
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
|
||||
//BENI:
|
||||
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
|
||||
|
||||
/**
|
||||
* Call complex-to-complex fft.
|
||||
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
|
||||
* stream id can be specified to use other streams than default.
|
||||
* TODO: mic implementation
|
||||
*/
|
||||
int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call complex-to-complex ifft.
|
||||
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
|
||||
* stream id can be specified to use other streams than default.
|
||||
* TODO: mic implementation.
|
||||
*/
|
||||
int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Normalize complex to complex ifft.
|
||||
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
|
||||
* fft size
|
||||
* TODO: mic implementation.
|
||||
*/
|
||||
int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call real to complex FFT.
|
||||
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
|
||||
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
|
||||
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
|
||||
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
|
||||
* TODO: opencl and mic implementations
|
||||
*/
|
||||
int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Call complex to real iFFT.
|
||||
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
|
||||
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
|
||||
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
|
||||
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Normalize compelx to real ifft.
|
||||
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
|
||||
* fft size.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
|
||||
|
||||
/**
|
||||
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
|
||||
* For specifics check OPAL docs.
|
||||
@ -159,14 +94,16 @@ public:
|
||||
*/
|
||||
int callCollimatorPhysics(void *mem_ptr, void *par_ptr,
|
||||
int numparticles, int numparams,
|
||||
int &numaddback, int &numdead);
|
||||
int &numaddback, int &numdead,
|
||||
bool enableRutherfordScattering = true);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
|
||||
* TODO: opencl and mic implementations.
|
||||
*/
|
||||
int callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles);
|
||||
int callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles,
|
||||
bool enableRutherfordScattering = true);
|
||||
|
||||
/**
|
||||
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
|
||||
|
@ -1,11 +1,10 @@
|
||||
SET (_SRCS MICBase.cpp)
|
||||
SET (_HDRS MICBase.h)
|
||||
SET (_SRCS MICBase.cpp MICFFT.cpp)
|
||||
SET (_HDRS MICBase.h MICFFT.h)
|
||||
|
||||
IF (ENABLE_OPAL)
|
||||
SET (_SRCS
|
||||
${_SRCS}
|
||||
MICChiSquare.cpp
|
||||
MICFFT.cpp
|
||||
MICGreensFunction.cpp
|
||||
MICCollimatorPhysics.cpp
|
||||
)
|
||||
@ -13,7 +12,6 @@ IF (ENABLE_OPAL)
|
||||
SET (_HDRS
|
||||
${_HDRS}
|
||||
MICChiSquare.h
|
||||
MICFFT.h
|
||||
MICCollimatorPhysics.h
|
||||
MICGreensFunction.hpp
|
||||
MICMergeSort.h
|
||||
|
@ -368,7 +368,9 @@ void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) {
|
||||
|
||||
}
|
||||
|
||||
int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles) {
|
||||
int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
|
||||
bool enableRutherforScattering)
|
||||
{
|
||||
|
||||
//cast device memory pointers to appropriate types
|
||||
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
|
||||
|
@ -40,7 +40,8 @@ public:
|
||||
|
||||
~MICCollimatorPhysics() { };
|
||||
|
||||
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles);
|
||||
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
|
||||
bool enableRutherforScattering = true);
|
||||
|
||||
int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "../Algorithms/FFT.h"
|
||||
#include "MICBase.h"
|
||||
|
||||
class MICFFT : public DKSFFT {
|
||||
class MICFFT : public BaseFFT {
|
||||
|
||||
private:
|
||||
|
||||
|
@ -4,6 +4,25 @@ SET (_HDRS OpenCLBase.h)
|
||||
SET (_SRCS OpenCLBase.cpp)
|
||||
SET (_KERNELS "")
|
||||
|
||||
IF (ENABLE_AMD)
|
||||
SET (_SRCS
|
||||
${_SRCS}
|
||||
OpenCLFFT.cpp
|
||||
)
|
||||
|
||||
SET (_HDRS
|
||||
${_HDRS}
|
||||
OpenCLFFT.h
|
||||
)
|
||||
|
||||
SET (_KERNELS
|
||||
${_KERNELS}
|
||||
OpenCLKernels/OpenCLFFT.cl
|
||||
OpenCLKernels/OpenCLFFTStockham.cl
|
||||
OpenCLKernels/OpenCLTranspose.cl
|
||||
)
|
||||
ENDIF (ENABLE_AMD)
|
||||
|
||||
IF (ENABLE_MUSR)
|
||||
SET (_HDRS ${_HDRS} OpenCLChiSquareRuntime.h)
|
||||
SET (_SRCS ${_SRCS} OpenCLChiSquareRuntime.cpp)
|
||||
@ -13,23 +32,18 @@ ENDIF (ENABLE_MUSR)
|
||||
IF (ENABLE_AMD AND ENABLE_OPAL)
|
||||
SET (_SRCS
|
||||
${_SRCS}
|
||||
OpenCLFFT.cpp
|
||||
OpenCLCollimatorPhysics.cpp
|
||||
OpenCLGreensFunction.cpp
|
||||
)
|
||||
|
||||
SET (_HDRS
|
||||
${_HDRS}
|
||||
OpenCLFFT.h
|
||||
OpenCLCollimatorPhysics.h
|
||||
OpenCLGreensFunction.h
|
||||
)
|
||||
|
||||
SET (_KERNELS
|
||||
${_KERNELS}
|
||||
OpenCLKernels/OpenCLFFT.cl
|
||||
OpenCLKernels/OpenCLFFTStockham.cl
|
||||
OpenCLKernels/OpenCLTranspose.cl
|
||||
OpenCLKernels/OpenCLCollimatorPhysics.cl
|
||||
OpenCLKernels/OpenCLGreensFunction.cl
|
||||
)
|
||||
|
@ -34,7 +34,7 @@ TODO:
|
||||
2. boost.compute sort for user defined structure crashes
|
||||
*/
|
||||
int OpenCLCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr,
|
||||
int numparticles)
|
||||
int numparticles, bool enableRutherforScattering)
|
||||
{
|
||||
/*
|
||||
//set number of total threads, and number threads per block
|
||||
|
@ -52,7 +52,8 @@ public:
|
||||
}
|
||||
|
||||
/* execute degrader code on device */
|
||||
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles);
|
||||
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
|
||||
bool enableRutherforScattering = true);
|
||||
|
||||
int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
||||
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
||||
|
@ -22,7 +22,7 @@
|
||||
|
||||
#include "clFFT.h"
|
||||
|
||||
class OpenCLFFT : public DKSFFT {
|
||||
class OpenCLFFT : public BaseFFT {
|
||||
|
||||
private:
|
||||
|
||||
@ -112,10 +112,9 @@ public:
|
||||
int streamId = -1);
|
||||
int executeCRFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3],
|
||||
int streamId = -1);
|
||||
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1)
|
||||
{
|
||||
return DKS_ERROR;
|
||||
}
|
||||
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) {
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
//void printData3DN4(cl_double2* &data, int N);
|
||||
|
||||
|
@ -39,8 +39,8 @@ ADD_EXECUTABLE(testFFTSolverMIC testFFTSolver_MIC.cpp)
|
||||
#TARGET_LINK_LIBRARIES(testFFT dks)
|
||||
#TARGET_LINK_LIBRARIES(testMIC dks)
|
||||
#TARGET_LINK_LIBRARIES(testMICOpenCL dks)
|
||||
TARGET_LINK_LIBRARIES(testFFT3D dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testFFT3D dks ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testFFT3DRC dks ${CLFFT_LIBRARIES})
|
||||
#TARGET_LINK_LIBRARIES(testFFT3DRC_MIC dks)
|
||||
#TARGET_LINK_LIBRARIES(testFFT3DTiming dks)
|
||||
#TARGET_LINK_LIBRARIES(testStockhamFFT dks)
|
||||
@ -54,11 +54,11 @@ TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
#TARGET_LINK_LIBRARIES(testGather dks)
|
||||
#TARGET_LINK_LIBRARIES(testGatherAsync dks)
|
||||
#TARGET_LINK_LIBRARIES(testTranspose dks)
|
||||
TARGET_LINK_LIBRARIES(testRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testRandom dks ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${CLFFT_LIBRARIES})
|
||||
#TARGET_LINK_LIBRARIES(testPush dks)
|
||||
TARGET_LINK_LIBRARIES(testFFTSolverMIC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testFFTSolverMIC dks ${CLFFT_LIBRARIES})
|
||||
#TARGET_LINK_LIBRARIES(testIntegration dks)
|
||||
#TARGET_LINK_LIBRARIES(testImageReconstruction dks)
|
||||
|
||||
|
Reference in New Issue
Block a user