FFT for OpenCL using clFFT library

This commit is contained in:
Uldis Locans
2016-11-16 18:12:00 +01:00
parent e8386869dc
commit 027bdc01f5
10 changed files with 400 additions and 235 deletions

View File

@ -7,8 +7,8 @@ LINK_DIRECTORIES( ${CMAKE_SOURCE_DIR}/src )
#ADD_EXECUTABLE(testFFT testFFT.cpp)
#ADD_EXECUTABLE(testMIC testMIC.cpp)
#ADD_EXECUTABLE(testMICOpenCL testMICOpenCL.cpp)
#ADD_EXECUTABLE(testFFT3D testFFT3D.cpp)
#ADD_EXECUTABLE(testFFT3DRC testFFT3DRC.cpp)
ADD_EXECUTABLE(testFFT3D testFFT3D.cpp)
ADD_EXECUTABLE(testFFT3DRC testFFT3DRC.cpp)
#ADD_EXECUTABLE(testFFT3DRC_MIC testFFT3DRC_MIC.cpp)
#ADD_EXECUTABLE(testFFT3DTiming testFFT3DTiming.cpp)
#ADD_EXECUTABLE(testStockhamFFT testStockhamFFT.cpp)
@ -23,7 +23,7 @@ LINK_DIRECTORIES( ${CMAKE_SOURCE_DIR}/src )
#ADD_EXECUTABLE(testGatherAsync testGatherAsync.cpp)
#ADD_EXECUTABLE(testTranspose testTranspose.cpp)
ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
#ADD_EXECUTABLE(testCollimatorPhysicsSoA testCollimatorPhysicsSoA.cpp)
ADD_EXECUTABLE(testCollimatorPhysicsSoA testCollimatorPhysicsSoA.cpp)
#ADD_EXECUTABLE(testPush testPush.cpp)
#ADD_EXECUTABLE(testFFTSolverMIC testFFTSolver_MIC.cpp)
#ADD_EXECUTABLE(testIntegration testTimeIntegration.cpp)
@ -38,8 +38,8 @@ ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
#TARGET_LINK_LIBRARIES(testFFT dks)
#TARGET_LINK_LIBRARIES(testMIC dks)
#TARGET_LINK_LIBRARIES(testMICOpenCL dks)
#TARGET_LINK_LIBRARIES(testFFT3D dks)
#TARGET_LINK_LIBRARIES(testFFT3DRC dks)
TARGET_LINK_LIBRARIES(testFFT3D dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testFFT3DRC_MIC dks)
#TARGET_LINK_LIBRARIES(testFFT3DTiming dks)
#TARGET_LINK_LIBRARIES(testStockhamFFT dks)
@ -53,8 +53,8 @@ ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
#TARGET_LINK_LIBRARIES(testGather dks)
#TARGET_LINK_LIBRARIES(testGatherAsync dks)
#TARGET_LINK_LIBRARIES(testTranspose dks)
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks)
#TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks)
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testPush dks)
#TARGET_LINK_LIBRARIES(testFFTSolverMIC dks)
#TARGET_LINK_LIBRARIES(testIntegration dks)

View File

@ -1,6 +1,7 @@
#include <iostream>
#include <cstdlib>
#include <complex>
#include <string>
#include "Utility/TimeStamp.h"
#include "DKSBase.h"
@ -18,22 +19,30 @@ int main(int argc, char *argv[]) {
int N = 16;
char *api_name = new char[10];
char *device_name = new char[10];
if (argc == 2) {
N = atoi(argv[1]);
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
} else if (argc == 3) {
N = atoi(argv[1]);
strcpy(api_name, argv[2]);
strcpy(device_name, "-gpu");
} else if (argc == 4) {
N = atoi(argv[1]);
strcpy(api_name, argv[2]);
strcpy(device_name, argv[3]);
} else {
N = 16;
strcpy(api_name, "OpenCL");
strcpy(device_name, "-gpu");
for (int i = 1; i < argc; i++) {
if (argv[i] == string("-cuda")) {
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-opencl")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-mic")) {
strcpy(api_name, "OpenMP");
strcpy(device_name, "-mic");
}
if (argv[i] == string("-cpu")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-cpu");
}
if (argv[i] == string("-N"))
N = atoi(argv[i+1]);
}
cout << "Use api: " << api_name << ", " << device_name << endl;
@ -74,9 +83,16 @@ int main(int argc, char *argv[]) {
/* write data to device */
ierr = base.writeData< complex<double> >(mem_ptr, cdata, N*N*N);
if (N < 5)
printData3DN4(cdata, N, 3);
/* execute fft */
base.callFFT(mem_ptr, 3, dimsize);
if (N < 5) {
base.readData< complex<double> > (mem_ptr, cfft, N*N*N);
printData3DN4(cfft, N, 3);
}
/* execute ifft */
base.callIFFT(mem_ptr, 3, dimsize);
@ -86,7 +102,9 @@ int main(int argc, char *argv[]) {
/* read data from device */
base.readData< complex<double> >(mem_ptr, cifft, N*N*N);
if (N < 5)
printData3DN4(cifft, N, 3);
/* free device memory */
base.freeMemory< complex<double> >(mem_ptr, N*N*N);
@ -130,7 +148,7 @@ void printData3DN4(complex<double>* &data, int N, int dim) {
if (a < 10e-5 && a > -10e-5)
a = 0;
cout << d << "; " << a << "\t";
cout << "(" << d << "," << a << ") ";
}
}
cout << endl;
@ -157,3 +175,5 @@ void compareData(complex<double>* &data1, complex<double>* &data2, int N, int di
cout << "Size " << N << " CC <--> CC diff: " << sum << endl;
}

View File

@ -9,20 +9,29 @@ using namespace std;
void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim);
void initData(double *data, int dimsize[3]);
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop);
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop,
char *api_name, char *device_name);
void printHelp();
void printData3DN4(complex<double>* &data, int N, int dim);
void printData3DN4(double* &data, int N, int dim);
int main(int argc, char *argv[]) {
int N1 = 8;
int N2 = 8;
int N3 = 8;
int dim = 3;
int loop = 10;
int loop = 0;
char *api_name = new char[10];
char *device_name = new char[10];
if ( readParams(argc, argv, N1, N2, N3, loop) )
if ( readParams(argc, argv, N1, N2, N3, loop, api_name, device_name) )
return 0;
cout << "Use api: " << api_name << ", " << device_name << endl;
int dimsize[3] = {N3, N2, N1};
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2];
@ -30,32 +39,19 @@ int main(int argc, char *argv[]) {
double *rdata = new double[sizereal];
double *outdata = new double[sizereal];
complex<double> *cfft = new complex<double>[sizecomp];
for (int i=0; i<sizecomp; ++i) {
cfft[i].real() = 7.;
cfft[i].imag() = 3.33;
}
initData(rdata, dimsize);
/* init DKSBase */
cout << "Init device and set function" << endl;
#ifdef DKS_MIC
DKSBase base;
base.setAPI("OpenMP", 6);
base.setDevice("-mic", 4);
DKSBase base;
base.setAPI(api_name, strlen(api_name));
base.setDevice(device_name, strlen(device_name));
base.initDevice();
base.setupFFT(3, dimsize);
base.setupFFTRC(dim, dimsize);
/* setup backward fft (COMPLEX->REAL) */
base.setupFFTCR(dim, dimsize,1./(N1*N2*N3));
#endif
#ifdef DKS_CUDA
DKSBase base;
base.setAPI("Cuda", 4);
base.setDevice("-gpu", 4);
base.initDevice();
base.setupFFT(dim, dimsize);
#endif
// allocate memory on device
int ierr;
@ -68,6 +64,7 @@ int main(int argc, char *argv[]) {
base.writeData<double>(real_ptr, rdata, sizereal);
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize);
base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize);
base.readData<double>(real_res_ptr, outdata, sizereal);
//timer for total loop time, FFT and IFFT calls
@ -92,9 +89,7 @@ int main(int argc, char *argv[]) {
gettimeofday(&timeIFFTEnd[i], NULL);
//normalize
#ifdef DKS_CUDA
base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize);
#endif
// read IFFT data from device
base.readData<double>(real_res_ptr, outdata, sizereal);
@ -173,7 +168,9 @@ void printHelp() {
std::cout << std::endl;
}
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop) {
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop,
char *api_name, char *device_name)
{
for (int i = 1; i < argc; i++) {
@ -193,7 +190,68 @@ bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop) {
printHelp();
return true;
}
if (argv[i] == string("-cuda")) {
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-opencl")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-mic")) {
strcpy(api_name, "OpenMP");
strcpy(device_name, "-mic");
}
if (argv[i] == string("-cpu")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-cpu");
}
}
return false;
}
void printData3DN4(complex<double>* &data, int N, int dim) {
for (int j = 0; j < N; j++) {
for (int i = 0; i < N; i++) {
for (int k = 0; k < N/2 + 1; k++) {
double d = data[i*N*N + j*N + k].real();
double a = data[i*N*N + j*N + k].imag();
if (d < 10e-5 && d > -10e-5)
d = 0;
if (a < 10e-5 && a > -10e-5)
a = 0;
cout << "(" << d << "," << a << ") ";
}
}
cout << endl;
}
cout << endl;
}
void printData3DN4(double* &data, int N, int dim) {
for (int j = 0; j < N; j++) {
for (int i = 0; i < N; i++) {
for (int k = 0; k < N; k++) {
double d = data[i*N*N + j*N + k];
if (d < 10e-5 && d > -10e-5)
d = 0;
cout << d << " ";
}
}
cout << endl;
}
cout << endl;
}