200 lines
5.6 KiB
C++
200 lines
5.6 KiB
C++
#include <iostream>
|
|
#include <cstdlib>
|
|
#include <complex>
|
|
|
|
#include "Utility/TimeStamp.h"
|
|
#include "DKSBase.h"
|
|
|
|
using namespace std;
|
|
|
|
void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim);
|
|
void initData(double *data, int dimsize[3]);
|
|
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop);
|
|
void printHelp();
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
int N1 = 8;
|
|
int N2 = 8;
|
|
int N3 = 8;
|
|
int dim = 3;
|
|
int loop = 10;
|
|
|
|
if ( readParams(argc, argv, N1, N2, N3, loop) )
|
|
return 0;
|
|
|
|
int dimsize[3] = {N3, N2, N1};
|
|
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
|
|
int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2];
|
|
|
|
double *rdata = new double[sizereal];
|
|
double *outdata = new double[sizereal];
|
|
complex<double> *cfft = new complex<double>[sizecomp];
|
|
|
|
for (int i=0; i<sizecomp; ++i) {
|
|
cfft[i].real() = 7.;
|
|
cfft[i].imag() = 3.33;
|
|
}
|
|
initData(rdata, dimsize);
|
|
|
|
/* init DKSBase */
|
|
cout << "Init device and set function" << endl;
|
|
#ifdef DKS_MIC
|
|
DKSBase base;
|
|
base.setAPI("OpenMP", 6);
|
|
base.setDevice("-mic", 4);
|
|
base.initDevice();
|
|
base.setupFFTRC(dim, dimsize);
|
|
/* setup backward fft (COMPLEX->REAL) */
|
|
base.setupFFTCR(dim, dimsize,1./(N1*N2*N3));
|
|
#endif
|
|
|
|
#ifdef DKS_CUDA
|
|
DKSBase base;
|
|
base.setAPI("Cuda", 4);
|
|
base.setDevice("-gpu", 4);
|
|
base.initDevice();
|
|
base.setupFFT(dim, dimsize);
|
|
#endif
|
|
|
|
// allocate memory on device
|
|
int ierr;
|
|
void *real_ptr, *comp_ptr, *real_res_ptr;
|
|
real_ptr = base.allocateMemory<double>(sizereal, ierr);
|
|
real_res_ptr = base.allocateMemory<double>(sizereal, ierr);
|
|
comp_ptr = base.allocateMemory< std::complex<double> >(sizecomp, ierr);
|
|
|
|
// execute one run before starting the timers
|
|
base.writeData<double>(real_ptr, rdata, sizereal);
|
|
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
|
|
base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize);
|
|
base.readData<double>(real_res_ptr, outdata, sizereal);
|
|
|
|
//timer for total loop time, FFT and IFFT calls
|
|
struct timeval timeStart, timeEnd;
|
|
struct timeval timeFFTStart[loop], timeFFTEnd[loop];
|
|
struct timeval timeIFFTStart[loop], timeIFFTEnd[loop];
|
|
|
|
gettimeofday(&timeStart, NULL);
|
|
for (int i=0; i<loop; ++i){
|
|
|
|
// write data to device
|
|
base.writeData<double>(real_ptr, rdata, sizereal);
|
|
|
|
// execute rcfft
|
|
gettimeofday(&timeFFTStart[i], NULL);
|
|
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
|
|
gettimeofday(&timeFFTEnd[i], NULL);
|
|
|
|
// execute crfft
|
|
gettimeofday(&timeIFFTStart[i], NULL);
|
|
base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize);
|
|
gettimeofday(&timeIFFTEnd[i], NULL);
|
|
|
|
//normalize
|
|
#ifdef DKS_CUDA
|
|
base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize);
|
|
#endif
|
|
|
|
// read IFFT data from device
|
|
base.readData<double>(real_res_ptr, outdata, sizereal);
|
|
|
|
}
|
|
gettimeofday(&timeEnd, NULL);
|
|
|
|
// free device memory
|
|
base.freeMemory< std::complex<double> >(comp_ptr, sizecomp);
|
|
base.freeMemory<double>(real_ptr, sizereal);
|
|
base.freeMemory<double>(real_res_ptr, sizereal);
|
|
|
|
// compare in and out data to see if we get back the same results
|
|
compareData(rdata, outdata, N1, N2, N3, dim);
|
|
|
|
//calculate seconds for total time and fft times
|
|
double tfft = 0;
|
|
double tifft = 0;
|
|
double ttot = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1e6 +
|
|
(timeEnd.tv_usec - timeStart.tv_usec) ) * 1e-6;
|
|
|
|
for (int i = 0; i < loop; i++) {
|
|
tfft += ( (timeFFTEnd[i].tv_sec - timeFFTStart[i].tv_sec) * 1e6 +
|
|
(timeFFTEnd[i].tv_usec - timeFFTStart[i].tv_usec) ) * 1e-6;
|
|
|
|
tifft += ( (timeIFFTEnd[i].tv_sec - timeIFFTStart[i].tv_sec) * 1e6 +
|
|
(timeIFFTEnd[i].tv_usec - timeIFFTStart[i].tv_usec) ) * 1e-6;
|
|
}
|
|
|
|
//print timing results
|
|
std::cout << std::fixed << std::setprecision(5) << "\nTiming results"
|
|
<< "\nTotal time\t" << ttot << "s\tavg time\t" << ttot / loop << "s"
|
|
<< "\nFFT total\t" << tfft << "s\tFFT avg \t" << tfft / loop << "s"
|
|
<< "\nIFFT total\t" << tifft << "s\tIFFT avg\t" << tifft / loop << "s"
|
|
<< "\n\n";
|
|
|
|
return 0;
|
|
}
|
|
|
|
void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim) {
|
|
int id;
|
|
double sum = 0;
|
|
for (int i = 0; i < NI; i++) {
|
|
for (int j = 0; j < NJ; j++) {
|
|
for (int k = 0; k < NK; k++) {
|
|
id = k*NI*NJ + j*NI + i;
|
|
sum += fabs(data1[id] - data2[id]);
|
|
}
|
|
}
|
|
}
|
|
std::cout << "RC <--> CR diff: " << sum << std::endl;
|
|
}
|
|
|
|
void initData(double *data, int dimsize[3]) {
|
|
for (int i = 0; i < dimsize[2]; i++) {
|
|
for (int j = 0; j < dimsize[1]; j++) {
|
|
for (int k = 0; k < dimsize[0]; k++) {
|
|
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void printHelp() {
|
|
std::cout << std::endl;
|
|
|
|
std::cout << "testFFT3DRC executes 3D real complex and 3D complex real"
|
|
<< "function on the Intel MIC.\n";
|
|
std::cout << "Operations performed by testRC are: "
|
|
<< "write data to MIC -> FFT -> IFFT -> read data from MIC.\n";
|
|
std::cout << "To run testFFT3DRC execute: ./testFFT3DRC -grid $x $y $z "
|
|
<< "-loop $l\n";
|
|
std::cout << "where $x $y $z are number of elements in each dimension and "
|
|
<< "$l is the number of times all the operations will be performed.\n";
|
|
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop) {
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
|
|
if ( argv[i] == std::string("-grid") ) {
|
|
N1 = atoi(argv[i + 1]);
|
|
N2 = atoi(argv[i + 2]);
|
|
N3 = atoi(argv[i + 3]);
|
|
i += 3;
|
|
}
|
|
|
|
if ( argv[i] == std::string("-loop") ) {
|
|
loop = atoi(argv[i + 1]);
|
|
i += 1;
|
|
}
|
|
|
|
if ( argv[i] == std::string("-h") || argv[i] == std::string("-help") ) {
|
|
printHelp();
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|