#include #include #include #include "Utility/TimeStamp.h" #include "DKSBase.h" using namespace std; void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim); void initData(double *data, int dimsize[3]); bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop); void printHelp(); int main(int argc, char *argv[]) { int N1 = 8; int N2 = 8; int N3 = 8; int dim = 3; int loop = 10; if ( readParams(argc, argv, N1, N2, N3, loop) ) return 0; int dimsize[3] = {N3, N2, N1}; int sizereal = dimsize[0] * dimsize[1] * dimsize[2]; int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2]; double *rdata = new double[sizereal]; double *outdata = new double[sizereal]; complex *cfft = new complex[sizecomp]; for (int i=0; iREAL) */ base.setupFFTCR(dim, dimsize,1./(N1*N2*N3)); #endif #ifdef DKS_CUDA DKSBase base; base.setAPI("Cuda", 4); base.setDevice("-gpu", 4); base.initDevice(); base.setupFFT(dim, dimsize); #endif // allocate memory on device int ierr; void *real_ptr, *comp_ptr, *real_res_ptr; real_ptr = base.allocateMemory(sizereal, ierr); real_res_ptr = base.allocateMemory(sizereal, ierr); comp_ptr = base.allocateMemory< std::complex >(sizecomp, ierr); // execute one run before starting the timers base.writeData(real_ptr, rdata, sizereal); base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize); base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize); base.readData(real_res_ptr, outdata, sizereal); //timer for total loop time, FFT and IFFT calls struct timeval timeStart, timeEnd; struct timeval timeFFTStart[loop], timeFFTEnd[loop]; struct timeval timeIFFTStart[loop], timeIFFTEnd[loop]; gettimeofday(&timeStart, NULL); for (int i=0; i(real_ptr, rdata, sizereal); // execute rcfft gettimeofday(&timeFFTStart[i], NULL); base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize); gettimeofday(&timeFFTEnd[i], NULL); // execute crfft gettimeofday(&timeIFFTStart[i], NULL); base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize); gettimeofday(&timeIFFTEnd[i], NULL); //normalize #ifdef DKS_CUDA base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize); #endif // read IFFT data from device base.readData(real_res_ptr, outdata, sizereal); } gettimeofday(&timeEnd, NULL); // free device memory base.freeMemory< std::complex >(comp_ptr, sizecomp); base.freeMemory(real_ptr, sizereal); base.freeMemory(real_res_ptr, sizereal); // compare in and out data to see if we get back the same results compareData(rdata, outdata, N1, N2, N3, dim); //calculate seconds for total time and fft times double tfft = 0; double tifft = 0; double ttot = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1e6 + (timeEnd.tv_usec - timeStart.tv_usec) ) * 1e-6; for (int i = 0; i < loop; i++) { tfft += ( (timeFFTEnd[i].tv_sec - timeFFTStart[i].tv_sec) * 1e6 + (timeFFTEnd[i].tv_usec - timeFFTStart[i].tv_usec) ) * 1e-6; tifft += ( (timeIFFTEnd[i].tv_sec - timeIFFTStart[i].tv_sec) * 1e6 + (timeIFFTEnd[i].tv_usec - timeIFFTStart[i].tv_usec) ) * 1e-6; } //print timing results std::cout << std::fixed << std::setprecision(5) << "\nTiming results" << "\nTotal time\t" << ttot << "s\tavg time\t" << ttot / loop << "s" << "\nFFT total\t" << tfft << "s\tFFT avg \t" << tfft / loop << "s" << "\nIFFT total\t" << tifft << "s\tIFFT avg\t" << tifft / loop << "s" << "\n\n"; return 0; } void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim) { int id; double sum = 0; for (int i = 0; i < NI; i++) { for (int j = 0; j < NJ; j++) { for (int k = 0; k < NK; k++) { id = k*NI*NJ + j*NI + i; sum += fabs(data1[id] - data2[id]); } } } std::cout << "RC <--> CR diff: " << sum << std::endl; } void initData(double *data, int dimsize[3]) { for (int i = 0; i < dimsize[2]; i++) { for (int j = 0; j < dimsize[1]; j++) { for (int k = 0; k < dimsize[0]; k++) { data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = k; } } } } void printHelp() { std::cout << std::endl; std::cout << "testFFT3DRC executes 3D real complex and 3D complex real" << "function on the Intel MIC.\n"; std::cout << "Operations performed by testRC are: " << "write data to MIC -> FFT -> IFFT -> read data from MIC.\n"; std::cout << "To run testFFT3DRC execute: ./testFFT3DRC -grid $x $y $z " << "-loop $l\n"; std::cout << "where $x $y $z are number of elements in each dimension and " << "$l is the number of times all the operations will be performed.\n"; std::cout << std::endl; } bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop) { for (int i = 1; i < argc; i++) { if ( argv[i] == std::string("-grid") ) { N1 = atoi(argv[i + 1]); N2 = atoi(argv[i + 2]); N3 = atoi(argv[i + 3]); i += 3; } if ( argv[i] == std::string("-loop") ) { loop = atoi(argv[i + 1]); i += 1; } if ( argv[i] == std::string("-h") || argv[i] == std::string("-help") ) { printHelp(); return true; } } return false; }