#include #include #include #include "Utility/TimeStamp.h" #include "DKSBase.h" using namespace std; void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim); void initData(double *data, int dimsize[3]); bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop, char *api_name, char *device_name); void printHelp(); void printData3DN4(complex* &data, int N, int dim); void printData3DN4(double* &data, int N, int dim); int main(int argc, char *argv[]) { int N1 = 8; int N2 = 8; int N3 = 8; int dim = 3; int loop = 0; char *api_name = new char[10]; char *device_name = new char[10]; if ( readParams(argc, argv, N1, N2, N3, loop, api_name, device_name) ) return 0; cout << "Use api: " << api_name << ", " << device_name << endl; int dimsize[3] = {N3, N2, N1}; int sizereal = dimsize[0] * dimsize[1] * dimsize[2]; int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2]; double *rdata = new double[sizereal]; double *outdata = new double[sizereal]; complex *cfft = new complex[sizecomp]; initData(rdata, dimsize); /* init DKSBase */ cout << "Init device and set function" << endl; DKSBase base; base.setAPI(api_name, strlen(api_name)); base.setDevice(device_name, strlen(device_name)); base.initDevice(); base.setupFFT(3, dimsize); base.setupFFTRC(dim, dimsize); /* setup backward fft (COMPLEX->REAL) */ base.setupFFTCR(dim, dimsize,1./(N1*N2*N3)); // allocate memory on device int ierr; void *real_ptr, *comp_ptr, *real_res_ptr; real_ptr = base.allocateMemory(sizereal, ierr); real_res_ptr = base.allocateMemory(sizereal, ierr); comp_ptr = base.allocateMemory< std::complex >(sizecomp, ierr); // execute one run before starting the timers base.writeData(real_ptr, rdata, sizereal); base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize); base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize); base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize); base.readData(real_res_ptr, outdata, sizereal); //timer for total loop time, FFT and IFFT calls struct timeval timeStart, timeEnd; struct timeval timeFFTStart[loop], timeFFTEnd[loop]; struct timeval timeIFFTStart[loop], timeIFFTEnd[loop]; gettimeofday(&timeStart, NULL); for (int i=0; i(real_ptr, rdata, sizereal); // execute rcfft gettimeofday(&timeFFTStart[i], NULL); base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize); gettimeofday(&timeFFTEnd[i], NULL); // execute crfft gettimeofday(&timeIFFTStart[i], NULL); base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize); gettimeofday(&timeIFFTEnd[i], NULL); //normalize base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize); // read IFFT data from device base.readData(real_res_ptr, outdata, sizereal); } gettimeofday(&timeEnd, NULL); // free device memory base.freeMemory< std::complex >(comp_ptr, sizecomp); base.freeMemory(real_ptr, sizereal); base.freeMemory(real_res_ptr, sizereal); // compare in and out data to see if we get back the same results compareData(rdata, outdata, N1, N2, N3, dim); //calculate seconds for total time and fft times double tfft = 0; double tifft = 0; double ttot = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1e6 + (timeEnd.tv_usec - timeStart.tv_usec) ) * 1e-6; for (int i = 0; i < loop; i++) { tfft += ( (timeFFTEnd[i].tv_sec - timeFFTStart[i].tv_sec) * 1e6 + (timeFFTEnd[i].tv_usec - timeFFTStart[i].tv_usec) ) * 1e-6; tifft += ( (timeIFFTEnd[i].tv_sec - timeIFFTStart[i].tv_sec) * 1e6 + (timeIFFTEnd[i].tv_usec - timeIFFTStart[i].tv_usec) ) * 1e-6; } //print timing results std::cout << std::fixed << std::setprecision(5) << "\nTiming results" << "\nTotal time\t" << ttot << "s\tavg time\t" << ttot / loop << "s" << "\nFFT total\t" << tfft << "s\tFFT avg \t" << tfft / loop << "s" << "\nIFFT total\t" << tifft << "s\tIFFT avg\t" << tifft / loop << "s" << "\n\n"; return 0; } void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim) { int id; double sum = 0; for (int i = 0; i < NI; i++) { for (int j = 0; j < NJ; j++) { for (int k = 0; k < NK; k++) { id = k*NI*NJ + j*NI + i; sum += fabs(data1[id] - data2[id]); } } } std::cout << "RC <--> CR diff: " << sum << std::endl; } void initData(double *data, int dimsize[3]) { for (int i = 0; i < dimsize[2]; i++) { for (int j = 0; j < dimsize[1]; j++) { for (int k = 0; k < dimsize[0]; k++) { data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = k; } } } } void printHelp() { std::cout << std::endl; std::cout << "testFFT3DRC executes 3D real complex and 3D complex real" << "function on the Intel MIC.\n"; std::cout << "Operations performed by testRC are: " << "write data to MIC -> FFT -> IFFT -> read data from MIC.\n"; std::cout << "To run testFFT3DRC execute: ./testFFT3DRC -grid $x $y $z " << "-loop $l\n"; std::cout << "where $x $y $z are number of elements in each dimension and " << "$l is the number of times all the operations will be performed.\n"; std::cout << std::endl; } bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop, char *api_name, char *device_name) { for (int i = 1; i < argc; i++) { if ( argv[i] == std::string("-grid") ) { N1 = atoi(argv[i + 1]); N2 = atoi(argv[i + 2]); N3 = atoi(argv[i + 3]); i += 3; } if ( argv[i] == std::string("-loop") ) { loop = atoi(argv[i + 1]); i += 1; } if ( argv[i] == std::string("-h") || argv[i] == std::string("-help") ) { printHelp(); return true; } if (argv[i] == string("-cuda")) { strcpy(api_name, "Cuda"); strcpy(device_name, "-gpu"); } if (argv[i] == string("-opencl")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-gpu"); } if (argv[i] == string("-mic")) { strcpy(api_name, "OpenMP"); strcpy(device_name, "-mic"); } if (argv[i] == string("-cpu")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-cpu"); } } return false; } void printData3DN4(complex* &data, int N, int dim) { for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) { for (int k = 0; k < N/2 + 1; k++) { double d = data[i*N*N + j*N + k].real(); double a = data[i*N*N + j*N + k].imag(); if (d < 10e-5 && d > -10e-5) d = 0; if (a < 10e-5 && a > -10e-5) a = 0; cout << "(" << d << "," << a << ") "; } } cout << endl; } cout << endl; } void printData3DN4(double* &data, int N, int dim) { for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) { for (int k = 0; k < N; k++) { double d = data[i*N*N + j*N + k]; if (d < 10e-5 && d > -10e-5) d = 0; cout << d << " "; } } cout << endl; } cout << endl; }