#include #include #include #include #include #include "Utility/TimeStamp.h" #include "DKSBase.h" using namespace std; void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim); void initData(double *data, int dimsize[3], int dim); bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop, int &dim, char *api_name, char *device_name, char *file_name); void printHelp(); void printData3DN4(complex* &data, int N, int dim); void printData3DN4(double* &data, int N, int dim); double precision(double a) { //if (a < 1e-10) // return 0.0; //else return a; } int main(int argc, char *argv[]) { int N1 = 8; int N2 = 8; int N3 = 8; int dim = 3; int loop = 0; char *api_name = new char[10]; char *device_name = new char[10]; char *file_name = new char[50]; if ( readParams(argc, argv, N1, N2, N3, loop, dim, api_name, device_name, file_name) ) return 0; cout << "Use api: " << api_name << ", " << device_name << endl; int dimsize[3] = {N1, N2, N3}; int sizereal = dimsize[0] * dimsize[1] * dimsize[2]; int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2]; double *rdata = new double[sizereal]; double *outdata = new double[sizereal]; complex *cfft = new complex[sizecomp]; initData(rdata, dimsize, dim); /* init DKSBase */ cout << "Init device and set function" << endl; DKSBase base; base.setAPI(api_name, strlen(api_name)); base.setDevice(device_name, strlen(device_name)); base.initDevice(); base.setupFFT(dim, dimsize); // allocate memory on device int ierr; void *real_ptr, *comp_ptr, *real_res_ptr; real_ptr = base.allocateMemory(sizereal, ierr); real_res_ptr = base.allocateMemory(sizereal, ierr); comp_ptr = base.allocateMemory< std::complex >(sizecomp, ierr); // execute one run before starting the timers base.writeData(real_ptr, rdata, sizereal); base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize); base.readData< complex >(comp_ptr, cfft, sizecomp); base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize); base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize); base.readData(real_res_ptr, outdata, sizereal); ofstream myfile; myfile.open(file_name); myfile<< "in\tout\treal\timag\n"; for (int i = 0; i < sizereal; i++) { //myfile << precision(rdata[i]) << "\t"; //myfile << precision(outdata[i]) << "\t"; if (i < sizecomp) { myfile << precision(cfft[i].real()) << "\t"; myfile << precision(cfft[i].imag()); } myfile << "\n"; } myfile.close(); /* if (dim == 2) { for (int i = 0; i < N2; i++) { for (int j = 0; j < N1; j++) { cout << rdata[i*N1 + j] << " "; } cout << endl; } cout << endl; } if (dim == 2) { for (int i = 0; i < N2; i++) { for (int j = 0; j < N1 / 2 + 1; j++) { cout << cfft[i*(N1 / 2 + 1) + j] << " "; } cout << endl; } cout << endl; } */ // free device memory base.freeMemory< std::complex >(comp_ptr, sizecomp); base.freeMemory(real_ptr, sizereal); base.freeMemory(real_res_ptr, sizereal); // compare in and out data to see if we get back the same results cout << "comp" << endl; compareData(rdata, outdata, N1, N2, N3, dim); cout << "done" << endl; return 0; } void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim) { int id; double sum = 0; for (int i = 0; i < NK; i++) { for (int j = 0; j < NJ; j++) { for (int k = 0; k < NI; k++) { id = i*NI*NJ + j*NI + k; sum += fabs(data1[id] - data2[id]); } } } std::cout << "RC <--> CR diff: " << sum << std::endl; } void initData(double *data, int dimsize[3], int dim) { if (dim == 3) { for (int i = 0; i < dimsize[2]; i++) for (int j = 0; j < dimsize[1]; j++) for (int k = 0; k < dimsize[0]; k++) data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = sin(k); } else if (dim == 2) { for (int j = 0; j < dimsize[1]; j++) { for (int k = 0; k < dimsize[0]; k++) { data[j*dimsize[0] + k] = sin(k); } } } else { for (int k = 0; k < dimsize[0]; k++) data[k] = sin(k); } } void printHelp() { std::cout << std::endl; std::cout << "testFFT3DRC executes 3D real complex and 3D complex real" << "function on the Intel MIC.\n"; std::cout << "Operations performed by testRC are: " << "write data to MIC -> FFT -> IFFT -> read data from MIC.\n"; std::cout << "To run testFFT3DRC execute: ./testFFT3DRC -grid $x $y $z " << "-loop $l\n"; std::cout << "where $x $y $z are number of elements in each dimension and " << "$l is the number of times all the operations will be performed.\n"; std::cout << std::endl; } bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop, int &dim, char *api_name, char *device_name, char *file_name) { for (int i = 1; i < argc; i++) { if ( argv[i] == std::string("-dim")) { dim = atoi(argv[i + 1]); i++; } if ( argv[i] == std::string("-grid") ) { N1 = atoi(argv[i + 1]); N2 = atoi(argv[i + 2]); N3 = atoi(argv[i + 3]); i += 3; } if ( argv[i] == std::string("-loop") ) { loop = atoi(argv[i + 1]); i += 1; } if ( argv[i] == std::string("-h") || argv[i] == std::string("-help") ) { printHelp(); return true; } if (argv[i] == string("-cuda")) { strcpy(api_name, "Cuda"); strcpy(device_name, "-gpu"); strcpy(file_name, "cuda_fft.dat"); } if (argv[i] == string("-opencl")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-gpu"); strcpy(file_name, "opencl_fft.dat"); } if (argv[i] == string("-mic")) { strcpy(api_name, "OpenMP"); strcpy(device_name, "-mic"); strcpy(file_name, "openmp_fft.dat"); } if (argv[i] == string("-cpu")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-cpu"); strcpy(file_name, "opencl_cpu_fft.dat"); } } return false; } void printData3DN4(complex* &data, int N, int dim) { for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) { for (int k = 0; k < N/2 + 1; k++) { double d = data[i*N*N + j*N + k].real(); double a = data[i*N*N + j*N + k].imag(); if (d < 10e-5 && d > -10e-5) d = 0; if (a < 10e-5 && a > -10e-5) a = 0; cout << "(" << d << "," << a << ") "; } } cout << endl; } cout << endl; } void printData3DN4(double* &data, int N, int dim) { for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) { for (int k = 0; k < N; k++) { double d = data[i*N*N + j*N + k]; if (d < 10e-5 && d > -10e-5) d = 0; cout << d << " "; } } cout << endl; } cout << endl; }