#include #include #include #include "Utility/TimeStamp.h" #include "DKSBase.h" using namespace std; void printData(complex* &data, int N, int dim, bool normalize = false); void printData3DN4(complex* &data, int N, int dim); void printData3DN4(double* data, int N, int dim); void compareData(complex* &data1, complex* &data2, int N, int dim); void compareData(double* data1, double* data2, int N, int dim); /* Compute (K*L)%M accurately */ static double moda(int K, int L, int M) { return (double)(((long long)K * L) % M); } /* Initialize array x(N) to produce unit peaks at x(H) and x(N-H) */ static void init_r(double *x, int N1, int N2, int N3, int H1=-1, int H2=2, int H3=4) { double TWOPI = 6.2831853071795864769, phase, factor; int n1, n2, n3, S1, S2, S3, index; /* Generalized strides for row-major addressing of x */ S3 = 1; S2 = (N3/2+1)*2; S1 = N2*(N3/2+1)*2; factor = ((N1-H1%N1)==0 && (N2-H2%N2)==0 && (N3-H3%N3)==0) ? 1.0 : 2.0; for (n1 = 0; n1 < N1; n1++) { for (n2 = 0; n2 < N2; n2++) { for (n3 = 0; n3 < N3; n3++) { phase = moda(n1,H1,N1) / N1; phase += moda(n2,H2,N2) / N2; phase += moda(n3,H3,N3) / N3; index = n1*S1 + n2*S2 + n3*S3; //cout << "index = " << index << endl; x[index] = factor * cos( TWOPI * phase ) / (N1*N2*N3); } } } } int main(int argc, char *argv[]) { int N = atoi(argv[1]); int dim = 3; int dimsize[3] = {N, N, N}; int sizereal = dimsize[0] * dimsize[1] * dimsize[2]; int sizecomp = (dimsize[0]/2 + 1) * dimsize[1] * dimsize[2]; //double *rdata = new double[sizereal]; //double *outdata = new double[sizereal]; //complex *cfft = new complex[sizecomp]; double *rdata =(double *)malloc(N*N*(N/2+1)*2*sizeof(double)); double *outdata =(double *)malloc(N*N*(N/2+1)*2*sizeof(double)); complex *cfft = (complex *)malloc(sizecomp*sizeof(complex)); init_r(rdata, N,N,N); /* init DKSBase */ cout << "Init device and set function" << endl; DKSBase base; base.setAPI("OpenMP", 6); base.setDevice("-mic", 4); base.initDevice(); /* setup forward fft (REAL->COMPLEX) */ base.setupFFTRC(dim, dimsize); int ierr; void *real_ptr, *comp_ptr; /* allocate memory on device */; real_ptr = base.allocateMemory(sizereal, ierr); comp_ptr = base.allocateMemory< complex >(sizecomp, ierr); /* write data to device */ base.writeData(real_ptr, rdata, sizereal); //printData3DN4(rdata,N,3); /* execute rcfft */ base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize); /* read FFT data from device */ base.readData< complex >(comp_ptr, cfft, sizecomp); base.writeData(comp_ptr, cfft, sizereal); /* setup backward fft (COMPLEX->REAL) */ base.setupFFTCR(dim, dimsize,1./(N*N*N)); /* execute crfft */ base.callC2RFFT(real_ptr, comp_ptr, dim, dimsize); /* normalize */ //base.callNormalizeC2RFFT(real_ptr, dim, dimsize); /* read FFT data from device */ //base.readData< complex >(comp_ptr, cfft, sizecomp); /* read IFFT data from device */ base.readData(real_ptr, outdata, sizereal); /* free device memory */ base.freeMemory< complex >(comp_ptr, sizecomp); base.freeMemory(real_ptr, sizereal); /* compare data */ compareData(rdata, outdata, N, dim); return 0; } void printData(complex* &data, int N, int dim, bool normalize) { int ni, nj, nk; ni = (dim > 2) ? N : 1; nj = (dim > 1) ? N : 1; nk = N; for (int i = 0; i < ni; i++) { for (int j = 0; j < nj; j++) { for (int k = 0; k < nk; k++) { if (!normalize) cout << data[i*ni*ni + j*nj + k].real() << "\t"; else cout << data[i*ni*ni + j*nj + k].real() / N << "\t"; } cout << endl; } cout << endl; } } void printData3DN4(complex* &data, int N, int dim) { for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) { for (int k = 0; k < N; k++) { double d = data[i*N*N + j*N + k].real(); double a = data[i*N*N + j*N + k].imag(); if (d < 10e-5 && d > -10e-5) d = 0; if (a < 10e-5 && a > -10e-5) a = 0; cout << d << "; " << a << "\t"; } } cout << endl; } cout << endl; } void printData3DN4(double* data, int N, int dim) { for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) { for (int k = 0; k < N; k++) { double d = data[i*N*N + j*N + k]; //double a = data[i*N*N + j*N + k].imag(); if (d < 10e-5 && d > -10e-5) d = 0; //if (a < 10e-5 && a > -10e-5) // a = 0; cout << d << "\t"; } } cout << endl; } cout << endl; } void compareData(complex* &data1, complex* &data2, int N, int dim) { int ni, nj, nk, id; ni = (dim > 2) ? N : 1; nj = (dim > 1) ? N : 1; nk = N; double sum = 0; for (int i = 0; i < ni; i++) { for (int j = 0; j < nj; j++) { for (int k = 0; k < nk; k++) { id = i*ni*ni + j*nj + k; sum += fabs(data1[id].real() - data2[id].real()); sum += fabs(data1[id].imag() - data2[id].imag()); } } } cout << "Size " << N << " CC <--> CC diff: " << sum << endl; } void compareData(double* data1, double* data2, int N, int dim) { int ni, nj, nk, id; ni = (dim > 2) ? N : 1; nj = (dim > 1) ? N : 1; nk = N; double sum = 0; for (int i = 0; i < ni; i++) { for (int j = 0; j < nj; j++) { for (int k = 0; k < nk; k++) { id = i*ni*ni + j*nj + k; //sum += fabs(data1[id] - data2[id]/(N*N*N)); sum += fabs(data1[id] - data2[id]); } } } cout << "Size " << N << " RC <--> CR diff: " << sum << endl; }