221 lines
5.4 KiB
C++
221 lines
5.4 KiB
C++
#include <iostream>
|
|
#include <stdlib.h>
|
|
#include <complex>
|
|
|
|
#include "Utility/TimeStamp.h"
|
|
#include "DKSBase.h"
|
|
|
|
using namespace std;
|
|
|
|
void printData(complex<double>* &data, int N, int dim, bool normalize = false);
|
|
void printData3DN4(complex<double>* &data, int N, int dim);
|
|
void printData3DN4(double* data, int N, int dim);
|
|
|
|
void compareData(complex<double>* &data1, complex<double>* &data2, int N, int dim);
|
|
void compareData(double* data1, double* data2, int N, int dim);
|
|
|
|
/* Compute (K*L)%M accurately */
|
|
static double moda(int K, int L, int M)
|
|
{
|
|
return (double)(((long long)K * L) % M);
|
|
}
|
|
/* Initialize array x(N) to produce unit peaks at x(H) and x(N-H) */
|
|
static void init_r(double *x, int N1, int N2, int N3, int H1=-1, int H2=2, int H3=4)
|
|
{
|
|
double TWOPI = 6.2831853071795864769, phase, factor;
|
|
int n1, n2, n3, S1, S2, S3, index;
|
|
|
|
/* Generalized strides for row-major addressing of x */
|
|
S3 = 1;
|
|
S2 = (N3/2+1)*2;
|
|
S1 = N2*(N3/2+1)*2;
|
|
|
|
factor = ((N1-H1%N1)==0 && (N2-H2%N2)==0 && (N3-H3%N3)==0) ? 1.0 : 2.0;
|
|
for (n1 = 0; n1 < N1; n1++)
|
|
{
|
|
for (n2 = 0; n2 < N2; n2++)
|
|
{
|
|
for (n3 = 0; n3 < N3; n3++)
|
|
{
|
|
phase = moda(n1,H1,N1) / N1;
|
|
phase += moda(n2,H2,N2) / N2;
|
|
phase += moda(n3,H3,N3) / N3;
|
|
index = n1*S1 + n2*S2 + n3*S3;
|
|
//cout << "index = " << index << endl;
|
|
x[index] = factor * cos( TWOPI * phase ) / (N1*N2*N3);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
int N = atoi(argv[1]);
|
|
int dim = 3;
|
|
int dimsize[3] = {N, N, N};
|
|
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
|
|
int sizecomp = (dimsize[0]/2 + 1) * dimsize[1] * dimsize[2];
|
|
|
|
//double *rdata = new double[sizereal];
|
|
//double *outdata = new double[sizereal];
|
|
//complex<double> *cfft = new complex<double>[sizecomp];
|
|
double *rdata =(double *)malloc(N*N*(N/2+1)*2*sizeof(double));
|
|
double *outdata =(double *)malloc(N*N*(N/2+1)*2*sizeof(double));
|
|
complex<double> *cfft = (complex<double> *)malloc(sizecomp*sizeof(complex<double>));
|
|
|
|
init_r(rdata, N,N,N);
|
|
|
|
/* init DKSBase */
|
|
cout << "Init device and set function" << endl;
|
|
|
|
DKSBase base;
|
|
base.setAPI("OpenMP", 6);
|
|
base.setDevice("-mic", 4);
|
|
base.initDevice();
|
|
|
|
/* setup forward fft (REAL->COMPLEX) */
|
|
base.setupFFTRC(dim, dimsize);
|
|
|
|
int ierr;
|
|
void *real_ptr, *comp_ptr;
|
|
|
|
/* allocate memory on device */;
|
|
real_ptr = base.allocateMemory<double>(sizereal, ierr);
|
|
comp_ptr = base.allocateMemory< complex<double> >(sizecomp, ierr);
|
|
|
|
/* write data to device */
|
|
base.writeData<double>(real_ptr, rdata, sizereal);
|
|
|
|
//printData3DN4(rdata,N,3);
|
|
|
|
/* execute rcfft */
|
|
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
|
|
|
|
/* read FFT data from device */
|
|
base.readData< complex<double> >(comp_ptr, cfft, sizecomp);
|
|
base.writeData<double>(comp_ptr, cfft, sizereal);
|
|
|
|
|
|
/* setup backward fft (COMPLEX->REAL) */
|
|
base.setupFFTCR(dim, dimsize,1./(N*N*N));
|
|
/* execute crfft */
|
|
base.callC2RFFT(real_ptr, comp_ptr, dim, dimsize);
|
|
|
|
/* normalize */
|
|
//base.callNormalizeC2RFFT(real_ptr, dim, dimsize);
|
|
|
|
/* read FFT data from device */
|
|
//base.readData< complex<double> >(comp_ptr, cfft, sizecomp);
|
|
|
|
/* read IFFT data from device */
|
|
base.readData<double>(real_ptr, outdata, sizereal);
|
|
|
|
/* free device memory */
|
|
base.freeMemory< complex<double> >(comp_ptr, sizecomp);
|
|
base.freeMemory<double>(real_ptr, sizereal);
|
|
|
|
/* compare data */
|
|
compareData(rdata, outdata, N, dim);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void printData(complex<double>* &data, int N, int dim, bool normalize) {
|
|
int ni, nj, nk;
|
|
ni = (dim > 2) ? N : 1;
|
|
nj = (dim > 1) ? N : 1;
|
|
nk = N;
|
|
|
|
for (int i = 0; i < ni; i++) {
|
|
for (int j = 0; j < nj; j++) {
|
|
for (int k = 0; k < nk; k++) {
|
|
if (!normalize)
|
|
cout << data[i*ni*ni + j*nj + k].real() << "\t";
|
|
else
|
|
cout << data[i*ni*ni + j*nj + k].real() / N << "\t";
|
|
}
|
|
cout << endl;
|
|
}
|
|
cout << endl;
|
|
}
|
|
}
|
|
|
|
void printData3DN4(complex<double>* &data, int N, int dim) {
|
|
|
|
for (int j = 0; j < N; j++) {
|
|
for (int i = 0; i < N; i++) {
|
|
for (int k = 0; k < N; k++) {
|
|
double d = data[i*N*N + j*N + k].real();
|
|
double a = data[i*N*N + j*N + k].imag();
|
|
|
|
if (d < 10e-5 && d > -10e-5)
|
|
d = 0;
|
|
if (a < 10e-5 && a > -10e-5)
|
|
a = 0;
|
|
|
|
cout << d << "; " << a << "\t";
|
|
}
|
|
}
|
|
cout << endl;
|
|
}
|
|
cout << endl;
|
|
|
|
}
|
|
void printData3DN4(double* data, int N, int dim) {
|
|
|
|
for (int j = 0; j < N; j++) {
|
|
for (int i = 0; i < N; i++) {
|
|
for (int k = 0; k < N; k++) {
|
|
double d = data[i*N*N + j*N + k];
|
|
//double a = data[i*N*N + j*N + k].imag();
|
|
|
|
if (d < 10e-5 && d > -10e-5)
|
|
d = 0;
|
|
//if (a < 10e-5 && a > -10e-5)
|
|
// a = 0;
|
|
|
|
cout << d << "\t";
|
|
}
|
|
}
|
|
cout << endl;
|
|
}
|
|
cout << endl;
|
|
|
|
}
|
|
void compareData(complex<double>* &data1, complex<double>* &data2, int N, int dim) {
|
|
int ni, nj, nk, id;
|
|
ni = (dim > 2) ? N : 1;
|
|
nj = (dim > 1) ? N : 1;
|
|
nk = N;
|
|
double sum = 0;
|
|
for (int i = 0; i < ni; i++) {
|
|
for (int j = 0; j < nj; j++) {
|
|
for (int k = 0; k < nk; k++) {
|
|
id = i*ni*ni + j*nj + k;
|
|
sum += fabs(data1[id].real() - data2[id].real());
|
|
sum += fabs(data1[id].imag() - data2[id].imag());
|
|
}
|
|
}
|
|
}
|
|
cout << "Size " << N << " CC <--> CC diff: " << sum << endl;
|
|
}
|
|
|
|
void compareData(double* data1, double* data2, int N, int dim) {
|
|
int ni, nj, nk, id;
|
|
ni = (dim > 2) ? N : 1;
|
|
nj = (dim > 1) ? N : 1;
|
|
nk = N;
|
|
double sum = 0;
|
|
for (int i = 0; i < ni; i++) {
|
|
for (int j = 0; j < nj; j++) {
|
|
for (int k = 0; k < nk; k++) {
|
|
id = i*ni*ni + j*nj + k;
|
|
//sum += fabs(data1[id] - data2[id]/(N*N*N));
|
|
sum += fabs(data1[id] - data2[id]);
|
|
}
|
|
}
|
|
}
|
|
cout << "Size " << N << " RC <--> CR diff: " << sum << endl;
|
|
}
|