OpenCL FFT using clfft and tests
This commit is contained in:
@ -25,7 +25,7 @@ ADD_EXECUTABLE(testFFT3DRC testFFT3DRC.cpp)
|
||||
ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
|
||||
ADD_EXECUTABLE(testCollimatorPhysicsSoA testCollimatorPhysicsSoA.cpp)
|
||||
#ADD_EXECUTABLE(testPush testPush.cpp)
|
||||
#ADD_EXECUTABLE(testFFTSolverMIC testFFTSolver_MIC.cpp)
|
||||
ADD_EXECUTABLE(testFFTSolverMIC testFFTSolver_MIC.cpp)
|
||||
#ADD_EXECUTABLE(testIntegration testTimeIntegration.cpp)
|
||||
#ADD_EXECUTABLE(testImageReconstruction testImageReconstruction.cpp)
|
||||
|
||||
@ -56,7 +56,7 @@ TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
#TARGET_LINK_LIBRARIES(testPush dks)
|
||||
#TARGET_LINK_LIBRARIES(testFFTSolverMIC dks)
|
||||
TARGET_LINK_LIBRARIES(testFFTSolverMIC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
|
||||
#TARGET_LINK_LIBRARIES(testIntegration dks)
|
||||
#TARGET_LINK_LIBRARIES(testImageReconstruction dks)
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <complex>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#include "Utility/TimeStamp.h"
|
||||
#include "DKSBase.h"
|
||||
@ -8,14 +10,20 @@
|
||||
using namespace std;
|
||||
|
||||
void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim);
|
||||
void initData(double *data, int dimsize[3]);
|
||||
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop,
|
||||
char *api_name, char *device_name);
|
||||
void initData(double *data, int dimsize[3], int dim);
|
||||
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop, int &dim,
|
||||
char *api_name, char *device_name, char *file_name);
|
||||
void printHelp();
|
||||
|
||||
void printData3DN4(complex<double>* &data, int N, int dim);
|
||||
void printData3DN4(double* &data, int N, int dim);
|
||||
|
||||
double precision(double a) {
|
||||
//if (a < 1e-10)
|
||||
// return 0.0;
|
||||
//else
|
||||
return a;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
@ -26,32 +34,29 @@ int main(int argc, char *argv[]) {
|
||||
int loop = 0;
|
||||
char *api_name = new char[10];
|
||||
char *device_name = new char[10];
|
||||
char *file_name = new char[50];
|
||||
|
||||
if ( readParams(argc, argv, N1, N2, N3, loop, api_name, device_name) )
|
||||
if ( readParams(argc, argv, N1, N2, N3, loop, dim, api_name, device_name, file_name) )
|
||||
return 0;
|
||||
|
||||
cout << "Use api: " << api_name << ", " << device_name << endl;
|
||||
|
||||
int dimsize[3] = {N3, N2, N1};
|
||||
int dimsize[3] = {N1, N2, N3};
|
||||
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
|
||||
int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2];
|
||||
|
||||
double *rdata = new double[sizereal];
|
||||
double *outdata = new double[sizereal];
|
||||
complex<double> *cfft = new complex<double>[sizecomp];
|
||||
initData(rdata, dimsize);
|
||||
initData(rdata, dimsize, dim);
|
||||
|
||||
/* init DKSBase */
|
||||
cout << "Init device and set function" << endl;
|
||||
DKSBase base;
|
||||
DKSBase base;
|
||||
base.setAPI(api_name, strlen(api_name));
|
||||
base.setDevice(device_name, strlen(device_name));
|
||||
base.initDevice();
|
||||
base.setupFFT(3, dimsize);
|
||||
|
||||
base.setupFFTRC(dim, dimsize);
|
||||
/* setup backward fft (COMPLEX->REAL) */
|
||||
base.setupFFTCR(dim, dimsize,1./(N1*N2*N3));
|
||||
base.setupFFT(dim, dimsize);
|
||||
|
||||
// allocate memory on device
|
||||
int ierr;
|
||||
@ -63,68 +68,59 @@ DKSBase base;
|
||||
// execute one run before starting the timers
|
||||
base.writeData<double>(real_ptr, rdata, sizereal);
|
||||
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
|
||||
base.readData< complex<double> >(comp_ptr, cfft, sizecomp);
|
||||
base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize);
|
||||
base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize);
|
||||
base.readData<double>(real_res_ptr, outdata, sizereal);
|
||||
|
||||
//timer for total loop time, FFT and IFFT calls
|
||||
struct timeval timeStart, timeEnd;
|
||||
struct timeval timeFFTStart[loop], timeFFTEnd[loop];
|
||||
struct timeval timeIFFTStart[loop], timeIFFTEnd[loop];
|
||||
|
||||
gettimeofday(&timeStart, NULL);
|
||||
for (int i=0; i<loop; ++i){
|
||||
|
||||
// write data to device
|
||||
base.writeData<double>(real_ptr, rdata, sizereal);
|
||||
|
||||
// execute rcfft
|
||||
gettimeofday(&timeFFTStart[i], NULL);
|
||||
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
|
||||
gettimeofday(&timeFFTEnd[i], NULL);
|
||||
|
||||
// execute crfft
|
||||
gettimeofday(&timeIFFTStart[i], NULL);
|
||||
base.callC2RFFT(real_res_ptr, comp_ptr, dim, dimsize);
|
||||
gettimeofday(&timeIFFTEnd[i], NULL);
|
||||
|
||||
//normalize
|
||||
base.callNormalizeC2RFFT(real_res_ptr, dim, dimsize);
|
||||
|
||||
// read IFFT data from device
|
||||
base.readData<double>(real_res_ptr, outdata, sizereal);
|
||||
|
||||
|
||||
ofstream myfile;
|
||||
myfile.open(file_name);
|
||||
myfile<< "in\tout\treal\timag\n";
|
||||
for (int i = 0; i < sizereal; i++) {
|
||||
//myfile << precision(rdata[i]) << "\t";
|
||||
//myfile << precision(outdata[i]) << "\t";
|
||||
if (i < sizecomp) {
|
||||
myfile << precision(cfft[i].real()) << "\t";
|
||||
myfile << precision(cfft[i].imag());
|
||||
}
|
||||
myfile << "\n";
|
||||
}
|
||||
gettimeofday(&timeEnd, NULL);
|
||||
myfile.close();
|
||||
|
||||
|
||||
/*
|
||||
if (dim == 2) {
|
||||
for (int i = 0; i < N2; i++) {
|
||||
for (int j = 0; j < N1; j++) {
|
||||
cout << rdata[i*N1 + j] << " ";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
|
||||
if (dim == 2) {
|
||||
for (int i = 0; i < N2; i++) {
|
||||
for (int j = 0; j < N1 / 2 + 1; j++) {
|
||||
cout << cfft[i*(N1 / 2 + 1) + j] << " ";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
*/
|
||||
// free device memory
|
||||
base.freeMemory< std::complex<double> >(comp_ptr, sizecomp);
|
||||
base.freeMemory<double>(real_ptr, sizereal);
|
||||
base.freeMemory<double>(real_res_ptr, sizereal);
|
||||
|
||||
// compare in and out data to see if we get back the same results
|
||||
cout << "comp" << endl;
|
||||
compareData(rdata, outdata, N1, N2, N3, dim);
|
||||
|
||||
//calculate seconds for total time and fft times
|
||||
double tfft = 0;
|
||||
double tifft = 0;
|
||||
double ttot = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1e6 +
|
||||
(timeEnd.tv_usec - timeStart.tv_usec) ) * 1e-6;
|
||||
|
||||
for (int i = 0; i < loop; i++) {
|
||||
tfft += ( (timeFFTEnd[i].tv_sec - timeFFTStart[i].tv_sec) * 1e6 +
|
||||
(timeFFTEnd[i].tv_usec - timeFFTStart[i].tv_usec) ) * 1e-6;
|
||||
|
||||
tifft += ( (timeIFFTEnd[i].tv_sec - timeIFFTStart[i].tv_sec) * 1e6 +
|
||||
(timeIFFTEnd[i].tv_usec - timeIFFTStart[i].tv_usec) ) * 1e-6;
|
||||
}
|
||||
|
||||
//print timing results
|
||||
std::cout << std::fixed << std::setprecision(5) << "\nTiming results"
|
||||
<< "\nTotal time\t" << ttot << "s\tavg time\t" << ttot / loop << "s"
|
||||
<< "\nFFT total\t" << tfft << "s\tFFT avg \t" << tfft / loop << "s"
|
||||
<< "\nIFFT total\t" << tifft << "s\tIFFT avg\t" << tifft / loop << "s"
|
||||
<< "\n\n";
|
||||
cout << "done" << endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -132,10 +128,10 @@ DKSBase base;
|
||||
void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim) {
|
||||
int id;
|
||||
double sum = 0;
|
||||
for (int i = 0; i < NI; i++) {
|
||||
for (int i = 0; i < NK; i++) {
|
||||
for (int j = 0; j < NJ; j++) {
|
||||
for (int k = 0; k < NK; k++) {
|
||||
id = k*NI*NJ + j*NI + i;
|
||||
for (int k = 0; k < NI; k++) {
|
||||
id = i*NI*NJ + j*NI + k;
|
||||
sum += fabs(data1[id] - data2[id]);
|
||||
}
|
||||
}
|
||||
@ -143,13 +139,21 @@ void compareData(double* data1, double* data2, int NI, int NJ, int NK, int dim)
|
||||
std::cout << "RC <--> CR diff: " << sum << std::endl;
|
||||
}
|
||||
|
||||
void initData(double *data, int dimsize[3]) {
|
||||
for (int i = 0; i < dimsize[2]; i++) {
|
||||
void initData(double *data, int dimsize[3], int dim) {
|
||||
if (dim == 3) {
|
||||
for (int i = 0; i < dimsize[2]; i++)
|
||||
for (int j = 0; j < dimsize[1]; j++)
|
||||
for (int k = 0; k < dimsize[0]; k++)
|
||||
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = sin(k);
|
||||
} else if (dim == 2) {
|
||||
for (int j = 0; j < dimsize[1]; j++) {
|
||||
for (int k = 0; k < dimsize[0]; k++) {
|
||||
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = k;
|
||||
data[j*dimsize[0] + k] = sin(k);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < dimsize[0]; k++)
|
||||
data[k] = sin(k);
|
||||
}
|
||||
}
|
||||
|
||||
@ -168,12 +172,17 @@ void printHelp() {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop,
|
||||
char *api_name, char *device_name)
|
||||
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop, int &dim,
|
||||
char *api_name, char *device_name, char *file_name)
|
||||
{
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
|
||||
if ( argv[i] == std::string("-dim")) {
|
||||
dim = atoi(argv[i + 1]);
|
||||
i++;
|
||||
}
|
||||
|
||||
if ( argv[i] == std::string("-grid") ) {
|
||||
N1 = atoi(argv[i + 1]);
|
||||
N2 = atoi(argv[i + 2]);
|
||||
@ -194,21 +203,25 @@ bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &loop,
|
||||
if (argv[i] == string("-cuda")) {
|
||||
strcpy(api_name, "Cuda");
|
||||
strcpy(device_name, "-gpu");
|
||||
strcpy(file_name, "cuda_fft.dat");
|
||||
}
|
||||
|
||||
if (argv[i] == string("-opencl")) {
|
||||
strcpy(api_name, "OpenCL");
|
||||
strcpy(device_name, "-gpu");
|
||||
strcpy(file_name, "opencl_fft.dat");
|
||||
}
|
||||
|
||||
if (argv[i] == string("-mic")) {
|
||||
strcpy(api_name, "OpenMP");
|
||||
strcpy(device_name, "-mic");
|
||||
strcpy(file_name, "openmp_fft.dat");
|
||||
}
|
||||
|
||||
if (argv[i] == string("-cpu")) {
|
||||
strcpy(api_name, "OpenCL");
|
||||
strcpy(device_name, "-cpu");
|
||||
strcpy(file_name, "opencl_cpu_fft.dat");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include <iostream>
|
||||
//#include <mpi.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "DKSBase.h"
|
||||
@ -11,309 +10,265 @@ using namespace std;
|
||||
|
||||
|
||||
void printData3D(double* data, int N, int NI, const char *message = "") {
|
||||
if (strcmp(message, "") != 0)
|
||||
cout << message;
|
||||
if (strcmp(message, "") != 0)
|
||||
cout << message;
|
||||
|
||||
for (int i = 0; i < NI; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
for (int k = 0; k < N; k++) {
|
||||
cout << data[i*N*N + j*N + k] << "\t";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
for (int i = 0; i < NI; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
for (int k = 0; k < N; k++) {
|
||||
cout << data[i*N*N + j*N + k] << "\t";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void initData(double *data, int N) {
|
||||
|
||||
for (int i = 0; i < N/4 + 1; i++) {
|
||||
for (int j = 0; j < N/2 + 1; j++) {
|
||||
for (int k = 0; k < N/2 + 1; k++) {
|
||||
data[i*N*N + j*N + k] = k+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < N/4 + 1; i++) {
|
||||
for (int j = 0; j < N/2 + 1; j++) {
|
||||
for (int k = 0; k < N/2 + 1; k++) {
|
||||
data[i*N*N + j*N + k] = k+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void initData2(double *data, int N) {
|
||||
for (int i = 0; i < N; i++)
|
||||
data[i] = i;
|
||||
for (int i = 0; i < N; i++)
|
||||
data[i] = i;
|
||||
}
|
||||
|
||||
void initComplex( complex<double> *d, int N) {
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
d[i] = complex<double>(2, 0);
|
||||
}
|
||||
for (int i = 0; i < N; i++) {
|
||||
d[i] = complex<double>(2, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void printComplex(complex<double> *d, int N) {
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
cout << d[i] << "\t";
|
||||
cout << endl;
|
||||
for (int i = 0; i < N; i++)
|
||||
cout << d[i] << "\t";
|
||||
cout << endl;
|
||||
|
||||
}
|
||||
|
||||
void printDouble(double *d, int N) {
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
cout << d[i] << ", ";
|
||||
cout << endl;
|
||||
|
||||
}
|
||||
|
||||
void initMirror(double *data, int n1, int n2, int n3) {
|
||||
int d = 1;
|
||||
for (int i = 0; i < n3; i++) {
|
||||
for (int j = 0; j < n2; j++) {
|
||||
for (int k = 0; k < n1; k++) {
|
||||
if (i < n3/2 + 1 && j < n2/2 + 1 && k < n1/2 + 1)
|
||||
data[i * n2 * n1 + j * n1 + k] = d++;
|
||||
else
|
||||
data[i * n2 * n1 + j * n1 + k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
int d = 1;
|
||||
for (int i = 0; i < n3; i++) {
|
||||
for (int j = 0; j < n2; j++) {
|
||||
for (int k = 0; k < n1; k++) {
|
||||
if (i < n3/2 + 1 && j < n2/2 + 1 && k < n1/2 + 1)
|
||||
data[i * n2 * n1 + j * n1 + k] = d++;
|
||||
else
|
||||
data[i * n2 * n1 + j * n1 + k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void printDiv(int c) {
|
||||
for (int i = 0; i < c; i++)
|
||||
cout << "-";
|
||||
cout << endl;
|
||||
for (int i = 0; i < c; i++)
|
||||
cout << "-";
|
||||
cout << endl;
|
||||
|
||||
}
|
||||
|
||||
void printMirror(double *data, int n1, int n2, int n3) {
|
||||
|
||||
printDiv(75);
|
||||
for (int i = 0; i < n3; i++) {
|
||||
for (int j = 0; j < n2; j++) {
|
||||
for (int k = 0; k < n1; k++) {
|
||||
cout << data[i * n2 * n1 + j * n1 + k] << "\t";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
printDiv(75);
|
||||
for (int i = 0; i < n3; i++) {
|
||||
for (int j = 0; j < n2; j++) {
|
||||
for (int k = 0; k < n1; k++) {
|
||||
cout << data[i * n2 * n1 + j * n1 + k] << "\t";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
double sumData(double *data, int datasize) {
|
||||
|
||||
double sum = 0;
|
||||
for (int i = 0; i < datasize; i++)
|
||||
sum += data[i];
|
||||
double sum = 0;
|
||||
for (int i = 0; i < datasize; i++)
|
||||
sum += data[i];
|
||||
|
||||
return sum;
|
||||
return sum;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
/* mpi init */
|
||||
//int rank, nprocs;
|
||||
//MPI_Init(&argc, &argv);
|
||||
//MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
//MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||
char *api_name = new char[10];
|
||||
char *device_name = new char[10];
|
||||
|
||||
/*
|
||||
if (nprocs != 8) {
|
||||
cout << "example was set to run with 8 processes" << endl;
|
||||
cout << "exit..." << endl;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (argv[i] == string("-cuda")) {
|
||||
strcpy(api_name, "Cuda");
|
||||
strcpy(device_name, "-gpu");
|
||||
}
|
||||
|
||||
/* set domain size */
|
||||
int NG[3] = {64, 64, 32};
|
||||
int NL[3] = {NG[0], NG[1] / 4, NG[2] / 2};
|
||||
int ng[3] = {NG[0]/2 + 1, NG[1]/2 + 1, NG[2]/2 + 1};
|
||||
int sizerho = NG[0] * NG[1] * NG[2];
|
||||
int sizegreen = ng[0] * ng[1] * ng[2];
|
||||
int sizecomp = NG[0] * NG[1] * NG[2] / 2 + 1;
|
||||
int id[3];
|
||||
if (argv[i] == string("-opencl")) {
|
||||
strcpy(api_name, "OpenCL");
|
||||
strcpy(device_name, "-gpu");
|
||||
}
|
||||
|
||||
//id[0] = 0;
|
||||
//id[1] = NL[1] * (rank % 4);
|
||||
//id[2] = NL[2] * (rank / 4);
|
||||
if (argv[i] == string("-mic")) {
|
||||
strcpy(api_name, "OpenMP");
|
||||
strcpy(device_name, "-mic");
|
||||
}
|
||||
|
||||
/* print some messages bout the example in the begginig */
|
||||
cout << "Global domain: " << NG[0] << ", " << NG[1] << ", " << NG[2] << endl;
|
||||
//cout << "Local domain: " << NL[0] << ", " << NL[1] << ", " << NL[2] << endl;
|
||||
cout << "Greens domain: " << ng[0] << ", " << ng[1] << ", " << ng[2] << endl;
|
||||
//cout << "Start idx0: " << id[0] << ", " << id[1] << ", " << id[2] << endl;
|
||||
int tmp[3];
|
||||
/* for (int p = 1; p < nprocs; p++) {
|
||||
MPI_Status mpistatus;
|
||||
MPI_Recv(tmp, 3, MPI_INT, p, 1001, MPI_COMM_WORLD, &mpistatus);
|
||||
cout << "Start idx" << p << ": " << tmp[0] << ", " << tmp[1] << ", " << tmp[2] << endl;
|
||||
}*/
|
||||
// } else {
|
||||
// MPI_Send(id, 3, MPI_INT, 0, 1001, MPI_COMM_WORLD);
|
||||
// }
|
||||
if (argv[i] == string("-cpu")) {
|
||||
strcpy(api_name, "OpenCL");
|
||||
strcpy(device_name, "-cpu");
|
||||
}
|
||||
}
|
||||
|
||||
/* dks init and create 2 streams */
|
||||
int dkserr;
|
||||
//int streamGreens, streamFFT;
|
||||
#ifdef DKS_MIC
|
||||
DKSBase base;
|
||||
base.setAPI("OpenMP", 6);
|
||||
base.setDevice("-mic", 4);
|
||||
base.initDevice();
|
||||
#endif
|
||||
cout << "Use api: " << api_name << ", " << device_name << endl;
|
||||
|
||||
#ifdef DKS_CUDA
|
||||
DKSBase base;
|
||||
base.setAPI("Cuda", 4);
|
||||
base.setDevice("-gpu", 4);
|
||||
base.initDevice();
|
||||
#endif
|
||||
/* set domain size */
|
||||
int NG[3] = {64, 64, 32};
|
||||
int NL[3] = {NG[0], NG[1] / 4, NG[2] / 2};
|
||||
int ng[3] = {NG[0]/2 + 1, NG[1]/2 + 1, NG[2]/2 + 1};
|
||||
int sizerho = NG[0] * NG[1] * NG[2];
|
||||
int sizegreen = ng[0] * ng[1] * ng[2];
|
||||
int sizecomp = NG[0] * NG[1] * NG[2] / 2 + 1;
|
||||
|
||||
//base.createStream(streamFFT);
|
||||
//if (rank == 0) {
|
||||
// base.createStream(streamGreens);
|
||||
base.setupFFT(3, NG);
|
||||
//}
|
||||
/* print some messages bout the example in the begginig */
|
||||
cout << "Global domain: " << NG[0] << ", " << NG[1] << ", " << NG[2] << endl;
|
||||
cout << "Greens domain: " << ng[0] << ", " << ng[1] << ", " << ng[2] << endl;
|
||||
|
||||
/* allocate memory and init rho field */
|
||||
double *rho = new double[sizerho];
|
||||
double *rho_out = new double[sizerho];
|
||||
//double *green_out = new double[sizegreen];
|
||||
initMirror(rho, NL[0], NL[1], NL[2]);
|
||||
/* dks init and create 2 streams */
|
||||
int dkserr;
|
||||
DKSBase base;
|
||||
base.setAPI(api_name, strlen(api_name));
|
||||
base.setDevice(device_name, strlen(device_name));
|
||||
base.initDevice();
|
||||
base.setupFFT(3, NG);
|
||||
|
||||
/*
|
||||
allocate memory on device for
|
||||
- rho field
|
||||
- rho FFT
|
||||
- tmpgreen
|
||||
- greens integral
|
||||
- greens integral FFT
|
||||
*/
|
||||
void *tmpgreen_ptr, *rho2_ptr, *grn_ptr, *rho2tr_ptr, *grntr_ptr;
|
||||
// if (rank == 0) {
|
||||
tmpgreen_ptr = base.allocateMemory<double>(sizegreen, dkserr);
|
||||
rho2_ptr = base.allocateMemory<double>(sizerho, dkserr);
|
||||
grn_ptr = base.allocateMemory<double>(sizerho, dkserr);
|
||||
rho2tr_ptr = base.allocateMemory< complex<double> >(sizecomp, dkserr);
|
||||
grntr_ptr = base.allocateMemory< complex<double> >(sizecomp, dkserr);
|
||||
/* } else {
|
||||
grntr_ptr = NULL;
|
||||
rho2_ptr = NULL;
|
||||
grn_ptr = NULL;
|
||||
rho2tr_ptr = NULL;
|
||||
tmpgreen_ptr = NULL;
|
||||
}*/
|
||||
/* allocate memory and init rho field */
|
||||
double *rho = new double[sizerho];
|
||||
double *rho_out = new double[sizerho];
|
||||
//double *green_out = new double[sizegreen];
|
||||
double *mirror_out = new double[sizerho];
|
||||
//initMirror(rho, NL[0], NL[1], NL[2]);
|
||||
initMirror(rho, NG[0], NG[1], NG[2]);
|
||||
|
||||
/*
|
||||
allocate memory on device for
|
||||
- rho field
|
||||
- rho FFT
|
||||
- tmpgreen
|
||||
- greens integral
|
||||
- greens integral FFT
|
||||
*/
|
||||
void *tmpgreen_ptr, *rho2_ptr, *grn_ptr, *rho2tr_ptr, *grntr_ptr;
|
||||
tmpgreen_ptr = base.allocateMemory<double>(sizegreen, dkserr);
|
||||
rho2_ptr = base.allocateMemory<double>(sizerho, dkserr);
|
||||
grn_ptr = base.allocateMemory<double>(sizerho, dkserr);
|
||||
rho2tr_ptr = base.allocateMemory< complex<double> >(sizecomp, dkserr);
|
||||
grntr_ptr = base.allocateMemory< complex<double> >(sizecomp, dkserr);
|
||||
|
||||
/* send and receive pointer to allocated memory on device */
|
||||
/*
|
||||
if (rank == 0) {
|
||||
for (int p = 1; p < nprocs; p++)
|
||||
base.sendPointer( rho2_ptr, p, MPI_COMM_WORLD);
|
||||
} else {
|
||||
rho2_ptr = base.receivePointer(0, MPI_COMM_WORLD, dkserr);
|
||||
}
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
*/
|
||||
/* =================================================*/
|
||||
/* =================================================*/
|
||||
/* =====loop trough fftpoison solver iterations=====*/
|
||||
/* =================================================*/
|
||||
/* =================================================*/
|
||||
|
||||
double old_sum = 0;
|
||||
|
||||
/* =================================================*/
|
||||
/* =================================================*/
|
||||
/* =====loop trough fftpoison solver iterations=====*/
|
||||
/* =================================================*/
|
||||
/* =================================================*/
|
||||
int hr_m[3] = {1, 1, 1};
|
||||
base.callGreensIntegral(tmpgreen_ptr, ng[0], ng[1], ng[2], ng[0], ng[1], hr_m[0], hr_m[1], hr_m[2]);
|
||||
|
||||
double old_sum = 0;
|
||||
double tmp_sum = 0;
|
||||
for (int l = 0; l < 100; l++) {
|
||||
//MPI_Barrier(MPI_COMM_WORLD);
|
||||
/* on node 0, calculate tmpgreen on gpu */
|
||||
int hr_m[3] = {1, 1, 1};
|
||||
//if (rank == 0)
|
||||
base.callGreensIntegral(tmpgreen_ptr, ng[0], ng[1], ng[2], ng[0], ng[1],
|
||||
hr_m[0], hr_m[1], hr_m[2]);
|
||||
/* calculate greens integral on gpu */
|
||||
base.callGreensIntegration(grn_ptr, tmpgreen_ptr, ng[0], ng[1], ng[2]);
|
||||
|
||||
/* calculate greens integral on gpu */
|
||||
//if (rank == 0)
|
||||
base.callGreensIntegration(grn_ptr, tmpgreen_ptr, ng[0], ng[1], ng[2]);
|
||||
/* mirror the field */
|
||||
base.callMirrorRhoField(grn_ptr, ng[0], ng[1], ng[2]);
|
||||
/*
|
||||
base.readData<double>(grn_ptr, mirror_out, sizerho);
|
||||
for (int i = 0; i < sizerho; i++)
|
||||
cout << mirror_out[i] << " ";
|
||||
cout << endl << endl;
|
||||
|
||||
/* mirror the field */
|
||||
//if (rank == 0)
|
||||
base.callMirrorRhoField(grn_ptr, ng[0], ng[1], ng[2]);
|
||||
for (int i = 0; i < sizerho; i++)
|
||||
cout << rho[i] << " ";
|
||||
cout << endl << endl;
|
||||
*/
|
||||
/* transfer rho field to device */
|
||||
base.writeData<double>(rho2_ptr, rho, sizerho);
|
||||
|
||||
/* get FFT of rho field */
|
||||
base.callR2CFFT(rho2_ptr, rho2tr_ptr, 3, NG);
|
||||
|
||||
/* get FFT of mirrored greens integral */
|
||||
//if (rank == 0)
|
||||
base.callR2CFFT(grn_ptr, grntr_ptr, 3, NG);
|
||||
/* get FFT of mirrored greens integral */
|
||||
base.callR2CFFT(grn_ptr, grntr_ptr, 3, NG);
|
||||
|
||||
/* transfer rho field to device */
|
||||
//base.gather3DDataAsync<double> ( rho2_ptr, rho, NG, NL, id, streamFFT);
|
||||
base.writeData<double>(rho2_ptr, rho,NG[0]*NG[1]*NG[2]);
|
||||
//MPI_Barrier(MPI_COMM_WORLD);
|
||||
/* multiply both FFTs */
|
||||
base.callMultiplyComplexFields(rho2tr_ptr, grntr_ptr, sizecomp);
|
||||
|
||||
/* get FFT of rho field */
|
||||
//if (rank == 0) {
|
||||
//base.syncDevice();
|
||||
base.callR2CFFT(rho2_ptr, rho2tr_ptr, 3, NG);
|
||||
//}
|
||||
/*
|
||||
complex<double> *crho = new complex<double>[sizecomp];
|
||||
complex<double> *cgre = new complex<double>[sizecomp];
|
||||
base.readData< complex<double> >(rho2tr_ptr, crho, sizecomp);
|
||||
base.readData< complex<double> >(grntr_ptr, cgre, sizecomp);
|
||||
|
||||
/* multiply both FFTs */
|
||||
//if (rank == 0)
|
||||
base.callMultiplyComplexFields(rho2tr_ptr, grntr_ptr, sizecomp);
|
||||
//MPI_Barrier(MPI_COMM_WORLD);
|
||||
for (int i = 0; i < sizecomp; i++)
|
||||
cout << cgre[i].real() << " ";
|
||||
cout << endl << endl;
|
||||
|
||||
/* inverse fft and transfer data back */
|
||||
/*
|
||||
multiple device syncs and mpi barriers are used to make sure data
|
||||
transfer is started when results are ready and progam moves on
|
||||
only when data transfer is finished
|
||||
*/
|
||||
//if (rank == 0) {
|
||||
base.callC2RFFT(rho2tr_ptr, rho2_ptr, 3, NG);
|
||||
//base.syncDevice();
|
||||
//MPI_Barrier(MPI_COMM_WORLD);
|
||||
//base.scatter3DDataAsync<double> (rho2_ptr, rho_out, NG, NL, id);
|
||||
base.readData<double> (rho2_ptr, rho_out, NG[0]*NG[1]*NG[2]);
|
||||
//MPI_Barrier(MPI_COMM_WORLD);
|
||||
//base.syncDevice();
|
||||
//MPI_Barrier(MPI_COMM_WORLD);
|
||||
//cout << "result: " << sumData(rho_out, sizerho) << endl;
|
||||
if (l == 0) {
|
||||
old_sum = sumData(rho_out, sizerho);
|
||||
} else {
|
||||
tmp_sum = sumData(rho_out, sizerho);
|
||||
if (old_sum != tmp_sum) {
|
||||
cout << "diff in iteration: " << l << endl;
|
||||
}
|
||||
}
|
||||
/*} else {
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
base.scatter3DDataAsync<double> (rho2_ptr, rho_out, NG, NL, id);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
*/
|
||||
for (int i = 0; i < sizecomp; i++)
|
||||
cout << crho[i].real() << " ";
|
||||
cout << endl << endl;
|
||||
|
||||
delete[] crho;
|
||||
delete[] cgre;
|
||||
*/
|
||||
|
||||
/* inverse fft and transfer data back */
|
||||
/*
|
||||
multiple device syncs and mpi barriers are used to make sure data
|
||||
transfer is started when results are ready and progam moves on
|
||||
only when data transfer is finished
|
||||
*/
|
||||
base.callC2RFFT(rho2tr_ptr, rho2_ptr, 3, NG);
|
||||
|
||||
base.readData<double> (rho2_ptr, rho_out, sizerho);
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
cout << rho_out[i] << " ";
|
||||
cout << endl;
|
||||
|
||||
old_sum = sumData(rho_out, sizerho);
|
||||
|
||||
}
|
||||
/* =================================================*/
|
||||
/* =================================================*/
|
||||
/* ==========end fftpoison solver test run==========*/
|
||||
/* =================================================*/
|
||||
/* =================================================*/
|
||||
|
||||
base.freeMemory<double>(tmpgreen_ptr, sizegreen);
|
||||
base.freeMemory<double>(grn_ptr, sizerho);
|
||||
base.freeMemory< complex<double> >(rho2tr_ptr, sizecomp);
|
||||
base.freeMemory< complex<double> >(grntr_ptr, sizecomp);
|
||||
base.freeMemory<double>(rho2_ptr, sizerho);
|
||||
|
||||
|
||||
/* free memory on device */
|
||||
//if (rank == 0) {
|
||||
base.freeMemory<double>(tmpgreen_ptr, sizegreen);
|
||||
base.freeMemory<double>(grn_ptr, sizerho);
|
||||
base.freeMemory< complex<double> >(rho2tr_ptr, sizecomp);
|
||||
base.freeMemory< complex<double> >(grntr_ptr, sizecomp);
|
||||
//MPI_Barrier(MPI_COMM_WORLD);
|
||||
base.freeMemory<double>(rho2_ptr, sizerho);
|
||||
cout << "Final sum: " << old_sum << endl;
|
||||
/*} else {
|
||||
base.closeHandle(rho2_ptr);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}*/
|
||||
|
||||
//MPI_Finalize();
|
||||
|
||||
delete[] rho_out;
|
||||
delete[] rho;
|
||||
delete[] mirror_out;
|
||||
cout << "Final sum: " << old_sum << endl;
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user