snapshot of svn

2016-10-10 14:49:32 +02:00
commit 4fa529aaea
122 changed files with 23153 additions and 0 deletions
--- a/test/testFFTSolver_MIC.cpp
+++ b/test/testFFTSolver_MIC.cpp
@@ -0,0 +1,319 @@
+#include <iostream>
+//#include <mpi.h>
+#include <string.h>
+
+#include "DKSBase.h"
+#include "nvToolsExt.h"
+#include "cuda_profiler_api.h"
+#include "cuda_runtime.h"
+
+using namespace std;
+
+
+void printData3D(double* data, int N, int NI, const char *message = "") {
+	if (strcmp(message, "") != 0)
+		cout << message;
+
+	for (int i = 0; i < NI; i++) {
+		for (int j = 0; j < N; j++) {
+			for (int k = 0; k < N; k++) {
+				cout << data[i*N*N + j*N + k] << "\t";
+			}
+			cout << endl;
+		}
+		cout << endl;
+	}
+
+}
+
+void initData(double *data, int N) {
+
+	for (int i = 0; i < N/4 + 1; i++) {
+		for (int j = 0; j < N/2 + 1; j++) {
+			for (int k = 0; k < N/2 + 1; k++) {
+				data[i*N*N + j*N + k] = k+1;
+			}
+		}
+	}
+}
+
+void initData2(double *data, int N) {
+	for (int i = 0; i < N; i++)
+		data[i] = i;
+}
+
+void initComplex( complex<double> *d, int N) {
+
+	for (int i = 0; i < N; i++) {
+		d[i] = complex<double>(2, 0);
+	}
+
+}
+
+void printComplex(complex<double> *d, int N) {
+
+	for (int i = 0; i < N; i++)
+		cout << d[i] << "\t";
+	cout << endl;
+
+}
+
+void initMirror(double *data, int n1, int n2, int n3) {
+	int d = 1;
+	for (int i = 0; i < n3; i++) {
+		for (int j = 0; j < n2; j++) {
+			for (int k = 0; k < n1; k++) {
+				if (i < n3/2 + 1 && j < n2/2 + 1 && k < n1/2 + 1)
+					data[i * n2 * n1 + j * n1 + k] = d++;
+				else
+					data[i * n2 * n1 + j * n1 + k] = 0;
+			}
+		}
+	}
+}
+
+void printDiv(int c) {
+	for (int i = 0; i < c; i++)
+		cout << "-";
+	cout << endl;
+
+}
+
+void printMirror(double *data, int n1, int n2, int n3) {
+
+	printDiv(75);
+	for (int i = 0; i < n3; i++) {
+		for (int j = 0; j < n2; j++) {
+			for (int k = 0; k < n1; k++) {
+				cout << data[i * n2 * n1 + j * n1 + k] << "\t";
+			}
+			cout << endl;
+		}
+		cout << endl;
+	}
+	cout << endl;
+}
+
+double sumData(double *data, int datasize) {
+
+	double sum = 0;
+	for (int i = 0; i < datasize; i++)
+		sum += data[i];
+
+	return sum;
+}
+
+int main(int argc, char *argv[]) {
+
+	/* mpi init */
+	//int rank, nprocs;
+	//MPI_Init(&argc, &argv);
+	//MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	//MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+
+	/*
+	   if (nprocs != 8) {
+	   cout << "example was set to run with 8 processes" << endl;
+	   cout << "exit..." << endl;
+	   return 0;
+	   }
+	   */
+
+	/* set domain size */
+	int NG[3] = {64, 64, 32};
+	int NL[3] = {NG[0], NG[1] / 4, NG[2] / 2};
+	int ng[3] = {NG[0]/2 + 1, NG[1]/2 + 1, NG[2]/2 + 1};
+	int sizerho = NG[0] * NG[1] * NG[2];
+	int sizegreen = ng[0] * ng[1] * ng[2];
+	int sizecomp = NG[0] * NG[1] * NG[2] / 2 + 1;
+	int id[3];
+
+	//id[0] = 0;
+	//id[1] = NL[1] * (rank % 4);
+	//id[2] = NL[2] * (rank / 4);
+
+	/* print some messages bout the example in the begginig */
+	cout << "Global domain: " << NG[0] << ", " << NG[1] << ", " << NG[2] << endl;
+	//cout << "Local domain: " << NL[0] << ", " << NL[1] << ", " << NL[2] << endl;
+	cout << "Greens domain: " << ng[0] << ", " << ng[1] << ", " << ng[2] << endl;
+	//cout << "Start idx0: " << id[0] << ", " << id[1] << ", " << id[2] << endl;
+	int tmp[3];
+	/*  for (int p = 1; p < nprocs; p++) {
+		MPI_Status mpistatus;
+		MPI_Recv(tmp, 3, MPI_INT, p, 1001, MPI_COMM_WORLD, &mpistatus);
+		cout << "Start idx" << p << ": " << tmp[0] << ", " << tmp[1] << ", " << tmp[2] << endl;
+		}*/
+	// } else {
+	//   MPI_Send(id, 3, MPI_INT, 0, 1001, MPI_COMM_WORLD);
+	// }
+
+	/* dks init and create 2 streams */
+	int dkserr;
+	//int streamGreens, streamFFT;
+#ifdef DKS_MIC
+	DKSBase base;
+	base.setAPI("OpenMP", 6);
+	base.setDevice("-mic", 4);
+	base.initDevice();
+#endif
+
+#ifdef DKS_CUDA
+	DKSBase base;
+	base.setAPI("Cuda", 4);
+	base.setDevice("-gpu", 4);
+	base.initDevice();
+#endif
+
+	//base.createStream(streamFFT);
+	//if (rank == 0) {
+	//  base.createStream(streamGreens);
+	base.setupFFT(3, NG);
+	//}
+
+	/* allocate memory and init rho field */
+	double *rho = new double[sizerho];
+	double *rho_out = new double[sizerho];
+	//double *green_out = new double[sizegreen];
+	initMirror(rho, NL[0], NL[1], NL[2]);
+
+	/*
+	   allocate memory on device for 
+	   - rho field
+	   - rho FFT
+	   - tmpgreen
+	   - greens integral
+	   - greens integral FFT
+	   */
+	void *tmpgreen_ptr, *rho2_ptr, *grn_ptr, *rho2tr_ptr, *grntr_ptr;
+	// if (rank == 0) {
+	tmpgreen_ptr = base.allocateMemory<double>(sizegreen, dkserr);
+	rho2_ptr = base.allocateMemory<double>(sizerho, dkserr);
+	grn_ptr = base.allocateMemory<double>(sizerho, dkserr);
+	rho2tr_ptr = base.allocateMemory< complex<double> >(sizecomp, dkserr);
+	grntr_ptr = base.allocateMemory< complex<double> >(sizecomp, dkserr);
+	/* } else {
+	   grntr_ptr = NULL;
+	   rho2_ptr = NULL;
+	   grn_ptr = NULL;
+	   rho2tr_ptr = NULL;
+	   tmpgreen_ptr = NULL;
+	   }*/
+
+
+	/* send and receive pointer to allocated memory on device */
+	/*
+	   if (rank == 0) {
+	   for (int p = 1; p < nprocs; p++)
+	   base.sendPointer( rho2_ptr, p, MPI_COMM_WORLD);
+	   } else {
+	   rho2_ptr = base.receivePointer(0, MPI_COMM_WORLD, dkserr);
+	   }
+	   MPI_Barrier(MPI_COMM_WORLD);
+	   */
+
+
+	/* =================================================*/
+	/* =================================================*/
+	/* =====loop trough fftpoison solver iterations=====*/
+	/* =================================================*/
+	/* =================================================*/
+
+	double old_sum = 0;
+	double tmp_sum = 0;
+	for (int l = 0; l < 100; l++) {
+		//MPI_Barrier(MPI_COMM_WORLD);
+		/* on node 0, calculate tmpgreen on gpu */
+		int hr_m[3] = {1, 1, 1};
+		//if (rank == 0)
+		base.callGreensIntegral(tmpgreen_ptr, ng[0], ng[1], ng[2], ng[0], ng[1], 
+				hr_m[0], hr_m[1], hr_m[2]);
+
+		/* calculate greens integral on gpu */
+		//if (rank == 0)
+		base.callGreensIntegration(grn_ptr, tmpgreen_ptr, ng[0], ng[1], ng[2]);
+
+		/* mirror the field */
+		//if (rank == 0)
+		base.callMirrorRhoField(grn_ptr, ng[0], ng[1], ng[2]);
+
+
+		/* get FFT of mirrored greens integral */
+		//if (rank == 0) 
+		base.callR2CFFT(grn_ptr, grntr_ptr, 3, NG);
+
+		/* transfer rho field to device */
+		//base.gather3DDataAsync<double> ( rho2_ptr, rho, NG, NL, id, streamFFT);
+		base.writeData<double>(rho2_ptr, rho,NG[0]*NG[1]*NG[2]);
+		//MPI_Barrier(MPI_COMM_WORLD);
+
+		/* get FFT of rho field */
+		//if (rank == 0) {
+		//base.syncDevice();
+		base.callR2CFFT(rho2_ptr, rho2tr_ptr, 3, NG);
+		//}
+
+		/* multiply both FFTs */
+		//if (rank == 0)
+		base.callMultiplyComplexFields(rho2tr_ptr, grntr_ptr, sizecomp);
+		//MPI_Barrier(MPI_COMM_WORLD);
+
+		/* inverse fft and transfer data back */
+		/* 
+		   multiple device syncs and mpi barriers are used to make sure data 
+		   transfer is started when results are ready and progam moves on 
+		   only when data transfer is finished
+		   */
+		//if (rank == 0) {
+		base.callC2RFFT(rho2tr_ptr, rho2_ptr, 3, NG);
+		//base.syncDevice();
+		//MPI_Barrier(MPI_COMM_WORLD);
+		//base.scatter3DDataAsync<double> (rho2_ptr, rho_out, NG, NL, id);
+		base.readData<double> (rho2_ptr, rho_out, NG[0]*NG[1]*NG[2]);
+		//MPI_Barrier(MPI_COMM_WORLD);
+		//base.syncDevice();
+		//MPI_Barrier(MPI_COMM_WORLD);
+		//cout << "result: " << sumData(rho_out, sizerho) << endl;
+		if (l == 0) { 
+			old_sum = sumData(rho_out, sizerho);
+		} else {
+			tmp_sum = sumData(rho_out, sizerho);
+			if (old_sum != tmp_sum) {
+				cout << "diff in iteration: " << l << endl;
+			}
+		}
+		/*} else {
+		  MPI_Barrier(MPI_COMM_WORLD);
+		  base.scatter3DDataAsync<double> (rho2_ptr, rho_out, NG, NL, id);
+		  MPI_Barrier(MPI_COMM_WORLD);
+		  MPI_Barrier(MPI_COMM_WORLD);
+		  }
+		  */
+
+
+	}
+/* =================================================*/  
+/* =================================================*/
+/* ==========end fftpoison solver test run==========*/
+/* =================================================*/
+/* =================================================*/
+
+
+
+/* free memory on device */
+//if (rank == 0) {
+base.freeMemory<double>(tmpgreen_ptr, sizegreen);
+base.freeMemory<double>(grn_ptr, sizerho);
+base.freeMemory< complex<double> >(rho2tr_ptr, sizecomp);
+base.freeMemory< complex<double> >(grntr_ptr, sizecomp);
+//MPI_Barrier(MPI_COMM_WORLD);
+base.freeMemory<double>(rho2_ptr, sizerho);
+cout << "Final sum: " << old_sum << endl;
+/*} else {
+  base.closeHandle(rho2_ptr);
+  MPI_Barrier(MPI_COMM_WORLD);
+  }*/
+
+//MPI_Finalize();
+
+
+}