275 lines
6.9 KiB
C++
275 lines
6.9 KiB
C++
#include <iostream>
|
|
#include <string.h>
|
|
|
|
#include "DKSBase.h"
|
|
#include "nvToolsExt.h"
|
|
#include "cuda_profiler_api.h"
|
|
#include "cuda_runtime.h"
|
|
|
|
using namespace std;
|
|
|
|
|
|
void printData3D(double* data, int N, int NI, const char *message = "") {
|
|
if (strcmp(message, "") != 0)
|
|
cout << message;
|
|
|
|
for (int i = 0; i < NI; i++) {
|
|
for (int j = 0; j < N; j++) {
|
|
for (int k = 0; k < N; k++) {
|
|
cout << data[i*N*N + j*N + k] << "\t";
|
|
}
|
|
cout << endl;
|
|
}
|
|
cout << endl;
|
|
}
|
|
|
|
}
|
|
|
|
void initData(double *data, int N) {
|
|
|
|
for (int i = 0; i < N/4 + 1; i++) {
|
|
for (int j = 0; j < N/2 + 1; j++) {
|
|
for (int k = 0; k < N/2 + 1; k++) {
|
|
data[i*N*N + j*N + k] = k+1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void initData2(double *data, int N) {
|
|
for (int i = 0; i < N; i++)
|
|
data[i] = i;
|
|
}
|
|
|
|
void initComplex( complex<double> *d, int N) {
|
|
|
|
for (int i = 0; i < N; i++) {
|
|
d[i] = complex<double>(2, 0);
|
|
}
|
|
|
|
}
|
|
|
|
void printComplex(complex<double> *d, int N) {
|
|
|
|
for (int i = 0; i < N; i++)
|
|
cout << d[i] << "\t";
|
|
cout << endl;
|
|
|
|
}
|
|
|
|
void printDouble(double *d, int N) {
|
|
|
|
for (int i = 0; i < N; i++)
|
|
cout << d[i] << ", ";
|
|
cout << endl;
|
|
|
|
}
|
|
|
|
void initMirror(double *data, int n1, int n2, int n3) {
|
|
int d = 1;
|
|
for (int i = 0; i < n3; i++) {
|
|
for (int j = 0; j < n2; j++) {
|
|
for (int k = 0; k < n1; k++) {
|
|
if (i < n3/2 + 1 && j < n2/2 + 1 && k < n1/2 + 1)
|
|
data[i * n2 * n1 + j * n1 + k] = d++;
|
|
else
|
|
data[i * n2 * n1 + j * n1 + k] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void printDiv(int c) {
|
|
for (int i = 0; i < c; i++)
|
|
cout << "-";
|
|
cout << endl;
|
|
|
|
}
|
|
|
|
void printMirror(double *data, int n1, int n2, int n3) {
|
|
|
|
printDiv(75);
|
|
for (int i = 0; i < n3; i++) {
|
|
for (int j = 0; j < n2; j++) {
|
|
for (int k = 0; k < n1; k++) {
|
|
cout << data[i * n2 * n1 + j * n1 + k] << "\t";
|
|
}
|
|
cout << endl;
|
|
}
|
|
cout << endl;
|
|
}
|
|
cout << endl;
|
|
}
|
|
|
|
double sumData(double *data, int datasize) {
|
|
|
|
double sum = 0;
|
|
for (int i = 0; i < datasize; i++)
|
|
sum += data[i];
|
|
|
|
return sum;
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
char *api_name = new char[10];
|
|
char *device_name = new char[10];
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
if (argv[i] == string("-cuda")) {
|
|
strcpy(api_name, "Cuda");
|
|
strcpy(device_name, "-gpu");
|
|
}
|
|
|
|
if (argv[i] == string("-opencl")) {
|
|
strcpy(api_name, "OpenCL");
|
|
strcpy(device_name, "-gpu");
|
|
}
|
|
|
|
if (argv[i] == string("-mic")) {
|
|
strcpy(api_name, "OpenMP");
|
|
strcpy(device_name, "-mic");
|
|
}
|
|
|
|
if (argv[i] == string("-cpu")) {
|
|
strcpy(api_name, "OpenCL");
|
|
strcpy(device_name, "-cpu");
|
|
}
|
|
}
|
|
|
|
cout << "Use api: " << api_name << ", " << device_name << endl;
|
|
|
|
/* set domain size */
|
|
int NG[3] = {64, 64, 32};
|
|
int NL[3] = {NG[0], NG[1] / 4, NG[2] / 2};
|
|
int ng[3] = {NG[0]/2 + 1, NG[1]/2 + 1, NG[2]/2 + 1};
|
|
int sizerho = NG[0] * NG[1] * NG[2];
|
|
int sizegreen = ng[0] * ng[1] * ng[2];
|
|
int sizecomp = NG[0] * NG[1] * NG[2] / 2 + 1;
|
|
|
|
/* print some messages bout the example in the begginig */
|
|
cout << "Global domain: " << NG[0] << ", " << NG[1] << ", " << NG[2] << endl;
|
|
cout << "Greens domain: " << ng[0] << ", " << ng[1] << ", " << ng[2] << endl;
|
|
|
|
/* dks init and create 2 streams */
|
|
int dkserr;
|
|
DKSBase base;
|
|
base.setAPI(api_name, strlen(api_name));
|
|
base.setDevice(device_name, strlen(device_name));
|
|
base.initDevice();
|
|
base.setupFFT(3, NG);
|
|
|
|
/* allocate memory and init rho field */
|
|
double *rho = new double[sizerho];
|
|
double *rho_out = new double[sizerho];
|
|
//double *green_out = new double[sizegreen];
|
|
double *mirror_out = new double[sizerho];
|
|
//initMirror(rho, NL[0], NL[1], NL[2]);
|
|
initMirror(rho, NG[0], NG[1], NG[2]);
|
|
|
|
/*
|
|
allocate memory on device for
|
|
- rho field
|
|
- rho FFT
|
|
- tmpgreen
|
|
- greens integral
|
|
- greens integral FFT
|
|
*/
|
|
void *tmpgreen_ptr, *rho2_ptr, *grn_ptr, *rho2tr_ptr, *grntr_ptr;
|
|
tmpgreen_ptr = base.allocateMemory<double>(sizegreen, dkserr);
|
|
rho2_ptr = base.allocateMemory<double>(sizerho, dkserr);
|
|
grn_ptr = base.allocateMemory<double>(sizerho, dkserr);
|
|
rho2tr_ptr = base.allocateMemory< complex<double> >(sizecomp, dkserr);
|
|
grntr_ptr = base.allocateMemory< complex<double> >(sizecomp, dkserr);
|
|
|
|
/* =================================================*/
|
|
/* =================================================*/
|
|
/* =====loop trough fftpoison solver iterations=====*/
|
|
/* =================================================*/
|
|
/* =================================================*/
|
|
|
|
double old_sum = 0;
|
|
|
|
int hr_m[3] = {1, 1, 1};
|
|
base.callGreensIntegral(tmpgreen_ptr, ng[0], ng[1], ng[2], ng[0], ng[1], hr_m[0], hr_m[1], hr_m[2]);
|
|
|
|
/* calculate greens integral on gpu */
|
|
base.callGreensIntegration(grn_ptr, tmpgreen_ptr, ng[0], ng[1], ng[2]);
|
|
|
|
/* mirror the field */
|
|
base.callMirrorRhoField(grn_ptr, ng[0], ng[1], ng[2]);
|
|
/*
|
|
base.readData<double>(grn_ptr, mirror_out, sizerho);
|
|
for (int i = 0; i < sizerho; i++)
|
|
cout << mirror_out[i] << " ";
|
|
cout << endl << endl;
|
|
|
|
for (int i = 0; i < sizerho; i++)
|
|
cout << rho[i] << " ";
|
|
cout << endl << endl;
|
|
*/
|
|
/* transfer rho field to device */
|
|
base.writeData<double>(rho2_ptr, rho, sizerho);
|
|
|
|
/* get FFT of rho field */
|
|
base.callR2CFFT(rho2_ptr, rho2tr_ptr, 3, NG);
|
|
|
|
/* get FFT of mirrored greens integral */
|
|
base.callR2CFFT(grn_ptr, grntr_ptr, 3, NG);
|
|
|
|
/* multiply both FFTs */
|
|
base.callMultiplyComplexFields(rho2tr_ptr, grntr_ptr, sizecomp);
|
|
|
|
/*
|
|
complex<double> *crho = new complex<double>[sizecomp];
|
|
complex<double> *cgre = new complex<double>[sizecomp];
|
|
base.readData< complex<double> >(rho2tr_ptr, crho, sizecomp);
|
|
base.readData< complex<double> >(grntr_ptr, cgre, sizecomp);
|
|
|
|
for (int i = 0; i < sizecomp; i++)
|
|
cout << cgre[i].real() << " ";
|
|
cout << endl << endl;
|
|
|
|
for (int i = 0; i < sizecomp; i++)
|
|
cout << crho[i].real() << " ";
|
|
cout << endl << endl;
|
|
|
|
delete[] crho;
|
|
delete[] cgre;
|
|
*/
|
|
|
|
/* inverse fft and transfer data back */
|
|
/*
|
|
multiple device syncs and mpi barriers are used to make sure data
|
|
transfer is started when results are ready and progam moves on
|
|
only when data transfer is finished
|
|
*/
|
|
base.callC2RFFT(rho2tr_ptr, rho2_ptr, 3, NG);
|
|
|
|
base.readData<double> (rho2_ptr, rho_out, sizerho);
|
|
|
|
for (int i = 0; i < 10; i++)
|
|
cout << rho_out[i] << " ";
|
|
cout << endl;
|
|
|
|
old_sum = sumData(rho_out, sizerho);
|
|
|
|
/* =================================================*/
|
|
/* =================================================*/
|
|
/* ==========end fftpoison solver test run==========*/
|
|
/* =================================================*/
|
|
/* =================================================*/
|
|
|
|
base.freeMemory<double>(tmpgreen_ptr, sizegreen);
|
|
base.freeMemory<double>(grn_ptr, sizerho);
|
|
base.freeMemory< complex<double> >(rho2tr_ptr, sizecomp);
|
|
base.freeMemory< complex<double> >(grntr_ptr, sizecomp);
|
|
base.freeMemory<double>(rho2_ptr, sizerho);
|
|
|
|
delete[] rho_out;
|
|
delete[] rho;
|
|
delete[] mirror_out;
|
|
cout << "Final sum: " << old_sum << endl;
|
|
|
|
}
|