snapshot of svn

This commit is contained in:
Uldis Locans
2016-10-10 14:49:32 +02:00
commit 4fa529aaea
122 changed files with 23153 additions and 0 deletions

172
test/testGather.cpp Normal file
View File

@ -0,0 +1,172 @@
#include <iostream>
#include <mpi.h>
#include <string.h>
#include "nvToolsExt.h"
#include "cuda_profiler_api.h"
#include "DKSBase.h"
using namespace std;
void printData3D(int* data, int N, const char *message = "") {
if (strcmp(message, "") != 0)
cout << message;
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
for (int k = 0; k < N; k++) {
cout << data[i*N*N + j*N + k] << "\t";
}
cout << endl;
}
cout << endl;
}
}
void printData3D2(int* data, int nx, int ny, int nz, const char *message = "") {
if (strcmp(message, "") != 0)
cout << message;
for (int i = 0; i < nz; i++) {
for (int j = 0; j < ny; j++) {
for (int k = 0; k < nx; k++) {
cout << data[i*ny*nx + j*nx + k] << "\t";
}
cout << endl;
}
cout << endl;
}
}
void printData(int *data, int N, int nprocs, const char *message = "") {
if (strcmp(message, "") != 0)
cout << message;
for (int i = 0; i < nprocs; i++) {
for (int j = 0; j < N; j++)
cout << data[i*N + j] << "\t";
cout << endl;
}
}
void initData(int *data, int N, int rank) {
for (int i = 0; i < N; i++)
data[i] = (rank+1);
}
int main(int argc, char *argv[]) {
int ierr;
int rank, nprocs;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
cout << "Rank " << (rank+1) << " from " << nprocs << endl;
int N_global[3] = {64, 64, 32};
int N_local[3] = {64, 32, 16};
int n = N_local[0] * N_local[1] * N_local[2];
int idx[4] = {0, 0, 0, 0};
int idy[4] = {0, 32, 0, 32};
int idz[4] = {0, 0, 16, 16};
DKSBase base = DKSBase();
base.setAPI("Cuda", 4);
base.setDevice("-gpu", 4);
base.initDevice();
int *hdata_in;
if (base.allocateHostMemory(hdata_in, n) != DKS_SUCCESS) {
hdata_in = new int[n];
cout << "pinned allocation failed!" << endl;
}
initData(hdata_in, n, rank);
for (int i = 0; i < 2; i++) {
MPI_Barrier(MPI_COMM_WORLD);
if (i == 1)
nvtxMarkA("start gather");
if (rank == 0) {
void *mem_ptr, *tmpgreen_ptr;
mem_ptr = base.allocateMemory<int>(nprocs*n, ierr);
//call another kernel
int sizegreen = 33 * 33 * 17;
tmpgreen_ptr = base.allocateMemory<double>(sizegreen, ierr);
nvtxMarkA("call green");
base.callGreensIntegral(tmpgreen_ptr, 33, 33, 17, 33, 33, 0.001, 0.001, 0.00007);
nvtxMarkA("call gather");
base.gather3DData(mem_ptr, hdata_in, n, MPI_INT, N_global, N_local,
idx, idy, idz, nprocs, rank, 0, MPI_COMM_WORLD);
//read and print data once for debug only
/*
if (i == 0 && nprocs*n < 257) {
int *hdata_out_all = new int[nprocs*n];
base.readData<int>(mem_ptr, hdata_out_all, n*nprocs);
printData3D2(hdata_out_all, N_global[0], N_global[1], N_global[2]);
}
else {
int *hout_data = new int[nprocs*n];
base.readData<int>(mem_ptr, hout_data, nprocs*n);
int sum = 0;
for (int s = 0; s < nprocs*n; s++)
sum += hout_data[s];
cout << "Sum: " << sum << endl;
}
*/
MPI_Barrier(MPI_COMM_WORLD);
nvtxMarkA("call scatter");
base.scatter3DData(mem_ptr, hdata_in, n, MPI_INT, N_global, N_local,
idx, idy, idz, nprocs, rank, 0, MPI_COMM_WORLD);
base.freeMemory<int>(mem_ptr, n*nprocs);
base.freeMemory<double>(tmpgreen_ptr, sizegreen);
} else {
nvtxMarkA("call gather");
base.gather3DData(NULL, hdata_in, n, MPI_INT, N_global, N_local,
idx, idy, idz, nprocs, rank, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
nvtxMarkA("call scatter");
base.scatter3DData(NULL, hdata_in, n, MPI_INT, N_global, N_local,
idx, idy, idz, nprocs, rank, 0, MPI_COMM_WORLD);
}
if (i == 1)
nvtxMarkA("end gather");
}
MPI_Barrier(MPI_COMM_WORLD);
base.freeHostMemory(hdata_in, n);
MPI_Finalize();
return 0;
}