DKS/test/testOffsetMPI.cpp

#include <mpi.h>
#include <iostream>
#include <cstdlib>


#include "DKSBase.h"

using namespace std;

int main(int argc, char *argv[]) {

	int rank, size;

	MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    cout << "Rank " << rank << " from " << size << endl;


	int ierr, N, n;

	N = 8;
	n = N / 2;

	double *data_in = new double[n];

	for (int i = 0; i < n; i++)
		data_in[i] = (double)rank + 1.0 + (double)i / n;

	DKSBase base = DKSBase();
	base.setAPI("Cuda", 4);
	base.setDevice("-gpu", 4);
	base.initDevice();

	if (rank == 0) {
		//alocate memory of size N
		void *ptr1;
		ptr1 = base.allocateMemory<double>(size*N, ierr);
		cout << "Sent pointer: " << ptr1 << endl;

		//send ptr to other processes
		MPI_Send(&ptr1, sizeof(void*), MPI_BYTE, 1, 123, MPI_COMM_WORLD);

		//wrtie n data with no offset to device and wait for other processes
		ierr = base.writeData<double>(ptr1, data_in, n, rank*n);
		MPI_Barrier(MPI_COMM_WORLD);

		//read memory of size N from device
		double *data_out = new double[N];
		ierr = base.readData<double>(ptr1, data_out, N);

		//free device memory
		base.freeMemory<double>(ptr1, size*N);

		//print results
		for (int i = 0; i < n; i++)
			cout << data_in[i] << "\t";
		cout << endl;

		for (int i = 0; i < N; i++)
			cout << data_out[i] << "\t";
		cout << endl;

    } else {
    	//receive device memory pointer
    	void *ptr2;
    	MPI_Recv(&ptr2, sizeof(void*), MPI_BYTE, 0, 123, MPI_COMM_WORLD, NULL);
    	cout << "Received pointer: " << ptr2 << endl;
    	//write data with an offset
    	base.writeData<double>(ptr2, data_in, n, rank*n);

    	MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Finalize();


	return 0;
}