Files
DKS/test/testTimeIntegration.cpp
2016-10-10 14:49:32 +02:00

228 lines
5.7 KiB
C++

#include <iostream>
#include <vector>
#include <time.h>
#include <sys/time.h>
#include "DKSBase.h"
#include <vector_types.h>
#include "cuda_runtime.h"
using namespace std;
typedef struct {
double x;
double y;
double z;
} Vector;
Vector initVector() {
Vector tmp;
tmp.x = 0.5;
tmp.y = 0.5;
tmp.z = 0.5;
return tmp;
}
void initVectors(Vector *v, int N) {
for (int i = 0; i < N; i++)
v[i] = initVector();
}
void initDouble(double *data, int N) {
for (int i = 0; i < N; i++)
data[i] = 0.005;
}
void initLastSect(long *data, int N) {
for (int i = 0; i < N; i++)
data[i] = -1;
}
void checkSum(Vector *v, int N) {
double sum = 0;
for (int i = 0; i < N; i++)
sum += v[i].x + v[i].y + v[i].z;
std::cout << "checksum: " << sum << std::endl;
}
int main(int argc, char *argv[]) {
int loop = 10;
int numpart = 10;
char *api_name = new char[10];
char *device_name = new char[10];
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
for (int i = 1; i < argc; i++) {
if (argv[i] == string("-mic")) {
strcpy(api_name, "OpenMP");
strcpy(device_name, "-mic");
}
if (argv[i] == string("-npart")) {
numpart = atoi(argv[i+1]);
i++;
}
if (argv[i] == string("-loop")) {
loop = atoi(argv[i+1]);
i++;
}
}
cout << "=========================BEGIN TEST=========================" << endl;
cout << "Use api: " << api_name << "\t" << device_name << endl;
cout << "Number of particles: " << numpart << endl;
cout << "------------------------------------------------------------" << endl;
//init p,r and dt arrays to test time integration
Vector *r = new Vector[numpart];
Vector *p = new Vector[numpart];
Vector *x = new Vector[numpart];
Vector *ori = new Vector[5];
initVectors(r, numpart);
initVectors(p, numpart);
initVectors(x, numpart);
initVectors(ori, 5);
double *dt = new double[numpart];
initDouble(dt, numpart);
long *ls = new long[numpart];
initLastSect(ls, numpart);
//init dks
int ierr;
DKSBase base;
base.setAPI(api_name, strlen(api_name));
base.setDevice(device_name, strlen(api_name));
base.initDevice();
int stream1, stream2;
base.createStream(stream1);
base.createStream(stream2);
base.registerHostMemory(r, numpart);
base.registerHostMemory(p, numpart);
base.registerHostMemory(x, numpart);
base.registerHostMemory(dt, numpart);
base.registerHostMemory(ls, numpart);
//***test parallelttrackerpush***//
void *r_ptr, *p_ptr, *x_ptr, *dt_ptr, *ls_ptr, *ori_ptr;
//allocate memory on the device
r_ptr = base.allocateMemory<Vector>(numpart, ierr);
p_ptr = base.allocateMemory<Vector>(numpart, ierr);
x_ptr = base.allocateMemory<Vector>(numpart, ierr);
dt_ptr = base.allocateMemory<double>(numpart, ierr);
ls_ptr = base.allocateMemory<long>(numpart, ierr);
ori_ptr = base.allocateMemory<Vector>(5, ierr);
//transfer data to device
base.writeData<Vector>(r_ptr, r, numpart);
base.writeData<Vector>(p_ptr, p, numpart);
base.writeData<Vector>(x_ptr, x, numpart);
base.writeData<Vector>(ori_ptr, ori, 5);
//do some couple of integration loops before the timer is started
for (int i = 0; i < 5; i++) {
//calc push
base.callParallelTTrackerPush (r_ptr, p_ptr, numpart, dt_ptr,
0.05, 1, false, stream1);
//read R from device
base.readDataAsync<Vector> (r_ptr, r, numpart, stream1);
//write LastSection to device
base.writeDataAsync<long> (ls_ptr, ls, numpart, stream2);
//calc push
base.callParallelTTrackerPushTransform(x_ptr, p_ptr, ls_ptr, ori_ptr, numpart, 5,
dt_ptr, 0.05, 1, false, stream2);
//read x from device
base.readDataAsync<Vector>(x_ptr, x, numpart, stream2);
//sync and wait till all tasks and reads are complete
base.syncDevice();
}
checkSum(r, numpart);
checkSum(x, numpart);
//start the timing of integration
struct timeval timeStart, timeEnd;
std::cout << "start integration" << std::endl;
gettimeofday(&timeStart, NULL);
for (int i = 0; i < loop; i++) {
//calc push
base.callParallelTTrackerPush(r_ptr, p_ptr, numpart, dt_ptr, 0.05, 1, false, stream1);
//read R from device
base.readDataAsync<Vector> (r_ptr, r, numpart, stream1);
//write LastSection to device
base.writeDataAsync<long> (ls_ptr, ls, numpart, stream2);
//calc push transform
base.callParallelTTrackerPushTransform(x_ptr, p_ptr, ls_ptr, ori_ptr, numpart, 5,
dt_ptr, 0.05, 1, false, stream2);
//read R from device
base.readDataAsync<Vector>(x_ptr, x, numpart, stream2);
//sync and wait till all tasks and reads are complete
base.syncDevice();
}
gettimeofday(&timeEnd, NULL);
std::cout << "end integration" << std::endl;
double t = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1000000 +
(timeEnd.tv_usec - timeStart.tv_usec));
std::cout << "Time for " << numpart << " integrations: " << t * 1e-6 << "s" << std::endl;
std::cout << "Average time for integration: " << t * 1e-6 / loop << std::endl;
checkSum(r, numpart);
checkSum(x, numpart);
//free memory
base.freeMemory<Vector>(r_ptr, numpart);
base.freeMemory<Vector>(p_ptr, numpart);
base.freeMemory<Vector>(x_ptr, numpart);
base.freeMemory<Vector>(ori_ptr, 5);
base.freeMemory<double>(dt_ptr, numpart);
base.freeMemory<long>(ls_ptr, numpart);
//unregister host memory
base.unregisterHostMemory(r);
base.unregisterHostMemory(p);
base.unregisterHostMemory(x);
base.unregisterHostMemory(dt);
base.unregisterHostMemory(ls);
//free host memory
delete[] r;
delete[] x;
delete[] p;
delete[] dt;
delete[] ls;
delete[] ori;
cout << "==========================END TEST==========================" << endl;
return 0;
}