228 lines
5.7 KiB
C++
228 lines
5.7 KiB
C++
#include <iostream>
|
|
#include <vector>
|
|
#include <time.h>
|
|
#include <sys/time.h>
|
|
#include "DKSBase.h"
|
|
|
|
#include <vector_types.h>
|
|
#include "cuda_runtime.h"
|
|
|
|
using namespace std;
|
|
|
|
typedef struct {
|
|
double x;
|
|
double y;
|
|
double z;
|
|
} Vector;
|
|
|
|
Vector initVector() {
|
|
Vector tmp;
|
|
tmp.x = 0.5;
|
|
tmp.y = 0.5;
|
|
tmp.z = 0.5;
|
|
|
|
return tmp;
|
|
}
|
|
|
|
void initVectors(Vector *v, int N) {
|
|
for (int i = 0; i < N; i++)
|
|
v[i] = initVector();
|
|
}
|
|
|
|
void initDouble(double *data, int N) {
|
|
for (int i = 0; i < N; i++)
|
|
data[i] = 0.005;
|
|
}
|
|
|
|
void initLastSect(long *data, int N) {
|
|
for (int i = 0; i < N; i++)
|
|
data[i] = -1;
|
|
}
|
|
|
|
void checkSum(Vector *v, int N) {
|
|
double sum = 0;
|
|
for (int i = 0; i < N; i++)
|
|
sum += v[i].x + v[i].y + v[i].z;
|
|
|
|
std::cout << "checksum: " << sum << std::endl;
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
int loop = 10;
|
|
int numpart = 10;
|
|
char *api_name = new char[10];
|
|
char *device_name = new char[10];
|
|
strcpy(api_name, "Cuda");
|
|
strcpy(device_name, "-gpu");
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
|
|
if (argv[i] == string("-mic")) {
|
|
strcpy(api_name, "OpenMP");
|
|
strcpy(device_name, "-mic");
|
|
}
|
|
|
|
if (argv[i] == string("-npart")) {
|
|
numpart = atoi(argv[i+1]);
|
|
i++;
|
|
}
|
|
|
|
if (argv[i] == string("-loop")) {
|
|
loop = atoi(argv[i+1]);
|
|
i++;
|
|
}
|
|
|
|
}
|
|
|
|
cout << "=========================BEGIN TEST=========================" << endl;
|
|
cout << "Use api: " << api_name << "\t" << device_name << endl;
|
|
cout << "Number of particles: " << numpart << endl;
|
|
cout << "------------------------------------------------------------" << endl;
|
|
|
|
//init p,r and dt arrays to test time integration
|
|
Vector *r = new Vector[numpart];
|
|
Vector *p = new Vector[numpart];
|
|
Vector *x = new Vector[numpart];
|
|
Vector *ori = new Vector[5];
|
|
initVectors(r, numpart);
|
|
initVectors(p, numpart);
|
|
initVectors(x, numpart);
|
|
initVectors(ori, 5);
|
|
|
|
double *dt = new double[numpart];
|
|
initDouble(dt, numpart);
|
|
|
|
long *ls = new long[numpart];
|
|
initLastSect(ls, numpart);
|
|
|
|
//init dks
|
|
int ierr;
|
|
DKSBase base;
|
|
base.setAPI(api_name, strlen(api_name));
|
|
base.setDevice(device_name, strlen(api_name));
|
|
base.initDevice();
|
|
|
|
int stream1, stream2;
|
|
base.createStream(stream1);
|
|
base.createStream(stream2);
|
|
|
|
base.registerHostMemory(r, numpart);
|
|
base.registerHostMemory(p, numpart);
|
|
base.registerHostMemory(x, numpart);
|
|
base.registerHostMemory(dt, numpart);
|
|
base.registerHostMemory(ls, numpart);
|
|
|
|
//***test parallelttrackerpush***//
|
|
void *r_ptr, *p_ptr, *x_ptr, *dt_ptr, *ls_ptr, *ori_ptr;
|
|
|
|
//allocate memory on the device
|
|
r_ptr = base.allocateMemory<Vector>(numpart, ierr);
|
|
p_ptr = base.allocateMemory<Vector>(numpart, ierr);
|
|
x_ptr = base.allocateMemory<Vector>(numpart, ierr);
|
|
dt_ptr = base.allocateMemory<double>(numpart, ierr);
|
|
ls_ptr = base.allocateMemory<long>(numpart, ierr);
|
|
ori_ptr = base.allocateMemory<Vector>(5, ierr);
|
|
|
|
//transfer data to device
|
|
base.writeData<Vector>(r_ptr, r, numpart);
|
|
base.writeData<Vector>(p_ptr, p, numpart);
|
|
base.writeData<Vector>(x_ptr, x, numpart);
|
|
base.writeData<Vector>(ori_ptr, ori, 5);
|
|
|
|
|
|
//do some couple of integration loops before the timer is started
|
|
for (int i = 0; i < 5; i++) {
|
|
//calc push
|
|
base.callParallelTTrackerPush (r_ptr, p_ptr, numpart, dt_ptr,
|
|
0.05, 1, false, stream1);
|
|
|
|
//read R from device
|
|
base.readDataAsync<Vector> (r_ptr, r, numpart, stream1);
|
|
|
|
//write LastSection to device
|
|
base.writeDataAsync<long> (ls_ptr, ls, numpart, stream2);
|
|
|
|
//calc push
|
|
base.callParallelTTrackerPushTransform(x_ptr, p_ptr, ls_ptr, ori_ptr, numpart, 5,
|
|
dt_ptr, 0.05, 1, false, stream2);
|
|
//read x from device
|
|
base.readDataAsync<Vector>(x_ptr, x, numpart, stream2);
|
|
|
|
//sync and wait till all tasks and reads are complete
|
|
base.syncDevice();
|
|
}
|
|
|
|
checkSum(r, numpart);
|
|
checkSum(x, numpart);
|
|
|
|
|
|
|
|
//start the timing of integration
|
|
struct timeval timeStart, timeEnd;
|
|
std::cout << "start integration" << std::endl;
|
|
|
|
gettimeofday(&timeStart, NULL);
|
|
for (int i = 0; i < loop; i++) {
|
|
|
|
//calc push
|
|
base.callParallelTTrackerPush(r_ptr, p_ptr, numpart, dt_ptr, 0.05, 1, false, stream1);
|
|
|
|
//read R from device
|
|
base.readDataAsync<Vector> (r_ptr, r, numpart, stream1);
|
|
|
|
//write LastSection to device
|
|
base.writeDataAsync<long> (ls_ptr, ls, numpart, stream2);
|
|
|
|
//calc push transform
|
|
base.callParallelTTrackerPushTransform(x_ptr, p_ptr, ls_ptr, ori_ptr, numpart, 5,
|
|
dt_ptr, 0.05, 1, false, stream2);
|
|
|
|
//read R from device
|
|
base.readDataAsync<Vector>(x_ptr, x, numpart, stream2);
|
|
|
|
//sync and wait till all tasks and reads are complete
|
|
base.syncDevice();
|
|
}
|
|
gettimeofday(&timeEnd, NULL);
|
|
|
|
std::cout << "end integration" << std::endl;
|
|
double t = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1000000 +
|
|
(timeEnd.tv_usec - timeStart.tv_usec));
|
|
|
|
std::cout << "Time for " << numpart << " integrations: " << t * 1e-6 << "s" << std::endl;
|
|
std::cout << "Average time for integration: " << t * 1e-6 / loop << std::endl;
|
|
|
|
checkSum(r, numpart);
|
|
checkSum(x, numpart);
|
|
|
|
|
|
|
|
//free memory
|
|
base.freeMemory<Vector>(r_ptr, numpart);
|
|
base.freeMemory<Vector>(p_ptr, numpart);
|
|
base.freeMemory<Vector>(x_ptr, numpart);
|
|
base.freeMemory<Vector>(ori_ptr, 5);
|
|
base.freeMemory<double>(dt_ptr, numpart);
|
|
base.freeMemory<long>(ls_ptr, numpart);
|
|
|
|
//unregister host memory
|
|
base.unregisterHostMemory(r);
|
|
base.unregisterHostMemory(p);
|
|
base.unregisterHostMemory(x);
|
|
base.unregisterHostMemory(dt);
|
|
base.unregisterHostMemory(ls);
|
|
|
|
//free host memory
|
|
delete[] r;
|
|
delete[] x;
|
|
delete[] p;
|
|
delete[] dt;
|
|
delete[] ls;
|
|
delete[] ori;
|
|
|
|
cout << "==========================END TEST==========================" << endl;
|
|
return 0;
|
|
|
|
}
|