#include #include #include #include #include "DKSBase.h" #include #include "cuda_runtime.h" using namespace std; typedef struct { double x; double y; double z; } Vector; Vector initVector() { Vector tmp; tmp.x = 0.5; tmp.y = 0.5; tmp.z = 0.5; return tmp; } void initVectors(Vector *v, int N) { for (int i = 0; i < N; i++) v[i] = initVector(); } void initDouble(double *data, int N) { for (int i = 0; i < N; i++) data[i] = 0.005; } void initLastSect(long *data, int N) { for (int i = 0; i < N; i++) data[i] = -1; } void checkSum(Vector *v, int N) { double sum = 0; for (int i = 0; i < N; i++) sum += v[i].x + v[i].y + v[i].z; std::cout << "checksum: " << sum << std::endl; } int main(int argc, char *argv[]) { int loop = 10; int numpart = 10; char *api_name = new char[10]; char *device_name = new char[10]; strcpy(api_name, "Cuda"); strcpy(device_name, "-gpu"); for (int i = 1; i < argc; i++) { if (argv[i] == string("-mic")) { strcpy(api_name, "OpenMP"); strcpy(device_name, "-mic"); } if (argv[i] == string("-npart")) { numpart = atoi(argv[i+1]); i++; } if (argv[i] == string("-loop")) { loop = atoi(argv[i+1]); i++; } } cout << "=========================BEGIN TEST=========================" << endl; cout << "Use api: " << api_name << "\t" << device_name << endl; cout << "Number of particles: " << numpart << endl; cout << "------------------------------------------------------------" << endl; //init p,r and dt arrays to test time integration Vector *r = new Vector[numpart]; Vector *p = new Vector[numpart]; Vector *x = new Vector[numpart]; Vector *ori = new Vector[5]; initVectors(r, numpart); initVectors(p, numpart); initVectors(x, numpart); initVectors(ori, 5); double *dt = new double[numpart]; initDouble(dt, numpart); long *ls = new long[numpart]; initLastSect(ls, numpart); //init dks int ierr; DKSBase base; base.setAPI(api_name, strlen(api_name)); base.setDevice(device_name, strlen(api_name)); base.initDevice(); int stream1, stream2; base.createStream(stream1); base.createStream(stream2); base.registerHostMemory(r, numpart); base.registerHostMemory(p, numpart); base.registerHostMemory(x, numpart); base.registerHostMemory(dt, numpart); base.registerHostMemory(ls, numpart); //***test parallelttrackerpush***// void *r_ptr, *p_ptr, *x_ptr, *dt_ptr, *ls_ptr, *ori_ptr; //allocate memory on the device r_ptr = base.allocateMemory(numpart, ierr); p_ptr = base.allocateMemory(numpart, ierr); x_ptr = base.allocateMemory(numpart, ierr); dt_ptr = base.allocateMemory(numpart, ierr); ls_ptr = base.allocateMemory(numpart, ierr); ori_ptr = base.allocateMemory(5, ierr); //transfer data to device base.writeData(r_ptr, r, numpart); base.writeData(p_ptr, p, numpart); base.writeData(x_ptr, x, numpart); base.writeData(ori_ptr, ori, 5); //do some couple of integration loops before the timer is started for (int i = 0; i < 5; i++) { //calc push base.callParallelTTrackerPush (r_ptr, p_ptr, numpart, dt_ptr, 0.05, 1, false, stream1); //read R from device base.readDataAsync (r_ptr, r, numpart, stream1); //write LastSection to device base.writeDataAsync (ls_ptr, ls, numpart, stream2); //calc push base.callParallelTTrackerPushTransform(x_ptr, p_ptr, ls_ptr, ori_ptr, numpart, 5, dt_ptr, 0.05, 1, false, stream2); //read x from device base.readDataAsync(x_ptr, x, numpart, stream2); //sync and wait till all tasks and reads are complete base.syncDevice(); } checkSum(r, numpart); checkSum(x, numpart); //start the timing of integration struct timeval timeStart, timeEnd; std::cout << "start integration" << std::endl; gettimeofday(&timeStart, NULL); for (int i = 0; i < loop; i++) { //calc push base.callParallelTTrackerPush(r_ptr, p_ptr, numpart, dt_ptr, 0.05, 1, false, stream1); //read R from device base.readDataAsync (r_ptr, r, numpart, stream1); //write LastSection to device base.writeDataAsync (ls_ptr, ls, numpart, stream2); //calc push transform base.callParallelTTrackerPushTransform(x_ptr, p_ptr, ls_ptr, ori_ptr, numpart, 5, dt_ptr, 0.05, 1, false, stream2); //read R from device base.readDataAsync(x_ptr, x, numpart, stream2); //sync and wait till all tasks and reads are complete base.syncDevice(); } gettimeofday(&timeEnd, NULL); std::cout << "end integration" << std::endl; double t = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1000000 + (timeEnd.tv_usec - timeStart.tv_usec)); std::cout << "Time for " << numpart << " integrations: " << t * 1e-6 << "s" << std::endl; std::cout << "Average time for integration: " << t * 1e-6 / loop << std::endl; checkSum(r, numpart); checkSum(x, numpart); //free memory base.freeMemory(r_ptr, numpart); base.freeMemory(p_ptr, numpart); base.freeMemory(x_ptr, numpart); base.freeMemory(ori_ptr, 5); base.freeMemory(dt_ptr, numpart); base.freeMemory(ls_ptr, numpart); //unregister host memory base.unregisterHostMemory(r); base.unregisterHostMemory(p); base.unregisterHostMemory(x); base.unregisterHostMemory(dt); base.unregisterHostMemory(ls); //free host memory delete[] r; delete[] x; delete[] p; delete[] dt; delete[] ls; delete[] ori; cout << "==========================END TEST==========================" << endl; return 0; }