253 lines
7.6 KiB
C++
253 lines
7.6 KiB
C++
#include <iostream>
|
|
#include <iomanip>
|
|
|
|
#include <vector>
|
|
#include <sys/time.h>
|
|
|
|
#include "DKSBase.h"
|
|
|
|
#include <vector_types.h>
|
|
#include "cuda_runtime.h"
|
|
#include <omp.h>
|
|
|
|
using namespace std;
|
|
|
|
typedef struct {
|
|
int *label;
|
|
unsigned *localID;
|
|
double *rx;
|
|
double *ry;
|
|
double *rz;
|
|
double *px;
|
|
double *py;
|
|
double *pz;
|
|
} PART;
|
|
|
|
|
|
void initParts(int *label, unsigned *localID, double *rx, double *ry, double *rz,
|
|
double *px, double *py, double *pz, int npart) {
|
|
|
|
for (int i = 0; i < npart; i++) {
|
|
label[i] = 0;
|
|
localID[i] = i;
|
|
rx[i] = 0.0;
|
|
ry[i] = 0.0;
|
|
rz[i] = 0.02;
|
|
px[i] = 0.0;
|
|
py[i] = 0.0;
|
|
pz[i] = 3.9920183237269791e-01;
|
|
}
|
|
}
|
|
|
|
void initParams(double *data) {
|
|
data[0] = 0.0;//2.0000000000000000e-02;
|
|
data[1] = 1.0;//1.0000000000000000e-02;
|
|
data[2] = 2.2100000000000000e+00;
|
|
data[3] = 6.0000000000000000e+00;
|
|
data[4] = 1.2010700000000000e+01;
|
|
data[5] = 2.6010000000000000e+00;
|
|
data[6] = 1.7010000000000000e+03;
|
|
data[7] = 1.2790000000000000e+03;
|
|
data[8] = 1.6379999999999999e-02;
|
|
data[9] = 1.9321266968325795e-01;
|
|
data[10] = 7.9000000000000000e+01;
|
|
data[11] = 1.0000000000000002e-12;
|
|
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
int loop = 10;
|
|
int numpart = 1e5;
|
|
char *api_name = new char[10];
|
|
char *device_name = new char[10];
|
|
strcpy(api_name, "Cuda");
|
|
strcpy(device_name, "-gpu");
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
|
|
if (argv[i] == string("-mic")) {
|
|
strcpy(api_name, "OpenMP");
|
|
strcpy(device_name, "-mic");
|
|
}
|
|
|
|
if (argv[i] == string("-npart")) {
|
|
numpart = atoi(argv[i+1]);
|
|
i++;
|
|
}
|
|
|
|
if (argv[i] == string("-loop")) {
|
|
loop = atoi(argv[i+1]);
|
|
i++;
|
|
}
|
|
|
|
}
|
|
|
|
int threads = 0;
|
|
/*
|
|
#pragma offload target(mic:0) out(threads)
|
|
{
|
|
#pragma omp parallel
|
|
{
|
|
threads = omp_get_num_threads();
|
|
}
|
|
}
|
|
*/
|
|
|
|
cout << "=========================BEGIN TEST=========================" << endl;
|
|
cout << "Use api: " << api_name << "\t" << device_name << endl;
|
|
cout << "Number of particles: " << numpart << endl;
|
|
cout << "Number of loops: " << loop << endl;
|
|
cout << "Number of threads: " << threads << endl;
|
|
cout << "------------------------------------------------------------" << endl;
|
|
|
|
//init part vector to test mc
|
|
//int *label;
|
|
//unsigned *localID;
|
|
//double *rx, *ry, *rz, *px, *py, *pz;
|
|
PART p;
|
|
p.label = (int*) _mm_malloc(sizeof(int)*numpart, 64);
|
|
p.localID = (unsigned*) _mm_malloc(sizeof(int)*numpart, 64);
|
|
p.rx = (double*) _mm_malloc(sizeof(double)*numpart, 64);
|
|
p.ry = (double*) _mm_malloc(sizeof(double)*numpart, 64);
|
|
p.rz = (double*) _mm_malloc(sizeof(double)*numpart, 64);
|
|
p.px = (double*) _mm_malloc(sizeof(double)*numpart, 64);
|
|
p.py = (double*) _mm_malloc(sizeof(double)*numpart, 64);
|
|
p.pz = (double*) _mm_malloc(sizeof(double)*numpart, 64);
|
|
initParts(p.label, p.localID, p.rx, p.ry, p.rz, p.px, p.py, p.pz, numpart);
|
|
|
|
double *params = new double[12];
|
|
initParams(params);
|
|
|
|
//init dks
|
|
int ierr;
|
|
DKSBase base;
|
|
base.setAPI(api_name, strlen(api_name));
|
|
base.setDevice(device_name, strlen(api_name));
|
|
base.initDevice();
|
|
|
|
//init random
|
|
base.callInitRandoms(numpart);
|
|
|
|
|
|
//**test collimator physics and sort***//
|
|
|
|
void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr;
|
|
|
|
//allocate memory for particles
|
|
label_ptr = base.allocateMemory<int>(numpart, ierr);
|
|
localID_ptr = base.allocateMemory<unsigned>(numpart, ierr);
|
|
rx_ptr = base.allocateMemory<double>(numpart, ierr);
|
|
ry_ptr = base.allocateMemory<double>(numpart, ierr);
|
|
rz_ptr = base.allocateMemory<double>(numpart, ierr);
|
|
px_ptr = base.allocateMemory<double>(numpart, ierr);
|
|
py_ptr = base.allocateMemory<double>(numpart, ierr);
|
|
pz_ptr = base.allocateMemory<double>(numpart, ierr);
|
|
|
|
param_ptr = base.allocateMemory<double>(12, ierr);
|
|
|
|
//transfer data to device
|
|
base.writeData<int>(label_ptr, p.label, numpart);
|
|
base.writeData<unsigned>(localID_ptr, p.localID, numpart);
|
|
base.writeData<double>(rx_ptr, p.rx, numpart);
|
|
base.writeData<double>(ry_ptr, p.ry, numpart);
|
|
base.writeData<double>(rz_ptr, p.rz, numpart);
|
|
base.writeData<double>(px_ptr, p.px, numpart);
|
|
base.writeData<double>(py_ptr, p.py, numpart);
|
|
base.writeData<double>(pz_ptr, p.pz, numpart);
|
|
|
|
//transfer params to device
|
|
base.writeData<double>(param_ptr, params, 12);
|
|
|
|
std::cout << "test runs" << std::endl;
|
|
|
|
int numaddback;
|
|
//test calls to do some first executions
|
|
base.callCollimatorPhysicsSoA(label_ptr, localID_ptr, rx_ptr, ry_ptr, rz_ptr, px_ptr,
|
|
py_ptr, pz_ptr, param_ptr, numpart);
|
|
base.callCollimatorPhysicsSortSoA(label_ptr, localID_ptr, rx_ptr, ry_ptr, rz_ptr, px_ptr,
|
|
py_ptr, pz_ptr, param_ptr, numpart, numaddback);
|
|
base.syncDevice();
|
|
|
|
struct timeval timeStart, timeEnd;
|
|
std::cout << "Start MC" << std::endl;
|
|
|
|
gettimeofday(&timeStart, NULL);
|
|
for (int i = 0; i < loop; i++) {
|
|
base.callCollimatorPhysicsSoA(label_ptr, localID_ptr, rx_ptr, ry_ptr, rz_ptr, px_ptr,
|
|
py_ptr, pz_ptr, param_ptr, numpart);
|
|
base.callCollimatorPhysicsSortSoA(label_ptr, localID_ptr, rx_ptr, ry_ptr, rz_ptr, px_ptr,
|
|
py_ptr, pz_ptr, param_ptr, numpart, numaddback);
|
|
base.syncDevice();
|
|
}
|
|
gettimeofday(&timeEnd, NULL);
|
|
|
|
std::cout << "addback: " << numaddback << std::endl;
|
|
|
|
std::cout << "End MC" << std::endl;
|
|
double t = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1000000 +
|
|
(timeEnd.tv_usec - timeStart.tv_usec));
|
|
|
|
std::cout << "Time for " << numpart << " MC runs: " << t * 1e-6 << "s" << std::endl;
|
|
std::cout << "Average time for MC run: " << t * 1e-6 / loop << std::endl;
|
|
|
|
//read data from device
|
|
base.readData<int>(label_ptr, p.label, numpart);
|
|
base.readData<unsigned>(localID_ptr, p.localID, numpart);
|
|
base.readData<double>(rx_ptr, p.rx, numpart);
|
|
base.readData<double>(ry_ptr, p.ry, numpart);
|
|
base.readData<double>(rz_ptr, p.rz, numpart);
|
|
base.readData<double>(px_ptr, p.px, numpart);
|
|
base.readData<double>(py_ptr, p.py, numpart);
|
|
base.readData<double>(pz_ptr, p.pz, numpart);
|
|
|
|
//free memory
|
|
base.freeMemory<int>(label_ptr, numpart);
|
|
base.freeMemory<unsigned>(localID_ptr, numpart);
|
|
base.freeMemory<double>(rx_ptr, numpart);
|
|
base.freeMemory<double>(ry_ptr, numpart);
|
|
base.freeMemory<double>(rz_ptr, numpart);
|
|
base.freeMemory<double>(px_ptr, numpart);
|
|
base.freeMemory<double>(py_ptr, numpart);
|
|
base.freeMemory<double>(pz_ptr, numpart);
|
|
|
|
base.freeMemory<double>(param_ptr, 12);
|
|
|
|
/*
|
|
std::cout << std::fixed << std::setprecision(4);
|
|
for (int i = 0; i < 10; i++) {
|
|
std::cout << p.label[i] << "\t" << p.rx[i]
|
|
<< "\t" << p.ry[i] << "\t" << p.rz[i] << "\t" << p.px[i]
|
|
<< "\t" << p.py[i] << "\t" << p.pz[i] << std::endl;
|
|
}
|
|
std:: cout << "..." << std::endl;
|
|
|
|
for (int i = numpart - 10; i < numpart; i++) {
|
|
std::cout << p.label[i] << "\t" << p.rx[i]
|
|
<< "\t" << p.ry[i] << "\t" << p.rz[i] << "\t" << p.px[i]
|
|
<< "\t" << p.py[i] << "\t" << p.pz[i] << std::endl;
|
|
}
|
|
|
|
double arx = 0, ary = 0, arz = 0;
|
|
double apx = 0, apy = 0, apz = 0;
|
|
for (int i = 0; i < numpart; i++) {
|
|
|
|
arx += sqrt(p.rx[i] * p.rx[i]) / numpart;
|
|
ary += sqrt(p.ry[i] * p.ry[i]) / numpart;
|
|
arz += sqrt(p.rz[i] * p.rz[i]) / numpart;
|
|
|
|
apx += sqrt(p.px[i] * p.px[i]) / numpart;
|
|
apy += sqrt(p.py[i] * p.py[i]) / numpart;
|
|
apz += sqrt(p.pz[i] * p.pz[i]) / numpart;
|
|
|
|
}
|
|
|
|
std::cout << std::fixed << std::setprecision(10);
|
|
std::cout << "R (" << arx << ", " << ary << ", " << arz << ") " << std::endl
|
|
<< "P (" << apx << ", " << apy << ", " << apz << ") " << std::endl;
|
|
*/
|
|
cout << "==========================END TEST==========================" << endl;
|
|
return 0;
|
|
|
|
}
|