snapshot of svn

This commit is contained in:
Uldis Locans
2016-10-10 14:49:32 +02:00
commit 4fa529aaea
122 changed files with 23153 additions and 0 deletions

View File

@ -0,0 +1,250 @@
#include <iostream>
#include <iomanip>
#include <vector>
#include <sys/time.h>
#include "DKSBase.h"
#include <vector_types.h>
#include "cuda_runtime.h"
#include <omp.h>
using namespace std;
typedef struct {
int *label;
unsigned *localID;
double *rx;
double *ry;
double *rz;
double *px;
double *py;
double *pz;
} PART;
void initParts(int *label, unsigned *localID, double *rx, double *ry, double *rz,
double *px, double *py, double *pz, int npart) {
for (int i = 0; i < npart; i++) {
label[i] = 0;
localID[i] = i;
rx[i] = 0.0;
ry[i] = 0.0;
rz[i] = 0.02;
px[i] = 0.0;
py[i] = 0.0;
pz[i] = 3.9920183237269791e-01;
}
}
void initParams(double *data) {
data[0] = 0.0;//2.0000000000000000e-02;
data[1] = 1.0;//1.0000000000000000e-02;
data[2] = 2.2100000000000000e+00;
data[3] = 6.0000000000000000e+00;
data[4] = 1.2010700000000000e+01;
data[5] = 2.6010000000000000e+00;
data[6] = 1.7010000000000000e+03;
data[7] = 1.2790000000000000e+03;
data[8] = 1.6379999999999999e-02;
data[9] = 1.9321266968325795e-01;
data[10] = 7.9000000000000000e+01;
data[11] = 1.0000000000000002e-12;
}
int main(int argc, char *argv[]) {
int loop = 10;
int numpart = 1e5;
char *api_name = new char[10];
char *device_name = new char[10];
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
for (int i = 1; i < argc; i++) {
if (argv[i] == string("-mic")) {
strcpy(api_name, "OpenMP");
strcpy(device_name, "-mic");
}
if (argv[i] == string("-npart")) {
numpart = atoi(argv[i+1]);
i++;
}
if (argv[i] == string("-loop")) {
loop = atoi(argv[i+1]);
i++;
}
}
int threads = 0;
/*
#pragma offload target(mic:0) out(threads)
{
#pragma omp parallel
{
threads = omp_get_num_threads();
}
}
*/
cout << "=========================BEGIN TEST=========================" << endl;
cout << "Use api: " << api_name << "\t" << device_name << endl;
cout << "Number of particles: " << numpart << endl;
cout << "Number of loops: " << loop << endl;
cout << "Number of threads: " << threads << endl;
cout << "------------------------------------------------------------" << endl;
//init part vector to test mc
//int *label;
//unsigned *localID;
//double *rx, *ry, *rz, *px, *py, *pz;
PART p;
p.label = (int*) _mm_malloc(sizeof(int)*numpart, 64);
p.localID = (unsigned*) _mm_malloc(sizeof(int)*numpart, 64);
p.rx = (double*) _mm_malloc(sizeof(double)*numpart, 64);
p.ry = (double*) _mm_malloc(sizeof(double)*numpart, 64);
p.rz = (double*) _mm_malloc(sizeof(double)*numpart, 64);
p.px = (double*) _mm_malloc(sizeof(double)*numpart, 64);
p.py = (double*) _mm_malloc(sizeof(double)*numpart, 64);
p.pz = (double*) _mm_malloc(sizeof(double)*numpart, 64);
initParts(p.label, p.localID, p.rx, p.ry, p.rz, p.px, p.py, p.pz, numpart);
double *params = new double[12];
initParams(params);
//init dks
int ierr;
DKSBase base;
base.setAPI(api_name, strlen(api_name));
base.setDevice(device_name, strlen(api_name));
base.initDevice();
//init random
base.callInitRandoms(numpart);
//**test collimator physics and sort***//
void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr;
//allocate memory for particles
label_ptr = base.allocateMemory<int>(numpart, ierr);
localID_ptr = base.allocateMemory<unsigned>(numpart, ierr);
rx_ptr = base.allocateMemory<double>(numpart, ierr);
ry_ptr = base.allocateMemory<double>(numpart, ierr);
rz_ptr = base.allocateMemory<double>(numpart, ierr);
px_ptr = base.allocateMemory<double>(numpart, ierr);
py_ptr = base.allocateMemory<double>(numpart, ierr);
pz_ptr = base.allocateMemory<double>(numpart, ierr);
param_ptr = base.allocateMemory<double>(12, ierr);
//transfer data to device
base.writeData<int>(label_ptr, p.label, numpart);
base.writeData<unsigned>(localID_ptr, p.localID, numpart);
base.writeData<double>(rx_ptr, p.rx, numpart);
base.writeData<double>(ry_ptr, p.ry, numpart);
base.writeData<double>(rz_ptr, p.rz, numpart);
base.writeData<double>(px_ptr, p.px, numpart);
base.writeData<double>(py_ptr, p.py, numpart);
base.writeData<double>(pz_ptr, p.pz, numpart);
//transfer params to device
base.writeData<double>(param_ptr, params, 12);
std::cout << "test runs" << std::endl;
int numaddback;
//test calls to do some first executions
base.callCollimatorPhysicsSoA(label_ptr, localID_ptr, rx_ptr, ry_ptr, rz_ptr, px_ptr,
py_ptr, pz_ptr, param_ptr, numpart);
base.callCollimatorPhysicsSortSoA(label_ptr, localID_ptr, rx_ptr, ry_ptr, rz_ptr, px_ptr,
py_ptr, pz_ptr, param_ptr, numpart, numaddback);
base.syncDevice();
struct timeval timeStart, timeEnd;
std::cout << "Start MC" << std::endl;
gettimeofday(&timeStart, NULL);
for (int i = 0; i < loop; i++) {
base.callCollimatorPhysicsSoA(label_ptr, localID_ptr, rx_ptr, ry_ptr, rz_ptr, px_ptr,
py_ptr, pz_ptr, param_ptr, numpart);
base.callCollimatorPhysicsSortSoA(label_ptr, localID_ptr, rx_ptr, ry_ptr, rz_ptr, px_ptr,
py_ptr, pz_ptr, param_ptr, numpart, numaddback);
base.syncDevice();
}
gettimeofday(&timeEnd, NULL);
std::cout << "addback: " << numaddback << std::endl;
std::cout << "End MC" << std::endl;
double t = ( (timeEnd.tv_sec - timeStart.tv_sec) * 1000000 +
(timeEnd.tv_usec - timeStart.tv_usec));
std::cout << "Time for " << numpart << " MC runs: " << t * 1e-6 << "s" << std::endl;
std::cout << "Average time for MC run: " << t * 1e-6 / loop << std::endl;
//read data from device
base.readData<int>(label_ptr, p.label, numpart);
base.readData<unsigned>(localID_ptr, p.localID, numpart);
base.readData<double>(rx_ptr, p.rx, numpart);
base.readData<double>(ry_ptr, p.ry, numpart);
base.readData<double>(rz_ptr, p.rz, numpart);
base.readData<double>(px_ptr, p.px, numpart);
base.readData<double>(py_ptr, p.py, numpart);
base.readData<double>(pz_ptr, p.pz, numpart);
//free memory
base.freeMemory<int>(label_ptr, numpart);
base.freeMemory<unsigned>(localID_ptr, numpart);
base.freeMemory<double>(rx_ptr, numpart);
base.freeMemory<double>(ry_ptr, numpart);
base.freeMemory<double>(rz_ptr, numpart);
base.freeMemory<double>(px_ptr, numpart);
base.freeMemory<double>(py_ptr, numpart);
base.freeMemory<double>(pz_ptr, numpart);
base.freeMemory<double>(param_ptr, 12);
/*
std::cout << std::fixed << std::setprecision(4);
for (int i = 0; i < 10; i++) {
std::cout << p.label[i] << "\t" << p.rx[i]
<< "\t" << p.ry[i] << "\t" << p.rz[i] << "\t" << p.px[i]
<< "\t" << p.py[i] << "\t" << p.pz[i] << std::endl;
}
std:: cout << "..." << std::endl;
for (int i = numpart - 10; i < numpart; i++) {
std::cout << p.label[i] << "\t" << p.rx[i]
<< "\t" << p.ry[i] << "\t" << p.rz[i] << "\t" << p.px[i]
<< "\t" << p.py[i] << "\t" << p.pz[i] << std::endl;
}
double arx = 0, ary = 0, arz = 0;
double apx = 0, apy = 0, apz = 0;
for (int i = 0; i < numpart; i++) {
arx += sqrt(p.rx[i] * p.rx[i]) / numpart;
ary += sqrt(p.ry[i] * p.ry[i]) / numpart;
arz += sqrt(p.rz[i] * p.rz[i]) / numpart;
apx += sqrt(p.px[i] * p.px[i]) / numpart;
apy += sqrt(p.py[i] * p.py[i]) / numpart;
apz += sqrt(p.pz[i] * p.pz[i]) / numpart;
}
std::cout << std::fixed << std::setprecision(10);
std::cout << "R (" << arx << ", " << ary << ", " << arz << ") " << std::endl
<< "P (" << apx << ", " << apy << ", " << apz << ") " << std::endl;
*/
cout << "==========================END TEST==========================" << endl;
return 0;
}