886 lines
26 KiB
C++
886 lines
26 KiB
C++
#include "MICCollimatorPhysics.h"
|
|
|
|
#define M_P 0.93827231e+00
|
|
#define C 299792458.0
|
|
#define PI 3.14159265358979323846
|
|
#define AVO 6.022e23
|
|
#define R_E 2.81794092e-15
|
|
#define eM_E 0.51099906e-03
|
|
#define Z_P 1
|
|
#define K 4.0*PI*AVO*R_E*R_E*eM_E*1e7
|
|
|
|
#define POSITION 0
|
|
#define ZSIZE 1
|
|
#define RHO_M 2
|
|
#define Z_M 3
|
|
#define A_M 4
|
|
#define A2_C 5
|
|
#define A3_C 6
|
|
#define A4_C 7
|
|
#define A5_C 8
|
|
#define X0_M 9
|
|
#define I_M 10
|
|
#define DT_M 11
|
|
|
|
__declspec(target(mic))
|
|
double dot(mic_double3 d1, mic_double3 d2) {
|
|
return (d1.x * d2.x + d1.y * d2.y + d1.z * d2.z);
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
double dot(double dx, double dy, double dz) {
|
|
return (dx * dx + dy * dy + dz * dz);
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
bool checkHit(double &z, double *par) {
|
|
return ( (z > par[POSITION]) && ( z <= par[POSITION] + par[ZSIZE]) );
|
|
}
|
|
|
|
|
|
__declspec(target(mic))
|
|
void Rot(double &px, double &pz, double &x, double &z, double xplane,
|
|
double normP, double thetacou, double deltas, int coord)
|
|
{
|
|
double Psixz = 1;
|
|
double pxz = 1;
|
|
|
|
if ( px >= 0 && pz >= 0 )
|
|
Psixz = atan(px/pz);
|
|
else if ( px > 0 && pz < 0 )
|
|
Psixz = atan(px/pz) + PI;
|
|
else if (px < 0 && pz > 0)
|
|
Psixz = atan(px/pz) + 2*PI;
|
|
else
|
|
Psixz = atan(px/pz) + PI;
|
|
|
|
pxz = sqrt(px*px + pz*pz);
|
|
|
|
if(coord == 1) {
|
|
x = x + deltas * px / normP + xplane*cos(Psixz);
|
|
z = z - xplane * sin(Psixz);
|
|
}
|
|
|
|
if(coord == 2) {
|
|
x = x + deltas * px / normP + xplane * cos(Psixz);
|
|
z = z - xplane * sin(Psixz) + deltas * pz / normP;
|
|
}
|
|
|
|
px = pxz*cos(Psixz)*sin(thetacou) + pxz*sin(Psixz)*cos(thetacou);
|
|
pz = -pxz*sin(Psixz)*sin(thetacou) + pxz*cos(Psixz)*cos(thetacou);
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
void coulombScat(mic_double3 &R, mic_double3 &P, double *par, VSLStreamStatePtr &stream) {
|
|
double Eng = sqrt(dot(P, P) + 1.0) * M_P - M_P;
|
|
double gamma = (Eng + M_P) / M_P;
|
|
double normP = sqrt(dot(P, P));
|
|
double beta = sqrt(1.0 - 1.0 / (gamma * gamma));
|
|
double deltas = par[DT_M] * beta * C;
|
|
|
|
double theta0 = 13.6e6 / (beta * normP * M_P * 1e9) *
|
|
Z_P * sqrt(deltas / par[X0_M]) * (1.0 + 0.038 * log(deltas / par[X0_M]));
|
|
|
|
// x-direction: See Physical Review, "Multiple Scattering"
|
|
double z1, z2;
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z1, 0.0, 1.0 );
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z2, 0.0, 1.0 );
|
|
double thetacou = z2 * theta0;
|
|
|
|
while(fabs(thetacou) > 3.5 * theta0) {
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z1, 0.0, 1.0 );
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z2, 0.0, 1.0 );
|
|
thetacou = z2 * theta0;
|
|
}
|
|
|
|
double xplane = z1 * deltas * theta0 / sqrt(12.0) + z2 * deltas * theta0 / 2.0;
|
|
Rot(P.x, P.z, R.x, R.z, xplane, normP, thetacou, deltas, 1);
|
|
|
|
double P2;//curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1, &P2, 0, 1);
|
|
if(P2 < 0.0047) {
|
|
double P3, P4;
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1, &P3, 0, 1);
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1, &P4, 0, 1);
|
|
double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
|
|
|
|
if(P4 > 0.5)
|
|
thetaru = -thetaru;
|
|
Rot(P.x ,P.z, R.x, R.z, xplane, normP, thetaru, deltas, 0);
|
|
}
|
|
|
|
// y-direction: See Physical Review, "Multiple Scattering"
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z1, 0.0, 1.0 );
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z2, 0.0, 1.0 );
|
|
thetacou = z2 * theta0;
|
|
|
|
while(fabs(thetacou) > 3.5 * theta0) {
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z1, 0.0, 1.0 );
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z2, 0.0, 1.0 );
|
|
thetacou = z2 * theta0;
|
|
}
|
|
|
|
double yplane = z1 * deltas * theta0 / sqrt(12.0) + z2 * deltas * theta0 / 2.0;
|
|
Rot(P.y, P.z, R.y, R.z, yplane, normP, thetacou, deltas, 2);
|
|
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1, &P2, 0, 1);
|
|
if(P2 < 0.0047) {
|
|
double P3, P4;
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1, &P3, 0, 1);
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1, &P4, 0, 1);
|
|
double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
|
|
if(P4 > 0.5)
|
|
thetaru = -thetaru;
|
|
Rot(P.y, P.z, R.y, R.z, yplane, normP, thetaru, deltas, 0);
|
|
}
|
|
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
void coulombScat(double *rx, double *ry, double *rz, double *px, double *py, double *pz, int *label,
|
|
double *par, VSLStreamStatePtr &stream, int ii, int size)
|
|
{
|
|
|
|
double normP[MIC_WIDTH] __attribute__((aligned(64)));
|
|
double deltas[MIC_WIDTH] __attribute__((aligned(64)));
|
|
double theta0[MIC_WIDTH] __attribute__((aligned(64)));
|
|
double P1[MIC_WIDTH] __attribute__((aligned(64)));
|
|
double P2[MIC_WIDTH] __attribute__((aligned(64)));
|
|
double P3[MIC_WIDTH] __attribute__((aligned(64)));
|
|
|
|
double z1[MIC_WIDTH] __attribute__((aligned(64)));
|
|
double z2[MIC_WIDTH] __attribute__((aligned(64)));
|
|
double thetacou[MIC_WIDTH] __attribute__((aligned(64)));
|
|
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
int idx = i - ii;
|
|
if (label[i] == 0) {
|
|
double dotp = dot(px[i], py[i], pz[i]);
|
|
double Eng = sqrt(dotp + 1.0) * M_P - M_P;
|
|
double gamma = (Eng + M_P) / M_P;
|
|
double beta = sqrt(1.0 - 1.0 / (gamma * gamma));
|
|
|
|
normP[idx] = sqrt(dotp);
|
|
deltas[idx] = par[DT_M] * beta * C;
|
|
theta0[idx] = 13.6e6 / (beta * normP[idx] * M_P * 1e9) *
|
|
Z_P * sqrt(deltas[idx] / par[X0_M]) * (1.0 + 0.038 * log(deltas[idx] / par[X0_M]));
|
|
}
|
|
}
|
|
|
|
// x-direction: See Physical Review, "Multiple Scattering"
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, MIC_WIDTH, z1, 0.0, 1.0);
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, MIC_WIDTH, z2, 0.0, 1.0);
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + size; i++) {
|
|
int idx = i - ii;
|
|
thetacou[idx] = z2[idx] * theta0[idx];
|
|
}
|
|
|
|
//unknown number of iterations, cannot vectorize
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
int idx = i - ii;
|
|
if (label[i] == 0) {
|
|
while(fabs(thetacou[idx]) > 3.5 * theta0[idx]) {
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z1[idx], 0.0, 1.0 );
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z2[idx], 0.0, 1.0 );
|
|
thetacou[idx] = z2[idx] * theta0[idx];
|
|
}
|
|
}
|
|
}
|
|
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + size; i++) {
|
|
int idx = i - ii;
|
|
if (label[i] == 0) {
|
|
double xplane = z1[idx] * deltas[idx] * theta0[idx] / sqrt(12.0) +
|
|
z2[idx] * deltas[idx] * theta0[idx] / 2.0;
|
|
Rot(px[i], pz[i], rx[i], rz[i], xplane, normP[idx], thetacou[idx], deltas[idx], 1);
|
|
}
|
|
}
|
|
|
|
|
|
//generate array of random numbers
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, MIC_WIDTH, P1, 0, 1);
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, MIC_WIDTH, P2, 0, 1);
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, MIC_WIDTH, P3, 0, 1);
|
|
|
|
//P2 = P[i], P3 = P[i+WIDTH], P4 = P[i+2*WIDTH]
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
int idx = i - ii;
|
|
if (label[i] == 0) {
|
|
if(P1[idx] < 0.0047) {
|
|
double thetaru = 2.5 * sqrt(1 / P2[idx]) * sqrt(2.0) * theta0[idx];
|
|
|
|
if(P3[idx] > 0.5)
|
|
thetaru = -thetaru;
|
|
|
|
Rot(px[i] ,pz[i], rx[i], rz[i], 0, 0, thetaru, 0, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
// y-direction: See Physical Review, "Multiple Scattering"
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, MIC_WIDTH, z1, 0.0, 1.0);
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, MIC_WIDTH, z2, 0.0, 1.0);
|
|
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
int idx = i - ii;
|
|
thetacou[idx] = z2[idx] * theta0[idx];
|
|
}
|
|
|
|
//unknown number of iterations, cannot vectorize
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
int idx = i - ii;
|
|
if (label[i] == 0) {
|
|
while(fabs(thetacou[idx]) > 3.5 * theta0[idx]) {
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z1[idx], 0.0, 1.0 );
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &z2[idx], 0.0, 1.0 );
|
|
thetacou[idx] = z2[idx] * theta0[idx];
|
|
}
|
|
}
|
|
}
|
|
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
int idx = i - ii;
|
|
if (label[i] == 0) {
|
|
double yplane = z1[idx] * deltas[idx] * theta0[idx] / sqrt(12.0)
|
|
+ z2[idx] * deltas[idx] * theta0[idx] / 2.0;
|
|
Rot(py[i], pz[i], ry[i], rz[i], yplane, normP[idx], thetacou[idx], deltas[idx], 2);
|
|
}
|
|
}
|
|
|
|
//generate array of random numbers
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, MIC_WIDTH, P1, 0, 1);
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, MIC_WIDTH, P2, 0, 1);
|
|
vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, MIC_WIDTH, P3, 0, 1);
|
|
|
|
//P2 = P[i], P3 = P[i+WIDTH], P4 = P[i+2*WIDTH]
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
int idx = i - ii;
|
|
if (label[i] == 0) {
|
|
if(P1[idx] < 0.0047) {
|
|
double thetaru = 2.5 * sqrt(1 / P2[idx]) * sqrt(2.0) * theta0[idx];
|
|
if(P3[idx] > 0.5)
|
|
thetaru = -thetaru;
|
|
Rot(py[i], pz[i], ry[i], rz[i], 0, 0, thetaru, 0, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
void energyLoss(double &Eng, int &pdead, double *par, VSLStreamStatePtr &stream) {
|
|
|
|
double dEdx = 0.0;
|
|
const double gamma = (Eng + M_P) / M_P;
|
|
const double gamma2 = gamma * gamma;
|
|
const double beta = sqrt(1.0 - 1.0 / gamma2);
|
|
const double beta2 = beta * beta;
|
|
|
|
const double deltas = par[DT_M] * beta * C;
|
|
const double deltasrho = deltas * 100 * par[RHO_M];
|
|
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
|
|
|
|
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
|
|
const double Ts = (Eng * 1E6) / 1.0073;
|
|
const double epsilon_low = par[A2_C] * pow(Ts, 0.45);
|
|
const double epsilon_high = (par[A3_C] / Ts) * log( 1 + ( par[A4_C] / Ts) + (par[A5_C] *Ts) );
|
|
const double epsilon = (epsilon_low * epsilon_high) / (epsilon_low + epsilon_high);
|
|
|
|
dEdx = -epsilon / (1E21 * (par[A_M] / AVO) );
|
|
|
|
double tmprnd;
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &tmprnd, 0.0, sigma_E );
|
|
const double delta_E = deltasrho * dEdx + tmprnd;
|
|
Eng = Eng + delta_E / 1E3;
|
|
}
|
|
|
|
if (Eng >= 0.0006) {
|
|
const double Tmax = 2.0 * eM_E * 1e9 * beta2 * gamma2 /
|
|
(1.0 + 2.0 * gamma * eM_E / M_P + (eM_E / M_P) * (eM_E / M_P));
|
|
|
|
dEdx = -K * Z_P * Z_P * par[Z_M] / (par[A_M] * beta2) *
|
|
(1.0 / 2.0 * log(2 * eM_E * 1e9 * beta2 * gamma2 *
|
|
Tmax / par[I_M] / par[I_M]) - beta2);
|
|
|
|
double tmprnd;
|
|
vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 1, &tmprnd, 0.0, sigma_E );
|
|
const double delta_E = deltasrho * dEdx + tmprnd;
|
|
|
|
Eng = Eng + delta_E / 1E3;
|
|
}
|
|
|
|
|
|
if ((Eng<1E-4) || (dEdx>0))
|
|
pdead = 1;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) {
|
|
|
|
const double gamma = (Eng + M_P) / M_P;
|
|
const double gamma2 = gamma * gamma;
|
|
const double beta = sqrt(1.0 - 1.0 / gamma2);
|
|
const double beta2 = beta * beta;
|
|
|
|
const double deltas = par[DT_M] * beta * C;
|
|
const double deltasrho = deltas * 100 * par[RHO_M];
|
|
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
|
|
|
|
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
|
|
const double Ts = (Eng * 1E6) / 1.0073;
|
|
const double epsilon_low = par[A2_C] * pow(Ts, 0.45);
|
|
const double epsilon_high = (par[A3_C] / Ts) * log( 1 + ( par[A4_C] / Ts) + (par[A5_C] *Ts) );
|
|
const double epsilon = (epsilon_low * epsilon_high) / (epsilon_low + epsilon_high);
|
|
|
|
dEdx = -epsilon / (1E21 * (par[A_M] / AVO) );
|
|
|
|
const double delta_E = deltasrho * dEdx + sigma_E * randv[ri];
|
|
|
|
Eng = Eng + delta_E / 1E3;
|
|
}
|
|
|
|
if (Eng >= 0.0006) {
|
|
const double Tmax = 2.0 * eM_E * 1e9 * beta2 * gamma2 /
|
|
(1.0 + 2.0 * gamma * eM_E / M_P + (eM_E / M_P) * (eM_E / M_P));
|
|
|
|
dEdx = -K * Z_P * Z_P * par[Z_M] / (par[A_M] * beta2) *
|
|
(1.0 / 2.0 * log(2 * eM_E * 1e9 * beta2 * gamma2 *
|
|
Tmax / par[I_M] / par[I_M]) - beta2);
|
|
|
|
const double delta_E = deltasrho * dEdx + sigma_E * randv[ri + MIC_WIDTH];
|
|
|
|
Eng = Eng + delta_E / 1E3;
|
|
}
|
|
|
|
}
|
|
|
|
int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles) {
|
|
|
|
//cast device memory pointers to appropriate types
|
|
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
|
|
double *par = (double*) par_ptr;
|
|
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
|
|
|
|
#pragma offload target(mic:m_micbase->m_device_id) \
|
|
inout(data:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(par:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(numparticles)
|
|
{
|
|
|
|
#pragma omp parallel
|
|
{
|
|
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
|
|
|
|
//for loop trough particles if not checkhit set label to -2 and update R.x
|
|
|
|
#pragma omp for simd
|
|
for (int i = 0; i < numparticles; i++) {
|
|
if ( !checkHit(data[i].Rincol.z, par) ) {
|
|
double sq = sqrt(1.0 + dot(data[i].Pincol, data[i].Pincol));
|
|
data[i].Rincol.x = data[i].Rincol.x + par[DT_M] * C * data[i].Pincol.x / sq;
|
|
data[i].Rincol.y = data[i].Rincol.y + par[DT_M] * C * data[i].Pincol.y / sq;
|
|
data[i].Rincol.z = data[i].Rincol.z + par[DT_M] * C * data[i].Pincol.z / sq;
|
|
data[i].label = -2;
|
|
}
|
|
}
|
|
|
|
//for loop trough particles if label == 0 eneregy loss and if pdead update label to -1
|
|
#pragma omp for simd
|
|
for (int i = 0; i < numparticles; i++) {
|
|
|
|
int pdead = -1;
|
|
double sq = sqrt(1.0 + dot(data[i].Pincol, data[i].Pincol));
|
|
double Eng = (sq - 1) * M_P;
|
|
|
|
if (data[i].label == 0) {
|
|
energyLoss(Eng, pdead, par, stream);
|
|
}
|
|
|
|
if (pdead == -1) {
|
|
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
|
|
sq = sqrt(dot(data[i].Pincol, data[i].Pincol));
|
|
data[i].Pincol.x = data[i].Pincol.x * ptot / sq;
|
|
data[i].Pincol.y = data[i].Pincol.y * ptot / sq;
|
|
data[i].Pincol.z = data[i].Pincol.z * ptot / sq;
|
|
}
|
|
|
|
if (pdead == 1)
|
|
data[i].label = -1;
|
|
}
|
|
|
|
//for loop trough particles if label == 0 coulomb scat
|
|
#pragma omp for
|
|
for (int i = 0; i < numparticles; i++) {
|
|
if (data[i].label == 0) {
|
|
coulombScat(data[i].Rincol, data[i].Pincol, par, stream);
|
|
}
|
|
}
|
|
|
|
} //end omp parallel
|
|
|
|
} //end offload
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
|
|
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
|
void *px_ptr, void *py_ptr, void *pz_ptr,
|
|
void *par_ptr, int numparticles)
|
|
{
|
|
|
|
|
|
|
|
int *label = (int*)label_ptr;
|
|
unsigned *localID = (unsigned*)localID_ptr;
|
|
double *rx = (double*)rx_ptr;
|
|
double *ry = (double*)ry_ptr;
|
|
double *rz = (double*)rz_ptr;
|
|
double *px = (double*)px_ptr;
|
|
double *py = (double*)py_ptr;
|
|
double *pz = (double*)pz_ptr;
|
|
double *par = (double*)par_ptr;
|
|
|
|
int padding = numparticles % MIC_WIDTH;
|
|
int totalpart = numparticles + padding;
|
|
|
|
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
|
|
|
|
#pragma offload target (mic:0) \
|
|
in(label:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(localID:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(rx:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(ry:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(rz:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(px:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(py:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(pz:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(par:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(totalpart)
|
|
{
|
|
|
|
|
|
#pragma omp parallel
|
|
{
|
|
//every thread gets its own rnd stream state
|
|
//VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
|
|
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
|
|
|
|
#pragma omp for nowait
|
|
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
|
|
//vectorize main loop
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
if ( !checkHit(rz[i], par) ) {
|
|
double sq = sqrt(1.0 + dot(px[i], py[i], pz[i]));
|
|
rx[i] = rx[i] + par[DT_M] * C * px[i] / sq;
|
|
ry[i] = ry[i] + par[DT_M] * C * py[i] / sq;
|
|
rz[i] = rz[i] + par[DT_M] * C * pz[i] / sq;
|
|
label[i] = -2;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//array of size 2*WIDTH for storing random values for the energyloss function
|
|
double randv[2*MIC_WIDTH] __attribute__((aligned(64)));
|
|
|
|
//for loop trough particles if label == 0 eneregy loss and if pdead update label to -1
|
|
#pragma omp for nowait
|
|
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
|
|
//create array of rand values (2 per thread)
|
|
vdRngGaussian (VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, 2*MIC_WIDTH, randv, 0.0, 1.0);
|
|
|
|
#pragma vector aligned
|
|
#pragma simd
|
|
for (int i = ii; i < ii + MIC_WIDTH; i++) {
|
|
|
|
double sq = sqrt(1.0 + dot(px[i], py[i], pz[i]));
|
|
double Eng = (sq - 1) * M_P;
|
|
double dEdx = 0;
|
|
|
|
|
|
if (label[i] == 0) {
|
|
energyLoss(Eng, dEdx, par, randv, i - ii);
|
|
}
|
|
|
|
|
|
if (Eng > 1e-4 && dEdx < 0) {
|
|
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
|
|
sq = sqrt(dot(px[i], py[i], pz[i]));
|
|
px[i] = px[i] * ptot / sq;
|
|
py[i] = py[i] * ptot / sq;
|
|
pz[i] = pz[i] * ptot / sq;
|
|
}
|
|
|
|
if (Eng < 1e-4 || dEdx > 0)
|
|
label[i] = -1;
|
|
|
|
} //end inner energy loss loop
|
|
|
|
} //end outer energy loss loop
|
|
|
|
|
|
//vectorize coulomb scattering as much as possible
|
|
#pragma omp for nowait
|
|
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
|
|
coulombScat(rx, ry, rz, px, py, pz, label, par, stream, ii, MIC_WIDTH);
|
|
} //end coulomb scattering
|
|
|
|
} //end omp parallel
|
|
|
|
} //end offload
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int MICCollimatorPhysics::CollimatorPhysicsSort(void *mem_ptr, int numparticles,
|
|
int &numaddback)
|
|
{
|
|
|
|
//cast device memory pointers to appropriate types
|
|
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
|
|
int privateback;
|
|
|
|
#pragma offload target(mic:m_micbase->m_device_id) \
|
|
in(data:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(numparticles) \
|
|
out(privateback)
|
|
{
|
|
//count dead and addback particles
|
|
int privateback = 0;
|
|
#pragma omp parallel for reduction(+:privateback)
|
|
for (int i = 0; i < numparticles; i++) {
|
|
if (data[i].label < 0)
|
|
privateback++;
|
|
}
|
|
//move particles with label < 0 to the end of the array (serial. can we do this parallel?)
|
|
if (privateback > 0) {
|
|
|
|
int moved = 0;
|
|
for (int i = numparticles - 1; i > 0; i--) {
|
|
if (data[i].label < 0) {
|
|
int idx = numparticles - 1 - moved;
|
|
if (i != idx) {
|
|
MIC_PART_SMALL tmp = data[i];
|
|
data[i] = data[idx];
|
|
data[idx] = tmp;
|
|
}
|
|
moved++;
|
|
}
|
|
}
|
|
}
|
|
numaddback = privateback;
|
|
}
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
void micmove(double &a, double &b) {
|
|
double tmp = a;
|
|
a = b;
|
|
b = tmp;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
void micmove(int &a, int &b) {
|
|
int tmp = a;
|
|
a = b;
|
|
b = tmp;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
void micmove(unsigned &a, unsigned &b) {
|
|
unsigned tmp = a;
|
|
a = b;
|
|
b = tmp;
|
|
}
|
|
|
|
|
|
int MICCollimatorPhysics::CollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
|
|
void *rx_ptr, void *ry_ptr, void *rz_ptr,
|
|
void *px_ptr, void *py_ptr, void *pz_ptr,
|
|
void *par_ptr, int numparticles,
|
|
int &numaddback)
|
|
{
|
|
|
|
int *label = (int*)label_ptr;
|
|
unsigned *localID = (unsigned*)localID_ptr;
|
|
double *rx = (double*)rx_ptr;
|
|
double *ry = (double*)ry_ptr;
|
|
double *rz = (double*)rz_ptr;
|
|
double *px = (double*)px_ptr;
|
|
double *py = (double*)py_ptr;
|
|
double *pz = (double*)pz_ptr;
|
|
double *par = (double*)par_ptr;
|
|
|
|
//int padding = numparticles % WIDTH;
|
|
//int totalpart = numparticles + padding;
|
|
|
|
int privateback;
|
|
|
|
#pragma offload target (mic:0) \
|
|
in(label:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(localID:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(rx:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(ry:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(rz:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(px:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(py:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(pz:length(0) DKS_REUSE DKS_RETAIN) \
|
|
in(par:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(numparticles) \
|
|
out(privateback)
|
|
{
|
|
|
|
//count dead and addback particles
|
|
int privateback = 0;
|
|
#pragma omp parallel for reduction(+:privateback)
|
|
for (int i = 0; i < numparticles; i++) {
|
|
if (label[i] < 0)
|
|
privateback++;
|
|
}
|
|
|
|
//move particles with label < 0 to the end of the array (serial. can we do this parallel?)
|
|
if (privateback > 0) {
|
|
int moved = 0;
|
|
for (int i = numparticles - 1; i >= 0; i--) {
|
|
if (label[i] < 0) {
|
|
int idx = numparticles - 1 - moved;
|
|
if (i != idx) {
|
|
micmove(rx[i], rx[idx]);
|
|
micmove(ry[i], ry[idx]);
|
|
micmove(rz[i], rz[idx]);
|
|
micmove(px[i], px[idx]);
|
|
micmove(py[i], py[idx]);
|
|
micmove(pz[i], pz[idx]);
|
|
micmove(label[i], label[idx]);
|
|
micmove(localID[i], localID[idx]);
|
|
}
|
|
moved++;
|
|
}
|
|
}
|
|
}
|
|
numaddback = privateback;
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
inline void unitlessOff(mic_double3 &a, const double c) {
|
|
a.x *= c;
|
|
a.y *= c;
|
|
a.z *= c;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
inline void unitlessOn(mic_double3 &a, const double c) {
|
|
a.x /= c;
|
|
a.y /= c;
|
|
a.z /= c;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
mic_double3 deviceTransformTo(const mic_double3 &vec, const mic_double3 &ori) {
|
|
const double sina = sin(ori.x);
|
|
const double cosa = cos(ori.x);
|
|
const double sinb = sin(ori.y);
|
|
const double cosb = cos(ori.y);
|
|
const double sinc = sin(ori.z);
|
|
const double cosc = cos(ori.z);
|
|
|
|
mic_double3 temp;
|
|
temp.x = 0.0;
|
|
temp.y = 0.0;
|
|
temp.z = 0.0;
|
|
|
|
temp.x = (cosa * cosc) * vec.x + (cosa * sinc) * vec.y - sina * vec.z;
|
|
temp.y = (-cosb * sinc - sina * sinb * cosc) * vec.x +
|
|
(cosb * cosc - sina * sinb * sinc) * vec.y - cosa * sinb * vec.z;
|
|
temp.z = (-sinb * sinc + sina * cosb * cosc) * vec.x +
|
|
(sinb * cosc + sina * cosb * sinc) * vec.y + cosa * cosb * vec.z;
|
|
|
|
return temp;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
inline void updateR(mic_double3 &R, mic_double3 &P, double dotp, double dtc) {
|
|
R.x /= dtc;
|
|
R.x += 0.5 * P.x / dotp;
|
|
R.x *= dtc;
|
|
|
|
R.y /= dtc;
|
|
R.y += 0.5 * P.y / dotp;
|
|
R.y *= dtc;
|
|
|
|
R.z /= dtc;
|
|
R.z += 0.5 * P.z / dotp;
|
|
R.z *= dtc;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
inline void push(mic_double3 *r, mic_double3 *p, double dtc, int npart) {
|
|
#pragma omp parallel for simd
|
|
for (int i = 0; i < npart; i++) {
|
|
mic_double3 R = r[i];
|
|
mic_double3 P = p[i];
|
|
double dotp = sqrt(1.0 + dot(P, P));
|
|
updateR(R, P, dotp, dtc);
|
|
r[i] = R;
|
|
}
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
inline void push(mic_double3 *r, mic_double3 *p, double *gdt, double c, int npart) {
|
|
#pragma omp parallel for simd
|
|
for (int i = 0; i < npart; i++) {
|
|
mic_double3 R = r[i];
|
|
mic_double3 P = p[i];
|
|
double dtc = gdt[i] * c;
|
|
double dotp = sqrt(1.0 + dot(P, P));
|
|
updateR(R, P, dotp, dtc);
|
|
r[i] = R;
|
|
}
|
|
}
|
|
|
|
|
|
int MICCollimatorPhysics::ParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart, void *dt_ptr,
|
|
double dt, double c, bool usedt, int streamId)
|
|
{
|
|
|
|
mic_double3 *r = (mic_double3*)r_ptr;
|
|
mic_double3 *p = (mic_double3*)p_ptr;
|
|
double *gdt = (double*)dt_ptr;
|
|
double dtc = dt * c;
|
|
|
|
if (!usedt) {
|
|
#pragma offload target(mic:m_micbase->m_device_id) in(r:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(p:length(0) DKS_RETAIN DKS_REUSE) in(npart, dtc)
|
|
{
|
|
push(r, p, dtc, npart);
|
|
}
|
|
|
|
} else {
|
|
|
|
#pragma offload target(mic:m_micbase->m_device_id) in(r:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(p:length(0) DKS_RETAIN DKS_REUSE) in(gdt:length(0) DKS_RETAIN DKS_REUSE) in(npart, c)
|
|
{
|
|
push(r, p, gdt, c, npart);
|
|
}
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
inline void pushTransform(mic_double3 *x, mic_double3 *p, mic_double3 *gOrient, long *gLastSect,
|
|
double dtc, int npart, int nsec)
|
|
{
|
|
|
|
#pragma omp parallel for simd
|
|
for (int i = 0; i < npart; i++) {
|
|
mic_double3 ori;
|
|
if (gLastSect[i] > -1 && gLastSect[i] < nsec) {
|
|
ori = gOrient[gLastSect[i]];
|
|
} else {
|
|
ori.x = 0.0;
|
|
ori.y = 0.0;
|
|
ori.z = 0.0;
|
|
}
|
|
|
|
mic_double3 tmp = deviceTransformTo(p[i], ori);
|
|
mic_double3 X = x[i];
|
|
double dotp = sqrt(1.0 + dot(tmp, tmp));
|
|
updateR(X, tmp, dotp, dtc);
|
|
x[i] = X;
|
|
}
|
|
|
|
}
|
|
|
|
__declspec(target(mic))
|
|
inline void pushTransform(mic_double3 *x, mic_double3 *p, mic_double3 *gOrient, long *gLastSect,
|
|
double *gdt, double c, int npart, int nsec)
|
|
{
|
|
|
|
#pragma omp parallel for simd
|
|
for (int i = 0; i < npart; i++) {
|
|
mic_double3 ori;
|
|
if (gLastSect[i] > -1 && gLastSect[i] < nsec) {
|
|
ori = gOrient[gLastSect[i]];
|
|
} else {
|
|
ori.x = 0.0;
|
|
ori.y = 0.0;
|
|
ori.z = 0.0;
|
|
}
|
|
|
|
mic_double3 tmp = deviceTransformTo(p[i], ori);
|
|
mic_double3 X = x[i];
|
|
double dotp = sqrt(1.0 + dot(tmp, tmp));
|
|
double dtc = gdt[i] * c;
|
|
|
|
updateR(X, tmp, dotp, dtc);
|
|
x[i] = X;
|
|
}
|
|
|
|
}
|
|
|
|
int MICCollimatorPhysics::ParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
|
|
void *lastSec_ptr,
|
|
void *orient_ptr, int npart,
|
|
int nsec, void *dt_ptr, double dt,
|
|
double c, bool usedt, int streamId)
|
|
{
|
|
|
|
mic_double3 *x = (mic_double3*)x_ptr;
|
|
mic_double3 *p = (mic_double3*)p_ptr;
|
|
mic_double3 *gOrient = (mic_double3*)orient_ptr;
|
|
double *gdt = (double*)dt_ptr;
|
|
long *gLastSect = (long*)lastSec_ptr;
|
|
double dtc = dt * c;
|
|
|
|
if (!usedt) {
|
|
|
|
#pragma offload target(mic:m_micbase->m_device_id) in(x:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(p:length(0) DKS_RETAIN DKS_REUSE) in(gOrient:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(gLastSect:length(0) DKS_RETAIN DKS_REUSE) in(npart, nsec, dtc)
|
|
{
|
|
pushTransform(x, p, gOrient, gLastSect, dtc, npart, nsec);
|
|
}
|
|
|
|
} else {
|
|
|
|
#pragma offload target(mic:m_micbase->m_device_id) in(x:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(p:length(0) DKS_RETAIN DKS_REUSE) in(gdt:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(gOrient:length(0) DKS_RETAIN DKS_REUSE) in(gLastSect:length(0) DKS_RETAIN DKS_REUSE) \
|
|
in(npart, nsec, c)
|
|
{
|
|
pushTransform(x, p, gOrient, gLastSect, gdt, c, npart, nsec);
|
|
}
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|
|
|