827 lines
20 KiB
Plaintext
827 lines
20 KiB
Plaintext
#include "CudaCollimatorPhysics.cuh"
|
|
|
|
//#define M_P 0.93827231e+00
|
|
#define M_P 0.93827204e+00
|
|
#define C 299792458.0
|
|
#define PI 3.14159265358979323846
|
|
#define AVO 6.022e23
|
|
#define R_E 2.81794092e-15
|
|
//#define eM_E 0.51099906e-03
|
|
#define eM_E 0.51099892e-03
|
|
#define Z_P 1
|
|
#define K 4.0*PI*AVO*R_E*R_E*eM_E*1e7
|
|
|
|
#define POSITION 0
|
|
#define ZSIZE 1
|
|
#define RHO_M 2
|
|
#define Z_M 3
|
|
#define A_M 4
|
|
#define A2_C 5
|
|
#define A3_C 6
|
|
#define A4_C 7
|
|
#define A5_C 8
|
|
#define X0_M 9
|
|
#define I_M 10
|
|
#define DT_M 11
|
|
#define LOWENERGY_THR 12
|
|
|
|
#define BLOCK_SIZE 128
|
|
#define NUMPAR 13
|
|
|
|
__device__ inline double dot(double3 &d1, double3 &d2) {
|
|
|
|
return (d1.x * d2.x + d1.y * d2.y + d1.z * d2.z);
|
|
|
|
}
|
|
|
|
__device__ inline double3 cross(double3 &lhs, double3 &rhs) {
|
|
double3 tmp;
|
|
tmp.x = lhs.y * rhs.z - lhs.z * rhs.y;
|
|
tmp.y = lhs.z * rhs.x - lhs.x * rhs.z;
|
|
tmp.z = lhs.x * rhs.y - lhs.y * rhs.x;
|
|
return tmp;
|
|
}
|
|
|
|
__device__ inline double3 ArbitraryRotation(double3 &W, double3 &Rorg, double Theta) {
|
|
double c=cos(Theta);
|
|
double s=sin(Theta);
|
|
double dotW = sqrt(dot(W,W));
|
|
W.x = W.x / dotW;
|
|
W.y = W.y / dotW;
|
|
W.z = W.z / dotW;
|
|
|
|
double dotWR = dot(W, Rorg) * (1.0 - c);
|
|
double3 crossW = cross(W, Rorg);
|
|
double3 tmp;
|
|
tmp.x = Rorg.x * c + crossW.x * s + W.x * dotWR;
|
|
tmp.y = Rorg.y * c + crossW.y * s + W.y * dotWR;
|
|
tmp.z = Rorg.z * c + crossW.z * s + W.z * dotWR;
|
|
return tmp;
|
|
}
|
|
|
|
__device__ inline bool checkHit(double &z, double *par) {
|
|
|
|
/* check if particle is in the degrader material */
|
|
return ( (z > par[POSITION]) && ( z <= par[POSITION] + par[ZSIZE]) );
|
|
|
|
}
|
|
|
|
|
|
__device__ inline void energyLoss(double &Eng, bool &pdead, curandState &state, double *par)
|
|
{
|
|
|
|
volatile double dEdx = 0.0;
|
|
|
|
volatile double gamma = (Eng + M_P) / M_P;
|
|
volatile double gamma2 = gamma * gamma;
|
|
|
|
double beta = sqrt(1.0 - 1.0 / gamma2);
|
|
volatile double beta2 = beta * beta;
|
|
|
|
double deltas = par[DT_M] * beta * C;
|
|
volatile double deltasrho = deltas * 100 * par[RHO_M];
|
|
volatile double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
|
|
|
|
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
|
|
double Ts = (Eng * 1E6) / 1.0073;
|
|
double epsilon_low = par[A2_C] * pow(Ts, 0.45);
|
|
double epsilon_high = (par[A3_C] / Ts) * log( 1 + ( par[A4_C] / Ts) + (par[A5_C] *Ts) );
|
|
double epsilon = (epsilon_low * epsilon_high) / (epsilon_low + epsilon_high);
|
|
|
|
dEdx = -epsilon / (1E21 * (par[A_M] / AVO) );
|
|
|
|
double delta_E = deltasrho * dEdx + sigma_E * curand_normal_double(&state);
|
|
Eng = Eng + delta_E / 1E3;
|
|
}
|
|
|
|
if (Eng >= 0.0006) {
|
|
double Tmax = 2.0 * eM_E * 1e9 * beta2 * gamma2 /
|
|
(1.0 + 2.0 * gamma * eM_E / M_P + (eM_E / M_P) * (eM_E / M_P));
|
|
|
|
dEdx = -K * Z_P * Z_P * par[Z_M] / (par[A_M] * beta2) *
|
|
(1.0 / 2.0 * log(2 * eM_E * 1e9 * beta2 * gamma2 *
|
|
Tmax / par[I_M] / par[I_M]) - beta2);
|
|
|
|
double delta_E = deltasrho * dEdx + sigma_E * curand_normal_double(&state);
|
|
|
|
Eng = Eng + delta_E / 1E3;
|
|
}
|
|
|
|
pdead = ( (Eng < par[LOWENERGY_THR]) || (dEdx > 0) );
|
|
|
|
}
|
|
|
|
__device__ inline void Rot(double &px, double &pz, double &x, double &z, double &xplane,
|
|
double &normP, double &thetacou, double &deltas, int coord,
|
|
double *par)
|
|
{
|
|
double Psixz;
|
|
double pxz;
|
|
|
|
/*
|
|
if (px>=0 && pz>=0)
|
|
Psixz = atan(px/pz);
|
|
else if (px>0 && pz<0)
|
|
Psixz = atan(px/pz) + PI;
|
|
else if (px<0 && pz>0)
|
|
Psixz = atan(px/pz) + 2*PI;
|
|
else
|
|
Psixz = atan(px/pz) + PI;
|
|
*/
|
|
Psixz = atan2(px, pz);
|
|
pxz = sqrt(px*px + pz*pz);
|
|
|
|
if(coord==1) {
|
|
x = x + deltas * px/normP + xplane*cos(Psixz);
|
|
z = z - xplane * sin(Psixz);
|
|
}
|
|
|
|
if(coord==2) {
|
|
x = x + deltas * px/normP + xplane*cos(Psixz);
|
|
z = z - xplane * sin(Psixz) + deltas * pz / normP;
|
|
}
|
|
|
|
px = pxz*cos(Psixz)*sin(thetacou) + pxz*sin(Psixz)*cos(thetacou);
|
|
pz = -pxz*sin(Psixz)*sin(thetacou) + pxz*cos(Psixz)*cos(thetacou);
|
|
}
|
|
|
|
__device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, double* par,
|
|
bool enableRutherfordScattering)
|
|
{
|
|
|
|
double Eng = sqrt(dot(P, P) + 1.0) * M_P - M_P;
|
|
double gamma = (Eng + M_P) / M_P;
|
|
double normP = sqrt(dot(P, P));
|
|
double beta = sqrt(1.0 - 1.0 / (gamma * gamma));
|
|
double deltas = par[DT_M] * beta * C;
|
|
|
|
double theta0 = 13.6e6 / (beta * normP * M_P * 1e9) *
|
|
Z_P * sqrt(deltas / par[X0_M]) * (1.0 + 0.038 * log(deltas / par[X0_M]));
|
|
|
|
// x-direction: See Physical Review, "Multiple Scattering"
|
|
double z1 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
|
double z2 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
|
double thetacou = z2 * theta0;
|
|
|
|
while(fabs(thetacou) > 3.5 * theta0) {
|
|
z1 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
|
z2 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
|
thetacou = z2 * theta0;
|
|
}
|
|
|
|
//__syncthreads();
|
|
double xplane = z1 * deltas * theta0 / sqrt(12.0) + z2 * deltas * theta0 / 2.0;
|
|
Rot(P.x, P.z, R.x, R.z, xplane, normP, thetacou, deltas, 1, par);
|
|
|
|
// y-direction: See Physical Review, "Multiple Scattering"
|
|
z1 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
|
z2 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
|
thetacou = z2 * theta0;
|
|
|
|
while(fabs(thetacou) > 3.5 * theta0) {
|
|
z1 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
|
z2 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
|
|
thetacou = z2 * theta0;
|
|
}
|
|
|
|
//__syncthreads();
|
|
|
|
double yplane = z1 * deltas * theta0 / sqrt(12.0) + z2 * deltas * theta0 / 2.0;
|
|
Rot(P.y,P.z,R.y,R.z, yplane, normP, thetacou, deltas, 2, par);
|
|
|
|
double P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
|
if( (P2 < 0.0047) && enableRutherfordScattering) {
|
|
double P3 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
|
|
//double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
|
|
double thetaru = 2.5 * sqrt(1 / P3) * 2.0 * theta0;
|
|
double phiru = 2.0 * M_PI * curand_uniform_double(&state);
|
|
double th0=atan2(sqrt(P.x*P.x+P.y*P.y),fabs(P.z));
|
|
double3 W,X;
|
|
|
|
double dotP = sqrt(dot(P,P));
|
|
X.x = cos(phiru)*sin(thetaru) * dotP;
|
|
X.y = sin(phiru)*sin(thetaru) * dotP;
|
|
X.z = cos(thetaru) * dotP;
|
|
W.x = -P.y;
|
|
W.y = P.x;
|
|
W.z = 0.0;
|
|
P = ArbitraryRotation(W, X, th0);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
template <typename T>
|
|
__global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state,
|
|
int numparticles, bool enableRutherfordScattering)
|
|
{
|
|
|
|
//get global id and thread id
|
|
volatile int tid = threadIdx.x;
|
|
volatile int idx = blockIdx.x * blockDim.x + tid;
|
|
|
|
//transfer params to shared memory
|
|
extern __shared__ double smem[];
|
|
double *p = (double*)smem;
|
|
double3 *R = (double3*)&smem[NUMPAR];
|
|
|
|
curandState s;
|
|
double3 P;
|
|
|
|
for (int tt = tid; tt < NUMPAR; tt += blockDim.x)
|
|
p[tt] = par[tt];
|
|
|
|
__syncthreads();
|
|
|
|
if (idx < numparticles) {
|
|
s = state[idx];
|
|
R[tid] = data[idx].Rincol;
|
|
P = data[idx].Pincol;
|
|
|
|
bool pdead = false;
|
|
volatile double sq = sqrt(1.0 + dot(P, P));
|
|
|
|
double Eng;
|
|
|
|
if (checkHit(R[tid].z, p)) {
|
|
|
|
Eng = (sq - 1) * M_P;
|
|
energyLoss(Eng, pdead, s, p);
|
|
|
|
if (!pdead) {
|
|
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
|
|
sq = sqrt(dot(P, P));
|
|
|
|
P.x = P.x * ptot / sq;
|
|
P.y = P.y * ptot / sq;
|
|
P.z = P.z * ptot / sq;
|
|
coulombScat(R[tid], P, s, p, enableRutherfordScattering);
|
|
|
|
data[idx].Pincol = P;
|
|
} else {
|
|
data[idx].label = -1;
|
|
}
|
|
|
|
state[idx] = s;
|
|
} else {
|
|
|
|
R[tid].x = R[tid].x + p[DT_M] * C * P.x / sq;
|
|
R[tid].y = R[tid].y + p[DT_M] * C * P.y / sq;
|
|
R[tid].z = R[tid].z + p[DT_M] * C * P.z / sq;
|
|
data[idx].label = -2;
|
|
|
|
}
|
|
|
|
data[idx].Rincol = R[tid];
|
|
}
|
|
|
|
}
|
|
|
|
__global__ void kernelCollimatorPhysics2(CUDA_PART2_SMALL data, double *par,
|
|
curandState *state, int numparticles,
|
|
bool enableRutherfordScattering)
|
|
{
|
|
|
|
//get global id and thread id
|
|
volatile int tid = threadIdx.x;
|
|
volatile int idx = blockIdx.x * blockDim.x + tid;
|
|
|
|
//transfer params to shared memory
|
|
__shared__ double p[NUMPAR];
|
|
__shared__ double3 R[BLOCK_SIZE];
|
|
|
|
if (tid < NUMPAR)
|
|
p[tid] = par[tid];
|
|
|
|
__syncthreads();
|
|
|
|
curandState s;
|
|
double3 P;
|
|
if (idx < numparticles) {
|
|
R[tid] = data.Rincol[idx];
|
|
P = data.Pincol[idx];
|
|
s = state[idx];
|
|
|
|
double sq = sqrt(1.0 + dot(P, P));
|
|
bool pdead = false;
|
|
|
|
if (checkHit(R[tid].z, p)) {
|
|
|
|
double Eng = (sq - 1) * M_P;
|
|
energyLoss(Eng, pdead, s, p);
|
|
|
|
if (!pdead) {
|
|
|
|
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
|
|
sq = sqrt(dot(P, P));
|
|
P.x = P.x * ptot / sq;
|
|
P.y = P.y * ptot / sq;
|
|
P.z = P.z * ptot / sq;
|
|
coulombScat(R[tid], P, s, p, enableRutherfordScattering);
|
|
|
|
data.Pincol[idx] = P;
|
|
} else {
|
|
data.label[idx] = -1;
|
|
}
|
|
|
|
} else {
|
|
R[tid].x = R[tid].x + p[DT_M] * C * P.x / sq;
|
|
R[tid].y = R[tid].y + p[DT_M] * C * P.y / sq;
|
|
R[tid].z = R[tid].z + p[DT_M] * C * P.z / sq;
|
|
|
|
data.label[idx] = -2;
|
|
}
|
|
|
|
data.Rincol[idx] = R[tid];
|
|
state[idx] = s;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
inline __device__ void unitlessOff(double3 &a, const double &c) {
|
|
a.x *= c;
|
|
a.y *= c;
|
|
a.z *= c;
|
|
}
|
|
|
|
inline __device__ void unitlessOn(double3 &a, const double &c) {
|
|
a.x /= c;
|
|
a.y /= c;
|
|
a.z /= c;
|
|
}
|
|
|
|
//swithch to unitless positions with dtc
|
|
__global__ void kernelSwitchToUnitlessPositions(double3 *gR, double3 *gX, double dtc, int npart) {
|
|
|
|
volatile int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
if (idx < npart) {
|
|
double3 R = gR[idx];
|
|
double3 X = gX[idx];
|
|
|
|
unitlessOn(R, dtc);
|
|
unitlessOn(X, dtc);
|
|
|
|
gR[idx] = R;
|
|
gX[idx] = X;
|
|
}
|
|
|
|
}
|
|
|
|
//swithc to unitless positions with dt*c
|
|
__global__ void kernelSwitchToUnitlessPositions(double3 *gR, double3 *gX, double *gdt, double c, int npart) {
|
|
|
|
volatile int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
if (idx < npart) {
|
|
double3 R = gR[idx];
|
|
double3 X = gX[idx];
|
|
double dt = gdt[idx];
|
|
|
|
unitlessOff(R, dt*c);
|
|
unitlessOff(X, dt*c);
|
|
|
|
gR[idx] = R;
|
|
gX[idx] = X;
|
|
}
|
|
}
|
|
|
|
//swithc off unitless positions with dtc
|
|
__global__ void kernelSwitchOffUnitlessPositions(double3 *gR, double3 *gX, double dtc, int npart) {
|
|
|
|
volatile int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
if (idx < npart) {
|
|
double3 R = gR[idx];
|
|
double3 X = gX[idx];
|
|
|
|
unitlessOff(R, dtc);
|
|
unitlessOff(X, dtc);
|
|
|
|
gR[idx] = R;
|
|
gX[idx] = X;
|
|
}
|
|
|
|
}
|
|
|
|
//switch off unitelss positions with dt*c
|
|
__global__ void kernelSwitchOffUnitlessPositions(double3 *gR, double3 *gX, double *gdt, double c, int npart) {
|
|
|
|
volatile int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
if (idx < npart) {
|
|
double3 R = gR[idx];
|
|
double3 X = gX[idx];
|
|
double dt = gdt[idx];
|
|
|
|
unitlessOff(R, dt*c);
|
|
unitlessOff(X, dt*c);
|
|
|
|
gR[idx] = R;
|
|
gX[idx] = X;
|
|
}
|
|
}
|
|
|
|
|
|
__global__ void kernelPush(double3 *gR, double3 *gP, int npart, double dtc) {
|
|
|
|
//get global id and thread id
|
|
volatile int tid = threadIdx.x;
|
|
volatile int idx = blockIdx.x * blockDim.x + tid;
|
|
|
|
if (idx < npart) {
|
|
|
|
double3 R = gR[idx];
|
|
double3 P = gP[idx];
|
|
|
|
//switch to unitless positions
|
|
unitlessOn(R, dtc);
|
|
|
|
//push
|
|
double tmp = sqrt(1.0 + dot(P, P));
|
|
R.x += 0.5 * P.x / tmp;
|
|
R.y += 0.5 * P.y / tmp;
|
|
R.z += 0.5 * P.z / tmp;
|
|
|
|
//switch off unitless positions with dt*c
|
|
unitlessOff(R, dtc);
|
|
|
|
gR[idx] = R;
|
|
}
|
|
}
|
|
|
|
|
|
__global__ void kernelPush(double3 *gR, double3 *gP, double *gdt, int npart, double c) {
|
|
|
|
//get global id and thread id
|
|
volatile int tid = threadIdx.x;
|
|
volatile int idx = blockIdx.x * blockDim.x + tid;
|
|
|
|
if (idx < npart) {
|
|
|
|
double3 R = gR[idx];
|
|
double3 P = gP[idx];
|
|
double dt = gdt[idx];
|
|
|
|
//switch to unitless positions with dt*c
|
|
unitlessOn(R, dt*c);
|
|
|
|
R.x += 0.5 * P.x / sqrt(1.0 + dot(P, P));
|
|
R.y += 0.5 * P.y / sqrt(1.0 + dot(P, P));
|
|
R.z += 0.5 * P.z / sqrt(1.0 + dot(P, P));
|
|
|
|
//switch off unitless positions with dt*c
|
|
unitlessOff(R, dt*c);
|
|
|
|
gR[idx] = R;
|
|
}
|
|
}
|
|
|
|
__global__ void kernelKick(double3 *gR, double3 *gP, double3 *gEf,
|
|
double3 *gBf, double *gdt, double charge,
|
|
double mass, int npart, double c)
|
|
{
|
|
volatile int tid = threadIdx.x;
|
|
volatile int idx = blockIdx.x * blockDim.x + tid;
|
|
|
|
if (idx < npart) {
|
|
double3 R = gR[idx];
|
|
double3 P = gP[idx];
|
|
double3 Ef = gEf[idx];
|
|
double3 Bf = gBf[idx];
|
|
double dt = gdt[idx];
|
|
|
|
P.x += 0.5 * dt * charge * c / mass * Ef.x;
|
|
P.y += 0.5 * dt * charge * c / mass * Ef.y;
|
|
P.z += 0.5 * dt * charge * c / mass * Ef.z;
|
|
|
|
double gamma = sqrt(1.0 + dot(P, P));
|
|
double3 t, w, s;
|
|
t.x = 0.5 * dt * charge * c * c / (gamma * mass) * Bf.x;
|
|
t.y = 0.5 * dt * charge * c * c / (gamma * mass) * Bf.y;
|
|
t.z = 0.5 * dt * charge * c * c / (gamma * mass) * Bf.z;
|
|
|
|
double3 crossPt = cross(P, t);
|
|
w.x = P.x + crossPt.x;
|
|
w.y = P.y + crossPt.y;
|
|
w.z = P.z + crossPt.z;
|
|
|
|
s.x = 2.0 / (1.0 + dot(t, t)) * t.x;
|
|
s.y = 2.0 / (1.0 + dot(t, t)) * t.y;
|
|
s.z = 2.0 / (1.0 + dot(t, t)) * t.z;
|
|
|
|
double3 crossws = cross(w, s);
|
|
P.x += crossws.x;
|
|
P.y += crossws.y;
|
|
P.z += crossws.z;
|
|
|
|
P.x += 0.5 * dt * charge * c / mass * Ef.x;
|
|
P.y += 0.5 * dt * charge * c / mass * Ef.y;
|
|
P.z += 0.5 * dt * charge * c / mass * Ef.z;
|
|
|
|
gP[idx] = P;
|
|
}
|
|
}
|
|
|
|
__device__ double3 deviceTransformTo(const double3 &vec, const double3 &ori) {
|
|
|
|
const double sina = sin(ori.x);
|
|
const double cosa = cos(ori.x);
|
|
const double sinb = sin(ori.y);
|
|
const double cosb = cos(ori.y);
|
|
const double sinc = sin(ori.z);
|
|
const double cosc = cos(ori.z);
|
|
|
|
double3 temp;
|
|
temp.x = 0.0;
|
|
temp.y = 0.0;
|
|
temp.z = 0.0;
|
|
|
|
temp.x = (cosa * cosc) * vec.x + (cosa * sinc) * vec.y - sina * vec.z;
|
|
temp.y = (-cosb * sinc - sina * sinb * cosc) * vec.x +
|
|
(cosb * cosc - sina * sinb * sinc) * vec.y - cosa * sinb * vec.z;
|
|
temp.z = (-sinb * sinc + sina * cosb * cosc) * vec.x +
|
|
(sinb * cosc + sina * cosb * sinc) * vec.y + cosa * cosb * vec.z;
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
__global__ void kernelPushTransform(double3 *gX, double3 *gP, long *gLastSection, double3* gOrient,
|
|
int npart, int nsect, double dtc)
|
|
{
|
|
|
|
//get global id and thread id
|
|
volatile int tid = threadIdx.x;
|
|
volatile int idx = blockIdx.x * blockDim.x + tid;
|
|
|
|
|
|
if (idx < npart) {
|
|
|
|
double3 X = gX[idx];
|
|
double3 P = gP[idx];
|
|
long lLastSection = gLastSection[idx];
|
|
|
|
double3 ori;
|
|
if (lLastSection > -1 && lLastSection < nsect) {
|
|
ori = gOrient[lLastSection];
|
|
} else {
|
|
ori.x = 0.0;
|
|
ori.y = 0.0;
|
|
ori.z = 0.0;
|
|
}
|
|
|
|
double3 tmp = deviceTransformTo(P, ori);
|
|
|
|
unitlessOn(X, dtc);
|
|
|
|
X.x += 0.5 * tmp.x / sqrt(1.0 + dot(tmp, tmp));
|
|
X.y += 0.5 * tmp.y / sqrt(1.0 + dot(tmp, tmp));
|
|
X.z += 0.5 * tmp.z / sqrt(1.0 + dot(tmp, tmp));
|
|
|
|
unitlessOff(X, dtc);
|
|
|
|
gX[idx] = X;
|
|
}
|
|
|
|
}
|
|
|
|
__global__ void kernelPushTransform(double3 *gX, double3 *gP, long *gLastSection, double3* gOrient,
|
|
int npart, int nsect, double *gdt, double c)
|
|
{
|
|
|
|
//get global id and thread id
|
|
volatile int tid = threadIdx.x;
|
|
volatile int idx = blockIdx.x * blockDim.x + tid;
|
|
|
|
|
|
if (idx < npart) {
|
|
|
|
double3 X = gX[idx];
|
|
double3 P = gP[idx];
|
|
long lLastSection = gLastSection[idx];
|
|
double dt = gdt[idx];
|
|
|
|
double3 ori;
|
|
if (lLastSection > -1 && lLastSection < nsect) {
|
|
ori = gOrient[lLastSection];
|
|
} else {
|
|
ori.x = 0.0;
|
|
ori.y = 0.0;
|
|
ori.z = 0.0;
|
|
}
|
|
|
|
double3 tmp = deviceTransformTo(P, ori);
|
|
|
|
unitlessOn(X, dt*c);
|
|
|
|
X.x += 0.5 * tmp.x / sqrt(1.0 + dot(tmp, tmp));
|
|
X.y += 0.5 * tmp.y / sqrt(1.0 + dot(tmp, tmp));
|
|
X.z += 0.5 * tmp.z / sqrt(1.0 + dot(tmp, tmp));
|
|
|
|
unitlessOff(X, dt*c);
|
|
|
|
gX[idx] = X;
|
|
}
|
|
|
|
}
|
|
|
|
struct compare_particle
|
|
{
|
|
int threshold;
|
|
|
|
compare_particle() {
|
|
threshold = 0;
|
|
}
|
|
|
|
void set_threshold(int t) {
|
|
threshold = t;
|
|
}
|
|
|
|
__host__ __device__
|
|
bool operator()(CUDA_PART p1, CUDA_PART p2) {
|
|
return p1.label > p2.label;
|
|
}
|
|
|
|
__host__ __device__
|
|
bool operator()(CUDA_PART p1) {
|
|
return p1.label < threshold;
|
|
}
|
|
};
|
|
|
|
|
|
struct compare_particle_small
|
|
{
|
|
int threshold;
|
|
|
|
compare_particle_small() {
|
|
threshold = 0;
|
|
}
|
|
|
|
void set_threshold(int t) {
|
|
threshold = t;
|
|
}
|
|
|
|
__host__ __device__
|
|
bool operator()(CUDA_PART_SMALL p1, CUDA_PART_SMALL p2) {
|
|
return p1.label > p2.label;
|
|
}
|
|
|
|
__host__ __device__
|
|
bool operator()(CUDA_PART_SMALL p1) {
|
|
return p1.label < threshold;
|
|
}
|
|
};
|
|
|
|
|
|
struct less_then
|
|
{
|
|
__host__ __device__
|
|
bool operator()(int x)
|
|
{
|
|
return x < 0;
|
|
}
|
|
};
|
|
|
|
int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
|
|
bool enableRutherfordScattering)
|
|
{
|
|
|
|
int threads = BLOCK_SIZE;
|
|
int blocks = numparticles / threads + 1;
|
|
|
|
//calc shared memory size
|
|
int smem_size = sizeof(double)*NUMPAR + sizeof(double3)*BLOCK_SIZE;
|
|
|
|
//call kernel
|
|
kernelCollimatorPhysics<<<blocks, threads, smem_size>>>((CUDA_PART_SMALL*)mem_ptr,
|
|
(double*)par_ptr,
|
|
m_base->cuda_getCurandStates(),
|
|
numparticles,
|
|
enableRutherfordScattering);
|
|
|
|
cudaError_t err = cudaGetLastError();
|
|
if (err != cudaSuccess)
|
|
std::cout << "Err2: " << cudaGetErrorString(err) << std::endl;
|
|
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|
|
int CudaCollimatorPhysics::CollimatorPhysicsSort(void *mem_ptr, int numparticles,
|
|
int &numaddback)
|
|
{
|
|
|
|
//wrap mem_ptr with thrust device ptr
|
|
thrust::device_ptr<CUDA_PART_SMALL> dev_ptr( (CUDA_PART_SMALL*)mem_ptr);
|
|
|
|
//count -2 and -1 particles
|
|
compare_particle_small comp;
|
|
comp.set_threshold(0);
|
|
numaddback = thrust::count_if(dev_ptr, dev_ptr + numparticles, comp);
|
|
|
|
//sort particles
|
|
if (numaddback > 0)
|
|
thrust::sort(dev_ptr, dev_ptr + numparticles, comp);
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int CudaCollimatorPhysics::ParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart,
|
|
void *dt_ptr, double dt, double c, bool usedt,
|
|
int streamId)
|
|
{
|
|
|
|
int threads = BLOCK_SIZE;
|
|
int blocks = npart / threads + 1;
|
|
|
|
//call kernel
|
|
if (!usedt) {
|
|
if (streamId == -1) {
|
|
kernelPush<<<blocks, threads >>>((double3*)r_ptr, (double3*)p_ptr, npart, dt*c);
|
|
} else {
|
|
cudaStream_t cs = m_base->cuda_getStream(streamId);
|
|
kernelPush<<<blocks, threads, 0, cs >>>((double3*)r_ptr, (double3*)p_ptr, npart, dt*c);
|
|
}
|
|
} else {
|
|
if (streamId == -1) {
|
|
kernelPush<<<blocks, threads>>>((double3*)r_ptr, (double3*)p_ptr,
|
|
(double*)dt_ptr, npart, c);
|
|
} else {
|
|
cudaStream_t cs = m_base->cuda_getStream(streamId);
|
|
kernelPush<<<blocks, threads, 0, cs >>>((double3*)r_ptr, (double3*)p_ptr,
|
|
(double*)dt_ptr, npart, c);
|
|
}
|
|
}
|
|
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int CudaCollimatorPhysics::ParallelTTrackerKick(void *r_ptr, void *p_ptr, void *ef_ptr,
|
|
void *bf_ptr, void *dt_ptr, double charge,
|
|
double mass, int npart,
|
|
double c, int streamId)
|
|
{
|
|
|
|
int threads = BLOCK_SIZE;
|
|
int blocks = npart / threads + 1;
|
|
|
|
//call kernel
|
|
if (streamId == -1) {
|
|
kernelKick<<<blocks, threads>>>((double3*)r_ptr, (double3*)p_ptr, (double3*)ef_ptr,
|
|
(double3*)bf_ptr, (double*)dt_ptr, charge, mass, npart, c);
|
|
} else {
|
|
cudaStream_t cs = m_base->cuda_getStream(streamId);
|
|
kernelKick<<<blocks, threads, 0, cs >>>((double3*)r_ptr, (double3*)p_ptr,
|
|
(double3*)ef_ptr, (double3*)bf_ptr,
|
|
(double*)dt_ptr, charge, mass, npart, c);
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int CudaCollimatorPhysics::ParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
|
|
void *lastSec_ptr, void *orient_ptr,
|
|
int npart, int nsec,
|
|
void *dt_ptr, double dt,
|
|
double c, bool usedt,
|
|
int streamId)
|
|
{
|
|
|
|
int threads = BLOCK_SIZE;
|
|
int blocks = npart / threads + 1;
|
|
int smem = sizeof(double3) * nsec;
|
|
|
|
//call kernel
|
|
if (!usedt) {
|
|
if (streamId == -1) {
|
|
kernelPushTransform<<<blocks, threads, smem>>>((double3*)x_ptr, (double3*)p_ptr,
|
|
(long*)lastSec_ptr, (double3*)orient_ptr,
|
|
npart, nsec, dt*c);
|
|
} else {
|
|
cudaStream_t cs = m_base->cuda_getStream(streamId);
|
|
kernelPushTransform<<<blocks, threads, smem, cs>>>((double3*)x_ptr, (double3*)p_ptr,
|
|
(long*)lastSec_ptr, (double3*)orient_ptr,
|
|
npart, nsec, dt*c);
|
|
}
|
|
} else {
|
|
if (streamId == -1) {
|
|
kernelPushTransform<<<blocks, threads, smem>>>((double3*)x_ptr, (double3*)p_ptr,
|
|
(long*)lastSec_ptr, (double3*)orient_ptr,
|
|
npart, nsec, (double*)dt_ptr, c);
|
|
} else {
|
|
cudaStream_t cs = m_base->cuda_getStream(streamId);
|
|
kernelPushTransform<<<blocks, threads, smem, cs>>>((double3*)x_ptr, (double3*)p_ptr,
|
|
(long*)lastSec_ptr, (double3*)orient_ptr,
|
|
npart, nsec, (double*)dt_ptr, c);
|
|
}
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
|
|
|