push and kick for OPALs BorisPusher

2017-03-15 09:59:33 +01:00
parent 24f9c9dbf4
commit 9734157ad8
5 changed files with 129 additions and 9 deletions
--- a/src/CUDA/CudaCollimatorPhysics.cu
+++ b/src/CUDA/CudaCollimatorPhysics.cu
@@ -33,6 +33,14 @@ __device__ inline double dot(double3 &d1, double3 &d2) {

 }

+__device__ inline double3 cross(double3 &lhs, double3 &rhs) {
+  double3 tmp;
+  tmp.x = lhs.y * rhs.z - lhs.z * rhs.y;
+  tmp.y = lhs.z * rhs.x - lhs.x * rhs.z;
+  tmp.z = lhs.x * rhs.y - lhs.y * rhs.x;
+  return tmp;
+}
+
 __device__ inline bool checkHit(double &z, double *par) {

  /* check if particle is in the degrader material */
@@ -423,7 +431,7 @@ __global__ void kernelPush(double3 *gR, double3 *gP, int npart, double dtc) {
 }


-__global__ void kernelPush(double3 *gR, double3 *gP, int npart, double *gdt, double c) {
+__global__ void kernelPush(double3 *gR, double3 *gP, double *gdt, int npart, double c) {

  //get global id and thread id
  volatile int tid = threadIdx.x;
@@ -449,7 +457,51 @@ __global__ void kernelPush(double3 *gR, double3 *gP, int npart, double *gdt, dou
  }
 }

-//TODO: kernel for push with switch off unitless positions with dt[i]*c
+__global__ void kernelKick(double3 *gR, double3 *gP, double3 *gEf, 
+			   double3 *gBf, double *gdt, double charge, 
+			   double mass, int npart, double c)
+{
+  volatile int tid = threadIdx.x;
+  volatile int idx = blockIdx.x * blockDim.x + tid;
+
+  if (idx < npart) {
+    double3 R = gR[idx];
+    double3 P = gP[idx];
+    double3 Ef = gEf[idx];
+    double3 Bf = gBf[idx];
+    double dt = gdt[idx];
+
+    P.x += 0.5 * dt * charge * c / mass * Ef.x;
+    P.y += 0.5 * dt * charge * c / mass * Ef.y;
+    P.z += 0.5 * dt * charge * c / mass * Ef.z;
+
+    double gamma = sqrt(1.0 + dot(P, P));
+    double3 t, w, s;
+    t.x = 0.5 * dt * charge * c * c / (gamma * mass) * Bf.x;
+    t.y = 0.5 * dt * charge * c * c / (gamma * mass) * Bf.y;
+    t.z = 0.5 * dt * charge * c * c / (gamma * mass) * Bf.z;
+    
+    double3 crossPt = cross(P, t);
+    w.x = P.x + crossPt.x;
+    w.y = P.y + crossPt.y;
+    w.z = P.z + crossPt.z;
+
+    s.x = 2.0 / (1.0 + dot(t, t)) * t.x;
+    s.y = 2.0 / (1.0 + dot(t, t)) * t.y;
+    s.z = 2.0 / (1.0 + dot(t, t)) * t.z;
+
+    double3 crossws = cross(w, s);
+    P.x += crossws.x;
+    P.y += crossws.y;
+    P.z += crossws.z;
+
+    P.x += 0.5 * dt * charge * c / mass * Ef.x;
+    P.y += 0.5 * dt * charge * c / mass * Ef.y;
+    P.z += 0.5 * dt * charge * c / mass * Ef.z;
+
+    gP[idx] = P;
+  }
+}

 __device__ double3 deviceTransformTo(const double3 &vec, const double3 &ori) {

@@ -671,12 +723,12 @@ int CudaCollimatorPhysics::ParallelTTrackerPush(void *r_ptr, void *p_ptr, int np
    }
  } else {
    if (streamId == -1) {
-      kernelPush<<<blocks, threads>>>((double3*)r_ptr, (double3*)p_ptr, npart, 
-				      (double*)dt_ptr, c);
+      kernelPush<<<blocks, threads>>>((double3*)r_ptr, (double3*)p_ptr, 
+				      (double*)dt_ptr, npart, c);
    } else {
      cudaStream_t cs = m_base->cuda_getStream(streamId);
-      kernelPush<<<blocks, threads, 0, cs >>>((double3*)r_ptr, (double3*)p_ptr, npart, 
-					      (double*)dt_ptr, c);
+      kernelPush<<<blocks, threads, 0, cs >>>((double3*)r_ptr, (double3*)p_ptr, 
+					      (double*)dt_ptr, npart, c);
    }
  }

@@ -684,6 +736,29 @@ int CudaCollimatorPhysics::ParallelTTrackerPush(void *r_ptr, void *p_ptr, int np
  return DKS_SUCCESS;
 }

+int CudaCollimatorPhysics::ParallelTTrackerKick(void *r_ptr, void *p_ptr, void *ef_ptr,
+						void *bf_ptr, void *dt_ptr, double charge,
+						double mass, int npart,
+						double c, int streamId) 
+{
+
+  int threads = BLOCK_SIZE;
+  int blocks = npart / threads + 1;
+
+  //call kernel
+  if (streamId == -1) {
+    kernelKick<<<blocks, threads>>>((double3*)r_ptr, (double3*)p_ptr, (double3*)ef_ptr,
+				    (double3*)bf_ptr, (double*)dt_ptr, charge, mass, npart, c);
+  } else {
+    cudaStream_t cs = m_base->cuda_getStream(streamId);
+    kernelKick<<<blocks, threads, 0, cs >>>((double3*)r_ptr, (double3*)p_ptr, 
+					    (double3*)ef_ptr, (double3*)bf_ptr, 
+					    (double*)dt_ptr, charge, mass,  npart, c);
+  }
+
+  return DKS_SUCCESS;
+}
+
 int CudaCollimatorPhysics::ParallelTTrackerPushTransform(void *x_ptr, void *p_ptr, 
 							 void *lastSec_ptr, void *orient_ptr, 
 							 int npart, int nsec,