Collimator physics for MIC fix

This commit is contained in:
Uldis Locans
2016-11-16 18:12:17 +01:00
parent 027bdc01f5
commit 87cdf52f07
8 changed files with 75 additions and 53 deletions

View File

@ -30,7 +30,7 @@ public:
/* destructor */ /* destructor */
~CudaGreensFunction(); ~CudaGreensFunction();
/* /**
Info: calc itegral on device memory (taken from OPAL src code) Info: calc itegral on device memory (taken from OPAL src code)
Return: success or error code Return: success or error code
*/ */
@ -38,20 +38,20 @@ public:
double hr_m0, double hr_m1, double hr_m2, double hr_m0, double hr_m1, double hr_m2,
int streamId = -1); int streamId = -1);
/* /**
Info: integration of rho2_m field (taken from OPAL src code) Info: integration of rho2_m field (taken from OPAL src code)
Return: success or error code Return: success or error code
*/ */
int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K, int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1); int streamId = -1);
/* /**
Info: mirror rho field (taken from OPAL src code) Info: mirror rho field (taken from OPAL src code)
Return: succes or error code Return: succes or error code
*/ */
int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1); int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
/* /**
Info: multiply complex fields already on the GPU memory, result will be put in ptr1 Info: multiply complex fields already on the GPU memory, result will be put in ptr1
Return: success or error code Return: success or error code
*/ */

View File

@ -18,30 +18,28 @@ int MICBase::mic_createRandStreams(int size) {
int seed = time(NULL); int seed = time(NULL);
#pragma offload target(mic:m_device_id) inout(defaultRndSet) in(seed) int numThreads = 0;
#pragma offload target(mic:m_device_id) inout(numThreads)
{ {
//get the number of threads
int numThreads;
#pragma omp parallel #pragma omp parallel
numThreads = omp_get_num_threads(); numThreads = omp_get_num_threads();
}
//if default rnd stream already allocated delete the array defaultRndStream = mic_allocateMemory<VSLStreamStatePtr>(numThreads);
if (defaultRndSet == 1) VSLStreamStatePtr *tmpRndStream = (VSLStreamStatePtr*) defaultRndStream;
delete[] defaultRndStream; maxThreads = numThreads;
//allocate defaultRndStream array
defaultRndStream = new VSLStreamStatePtr[numThreads];
#pragma offload target(mic:m_device_id) \
in(tmpRndStream:length(0) DKS_REUSE DKS_RETAIN) \
in(seed)
{
//create stream states for each thread //create stream states for each thread
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < omp_get_num_threads(); i++) for (int i = 0; i < omp_get_num_threads(); i++)
vslNewStream(&defaultRndStream[i], VSL_BRNG_MT2203, seed + i); vslNewStream(&tmpRndStream[i], VSL_BRNG_MT2203, seed + i);
defaultRndSet = 1;
} }
defaultRndSet = 1;
return DKS_SUCCESS; return DKS_SUCCESS;
} }
@ -49,15 +47,8 @@ int MICBase::mic_createRandStreams(int size) {
//delete default rand streams //delete default rand streams
int MICBase::mic_deleteRandStreams() { int MICBase::mic_deleteRandStreams() {
#pragma offload target(mic:m_device_id) inout(defaultRndSet) //mic_freeMemory<VSLStreamStatePtr>(defaultRndStream, 236);
{ return DKS_SUCCESS;
if (defaultRndSet == 1) {
delete[] defaultRndStream;
defaultRndSet = -1;
}
}
return DKS_ERROR;
} }
//create a new signal for the mic //create a new signal for the mic

View File

@ -30,14 +30,19 @@ class MICBase {
private: private:
std::vector<int> micStreams; std::vector<int> micStreams;
int maxThreads;
protected: protected:
int defaultRndSet; int defaultRndSet;
public: public:
VSLStreamStatePtr *defaultRndStream;
//#pragma offload_attribute(push,target(mic))
void *defaultRndStream; //VSLSStreamStatePtr
void *testPtr;
//#pragma offload_attribute(pop)
int m_device_id; int m_device_id;
/* constructor */ /* constructor */
@ -202,7 +207,6 @@ public:
#pragma offload_transfer target(mic:m_device_id) nocopy(tmp_ptr:length(totalsize) DKS_REUSE DKS_FREE) #pragma offload_transfer target(mic:m_device_id) nocopy(tmp_ptr:length(totalsize) DKS_REUSE DKS_FREE)
{ {
} }
return DKS_SUCCESS; return DKS_SUCCESS;
} }

View File

@ -292,7 +292,7 @@ void energyLoss(double &Eng, int &pdead, double *par, VSLStreamStatePtr &stream)
const double deltas = par[DT_M] * beta * C; const double deltas = par[DT_M] * beta * C;
const double deltasrho = deltas * 100 * par[RHO_M]; const double deltasrho = deltas * 100 * par[RHO_M];
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5); const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
if ( (Eng > 0.00001) && (Eng < 0.0006) ) { if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
const double Ts = (Eng * 1E6) / 1.0073; const double Ts = (Eng * 1E6) / 1.0073;
@ -338,7 +338,7 @@ void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) {
const double deltas = par[DT_M] * beta * C; const double deltas = par[DT_M] * beta * C;
const double deltasrho = deltas * 100 * par[RHO_M]; const double deltasrho = deltas * 100 * par[RHO_M];
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5); const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
if ( (Eng > 0.00001) && (Eng < 0.0006) ) { if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
const double Ts = (Eng * 1E6) / 1.0073; const double Ts = (Eng * 1E6) / 1.0073;
@ -373,16 +373,18 @@ int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int nu
//cast device memory pointers to appropriate types //cast device memory pointers to appropriate types
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr; MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
double *par = (double*) par_ptr; double *par = (double*) par_ptr;
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
#pragma offload target(mic:m_micbase->m_device_id) \ #pragma offload target(mic:m_micbase->m_device_id) \
inout(data:length(0) DKS_RETAIN DKS_REUSE) \ inout(data:length(0) DKS_RETAIN DKS_REUSE) \
in(par:length(0) DKS_RETAIN DKS_REUSE) \ in(par:length(0) DKS_RETAIN DKS_REUSE) \
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
in(numparticles) in(numparticles)
{ {
#pragma omp parallel #pragma omp parallel
{ {
VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()]; VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
//for loop trough particles if not checkhit set label to -2 and update R.x //for loop trough particles if not checkhit set label to -2 and update R.x
@ -459,6 +461,8 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
int padding = numparticles % MIC_WIDTH; int padding = numparticles % MIC_WIDTH;
int totalpart = numparticles + padding; int totalpart = numparticles + padding;
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
#pragma offload target (mic:0) \ #pragma offload target (mic:0) \
in(label:length(0) DKS_REUSE DKS_RETAIN) \ in(label:length(0) DKS_REUSE DKS_RETAIN) \
in(localID:length(0) DKS_REUSE DKS_RETAIN) \ in(localID:length(0) DKS_REUSE DKS_RETAIN) \
@ -469,14 +473,16 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
in(py:length(0) DKS_REUSE DKS_RETAIN) \ in(py:length(0) DKS_REUSE DKS_RETAIN) \
in(pz:length(0) DKS_REUSE DKS_RETAIN) \ in(pz:length(0) DKS_REUSE DKS_RETAIN) \
in(par:length(0) DKS_RETAIN DKS_REUSE) \ in(par:length(0) DKS_RETAIN DKS_REUSE) \
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
in(totalpart) in(totalpart)
{ {
#pragma omp parallel #pragma omp parallel
{ {
//every thread gets its own rnd stream state //every thread gets its own rnd stream state
VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()]; //VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
#pragma omp for nowait #pragma omp for nowait
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) { for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
@ -512,10 +518,12 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
double Eng = (sq - 1) * M_P; double Eng = (sq - 1) * M_P;
double dEdx = 0; double dEdx = 0;
if (label[i] == 0) { if (label[i] == 0) {
energyLoss(Eng, dEdx, par, randv, i - ii); energyLoss(Eng, dEdx, par, randv, i - ii);
} }
if (Eng > 1e-4 && dEdx < 0) { if (Eng > 1e-4 && dEdx < 0) {
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P; double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
sq = sqrt(dot(px[i], py[i], pz[i])); sq = sqrt(dot(px[i], py[i], pz[i]));
@ -531,6 +539,7 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
} //end outer energy loss loop } //end outer energy loss loop
//vectorize coulomb scattering as much as possible //vectorize coulomb scattering as much as possible
#pragma omp for nowait #pragma omp for nowait
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) { for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {

View File

@ -26,7 +26,7 @@ typedef struct {
} MIC_PART_SMALL; } MIC_PART_SMALL;
class MICCollimatorPhysics : DKSAlogorithms{ class MICCollimatorPhysics : public DKSCollimatorPhysics {
private: private:

View File

@ -6,15 +6,18 @@
MICFFT::MICFFT(MICBase *base) { MICFFT::MICFFT(MICBase *base) {
m_micbase = base; m_micbase = base;
m_fftsetup = false;
} }
MICFFT::~MICFFT() { MICFFT::~MICFFT() {
if (m_fftsetup) {
#pragma offload target(mic:0) #pragma offload target(mic:0)
{ {
DftiFreeDescriptor(&FFTHandle_m); DftiFreeDescriptor(&FFTHandle_m);
DftiFreeDescriptor(&handle); DftiFreeDescriptor(&handle);
} }
} }
}
//setup fft //setup fft
int MICFFT::setupFFT(int ndim, int N[3]) { int MICFFT::setupFFT(int ndim, int N[3]) {
@ -35,7 +38,7 @@ int MICFFT::setupFFT(int ndim, int N[3]) {
} }
m_fftsetup = true;
return DKS_SUCCESS; return DKS_SUCCESS;
} }
//BENI: //BENI:
@ -122,8 +125,8 @@ int MICFFT::executeFFT(void *mem_ptr, int ndim, int N[3], int streamId, bool for
} }
//execute iFFT //execute iFFT
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3]) { int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3], int streamId) {
return mic_executeFFT(mem_ptr, ndim, N, -1, false); return executeFFT(mem_ptr, ndim, N, -1, false);
} }
//execute REAL->COMPLEX FFT //execute REAL->COMPLEX FFT

View File

@ -7,13 +7,14 @@
#include <offload.h> #include <offload.h>
#include <mkl_dfti.h> #include <mkl_dfti.h>
#include "../Algorithm/DKSFFT.h" #include "../Algorithms/FFT.h"
#include "MICBase.h" #include "MICBase.h"
class MICFFT : public DKSFFT { class MICFFT : public DKSFFT {
private: private:
bool m_fftsetup;
MICBase *m_micbase; MICBase *m_micbase;
/// Internal FFT object for performing serial FFTs. /// Internal FFT object for performing serial FFTs.
@ -74,6 +75,18 @@ public:
/* normalize IFFT on MIC */ /* normalize IFFT on MIC */
int normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId = -1); int normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId = -1);
/**
* Info: destroy default FFT plans
* Return: success or error code
*/
int destroyFFT() { return DKS_SUCCESS; }
/*
Info: execute normalize for complex to real iFFT
Return: success or error code
*/
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) { return DKS_SUCCESS; }
}; };
#endif #endif

View File

@ -129,7 +129,9 @@ int main(int argc, char *argv[]) {
//init random //init random
base.callInitRandoms(numpart); base.callInitRandoms(numpart);
//**test collimator physics and sort***// //**test collimator physics and sort***//
void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr; void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr;
//allocate memory for particles //allocate memory for particles