From 87cdf52f07c4ad4e9506277e0749a03db5493343 Mon Sep 17 00:00:00 2001 From: Uldis Locans Date: Wed, 16 Nov 2016 18:12:17 +0100 Subject: [PATCH] Collimator physics for MIC fix --- src/CUDA/CudaGreensFunction.cuh | 8 +++--- src/MIC/MICBase.cpp | 41 ++++++++++++------------------- src/MIC/MICBase.h | 12 ++++++--- src/MIC/MICCollimatorPhysics.cpp | 27 +++++++++++++------- src/MIC/MICCollimatorPhysics.h | 4 +-- src/MIC/MICFFT.cpp | 15 ++++++----- src/MIC/MICFFT.h | 15 ++++++++++- test/testCollimatorPhysicsSoA.cpp | 6 +++-- 8 files changed, 75 insertions(+), 53 deletions(-) diff --git a/src/CUDA/CudaGreensFunction.cuh b/src/CUDA/CudaGreensFunction.cuh index 5095e7a..ea8d8ce 100644 --- a/src/CUDA/CudaGreensFunction.cuh +++ b/src/CUDA/CudaGreensFunction.cuh @@ -30,7 +30,7 @@ public: /* destructor */ ~CudaGreensFunction(); - /* + /** Info: calc itegral on device memory (taken from OPAL src code) Return: success or error code */ @@ -38,20 +38,20 @@ public: double hr_m0, double hr_m1, double hr_m2, int streamId = -1); - /* + /** Info: integration of rho2_m field (taken from OPAL src code) Return: success or error code */ int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K, int streamId = -1); - /* + /** Info: mirror rho field (taken from OPAL src code) Return: succes or error code */ int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1); - /* + /** Info: multiply complex fields already on the GPU memory, result will be put in ptr1 Return: success or error code */ diff --git a/src/MIC/MICBase.cpp b/src/MIC/MICBase.cpp index 43c15c4..d328e52 100644 --- a/src/MIC/MICBase.cpp +++ b/src/MIC/MICBase.cpp @@ -18,30 +18,28 @@ int MICBase::mic_createRandStreams(int size) { int seed = time(NULL); -#pragma offload target(mic:m_device_id) inout(defaultRndSet) in(seed) + int numThreads = 0; +#pragma offload target(mic:m_device_id) inout(numThreads) { - - //get the number of threads - int numThreads; - #pragma omp parallel numThreads = omp_get_num_threads(); + } - //if default rnd stream already allocated delete the array - if (defaultRndSet == 1) - delete[] defaultRndStream; - - //allocate defaultRndStream array - defaultRndStream = new VSLStreamStatePtr[numThreads]; - + defaultRndStream = mic_allocateMemory(numThreads); + VSLStreamStatePtr *tmpRndStream = (VSLStreamStatePtr*) defaultRndStream; + maxThreads = numThreads; + +#pragma offload target(mic:m_device_id) \ + in(tmpRndStream:length(0) DKS_REUSE DKS_RETAIN) \ + in(seed) + { //create stream states for each thread #pragma omp parallel for for (int i = 0; i < omp_get_num_threads(); i++) - vslNewStream(&defaultRndStream[i], VSL_BRNG_MT2203, seed + i); - - defaultRndSet = 1; + vslNewStream(&tmpRndStream[i], VSL_BRNG_MT2203, seed + i); } - + + defaultRndSet = 1; return DKS_SUCCESS; } @@ -49,15 +47,8 @@ int MICBase::mic_createRandStreams(int size) { //delete default rand streams int MICBase::mic_deleteRandStreams() { -#pragma offload target(mic:m_device_id) inout(defaultRndSet) - { - if (defaultRndSet == 1) { - delete[] defaultRndStream; - defaultRndSet = -1; - } - } - - return DKS_ERROR; + //mic_freeMemory(defaultRndStream, 236); + return DKS_SUCCESS; } //create a new signal for the mic diff --git a/src/MIC/MICBase.h b/src/MIC/MICBase.h index 92b4fe9..0087778 100644 --- a/src/MIC/MICBase.h +++ b/src/MIC/MICBase.h @@ -30,14 +30,19 @@ class MICBase { private: std::vector micStreams; + int maxThreads; protected: - int defaultRndSet; - public: - VSLStreamStatePtr *defaultRndStream; + +//#pragma offload_attribute(push,target(mic)) + void *defaultRndStream; //VSLSStreamStatePtr + void *testPtr; + +//#pragma offload_attribute(pop) + int m_device_id; /* constructor */ @@ -202,7 +207,6 @@ public: #pragma offload_transfer target(mic:m_device_id) nocopy(tmp_ptr:length(totalsize) DKS_REUSE DKS_FREE) { } - return DKS_SUCCESS; } diff --git a/src/MIC/MICCollimatorPhysics.cpp b/src/MIC/MICCollimatorPhysics.cpp index 6a1b937..e55b623 100644 --- a/src/MIC/MICCollimatorPhysics.cpp +++ b/src/MIC/MICCollimatorPhysics.cpp @@ -292,7 +292,7 @@ void energyLoss(double &Eng, int &pdead, double *par, VSLStreamStatePtr &stream) const double deltas = par[DT_M] * beta * C; const double deltasrho = deltas * 100 * par[RHO_M]; - const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5); + const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5); if ( (Eng > 0.00001) && (Eng < 0.0006) ) { const double Ts = (Eng * 1E6) / 1.0073; @@ -338,7 +338,7 @@ void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) { const double deltas = par[DT_M] * beta * C; const double deltasrho = deltas * 100 * par[RHO_M]; - const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5); + const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5); if ( (Eng > 0.00001) && (Eng < 0.0006) ) { const double Ts = (Eng * 1E6) / 1.0073; @@ -373,16 +373,18 @@ int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int nu //cast device memory pointers to appropriate types MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr; double *par = (double*) par_ptr; + VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream; #pragma offload target(mic:m_micbase->m_device_id) \ inout(data:length(0) DKS_RETAIN DKS_REUSE) \ in(par:length(0) DKS_RETAIN DKS_REUSE) \ + in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \ in(numparticles) { #pragma omp parallel { - VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()]; + VSLStreamStatePtr stream = streamArr[omp_get_thread_num()]; //for loop trough particles if not checkhit set label to -2 and update R.x @@ -459,6 +461,8 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt int padding = numparticles % MIC_WIDTH; int totalpart = numparticles + padding; + VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream; + #pragma offload target (mic:0) \ in(label:length(0) DKS_REUSE DKS_RETAIN) \ in(localID:length(0) DKS_REUSE DKS_RETAIN) \ @@ -469,14 +473,16 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt in(py:length(0) DKS_REUSE DKS_RETAIN) \ in(pz:length(0) DKS_REUSE DKS_RETAIN) \ in(par:length(0) DKS_RETAIN DKS_REUSE) \ + in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \ in(totalpart) { + #pragma omp parallel { //every thread gets its own rnd stream state - VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()]; - + //VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()]; + VSLStreamStatePtr stream = streamArr[omp_get_thread_num()]; #pragma omp for nowait for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) { @@ -512,9 +518,11 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt double Eng = (sq - 1) * M_P; double dEdx = 0; + if (label[i] == 0) { energyLoss(Eng, dEdx, par, randv, i - ii); } + if (Eng > 1e-4 && dEdx < 0) { double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P; @@ -526,11 +534,12 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt if (Eng < 1e-4 || dEdx > 0) label[i] = -1; - + } //end inner energy loss loop - - } //end outer energy loss loop + } //end outer energy loss loop + + //vectorize coulomb scattering as much as possible #pragma omp for nowait for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) { @@ -540,7 +549,7 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt } //end omp parallel } //end offload - + return DKS_SUCCESS; } diff --git a/src/MIC/MICCollimatorPhysics.h b/src/MIC/MICCollimatorPhysics.h index 0795779..c10bd85 100644 --- a/src/MIC/MICCollimatorPhysics.h +++ b/src/MIC/MICCollimatorPhysics.h @@ -26,7 +26,7 @@ typedef struct { } MIC_PART_SMALL; -class MICCollimatorPhysics : DKSAlogorithms{ +class MICCollimatorPhysics : public DKSCollimatorPhysics { private: @@ -38,7 +38,7 @@ public: m_micbase = base; }; - ~MICCollimatorPhysics() { }; + ~MICCollimatorPhysics() { }; int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles); diff --git a/src/MIC/MICFFT.cpp b/src/MIC/MICFFT.cpp index ab82c83..67c47b3 100644 --- a/src/MIC/MICFFT.cpp +++ b/src/MIC/MICFFT.cpp @@ -6,13 +6,16 @@ MICFFT::MICFFT(MICBase *base) { m_micbase = base; + m_fftsetup = false; } MICFFT::~MICFFT() { + if (m_fftsetup) { #pragma offload target(mic:0) - { - DftiFreeDescriptor(&FFTHandle_m); - DftiFreeDescriptor(&handle); + { + DftiFreeDescriptor(&FFTHandle_m); + DftiFreeDescriptor(&handle); + } } } @@ -35,7 +38,7 @@ int MICFFT::setupFFT(int ndim, int N[3]) { } - + m_fftsetup = true; return DKS_SUCCESS; } //BENI: @@ -122,8 +125,8 @@ int MICFFT::executeFFT(void *mem_ptr, int ndim, int N[3], int streamId, bool for } //execute iFFT -int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3]) { - return mic_executeFFT(mem_ptr, ndim, N, -1, false); +int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3], int streamId) { + return executeFFT(mem_ptr, ndim, N, -1, false); } //execute REAL->COMPLEX FFT diff --git a/src/MIC/MICFFT.h b/src/MIC/MICFFT.h index 626fc19..ade95c7 100644 --- a/src/MIC/MICFFT.h +++ b/src/MIC/MICFFT.h @@ -7,13 +7,14 @@ #include #include -#include "../Algorithm/DKSFFT.h" +#include "../Algorithms/FFT.h" #include "MICBase.h" class MICFFT : public DKSFFT { private: + bool m_fftsetup; MICBase *m_micbase; /// Internal FFT object for performing serial FFTs. @@ -74,6 +75,18 @@ public: /* normalize IFFT on MIC */ int normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId = -1); + /** + * Info: destroy default FFT plans + * Return: success or error code + */ + int destroyFFT() { return DKS_SUCCESS; } + + /* + Info: execute normalize for complex to real iFFT + Return: success or error code + */ + int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) { return DKS_SUCCESS; } + }; #endif diff --git a/test/testCollimatorPhysicsSoA.cpp b/test/testCollimatorPhysicsSoA.cpp index bc4bf0b..7150c14 100644 --- a/test/testCollimatorPhysicsSoA.cpp +++ b/test/testCollimatorPhysicsSoA.cpp @@ -129,7 +129,9 @@ int main(int argc, char *argv[]) { //init random base.callInitRandoms(numpart); + //**test collimator physics and sort***// + void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr; //allocate memory for particles @@ -210,8 +212,8 @@ int main(int argc, char *argv[]) { base.freeMemory(pz_ptr, numpart); base.freeMemory(param_ptr, 12); - - /* + + /* std::cout << std::fixed << std::setprecision(4); for (int i = 0; i < 10; i++) { std::cout << p.label[i] << "\t" << p.rx[i]