Collimator physics for MIC fix

This commit is contained in:
Uldis Locans
2016-11-16 18:12:17 +01:00
parent 027bdc01f5
commit 87cdf52f07
8 changed files with 75 additions and 53 deletions

View File

@ -30,7 +30,7 @@ public:
/* destructor */
~CudaGreensFunction();
/*
/**
Info: calc itegral on device memory (taken from OPAL src code)
Return: success or error code
*/
@ -38,20 +38,20 @@ public:
double hr_m0, double hr_m1, double hr_m2,
int streamId = -1);
/*
/**
Info: integration of rho2_m field (taken from OPAL src code)
Return: success or error code
*/
int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1);
/*
/**
Info: mirror rho field (taken from OPAL src code)
Return: succes or error code
*/
int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
/*
/**
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
Return: success or error code
*/

View File

@ -18,30 +18,28 @@ int MICBase::mic_createRandStreams(int size) {
int seed = time(NULL);
#pragma offload target(mic:m_device_id) inout(defaultRndSet) in(seed)
int numThreads = 0;
#pragma offload target(mic:m_device_id) inout(numThreads)
{
//get the number of threads
int numThreads;
#pragma omp parallel
numThreads = omp_get_num_threads();
}
//if default rnd stream already allocated delete the array
if (defaultRndSet == 1)
delete[] defaultRndStream;
//allocate defaultRndStream array
defaultRndStream = new VSLStreamStatePtr[numThreads];
defaultRndStream = mic_allocateMemory<VSLStreamStatePtr>(numThreads);
VSLStreamStatePtr *tmpRndStream = (VSLStreamStatePtr*) defaultRndStream;
maxThreads = numThreads;
#pragma offload target(mic:m_device_id) \
in(tmpRndStream:length(0) DKS_REUSE DKS_RETAIN) \
in(seed)
{
//create stream states for each thread
#pragma omp parallel for
for (int i = 0; i < omp_get_num_threads(); i++)
vslNewStream(&defaultRndStream[i], VSL_BRNG_MT2203, seed + i);
defaultRndSet = 1;
vslNewStream(&tmpRndStream[i], VSL_BRNG_MT2203, seed + i);
}
defaultRndSet = 1;
return DKS_SUCCESS;
}
@ -49,15 +47,8 @@ int MICBase::mic_createRandStreams(int size) {
//delete default rand streams
int MICBase::mic_deleteRandStreams() {
#pragma offload target(mic:m_device_id) inout(defaultRndSet)
{
if (defaultRndSet == 1) {
delete[] defaultRndStream;
defaultRndSet = -1;
}
}
return DKS_ERROR;
//mic_freeMemory<VSLStreamStatePtr>(defaultRndStream, 236);
return DKS_SUCCESS;
}
//create a new signal for the mic

View File

@ -30,14 +30,19 @@ class MICBase {
private:
std::vector<int> micStreams;
int maxThreads;
protected:
int defaultRndSet;
public:
VSLStreamStatePtr *defaultRndStream;
//#pragma offload_attribute(push,target(mic))
void *defaultRndStream; //VSLSStreamStatePtr
void *testPtr;
//#pragma offload_attribute(pop)
int m_device_id;
/* constructor */
@ -202,7 +207,6 @@ public:
#pragma offload_transfer target(mic:m_device_id) nocopy(tmp_ptr:length(totalsize) DKS_REUSE DKS_FREE)
{
}
return DKS_SUCCESS;
}

View File

@ -292,7 +292,7 @@ void energyLoss(double &Eng, int &pdead, double *par, VSLStreamStatePtr &stream)
const double deltas = par[DT_M] * beta * C;
const double deltasrho = deltas * 100 * par[RHO_M];
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5);
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
const double Ts = (Eng * 1E6) / 1.0073;
@ -338,7 +338,7 @@ void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) {
const double deltas = par[DT_M] * beta * C;
const double deltasrho = deltas * 100 * par[RHO_M];
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5);
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
const double Ts = (Eng * 1E6) / 1.0073;
@ -373,16 +373,18 @@ int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int nu
//cast device memory pointers to appropriate types
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
double *par = (double*) par_ptr;
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
#pragma offload target(mic:m_micbase->m_device_id) \
inout(data:length(0) DKS_RETAIN DKS_REUSE) \
in(par:length(0) DKS_RETAIN DKS_REUSE) \
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
in(numparticles)
{
#pragma omp parallel
{
VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
//for loop trough particles if not checkhit set label to -2 and update R.x
@ -459,6 +461,8 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
int padding = numparticles % MIC_WIDTH;
int totalpart = numparticles + padding;
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
#pragma offload target (mic:0) \
in(label:length(0) DKS_REUSE DKS_RETAIN) \
in(localID:length(0) DKS_REUSE DKS_RETAIN) \
@ -469,14 +473,16 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
in(py:length(0) DKS_REUSE DKS_RETAIN) \
in(pz:length(0) DKS_REUSE DKS_RETAIN) \
in(par:length(0) DKS_RETAIN DKS_REUSE) \
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
in(totalpart)
{
#pragma omp parallel
{
//every thread gets its own rnd stream state
VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
//VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
#pragma omp for nowait
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
@ -512,10 +518,12 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
double Eng = (sq - 1) * M_P;
double dEdx = 0;
if (label[i] == 0) {
energyLoss(Eng, dEdx, par, randv, i - ii);
}
if (Eng > 1e-4 && dEdx < 0) {
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
sq = sqrt(dot(px[i], py[i], pz[i]));
@ -531,6 +539,7 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
} //end outer energy loss loop
//vectorize coulomb scattering as much as possible
#pragma omp for nowait
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {

View File

@ -26,7 +26,7 @@ typedef struct {
} MIC_PART_SMALL;
class MICCollimatorPhysics : DKSAlogorithms{
class MICCollimatorPhysics : public DKSCollimatorPhysics {
private:
@ -38,7 +38,7 @@ public:
m_micbase = base;
};
~MICCollimatorPhysics() { };
~MICCollimatorPhysics() { };
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles);

View File

@ -6,13 +6,16 @@
MICFFT::MICFFT(MICBase *base) {
m_micbase = base;
m_fftsetup = false;
}
MICFFT::~MICFFT() {
if (m_fftsetup) {
#pragma offload target(mic:0)
{
DftiFreeDescriptor(&FFTHandle_m);
DftiFreeDescriptor(&handle);
{
DftiFreeDescriptor(&FFTHandle_m);
DftiFreeDescriptor(&handle);
}
}
}
@ -35,7 +38,7 @@ int MICFFT::setupFFT(int ndim, int N[3]) {
}
m_fftsetup = true;
return DKS_SUCCESS;
}
//BENI:
@ -122,8 +125,8 @@ int MICFFT::executeFFT(void *mem_ptr, int ndim, int N[3], int streamId, bool for
}
//execute iFFT
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3]) {
return mic_executeFFT(mem_ptr, ndim, N, -1, false);
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3], int streamId) {
return executeFFT(mem_ptr, ndim, N, -1, false);
}
//execute REAL->COMPLEX FFT

View File

@ -7,13 +7,14 @@
#include <offload.h>
#include <mkl_dfti.h>
#include "../Algorithm/DKSFFT.h"
#include "../Algorithms/FFT.h"
#include "MICBase.h"
class MICFFT : public DKSFFT {
private:
bool m_fftsetup;
MICBase *m_micbase;
/// Internal FFT object for performing serial FFTs.
@ -74,6 +75,18 @@ public:
/* normalize IFFT on MIC */
int normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId = -1);
/**
* Info: destroy default FFT plans
* Return: success or error code
*/
int destroyFFT() { return DKS_SUCCESS; }
/*
Info: execute normalize for complex to real iFFT
Return: success or error code
*/
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) { return DKS_SUCCESS; }
};
#endif

View File

@ -129,7 +129,9 @@ int main(int argc, char *argv[]) {
//init random
base.callInitRandoms(numpart);
//**test collimator physics and sort***//
void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr;
//allocate memory for particles
@ -211,7 +213,7 @@ int main(int argc, char *argv[]) {
base.freeMemory<double>(param_ptr, 12);
/*
/*
std::cout << std::fixed << std::setprecision(4);
for (int i = 0; i < 10; i++) {
std::cout << p.label[i] << "\t" << p.rx[i]