Collimator physics for MIC fix
This commit is contained in:
@ -30,7 +30,7 @@ public:
|
|||||||
/* destructor */
|
/* destructor */
|
||||||
~CudaGreensFunction();
|
~CudaGreensFunction();
|
||||||
|
|
||||||
/*
|
/**
|
||||||
Info: calc itegral on device memory (taken from OPAL src code)
|
Info: calc itegral on device memory (taken from OPAL src code)
|
||||||
Return: success or error code
|
Return: success or error code
|
||||||
*/
|
*/
|
||||||
@ -38,20 +38,20 @@ public:
|
|||||||
double hr_m0, double hr_m1, double hr_m2,
|
double hr_m0, double hr_m1, double hr_m2,
|
||||||
int streamId = -1);
|
int streamId = -1);
|
||||||
|
|
||||||
/*
|
/**
|
||||||
Info: integration of rho2_m field (taken from OPAL src code)
|
Info: integration of rho2_m field (taken from OPAL src code)
|
||||||
Return: success or error code
|
Return: success or error code
|
||||||
*/
|
*/
|
||||||
int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
|
int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
|
||||||
int streamId = -1);
|
int streamId = -1);
|
||||||
|
|
||||||
/*
|
/**
|
||||||
Info: mirror rho field (taken from OPAL src code)
|
Info: mirror rho field (taken from OPAL src code)
|
||||||
Return: succes or error code
|
Return: succes or error code
|
||||||
*/
|
*/
|
||||||
int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
|
int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
|
||||||
|
|
||||||
/*
|
/**
|
||||||
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
|
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
|
||||||
Return: success or error code
|
Return: success or error code
|
||||||
*/
|
*/
|
||||||
|
@ -18,30 +18,28 @@ int MICBase::mic_createRandStreams(int size) {
|
|||||||
|
|
||||||
int seed = time(NULL);
|
int seed = time(NULL);
|
||||||
|
|
||||||
#pragma offload target(mic:m_device_id) inout(defaultRndSet) in(seed)
|
int numThreads = 0;
|
||||||
|
#pragma offload target(mic:m_device_id) inout(numThreads)
|
||||||
{
|
{
|
||||||
|
|
||||||
//get the number of threads
|
|
||||||
int numThreads;
|
|
||||||
|
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
numThreads = omp_get_num_threads();
|
numThreads = omp_get_num_threads();
|
||||||
|
}
|
||||||
|
|
||||||
//if default rnd stream already allocated delete the array
|
defaultRndStream = mic_allocateMemory<VSLStreamStatePtr>(numThreads);
|
||||||
if (defaultRndSet == 1)
|
VSLStreamStatePtr *tmpRndStream = (VSLStreamStatePtr*) defaultRndStream;
|
||||||
delete[] defaultRndStream;
|
maxThreads = numThreads;
|
||||||
|
|
||||||
//allocate defaultRndStream array
|
|
||||||
defaultRndStream = new VSLStreamStatePtr[numThreads];
|
|
||||||
|
|
||||||
|
#pragma offload target(mic:m_device_id) \
|
||||||
|
in(tmpRndStream:length(0) DKS_REUSE DKS_RETAIN) \
|
||||||
|
in(seed)
|
||||||
|
{
|
||||||
//create stream states for each thread
|
//create stream states for each thread
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int i = 0; i < omp_get_num_threads(); i++)
|
for (int i = 0; i < omp_get_num_threads(); i++)
|
||||||
vslNewStream(&defaultRndStream[i], VSL_BRNG_MT2203, seed + i);
|
vslNewStream(&tmpRndStream[i], VSL_BRNG_MT2203, seed + i);
|
||||||
|
|
||||||
defaultRndSet = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
defaultRndSet = 1;
|
||||||
return DKS_SUCCESS;
|
return DKS_SUCCESS;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -49,15 +47,8 @@ int MICBase::mic_createRandStreams(int size) {
|
|||||||
//delete default rand streams
|
//delete default rand streams
|
||||||
int MICBase::mic_deleteRandStreams() {
|
int MICBase::mic_deleteRandStreams() {
|
||||||
|
|
||||||
#pragma offload target(mic:m_device_id) inout(defaultRndSet)
|
//mic_freeMemory<VSLStreamStatePtr>(defaultRndStream, 236);
|
||||||
{
|
return DKS_SUCCESS;
|
||||||
if (defaultRndSet == 1) {
|
|
||||||
delete[] defaultRndStream;
|
|
||||||
defaultRndSet = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return DKS_ERROR;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//create a new signal for the mic
|
//create a new signal for the mic
|
||||||
|
@ -30,14 +30,19 @@ class MICBase {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<int> micStreams;
|
std::vector<int> micStreams;
|
||||||
|
int maxThreads;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
|
||||||
int defaultRndSet;
|
int defaultRndSet;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VSLStreamStatePtr *defaultRndStream;
|
|
||||||
|
//#pragma offload_attribute(push,target(mic))
|
||||||
|
void *defaultRndStream; //VSLSStreamStatePtr
|
||||||
|
void *testPtr;
|
||||||
|
|
||||||
|
//#pragma offload_attribute(pop)
|
||||||
|
|
||||||
int m_device_id;
|
int m_device_id;
|
||||||
|
|
||||||
/* constructor */
|
/* constructor */
|
||||||
@ -202,7 +207,6 @@ public:
|
|||||||
#pragma offload_transfer target(mic:m_device_id) nocopy(tmp_ptr:length(totalsize) DKS_REUSE DKS_FREE)
|
#pragma offload_transfer target(mic:m_device_id) nocopy(tmp_ptr:length(totalsize) DKS_REUSE DKS_FREE)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
return DKS_SUCCESS;
|
return DKS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,7 +292,7 @@ void energyLoss(double &Eng, int &pdead, double *par, VSLStreamStatePtr &stream)
|
|||||||
|
|
||||||
const double deltas = par[DT_M] * beta * C;
|
const double deltas = par[DT_M] * beta * C;
|
||||||
const double deltasrho = deltas * 100 * par[RHO_M];
|
const double deltasrho = deltas * 100 * par[RHO_M];
|
||||||
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5);
|
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
|
||||||
|
|
||||||
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
|
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
|
||||||
const double Ts = (Eng * 1E6) / 1.0073;
|
const double Ts = (Eng * 1E6) / 1.0073;
|
||||||
@ -338,7 +338,7 @@ void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) {
|
|||||||
|
|
||||||
const double deltas = par[DT_M] * beta * C;
|
const double deltas = par[DT_M] * beta * C;
|
||||||
const double deltasrho = deltas * 100 * par[RHO_M];
|
const double deltasrho = deltas * 100 * par[RHO_M];
|
||||||
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5);
|
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
|
||||||
|
|
||||||
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
|
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
|
||||||
const double Ts = (Eng * 1E6) / 1.0073;
|
const double Ts = (Eng * 1E6) / 1.0073;
|
||||||
@ -373,16 +373,18 @@ int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int nu
|
|||||||
//cast device memory pointers to appropriate types
|
//cast device memory pointers to appropriate types
|
||||||
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
|
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
|
||||||
double *par = (double*) par_ptr;
|
double *par = (double*) par_ptr;
|
||||||
|
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
|
||||||
|
|
||||||
#pragma offload target(mic:m_micbase->m_device_id) \
|
#pragma offload target(mic:m_micbase->m_device_id) \
|
||||||
inout(data:length(0) DKS_RETAIN DKS_REUSE) \
|
inout(data:length(0) DKS_RETAIN DKS_REUSE) \
|
||||||
in(par:length(0) DKS_RETAIN DKS_REUSE) \
|
in(par:length(0) DKS_RETAIN DKS_REUSE) \
|
||||||
|
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
|
||||||
in(numparticles)
|
in(numparticles)
|
||||||
{
|
{
|
||||||
|
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
|
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
|
||||||
|
|
||||||
//for loop trough particles if not checkhit set label to -2 and update R.x
|
//for loop trough particles if not checkhit set label to -2 and update R.x
|
||||||
|
|
||||||
@ -459,6 +461,8 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
|
|||||||
int padding = numparticles % MIC_WIDTH;
|
int padding = numparticles % MIC_WIDTH;
|
||||||
int totalpart = numparticles + padding;
|
int totalpart = numparticles + padding;
|
||||||
|
|
||||||
|
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
|
||||||
|
|
||||||
#pragma offload target (mic:0) \
|
#pragma offload target (mic:0) \
|
||||||
in(label:length(0) DKS_REUSE DKS_RETAIN) \
|
in(label:length(0) DKS_REUSE DKS_RETAIN) \
|
||||||
in(localID:length(0) DKS_REUSE DKS_RETAIN) \
|
in(localID:length(0) DKS_REUSE DKS_RETAIN) \
|
||||||
@ -469,14 +473,16 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
|
|||||||
in(py:length(0) DKS_REUSE DKS_RETAIN) \
|
in(py:length(0) DKS_REUSE DKS_RETAIN) \
|
||||||
in(pz:length(0) DKS_REUSE DKS_RETAIN) \
|
in(pz:length(0) DKS_REUSE DKS_RETAIN) \
|
||||||
in(par:length(0) DKS_RETAIN DKS_REUSE) \
|
in(par:length(0) DKS_RETAIN DKS_REUSE) \
|
||||||
|
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
|
||||||
in(totalpart)
|
in(totalpart)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
//every thread gets its own rnd stream state
|
//every thread gets its own rnd stream state
|
||||||
VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
|
//VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
|
||||||
|
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
|
||||||
|
|
||||||
#pragma omp for nowait
|
#pragma omp for nowait
|
||||||
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
|
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
|
||||||
@ -512,10 +518,12 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
|
|||||||
double Eng = (sq - 1) * M_P;
|
double Eng = (sq - 1) * M_P;
|
||||||
double dEdx = 0;
|
double dEdx = 0;
|
||||||
|
|
||||||
|
|
||||||
if (label[i] == 0) {
|
if (label[i] == 0) {
|
||||||
energyLoss(Eng, dEdx, par, randv, i - ii);
|
energyLoss(Eng, dEdx, par, randv, i - ii);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (Eng > 1e-4 && dEdx < 0) {
|
if (Eng > 1e-4 && dEdx < 0) {
|
||||||
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
|
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
|
||||||
sq = sqrt(dot(px[i], py[i], pz[i]));
|
sq = sqrt(dot(px[i], py[i], pz[i]));
|
||||||
@ -531,6 +539,7 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
|
|||||||
|
|
||||||
} //end outer energy loss loop
|
} //end outer energy loss loop
|
||||||
|
|
||||||
|
|
||||||
//vectorize coulomb scattering as much as possible
|
//vectorize coulomb scattering as much as possible
|
||||||
#pragma omp for nowait
|
#pragma omp for nowait
|
||||||
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
|
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
|
||||||
|
@ -26,7 +26,7 @@ typedef struct {
|
|||||||
} MIC_PART_SMALL;
|
} MIC_PART_SMALL;
|
||||||
|
|
||||||
|
|
||||||
class MICCollimatorPhysics : DKSAlogorithms{
|
class MICCollimatorPhysics : public DKSCollimatorPhysics {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
@ -6,15 +6,18 @@
|
|||||||
|
|
||||||
MICFFT::MICFFT(MICBase *base) {
|
MICFFT::MICFFT(MICBase *base) {
|
||||||
m_micbase = base;
|
m_micbase = base;
|
||||||
|
m_fftsetup = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
MICFFT::~MICFFT() {
|
MICFFT::~MICFFT() {
|
||||||
|
if (m_fftsetup) {
|
||||||
#pragma offload target(mic:0)
|
#pragma offload target(mic:0)
|
||||||
{
|
{
|
||||||
DftiFreeDescriptor(&FFTHandle_m);
|
DftiFreeDescriptor(&FFTHandle_m);
|
||||||
DftiFreeDescriptor(&handle);
|
DftiFreeDescriptor(&handle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//setup fft
|
//setup fft
|
||||||
int MICFFT::setupFFT(int ndim, int N[3]) {
|
int MICFFT::setupFFT(int ndim, int N[3]) {
|
||||||
@ -35,7 +38,7 @@ int MICFFT::setupFFT(int ndim, int N[3]) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_fftsetup = true;
|
||||||
return DKS_SUCCESS;
|
return DKS_SUCCESS;
|
||||||
}
|
}
|
||||||
//BENI:
|
//BENI:
|
||||||
@ -122,8 +125,8 @@ int MICFFT::executeFFT(void *mem_ptr, int ndim, int N[3], int streamId, bool for
|
|||||||
}
|
}
|
||||||
|
|
||||||
//execute iFFT
|
//execute iFFT
|
||||||
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3]) {
|
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3], int streamId) {
|
||||||
return mic_executeFFT(mem_ptr, ndim, N, -1, false);
|
return executeFFT(mem_ptr, ndim, N, -1, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
//execute REAL->COMPLEX FFT
|
//execute REAL->COMPLEX FFT
|
||||||
|
@ -7,13 +7,14 @@
|
|||||||
#include <offload.h>
|
#include <offload.h>
|
||||||
#include <mkl_dfti.h>
|
#include <mkl_dfti.h>
|
||||||
|
|
||||||
#include "../Algorithm/DKSFFT.h"
|
#include "../Algorithms/FFT.h"
|
||||||
#include "MICBase.h"
|
#include "MICBase.h"
|
||||||
|
|
||||||
class MICFFT : public DKSFFT {
|
class MICFFT : public DKSFFT {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
bool m_fftsetup;
|
||||||
MICBase *m_micbase;
|
MICBase *m_micbase;
|
||||||
|
|
||||||
/// Internal FFT object for performing serial FFTs.
|
/// Internal FFT object for performing serial FFTs.
|
||||||
@ -74,6 +75,18 @@ public:
|
|||||||
/* normalize IFFT on MIC */
|
/* normalize IFFT on MIC */
|
||||||
int normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId = -1);
|
int normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId = -1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Info: destroy default FFT plans
|
||||||
|
* Return: success or error code
|
||||||
|
*/
|
||||||
|
int destroyFFT() { return DKS_SUCCESS; }
|
||||||
|
|
||||||
|
/*
|
||||||
|
Info: execute normalize for complex to real iFFT
|
||||||
|
Return: success or error code
|
||||||
|
*/
|
||||||
|
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) { return DKS_SUCCESS; }
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -129,7 +129,9 @@ int main(int argc, char *argv[]) {
|
|||||||
//init random
|
//init random
|
||||||
base.callInitRandoms(numpart);
|
base.callInitRandoms(numpart);
|
||||||
|
|
||||||
|
|
||||||
//**test collimator physics and sort***//
|
//**test collimator physics and sort***//
|
||||||
|
|
||||||
void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr;
|
void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr;
|
||||||
|
|
||||||
//allocate memory for particles
|
//allocate memory for particles
|
||||||
|
Reference in New Issue
Block a user