7 Commits

15 changed files with 82 additions and 37 deletions

View File

@ -1,15 +1,16 @@
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
PROJECT (DKS)
SET (DKS_VERSION_MAJOR 1)
SET (DKS_VERSION_MINOR 0.1)
SET (DKS_VERSION_MINOR 0)
SET (DKS_VERSION_PATCH 2)
SET (PACKAGE \"dks\")
SET (PACKAGE_BUGREPORT \"locagoons.uldis@psi.ch\")
SET (PACKAGE_NAME \"DKS\")
SET (PACKAGE_STRING \"DKS\ 1.0.1\")
SET (PACKAGE_TARNAME \"dks\")
SET (PACKAGE_VERSION \"1.0.1\")
SET (VERSION \"1.0.1\")
set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
SET (PACKAGE \"dks\")
SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
SET (PACKAGE_NAME \"DKS\")
SET (PACKAGE_TARNAME \"dks\")
SET (DKS_VERSION_STR "\"${DKS_VERSION}\"")
SET (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
#get compiler name
@ -91,14 +92,16 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
SET (USE_CUDA ON)
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs)
MESSAGE (STATUS "cuda include: ${CUDA_INCLUDE_DIRS}")
MESSAGE (STATUS "cuda libs: ${CUDA_TOOLKIT_ROOT_DIR}/lib64")
MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lcudart -lcufft -lcublas -lnvToolsExt -DDKS_CUDA")
SET (CUDA_NVCC_FLAGS "-arch=sm_35 -DDEBUG -lcufft -lcublas -lcudart -fmad=false")
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DDEBUG -std=c++11 -D__wsu")
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${OPENCL_KERNELS}")
#if cuda version >= 7.0 add runtime commpilation flags
@ -123,6 +126,7 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
MESSAGE(STATUS "OpenCL version : ${OpenCL_VERSION_STRING}")
MESSAGE(STATUS "OpenCL include dir: ${OpenCL_INCLUDE_DIR}")
MESSAGE(STATUS "OpenCL library dir: ${OpenCL_LIBRARY}")
SET(CMAKE_SKIP_RPATH TRUE)
INCLUDE_DIRECTORIES(${OpenCL_INCLUDE_DIR})
LINK_DIRECTORIES(${OpenCL_LIBRARY})
ENDIF (OpenCL_FOUND)
@ -166,9 +170,19 @@ ADD_SUBDIRECTORY (auto-tuning)
CONFIGURE_FILE ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config_install.cmake )
CONFIGURE_FILE (${CMAKE_CURRENT_SOURCE_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion_install.cmake @ONLY)
### install files ###
INSTALL (
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config_install.cmake
DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/cmake/${PROJECT_NAME}"
RENAME ${PROJECT_NAME}Config.cmake
)
INSTALL (
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion_install.cmake
DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/cmake/${PROJECT_NAME}"
RENAME ${PROJECT_NAME}ConfigVersion.cmake
)

View File

@ -1,4 +1,7 @@
SET(${PROJECT_NAME}_CMAKE_CXX_FLAGS "${${PROJECT_NAME}_CXX_FLAGS}")
SET(${PROJECT_NAME}_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(${PROJECT_NAME}_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib")
SET(${PROJECT_NAME}_LIBRARY "dks")
SET(${PROJECT_NAME}_LIBRARY "dks")
SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH})
SET(DKS_VERSION ${DKS_VERSION})
SET(DKS_VERSION_STR ${DKS_VERSION_STR})

View File

@ -0,0 +1,13 @@
set(PACKAGE_VERSION @DKS_VERSION@)
if("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "@DKS_VERSION_MAJOR@" AND "${PACKAGE_FIND_VERSION_MINOR}" EQUAL "@DKS_VERSION_MINOR@")
if ("${PACKAGE_FIND_VERSION_PATCH}" EQUAL "@DKS_VERSION_PATCH@")
set(PACKAGE_VERSION_EXACT TRUE)
elseif("${PACKAGE_FIND_VERSION_PATCH}" LESS "@DKS_VERSION_PATCH@")
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_UNSUITABLE TRUE)
endif()
else()
set(PACKAGE_VERSION_UNSUITABLE TRUE)
endif()

View File

@ -19,7 +19,8 @@ public:
virtual ~DKSCollimatorPhysics() { }
virtual int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numpartices) = 0;
virtual int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numpartices,
bool enableRutherfordScattering = true) = 0;
virtual int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
void *rx_ptr, void *ry_ptr, void *rz_ptr,

View File

@ -8,7 +8,7 @@ __global__ void initcuRandState(curandState *state, int size, int seed = 0) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < size) {
curand_init(seed + idx, 0, 0, &state[idx]);
curand_init(seed, idx, 0, &state[idx]);
}
}
@ -41,14 +41,15 @@ CudaBase::~CudaBase() {
/*
create curandStates
*/
int CudaBase::cuda_createCurandStates(int size) {
int CudaBase::cuda_createCurandStates(int size, int seed) {
if (defaultRndSet == 1)
cuda_deleteCurandStates();
int threads = 128;
int blocks = size / threads + 1;
int seed = time(NULL);
if (seed == -1)
seed = time(NULL);
//std::cout << "sizeof: " << sizeof(curandState) << std::endl;
cudaMalloc(&defaultRndState, sizeof(curandState)*size);

View File

@ -41,7 +41,7 @@ public:
* and create a curandState with different seed for each array entry.
* Return success or error code
*/
int cuda_createCurandStates(int size);
int cuda_createCurandStates(int size, int seed = -1);
/**
* Delete curandState.

View File

@ -23,6 +23,7 @@
#define X0_M 9
#define I_M 10
#define DT_M 11
#define LOWENERGY_THR 12
#define BLOCK_SIZE 128
#define NUMPAR 12
@ -81,7 +82,7 @@ __device__ inline void energyLoss(double &Eng, bool &pdead, curandState &state,
Eng = Eng + delta_E / 1E3;
}
pdead = ((Eng<1E-4) || (dEdx>0));
pdead = ( (Eng < par[LOWENERGY_THR]) || (dEdx > 0) );
}
@ -117,7 +118,9 @@ __device__ inline void Rot(double &px, double &pz, double &x, double &z, double
pz = -pxz*sin(Psixz)*sin(thetacou) + pxz*cos(Psixz)*cos(thetacou);
}
__device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, double* par) {
__device__ inline void coulombScat(double3 &R, double3 &P, curandState &state,
double* par, bool enableRutherfordScattering)
{
double Eng = sqrt(dot(P, P) + 1.0) * M_P - M_P;
double gamma = (Eng + M_P) / M_P;
@ -145,7 +148,7 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d
Rot(P.x, P.z, R.x, R.z, xplane, normP, thetacou, deltas, 1, par);
double P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
if(P2 < 0.0047) {
if( (P2 < 0.0047) && enableRutherfordScattering) {
double P3 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
double P4 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
@ -171,7 +174,7 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d
Rot(P.y,P.z,R.y,R.z, yplane, normP, thetacou, deltas, 2, par);
P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
if(P2 < 0.0047) {
if( (P2 < 0.0047) && enableRutherfordScattering) {
double P3 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
double P4 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
@ -185,7 +188,7 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d
template <typename T>
__global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state,
int numparticles)
int numparticles, bool enableRutherfordScattering)
{
//get global id and thread id
@ -227,7 +230,7 @@ __global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state
P.x = P.x * ptot / sq;
P.y = P.y * ptot / sq;
P.z = P.z * ptot / sq;
coulombScat(R[tid], P, s, p);
coulombScat(R[tid], P, s, p, enableRutherfordScattering);
data[idx].Pincol = P;
} else {
@ -250,7 +253,8 @@ __global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state
}
__global__ void kernelCollimatorPhysics2(CUDA_PART2_SMALL data, double *par,
curandState *state, int numparticles)
curandState *state, int numparticles,
bool enableRutherfordScattering)
{
//get global id and thread id
@ -288,7 +292,7 @@ __global__ void kernelCollimatorPhysics2(CUDA_PART2_SMALL data, double *par,
P.x = P.x * ptot / sq;
P.y = P.y * ptot / sq;
P.z = P.z * ptot / sq;
coulombScat(R[tid], P, s, p);
coulombScat(R[tid], P, s, p, enableRutherfordScattering);
data.Pincol[idx] = P;
} else {
@ -611,7 +615,8 @@ struct less_then
}
};
int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles)
int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
bool enableRutherfordScattering)
{
int threads = BLOCK_SIZE;
@ -624,7 +629,8 @@ int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int n
kernelCollimatorPhysics<<<blocks, threads, smem_size>>>((CUDA_PART_SMALL*)mem_ptr,
(double*)par_ptr,
m_base->cuda_getCurandStates(),
numparticles);
numparticles,
enableRutherfordScattering);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)

View File

@ -110,7 +110,7 @@ public:
*
*/
int CollimatorPhysics(void *mem_ptr, void *par_ptr,
int numpartices);
int numpartices, bool enableRutherfordScattering = true);
int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
void *rx_ptr, void *ry_ptr, void *rz_ptr,

View File

@ -746,11 +746,13 @@ int DKSBase::callCollimatorPhysics(void *mem_ptr, void *par_ptr,
}
int DKSBase::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles)
int DKSBase::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles,
bool enableRutherfordScattering)
{
if (apiCuda())
return CUDA_SAFECALL( ccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles) );
return CUDA_SAFECALL( ccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles,
enableRutherfordScattering) );
else if (apiOpenMP())
return MIC_SAFECALL( miccol->CollimatorPhysics(mem_ptr, par_ptr, numparticles) );
@ -807,9 +809,9 @@ int DKSBase::callCollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
}
int DKSBase::callInitRandoms(int size) {
int DKSBase::callInitRandoms(int size, int seed) {
if (apiCuda())
return CUDA_SAFECALL(cbase->cuda_createCurandStates(size));
return CUDA_SAFECALL(cbase->cuda_createCurandStates(size, seed));
else if (apiOpenCL())
return OPENCL_SAFECALL(oclbase->ocl_createRndStates(size));
else if (apiOpenMP())

View File

@ -1041,7 +1041,8 @@ public:
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
* TODO: opencl and mic implementations.
*/
int callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles);
int callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles,
bool enableRutherfordScattering = true);
/**
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
@ -1074,7 +1075,7 @@ public:
* Init random number states and save for reuse on device.
* TODO: opencl and mic implementations.
*/
int callInitRandoms(int size);
int callInitRandoms(int size, int seed = -1);
/**
* Integration code from ParallelTTracker from OPAL.

View File

@ -368,7 +368,9 @@ void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) {
}
int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles) {
int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr,
int numparticles, boll enableRutherfordScattering)
{
//cast device memory pointers to appropriate types
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;

View File

@ -40,7 +40,8 @@ public:
~MICCollimatorPhysics() { };
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles);
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
bool enableRutherfordScattering = true);
int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
void *rx_ptr, void *ry_ptr, void *rz_ptr,

View File

@ -34,7 +34,7 @@ TODO:
2. boost.compute sort for user defined structure crashes
*/
int OpenCLCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr,
int numparticles)
int numparticles, bool enableRutherfordScattering)
{
/*
//set number of total threads, and number threads per block

View File

@ -52,7 +52,8 @@ public:
}
/* execute degrader code on device */
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles);
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
bool enableRutherfordScattering = true);
int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
void *rx_ptr, void *ry_ptr, void *rz_ptr,

View File

@ -53,7 +53,7 @@ ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
#TARGET_LINK_LIBRARIES(testGather dks)
#TARGET_LINK_LIBRARIES(testGatherAsync dks)
#TARGET_LINK_LIBRARIES(testTranspose dks)
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks)
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES})
#TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks)
#TARGET_LINK_LIBRARIES(testPush dks)
#TARGET_LINK_LIBRARIES(testFFTSolverMIC dks)