Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
9d21fc5400 | |||
21b4c591b9 | |||
efa3311b45 | |||
ee33aacdd6 | |||
48f6f9c25e | |||
db79798da5 |
@ -1,8 +1,8 @@
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
||||
PROJECT (DKS)
|
||||
SET (DKS_VERSION_MAJOR 1)
|
||||
SET (DKS_VERSION_MINOR 1)
|
||||
SET (DKS_VERSION_PATCH 4)
|
||||
SET (DKS_VERSION_MINOR 2)
|
||||
SET (DKS_VERSION_PATCH 0)
|
||||
set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
|
||||
SET (PACKAGE \"dks\")
|
||||
SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
|
||||
@ -148,7 +148,8 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
|
||||
MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
|
||||
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||
|
||||
SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")
|
||||
#as35: Tesla K40c SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")
|
||||
SET (CUDA_NVCC_FLAGS "-arch=sm_70; -gencode=arch=compute_70,code=sm_70 -DDEBUG;-std=c++11;-D__wsu;-fmad=false")
|
||||
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${OPENCL_KERNELS}")
|
||||
|
||||
IF (NOT STATIC_CUDA)
|
||||
|
@ -1,7 +1,7 @@
|
||||
SET(${PROJECT_NAME}_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
SET(${PROJECT_NAME}_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
|
||||
SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib")
|
||||
SET(${PROJECT_NAME}_LIBRARY "dks")
|
||||
SET(${PROJECT_NAME}_LIBRARY "dksshared")
|
||||
SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH})
|
||||
SET(DKS_CUDA_STATIC ${STATIC_CUDA})
|
||||
SET(DKS_CUDA_LIBS "${DKS_CUDA_LIBS}")
|
||||
|
@ -1,4 +1,4 @@
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 2.8)
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
||||
|
||||
SET (DKS_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
MACRO (ADD_SOURCES )
|
||||
|
@ -1,8 +1,9 @@
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 2.8)
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
||||
|
||||
FIND_PACKAGE(CUDA REQUIRED)
|
||||
|
||||
SET (CUDA_NVCC_FLAGS "-arch=sm_30")
|
||||
#as35: Tesla K40c SET (CUDA_NVCC_FLAGS "-arch=sm_30")
|
||||
SET (CUDA_NVCC_FLAGS "-arch=sm_70")
|
||||
|
||||
SET(LIB_TYPE STATIC)
|
||||
|
||||
@ -22,4 +23,4 @@ INCLUDE_DIRECTORIES (
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
CUDA_ADD_LIBRARY(cudadks ${DKS_CUDA_SRCS})
|
||||
CUDA_ADD_LIBRARY(cudadks ${DKS_CUDA_SRCS})
|
||||
|
@ -245,7 +245,7 @@ int CudaBase::cuda_setDevice(int device) {
|
||||
std::cout << "Init: " << device << "\t" << ndev << std::endl;
|
||||
|
||||
if (device < ndev) {
|
||||
std::cout << "set device to: " << ndev << std::endl;
|
||||
std::cout << "set device to: " << device << std::endl;
|
||||
cudaSetDevice(device);
|
||||
} else {
|
||||
if (ndev > 0)
|
||||
|
@ -94,7 +94,8 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
||||
}
|
||||
|
||||
//compile program
|
||||
const char *opts[] = {"-arch=compute_35", "-fmad=false", ""};
|
||||
//as35: for Tesla K40c const char *opts[] = {"-arch=compute_35", "-fmad=false", ""};
|
||||
const char *opts[] = {"-arch=compute_70", "-fmad=false", ""};
|
||||
int numopts = 2;
|
||||
if (mlh) {
|
||||
opts[2] = "-DMLH";
|
||||
|
@ -36,6 +36,25 @@ __device__ double sekt(double t, double lambda) {
|
||||
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
|
||||
}
|
||||
|
||||
__device__ double dglktfzf(double t, double sigma, double hopp) {
|
||||
double nut = hopp*t;
|
||||
|
||||
return exp(-sqrt(4.0*pow(sigma/hopp,2.0)*(exp(-nut)-1.0+nut)));
|
||||
}
|
||||
|
||||
__device__ double dglktflf(double t, double nu0, double sigma, double hopp) {
|
||||
double w0 = TWO_PI*nu0;
|
||||
double w0_2 = w0*w0;
|
||||
double w0_t = w0*t;
|
||||
double nu_2 = hopp*hopp;
|
||||
double nu_t = hopp*t;
|
||||
double Gamma_t = ((w0_2+nu_2)*nu_t+(w0_2-nu_2)*(1.0-exp(-nu_t)*cos(w0_t))-2.0*hopp*w0*exp(-nu_t)*sin(w0_t))/pow(w0_2+nu_2,2.0);
|
||||
if (Gamma_t < 0.0)
|
||||
Gamma_t = 0.0;
|
||||
|
||||
return exp(-sqrt(4.0*sigma*hopp*Gamma_t));
|
||||
}
|
||||
|
||||
__device__ double lgkt(double t, double lambda, double sigma) {
|
||||
double lambdat = lambda*t;
|
||||
double sigmatsq = pow(sigma*t, 2.0);
|
||||
@ -69,6 +88,12 @@ __device__ double rahf(double t, double nu, double lambda) {
|
||||
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
|
||||
}
|
||||
|
||||
__device__ double ab(double t, double sigma, double gamma) {
|
||||
double gt = gamma*t;
|
||||
|
||||
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
||||
}
|
||||
|
||||
__device__ double tf(double t, double phi, double nu) {
|
||||
double tmp_nu = TWO_PI*nu*t;
|
||||
double tmp_phi = DEG_TO_RAD*phi;
|
||||
@ -144,12 +169,6 @@ __device__ double ib(double t, double alpha, double phi, double nu, double lambd
|
||||
return alpha*j0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
||||
}
|
||||
|
||||
__device__ double ab(double t, double sigma, double gamma) {
|
||||
double gt = gamma*t;
|
||||
|
||||
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
||||
}
|
||||
|
||||
__device__ double snkzf(double t, double Delta0, double Rb) {
|
||||
double D0t2 = pow(Delta0*t, 2.0);
|
||||
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
|
||||
@ -184,6 +203,13 @@ __device__ double dnktf(double t, double phi, double nu, double Delta0, double R
|
||||
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
|
||||
}
|
||||
|
||||
__device__ double fmuf(double t, double wd) {
|
||||
double sqrt3 = sqrt(3.0);
|
||||
double wd_t = wd*t;
|
||||
|
||||
return (3.0+cos(sqrt3*wd_t)+(1.0-1.0/sqrt3)*cos(((3.0-sqrt3)/2.0)*wd_t)+(1.0+1.0/sqrt3)*cos(((3.0 + sqrt3)/2.0)*wd_t))/6.0;
|
||||
}
|
||||
|
||||
/** Theory and chisquare functions.
|
||||
* Based on the compiler flags set theory is calculated either in single hist mode or asymetric.
|
||||
* Based on the compiler flags calculate either chisq or MLE
|
||||
|
@ -146,10 +146,10 @@ int DKSBaseMuSR::initChiSquare(int size_data, int size_param, int size_func, int
|
||||
|
||||
if (apiCuda()) {
|
||||
ierr = CUDA_SAFECALL( DKS_SUCCESS );
|
||||
chiSq = CUDA_SAFEINIT(new CudaChiSquareRuntime(getCudaBase()));
|
||||
chiSq = (ChiSquareRuntime*) CUDA_SAFEINIT(new CudaChiSquareRuntime(getCudaBase()));
|
||||
} else {
|
||||
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
|
||||
chiSq = OPENCL_SAFECALL(new OpenCLChiSquareRuntime(getOpenCLBase()));
|
||||
chiSq = (ChiSquareRuntime*) OPENCL_SAFECALL(new OpenCLChiSquareRuntime(getOpenCLBase()));
|
||||
}
|
||||
|
||||
if (ierr == DKS_SUCCESS) {
|
||||
|
@ -59,6 +59,25 @@ double sekt(double t, double lambda) {
|
||||
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
|
||||
}
|
||||
|
||||
double dglktfzf(double t, double sigma, double hopp) {
|
||||
double nut = hopp*t;
|
||||
|
||||
return exp(-sqrt(4.0*pow(sigma/hopp,2.0)*(exp(-nut)-1.0+nut)));
|
||||
}
|
||||
|
||||
double dglktflf(double t, double nu0, double sigma, double hopp) {
|
||||
double w0 = TWO_PI*nu0;
|
||||
double w0_2 = w0*w0;
|
||||
double w0_t = w0*t;
|
||||
double nu_2 = hopp*hopp;
|
||||
double nu_t = hopp*t;
|
||||
double Gamma_t = ((w0_2+nu_2)*nu_t+(w0_2-nu_2)*(1.0-exp(-nu_t)*cos(w0_t))-2.0*hopp*w0*exp(-nu_t)*sin(w0_t))/pow(w0_2+nu_2,2.0);
|
||||
if (Gamma_t < 0.0)
|
||||
Gamma_t = 0.0;
|
||||
|
||||
return exp(-sqrt(4.0*sigma*hopp*Gamma_t));
|
||||
}
|
||||
|
||||
double lgkt(double t, double lambda, double sigma) {
|
||||
double lambdat = lambda*t;
|
||||
double sigmatsq = pow(sigma*t, 2.0);
|
||||
@ -92,6 +111,12 @@ double rahf(double t, double nu, double lambda) {
|
||||
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
|
||||
}
|
||||
|
||||
double ab(double t, double sigma, double gamma) {
|
||||
double gt = gamma*t;
|
||||
|
||||
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
||||
}
|
||||
|
||||
double tf(double t, double phi, double nu) {
|
||||
double tmp_nu = TWO_PI*nu*t;
|
||||
double tmp_phi = DEG_TO_RAD * phi;
|
||||
@ -167,12 +192,6 @@ double ib(double t, double alpha, double phi, double nu, double lambdaT, double
|
||||
return alpha*bessj0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
||||
}
|
||||
|
||||
double ab(double t, double sigma, double gamma) {
|
||||
double gt = gamma*t;
|
||||
|
||||
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
||||
}
|
||||
|
||||
double snkzf(double t, double Delta0, double Rb) {
|
||||
double D0t2 = pow(Delta0*t, 2.0);
|
||||
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
|
||||
@ -207,6 +226,14 @@ double dnktf(double t, double phi, double nu, double Delta0, double Rb, double n
|
||||
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
|
||||
}
|
||||
|
||||
double fmuf(double t, double wd) {
|
||||
double sqrt3 = sqrt(3.0);
|
||||
double wd_t = wd*t;
|
||||
|
||||
return (3.0+cos(sqrt3*wd_t)+(1.0-1.0/sqrt3)*cos(((3.0-sqrt3)/2.0)*wd_t)+(1.0+1.0/sqrt3)*cos(((3.0 + sqrt3)/2.0)*wd_t))/6.0;
|
||||
}
|
||||
|
||||
|
||||
__kernel void kernelChiSquareSingleHisto(__global double *data, __global double *err,
|
||||
__global double *par, __global double *chisq, __global int *map, __global double *funcv,
|
||||
int length, int numpar, int numfunc, int nummap,
|
||||
|
Reference in New Issue
Block a user