Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
9d21fc5400 | |||
21b4c591b9 | |||
efa3311b45 | |||
ee33aacdd6 | |||
48f6f9c25e | |||
db79798da5 | |||
9381b14b87 | |||
43cb9020c4 | |||
3d946f666b | |||
e6021eb6e3 |
@ -1,8 +1,8 @@
|
|||||||
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
||||||
PROJECT (DKS)
|
PROJECT (DKS)
|
||||||
SET (DKS_VERSION_MAJOR 1)
|
SET (DKS_VERSION_MAJOR 1)
|
||||||
SET (DKS_VERSION_MINOR 1)
|
SET (DKS_VERSION_MINOR 2)
|
||||||
SET (DKS_VERSION_PATCH 2)
|
SET (DKS_VERSION_PATCH 0)
|
||||||
set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
|
set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
|
||||||
SET (PACKAGE \"dks\")
|
SET (PACKAGE \"dks\")
|
||||||
SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
|
SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
|
||||||
@ -148,7 +148,8 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
|
|||||||
MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
|
MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
|
||||||
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||||
|
|
||||||
SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")
|
#as35: Tesla K40c SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")
|
||||||
|
SET (CUDA_NVCC_FLAGS "-arch=sm_70; -gencode=arch=compute_70,code=sm_70 -DDEBUG;-std=c++11;-D__wsu;-fmad=false")
|
||||||
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${OPENCL_KERNELS}")
|
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${OPENCL_KERNELS}")
|
||||||
|
|
||||||
IF (NOT STATIC_CUDA)
|
IF (NOT STATIC_CUDA)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
##################################################################
|
##################################################################
|
||||||
#
|
#
|
||||||
# Name: Dynamic Kernel Scheduler
|
# Name: Dynamic Kernel Scheduler
|
||||||
# Version: 1.0
|
# Version: 1.1
|
||||||
# Author: Uldis Locans
|
# Author: Uldis Locans
|
||||||
# Contacts: locans.uldis@psi.ch
|
# Contacts: locans.uldis@psi.ch
|
||||||
#
|
#
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
SET(${PROJECT_NAME}_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
SET(${PROJECT_NAME}_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
SET(${PROJECT_NAME}_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
|
SET(${PROJECT_NAME}_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
|
||||||
SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib")
|
SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib")
|
||||||
SET(${PROJECT_NAME}_LIBRARY "dks")
|
SET(${PROJECT_NAME}_LIBRARY "dksshared")
|
||||||
SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH})
|
SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH})
|
||||||
SET(DKS_CUDA_STATIC ${STATIC_CUDA})
|
SET(DKS_CUDA_STATIC ${STATIC_CUDA})
|
||||||
SET(DKS_CUDA_LIBS "${DKS_CUDA_LIBS}")
|
SET(DKS_CUDA_LIBS "${DKS_CUDA_LIBS}")
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
CMAKE_MINIMUM_REQUIRED (VERSION 2.8)
|
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
||||||
|
|
||||||
SET (DKS_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
SET (DKS_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
MACRO (ADD_SOURCES )
|
MACRO (ADD_SOURCES )
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
CMAKE_MINIMUM_REQUIRED (VERSION 2.8)
|
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
||||||
|
|
||||||
FIND_PACKAGE(CUDA REQUIRED)
|
FIND_PACKAGE(CUDA REQUIRED)
|
||||||
|
|
||||||
SET (CUDA_NVCC_FLAGS "-arch=sm_30")
|
#as35: Tesla K40c SET (CUDA_NVCC_FLAGS "-arch=sm_30")
|
||||||
|
SET (CUDA_NVCC_FLAGS "-arch=sm_70")
|
||||||
|
|
||||||
SET(LIB_TYPE STATIC)
|
SET(LIB_TYPE STATIC)
|
||||||
|
|
||||||
|
@ -245,7 +245,7 @@ int CudaBase::cuda_setDevice(int device) {
|
|||||||
std::cout << "Init: " << device << "\t" << ndev << std::endl;
|
std::cout << "Init: " << device << "\t" << ndev << std::endl;
|
||||||
|
|
||||||
if (device < ndev) {
|
if (device < ndev) {
|
||||||
std::cout << "set device to: " << ndev << std::endl;
|
std::cout << "set device to: " << device << std::endl;
|
||||||
cudaSetDevice(device);
|
cudaSetDevice(device);
|
||||||
} else {
|
} else {
|
||||||
if (ndev > 0)
|
if (ndev > 0)
|
||||||
|
@ -87,14 +87,19 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
|||||||
//create program
|
//create program
|
||||||
nvrtcProgram prog;
|
nvrtcProgram prog;
|
||||||
// std::cout << cudaProg.c_str() << std::endl;
|
// std::cout << cudaProg.c_str() << std::endl;
|
||||||
nvrtcCreateProgram(&prog, cudaProg.c_str(), "chiSquareRuntime.cu", 0, NULL, NULL);
|
nvrtcResult createResult = nvrtcCreateProgram(&prog, cudaProg.c_str(), "chiSquareRuntime.cu", 0, NULL, NULL);
|
||||||
|
if (createResult != NVRTC_SUCCESS) {
|
||||||
|
DEBUG_MSG("Program creation failed!");
|
||||||
|
return DKS_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
//compile program
|
//compile program
|
||||||
const char *opts[] = {"-fmad=false", ""};
|
//as35: for Tesla K40c const char *opts[] = {"-arch=compute_35", "-fmad=false", ""};
|
||||||
int numopts = 1;
|
const char *opts[] = {"-arch=compute_70", "-fmad=false", ""};
|
||||||
|
int numopts = 2;
|
||||||
if (mlh) {
|
if (mlh) {
|
||||||
opts[1] = "-DMLH";
|
opts[2] = "-DMLH";
|
||||||
numopts = 2;
|
numopts = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvrtcResult compileResults = nvrtcCompileProgram(prog, numopts, opts);
|
nvrtcResult compileResults = nvrtcCompileProgram(prog, numopts, opts);
|
||||||
@ -118,7 +123,11 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
|||||||
if (ptx_m != NULL)
|
if (ptx_m != NULL)
|
||||||
delete[] ptx_m;
|
delete[] ptx_m;
|
||||||
size_t ptxSize;
|
size_t ptxSize;
|
||||||
nvrtcGetPTXSize(prog, &ptxSize);
|
nvrtcResult ptxSizeResult = nvrtcGetPTXSize(prog, &ptxSize);
|
||||||
|
if (ptxSizeResult != NVRTC_SUCCESS) {
|
||||||
|
DEBUG_MSG("PTX get size error!");
|
||||||
|
return DKS_ERROR;
|
||||||
|
}
|
||||||
ptx_m = new char[ptxSize];
|
ptx_m = new char[ptxSize];
|
||||||
nvrtcResult nvrtcPTXResult = nvrtcGetPTX(prog, ptx_m);
|
nvrtcResult nvrtcPTXResult = nvrtcGetPTX(prog, ptx_m);
|
||||||
|
|
||||||
@ -127,10 +136,26 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
|||||||
return DKS_ERROR;
|
return DKS_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add some additional diagnostics
|
||||||
|
const int buffer_size = 8192;
|
||||||
|
CUjit_option options[3];
|
||||||
|
void* values[3];
|
||||||
|
char error_log[buffer_size];
|
||||||
|
int err;
|
||||||
|
options[0] = CU_JIT_ERROR_LOG_BUFFER;
|
||||||
|
values[0] = (void*)error_log;
|
||||||
|
options[1] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
|
||||||
|
values[1] = (void*)buffer_size;
|
||||||
|
options[2] = CU_JIT_TARGET_FROM_CUCONTEXT;
|
||||||
|
values[2] = 0;
|
||||||
//load module from ptx
|
//load module from ptx
|
||||||
CUresult loadResult = cuModuleLoadDataEx(&module_m, ptx_m, 0, 0, 0);
|
CUresult loadResult = cuModuleLoadDataEx(&module_m, ptx_m, 3, options, values);
|
||||||
if (loadResult != CUDA_SUCCESS) {
|
if (loadResult != CUDA_SUCCESS) {
|
||||||
DEBUG_MSG("Load module from ptx failed!");
|
const char *err_msg;
|
||||||
|
cuGetErrorString(loadResult, &err_msg);
|
||||||
|
std::string msg = "Load module from ptx failed! (" + std::to_string(loadResult) + ") : " + err_msg;
|
||||||
|
DEBUG_MSG(msg);
|
||||||
|
DEBUG_MSG(error_log);
|
||||||
return DKS_ERROR;
|
return DKS_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,6 +36,25 @@ __device__ double sekt(double t, double lambda) {
|
|||||||
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
|
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ double dglktfzf(double t, double sigma, double hopp) {
|
||||||
|
double nut = hopp*t;
|
||||||
|
|
||||||
|
return exp(-sqrt(4.0*pow(sigma/hopp,2.0)*(exp(-nut)-1.0+nut)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ double dglktflf(double t, double nu0, double sigma, double hopp) {
|
||||||
|
double w0 = TWO_PI*nu0;
|
||||||
|
double w0_2 = w0*w0;
|
||||||
|
double w0_t = w0*t;
|
||||||
|
double nu_2 = hopp*hopp;
|
||||||
|
double nu_t = hopp*t;
|
||||||
|
double Gamma_t = ((w0_2+nu_2)*nu_t+(w0_2-nu_2)*(1.0-exp(-nu_t)*cos(w0_t))-2.0*hopp*w0*exp(-nu_t)*sin(w0_t))/pow(w0_2+nu_2,2.0);
|
||||||
|
if (Gamma_t < 0.0)
|
||||||
|
Gamma_t = 0.0;
|
||||||
|
|
||||||
|
return exp(-sqrt(4.0*sigma*hopp*Gamma_t));
|
||||||
|
}
|
||||||
|
|
||||||
__device__ double lgkt(double t, double lambda, double sigma) {
|
__device__ double lgkt(double t, double lambda, double sigma) {
|
||||||
double lambdat = lambda*t;
|
double lambdat = lambda*t;
|
||||||
double sigmatsq = pow(sigma*t, 2.0);
|
double sigmatsq = pow(sigma*t, 2.0);
|
||||||
@ -69,6 +88,12 @@ __device__ double rahf(double t, double nu, double lambda) {
|
|||||||
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
|
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ double ab(double t, double sigma, double gamma) {
|
||||||
|
double gt = gamma*t;
|
||||||
|
|
||||||
|
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
||||||
|
}
|
||||||
|
|
||||||
__device__ double tf(double t, double phi, double nu) {
|
__device__ double tf(double t, double phi, double nu) {
|
||||||
double tmp_nu = TWO_PI*nu*t;
|
double tmp_nu = TWO_PI*nu*t;
|
||||||
double tmp_phi = DEG_TO_RAD*phi;
|
double tmp_phi = DEG_TO_RAD*phi;
|
||||||
@ -83,6 +108,56 @@ __device__ double ifld(double t, double alpha, double phi, double nu, double lam
|
|||||||
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ double ifgk(double t, double alpha, double nu, double sigma, double lambda, double beta) {
|
||||||
|
double wt = TWO_PI*nu*t;
|
||||||
|
double rate2 = sigma*sigma*t*t;
|
||||||
|
double rateL = 0.0;
|
||||||
|
double result = 0.0;
|
||||||
|
|
||||||
|
// make sure lambda > 0
|
||||||
|
if (lambda < 0.0)
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
if (beta < 0.001) {
|
||||||
|
rateL = 1.0;
|
||||||
|
} else {
|
||||||
|
rateL = pow(lambda*t, beta);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nu < 0.01) {
|
||||||
|
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-rate2)*exp(-0.5*rate2);
|
||||||
|
} else {
|
||||||
|
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-sigma*sigma*t*t/(wt)*sin(wt))*exp(-0.5*rate2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ double ifll(double t, double alpha, double nu, double a, double lambda, double beta) {
|
||||||
|
double wt = TWO_PI*nu*t;
|
||||||
|
double at = a*t;
|
||||||
|
double rateL = 0.0;
|
||||||
|
double result = 0.0;
|
||||||
|
|
||||||
|
// make sure lambda > 0
|
||||||
|
if (lambda < 0.0)
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
if (beta < 0.001) {
|
||||||
|
rateL = 1.0;
|
||||||
|
} else {
|
||||||
|
rateL = pow(lambda*t, beta);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nu < 0.01) {
|
||||||
|
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-at)*exp(-at);
|
||||||
|
} else {
|
||||||
|
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-a/(TWO_PI*nu)*sin(wt))*exp(-at);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
__device__ double b(double t, double phi, double nu) {
|
__device__ double b(double t, double phi, double nu) {
|
||||||
return j0(TWO_PI*nu*t + DEG_TO_RAD*phi);
|
return j0(TWO_PI*nu*t + DEG_TO_RAD*phi);
|
||||||
}
|
}
|
||||||
@ -94,12 +169,6 @@ __device__ double ib(double t, double alpha, double phi, double nu, double lambd
|
|||||||
return alpha*j0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
return alpha*j0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ double ab(double t, double sigma, double gamma) {
|
|
||||||
double gt = gamma*t;
|
|
||||||
|
|
||||||
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
|
||||||
}
|
|
||||||
|
|
||||||
__device__ double snkzf(double t, double Delta0, double Rb) {
|
__device__ double snkzf(double t, double Delta0, double Rb) {
|
||||||
double D0t2 = pow(Delta0*t, 2.0);
|
double D0t2 = pow(Delta0*t, 2.0);
|
||||||
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
|
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
|
||||||
@ -134,6 +203,13 @@ __device__ double dnktf(double t, double phi, double nu, double Delta0, double R
|
|||||||
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
|
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ double fmuf(double t, double wd) {
|
||||||
|
double sqrt3 = sqrt(3.0);
|
||||||
|
double wd_t = wd*t;
|
||||||
|
|
||||||
|
return (3.0+cos(sqrt3*wd_t)+(1.0-1.0/sqrt3)*cos(((3.0-sqrt3)/2.0)*wd_t)+(1.0+1.0/sqrt3)*cos(((3.0 + sqrt3)/2.0)*wd_t))/6.0;
|
||||||
|
}
|
||||||
|
|
||||||
/** Theory and chisquare functions.
|
/** Theory and chisquare functions.
|
||||||
* Based on the compiler flags set theory is calculated either in single hist mode or asymetric.
|
* Based on the compiler flags set theory is calculated either in single hist mode or asymetric.
|
||||||
* Based on the compiler flags calculate either chisq or MLE
|
* Based on the compiler flags calculate either chisq or MLE
|
||||||
|
@ -146,10 +146,10 @@ int DKSBaseMuSR::initChiSquare(int size_data, int size_param, int size_func, int
|
|||||||
|
|
||||||
if (apiCuda()) {
|
if (apiCuda()) {
|
||||||
ierr = CUDA_SAFECALL( DKS_SUCCESS );
|
ierr = CUDA_SAFECALL( DKS_SUCCESS );
|
||||||
chiSq = CUDA_SAFEINIT(new CudaChiSquareRuntime(getCudaBase()));
|
chiSq = (ChiSquareRuntime*) CUDA_SAFEINIT(new CudaChiSquareRuntime(getCudaBase()));
|
||||||
} else {
|
} else {
|
||||||
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
|
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
|
||||||
chiSq = OPENCL_SAFECALL(new OpenCLChiSquareRuntime(getOpenCLBase()));
|
chiSq = (ChiSquareRuntime*) OPENCL_SAFECALL(new OpenCLChiSquareRuntime(getOpenCLBase()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ierr == DKS_SUCCESS) {
|
if (ierr == DKS_SUCCESS) {
|
||||||
|
@ -76,7 +76,6 @@ int OpenCLChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
|||||||
|
|
||||||
double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) {
|
double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) {
|
||||||
|
|
||||||
|
|
||||||
int ierr;
|
int ierr;
|
||||||
//calc number of threads per workgroup and nr of work groups
|
//calc number of threads per workgroup and nr of work groups
|
||||||
size_t work_size_sum = (size_t)blockSize_m;
|
size_t work_size_sum = (size_t)blockSize_m;
|
||||||
@ -105,7 +104,7 @@ double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) {
|
|||||||
m_oclbase->ocl_setKernelArg(3, sizeof(int), &length);
|
m_oclbase->ocl_setKernelArg(3, sizeof(int), &length);
|
||||||
m_oclbase->ocl_executeKernel(1, &work_items, &work_size_sum);
|
m_oclbase->ocl_executeKernel(1, &work_items, &work_size_sum);
|
||||||
|
|
||||||
//read partial sums and free temp mempry
|
//read partial sums and free temp memory
|
||||||
m_oclbase->ocl_readData(tmp_ptr, partial_sums, sizeof(double)*work_groups);
|
m_oclbase->ocl_readData(tmp_ptr, partial_sums, sizeof(double)*work_groups);
|
||||||
m_oclbase->ocl_freeMemory(tmp_ptr);
|
m_oclbase->ocl_freeMemory(tmp_ptr);
|
||||||
|
|
||||||
@ -157,6 +156,7 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
|
|||||||
return ierr;
|
return ierr;
|
||||||
|
|
||||||
//set kernel args
|
//set kernel args
|
||||||
|
size_t num=1;
|
||||||
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data);
|
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data);
|
||||||
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err);
|
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err);
|
||||||
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param);
|
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param);
|
||||||
@ -172,20 +172,23 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
|
|||||||
m_oclbase->ocl_setKernelArg(12, sizeof(double), &tau_m);
|
m_oclbase->ocl_setKernelArg(12, sizeof(double), &tau_m);
|
||||||
m_oclbase->ocl_setKernelArg(13, sizeof(double), &N0_m);
|
m_oclbase->ocl_setKernelArg(13, sizeof(double), &N0_m);
|
||||||
m_oclbase->ocl_setKernelArg(14, sizeof(double), &bkg_m);
|
m_oclbase->ocl_setKernelArg(14, sizeof(double), &bkg_m);
|
||||||
m_oclbase->ocl_setKernelArg(15, sizeof(double)*numpar, NULL);
|
num = numpar; if (num == 0) num = 1;
|
||||||
m_oclbase->ocl_setKernelArg(16, sizeof(double)*numfunc, NULL);
|
m_oclbase->ocl_setKernelArg(15, sizeof(double)*num, NULL);
|
||||||
m_oclbase->ocl_setKernelArg(17, sizeof(int)*nummap, NULL);
|
num = numfunc; if (num == 0) num = 1;
|
||||||
|
m_oclbase->ocl_setKernelArg(16, sizeof(double)*num, NULL);
|
||||||
|
num = nummap; if (num == 0) num = 1;
|
||||||
|
m_oclbase->ocl_setKernelArg(17, sizeof(int)*num, NULL);
|
||||||
|
|
||||||
if (ierr != DKS_SUCCESS)
|
if (ierr != DKS_SUCCESS)
|
||||||
return ierr;
|
return ierr;
|
||||||
} else if (fitType == FITTYPE_ASYMMETRY) {
|
} else if (fitType == FITTYPE_ASYMMETRY) {
|
||||||
//create kernel
|
//create kernel
|
||||||
ierr = m_oclbase->ocl_createKernel("kernelChiSquareAsymmetry");
|
ierr = m_oclbase->ocl_createKernel("kernelChiSquareAsymmetry");
|
||||||
|
|
||||||
if (ierr != DKS_SUCCESS)
|
if (ierr != DKS_SUCCESS)
|
||||||
return ierr;
|
return ierr;
|
||||||
|
|
||||||
//set kernel args
|
//set kernel args
|
||||||
|
size_t num=1;
|
||||||
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data);
|
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data);
|
||||||
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err);
|
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err);
|
||||||
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param);
|
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param);
|
||||||
@ -200,9 +203,12 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
|
|||||||
m_oclbase->ocl_setKernelArg(11, sizeof(double), &timeStep);
|
m_oclbase->ocl_setKernelArg(11, sizeof(double), &timeStep);
|
||||||
m_oclbase->ocl_setKernelArg(12, sizeof(double), &alpha_m);
|
m_oclbase->ocl_setKernelArg(12, sizeof(double), &alpha_m);
|
||||||
m_oclbase->ocl_setKernelArg(13, sizeof(double), &beta_m);
|
m_oclbase->ocl_setKernelArg(13, sizeof(double), &beta_m);
|
||||||
m_oclbase->ocl_setKernelArg(14, sizeof(double)*numpar, NULL);
|
num = numpar; if (num == 0) num = 1;
|
||||||
m_oclbase->ocl_setKernelArg(15, sizeof(double)*numfunc, NULL);
|
m_oclbase->ocl_setKernelArg(14, sizeof(double)*num, NULL);
|
||||||
m_oclbase->ocl_setKernelArg(16, sizeof(int)*nummap, NULL);
|
num = numfunc; if (num == 0) num = 1;
|
||||||
|
m_oclbase->ocl_setKernelArg(15, sizeof(double)*num, NULL);
|
||||||
|
num = nummap; if (num == 0) num = 1;
|
||||||
|
m_oclbase->ocl_setKernelArg(16, sizeof(int)*num, NULL);
|
||||||
|
|
||||||
if (ierr != DKS_SUCCESS)
|
if (ierr != DKS_SUCCESS)
|
||||||
return ierr;
|
return ierr;
|
||||||
@ -321,4 +327,3 @@ int OpenCLChiSquareRuntime::checkChiSquareKernels(int fitType, int &threadsPerBl
|
|||||||
return ierr;
|
return ierr;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,6 +59,25 @@ double sekt(double t, double lambda) {
|
|||||||
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
|
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double dglktfzf(double t, double sigma, double hopp) {
|
||||||
|
double nut = hopp*t;
|
||||||
|
|
||||||
|
return exp(-sqrt(4.0*pow(sigma/hopp,2.0)*(exp(-nut)-1.0+nut)));
|
||||||
|
}
|
||||||
|
|
||||||
|
double dglktflf(double t, double nu0, double sigma, double hopp) {
|
||||||
|
double w0 = TWO_PI*nu0;
|
||||||
|
double w0_2 = w0*w0;
|
||||||
|
double w0_t = w0*t;
|
||||||
|
double nu_2 = hopp*hopp;
|
||||||
|
double nu_t = hopp*t;
|
||||||
|
double Gamma_t = ((w0_2+nu_2)*nu_t+(w0_2-nu_2)*(1.0-exp(-nu_t)*cos(w0_t))-2.0*hopp*w0*exp(-nu_t)*sin(w0_t))/pow(w0_2+nu_2,2.0);
|
||||||
|
if (Gamma_t < 0.0)
|
||||||
|
Gamma_t = 0.0;
|
||||||
|
|
||||||
|
return exp(-sqrt(4.0*sigma*hopp*Gamma_t));
|
||||||
|
}
|
||||||
|
|
||||||
double lgkt(double t, double lambda, double sigma) {
|
double lgkt(double t, double lambda, double sigma) {
|
||||||
double lambdat = lambda*t;
|
double lambdat = lambda*t;
|
||||||
double sigmatsq = pow(sigma*t, 2.0);
|
double sigmatsq = pow(sigma*t, 2.0);
|
||||||
@ -92,6 +111,12 @@ double rahf(double t, double nu, double lambda) {
|
|||||||
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
|
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double ab(double t, double sigma, double gamma) {
|
||||||
|
double gt = gamma*t;
|
||||||
|
|
||||||
|
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
||||||
|
}
|
||||||
|
|
||||||
double tf(double t, double phi, double nu) {
|
double tf(double t, double phi, double nu) {
|
||||||
double tmp_nu = TWO_PI*nu*t;
|
double tmp_nu = TWO_PI*nu*t;
|
||||||
double tmp_phi = DEG_TO_RAD * phi;
|
double tmp_phi = DEG_TO_RAD * phi;
|
||||||
@ -106,6 +131,56 @@ double ifld(double t, double alpha, double phi, double nu, double lambdaT, doubl
|
|||||||
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double ifgk(double t, double alpha, double nu, double sigma, double lambda, double beta) {
|
||||||
|
double wt = TWO_PI*nu*t;
|
||||||
|
double rate2 = sigma*sigma*t*t;
|
||||||
|
double rateL = 0.0;
|
||||||
|
double result = 0.0;
|
||||||
|
|
||||||
|
// make sure lambda > 0
|
||||||
|
if (lambda < 0.0)
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
if (beta < 0.001) {
|
||||||
|
rateL = 1.0;
|
||||||
|
} else {
|
||||||
|
rateL = pow(lambda*t, beta);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nu < 0.01) {
|
||||||
|
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-rate2)*exp(-0.5*rate2);
|
||||||
|
} else {
|
||||||
|
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-sigma*sigma*t*t/(wt)*sin(wt))*exp(-0.5*rate2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ifll(double t, double alpha, double nu, double a, double lambda, double beta) {
|
||||||
|
double wt = TWO_PI*nu*t;
|
||||||
|
double at = a*t;
|
||||||
|
double rateL = 0.0;
|
||||||
|
double result = 0.0;
|
||||||
|
|
||||||
|
// make sure lambda > 0
|
||||||
|
if (lambda < 0.0)
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
if (beta < 0.001) {
|
||||||
|
rateL = 1.0;
|
||||||
|
} else {
|
||||||
|
rateL = pow(lambda*t, beta);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nu < 0.01) {
|
||||||
|
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-at)*exp(-at);
|
||||||
|
} else {
|
||||||
|
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-a/(TWO_PI*nu)*sin(wt))*exp(-at);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
double b(double t, double phi, double nu) {
|
double b(double t, double phi, double nu) {
|
||||||
return bessj0(TWO_PI*nu*t + DEG_TO_RAD*phi);
|
return bessj0(TWO_PI*nu*t + DEG_TO_RAD*phi);
|
||||||
}
|
}
|
||||||
@ -117,12 +192,6 @@ double ib(double t, double alpha, double phi, double nu, double lambdaT, double
|
|||||||
return alpha*bessj0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
return alpha*bessj0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
||||||
}
|
}
|
||||||
|
|
||||||
double ab(double t, double sigma, double gamma) {
|
|
||||||
double gt = gamma*t;
|
|
||||||
|
|
||||||
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
|
||||||
}
|
|
||||||
|
|
||||||
double snkzf(double t, double Delta0, double Rb) {
|
double snkzf(double t, double Delta0, double Rb) {
|
||||||
double D0t2 = pow(Delta0*t, 2.0);
|
double D0t2 = pow(Delta0*t, 2.0);
|
||||||
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
|
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
|
||||||
@ -157,6 +226,14 @@ double dnktf(double t, double phi, double nu, double Delta0, double Rb, double n
|
|||||||
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
|
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double fmuf(double t, double wd) {
|
||||||
|
double sqrt3 = sqrt(3.0);
|
||||||
|
double wd_t = wd*t;
|
||||||
|
|
||||||
|
return (3.0+cos(sqrt3*wd_t)+(1.0-1.0/sqrt3)*cos(((3.0-sqrt3)/2.0)*wd_t)+(1.0+1.0/sqrt3)*cos(((3.0 + sqrt3)/2.0)*wd_t))/6.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
__kernel void kernelChiSquareSingleHisto(__global double *data, __global double *err,
|
__kernel void kernelChiSquareSingleHisto(__global double *data, __global double *err,
|
||||||
__global double *par, __global double *chisq, __global int *map, __global double *funcv,
|
__global double *par, __global double *chisq, __global int *map, __global double *funcv,
|
||||||
int length, int numpar, int numfunc, int nummap,
|
int length, int numpar, int numfunc, int nummap,
|
||||||
|
Reference in New Issue
Block a user