10 Commits

Author SHA1 Message Date
9d21fc5400 added a couple new theory functions (dglktfzf, dglktflf, fmuf). 2025-06-10 14:20:45 +02:00
21b4c591b9 adopted DKS to Cuda12 and GV100. For Tesla K40c code lines are still present but commented by as35. 2023-01-30 12:38:52 +01:00
efa3311b45 increased minimum required cmake version to 3.2 2023-01-30 12:27:37 +01:00
ee33aacdd6 fix a minor bug on reporting the selected device. 2022-10-31 17:49:20 +01:00
48f6f9c25e make newer compilers happy. 2022-04-08 16:53:34 +02:00
db79798da5 switch to the shared dks lib. 2020-12-28 18:17:41 +01:00
9381b14b87 changed version to 1.1.4 2020-06-09 13:04:48 +02:00
43cb9020c4 adapted for CUDA 11 2020-06-09 12:55:55 +02:00
3d946f666b added the two new muSR functions ifgk and ifll (CUDA/OpenCL). 2019-01-22 14:10:02 +01:00
e6021eb6e3 Set kernel argument size to a value > 0
For the case that map or fun is not used in the msr-file, the corresponding
call to the setKernelArg still needs a 2nd argument > 0, otherwise macOS crashes.
2018-12-11 11:35:32 +01:00
11 changed files with 236 additions and 51 deletions

View File

@ -1,8 +1,8 @@
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
PROJECT (DKS)
SET (DKS_VERSION_MAJOR 1)
SET (DKS_VERSION_MINOR 1)
SET (DKS_VERSION_PATCH 2)
SET (DKS_VERSION_MINOR 2)
SET (DKS_VERSION_PATCH 0)
set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
SET (PACKAGE \"dks\")
SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
@ -148,7 +148,8 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")
#as35: Tesla K40c SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")
SET (CUDA_NVCC_FLAGS "-arch=sm_70; -gencode=arch=compute_70,code=sm_70 -DDEBUG;-std=c++11;-D__wsu;-fmad=false")
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${OPENCL_KERNELS}")
IF (NOT STATIC_CUDA)

View File

@ -1,7 +1,7 @@
##################################################################
#
# Name: Dynamic Kernel Scheduler
# Version: 1.0
# Version: 1.1
# Author: Uldis Locans
# Contacts: locans.uldis@psi.ch
#

View File

@ -1,7 +1,7 @@
SET(${PROJECT_NAME}_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(${PROJECT_NAME}_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib")
SET(${PROJECT_NAME}_LIBRARY "dks")
SET(${PROJECT_NAME}_LIBRARY "dksshared")
SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH})
SET(DKS_CUDA_STATIC ${STATIC_CUDA})
SET(DKS_CUDA_LIBS "${DKS_CUDA_LIBS}")

View File

@ -1,4 +1,4 @@
CMAKE_MINIMUM_REQUIRED (VERSION 2.8)
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
SET (DKS_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
MACRO (ADD_SOURCES )

View File

@ -1,8 +1,9 @@
CMAKE_MINIMUM_REQUIRED (VERSION 2.8)
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
FIND_PACKAGE(CUDA REQUIRED)
SET (CUDA_NVCC_FLAGS "-arch=sm_30")
#as35: Tesla K40c SET (CUDA_NVCC_FLAGS "-arch=sm_30")
SET (CUDA_NVCC_FLAGS "-arch=sm_70")
SET(LIB_TYPE STATIC)
@ -22,4 +23,4 @@ INCLUDE_DIRECTORIES (
${CMAKE_CURRENT_SOURCE_DIR}
)
CUDA_ADD_LIBRARY(cudadks ${DKS_CUDA_SRCS})
CUDA_ADD_LIBRARY(cudadks ${DKS_CUDA_SRCS})

View File

@ -245,7 +245,7 @@ int CudaBase::cuda_setDevice(int device) {
std::cout << "Init: " << device << "\t" << ndev << std::endl;
if (device < ndev) {
std::cout << "set device to: " << ndev << std::endl;
std::cout << "set device to: " << device << std::endl;
cudaSetDevice(device);
} else {
if (ndev > 0)

View File

@ -86,15 +86,20 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
//create program
nvrtcProgram prog;
//std::cout << cudaProg.c_str() << std::endl;
nvrtcCreateProgram(&prog, cudaProg.c_str(), "chiSquareRuntime.cu", 0, NULL, NULL);
// std::cout << cudaProg.c_str() << std::endl;
nvrtcResult createResult = nvrtcCreateProgram(&prog, cudaProg.c_str(), "chiSquareRuntime.cu", 0, NULL, NULL);
if (createResult != NVRTC_SUCCESS) {
DEBUG_MSG("Program creation failed!");
return DKS_ERROR;
}
//compile program
const char *opts[] = {"-fmad=false", ""};
int numopts = 1;
//as35: for Tesla K40c const char *opts[] = {"-arch=compute_35", "-fmad=false", ""};
const char *opts[] = {"-arch=compute_70", "-fmad=false", ""};
int numopts = 2;
if (mlh) {
opts[1] = "-DMLH";
numopts = 2;
opts[2] = "-DMLH";
numopts = 3;
}
nvrtcResult compileResults = nvrtcCompileProgram(prog, numopts, opts);
@ -118,7 +123,11 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
if (ptx_m != NULL)
delete[] ptx_m;
size_t ptxSize;
nvrtcGetPTXSize(prog, &ptxSize);
nvrtcResult ptxSizeResult = nvrtcGetPTXSize(prog, &ptxSize);
if (ptxSizeResult != NVRTC_SUCCESS) {
DEBUG_MSG("PTX get size error!");
return DKS_ERROR;
}
ptx_m = new char[ptxSize];
nvrtcResult nvrtcPTXResult = nvrtcGetPTX(prog, ptx_m);
@ -127,10 +136,26 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
return DKS_ERROR;
}
// add some additional diagnostics
const int buffer_size = 8192;
CUjit_option options[3];
void* values[3];
char error_log[buffer_size];
int err;
options[0] = CU_JIT_ERROR_LOG_BUFFER;
values[0] = (void*)error_log;
options[1] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
values[1] = (void*)buffer_size;
options[2] = CU_JIT_TARGET_FROM_CUCONTEXT;
values[2] = 0;
//load module from ptx
CUresult loadResult = cuModuleLoadDataEx(&module_m, ptx_m, 0, 0, 0);
CUresult loadResult = cuModuleLoadDataEx(&module_m, ptx_m, 3, options, values);
if (loadResult != CUDA_SUCCESS) {
DEBUG_MSG("Load module from ptx failed!");
const char *err_msg;
cuGetErrorString(loadResult, &err_msg);
std::string msg = "Load module from ptx failed! (" + std::to_string(loadResult) + ") : " + err_msg;
DEBUG_MSG(msg);
DEBUG_MSG(error_log);
return DKS_ERROR;
}

View File

@ -36,6 +36,25 @@ __device__ double sekt(double t, double lambda) {
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
}
__device__ double dglktfzf(double t, double sigma, double hopp) {
double nut = hopp*t;
return exp(-sqrt(4.0*pow(sigma/hopp,2.0)*(exp(-nut)-1.0+nut)));
}
__device__ double dglktflf(double t, double nu0, double sigma, double hopp) {
double w0 = TWO_PI*nu0;
double w0_2 = w0*w0;
double w0_t = w0*t;
double nu_2 = hopp*hopp;
double nu_t = hopp*t;
double Gamma_t = ((w0_2+nu_2)*nu_t+(w0_2-nu_2)*(1.0-exp(-nu_t)*cos(w0_t))-2.0*hopp*w0*exp(-nu_t)*sin(w0_t))/pow(w0_2+nu_2,2.0);
if (Gamma_t < 0.0)
Gamma_t = 0.0;
return exp(-sqrt(4.0*sigma*hopp*Gamma_t));
}
__device__ double lgkt(double t, double lambda, double sigma) {
double lambdat = lambda*t;
double sigmatsq = pow(sigma*t, 2.0);
@ -69,6 +88,12 @@ __device__ double rahf(double t, double nu, double lambda) {
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
}
__device__ double ab(double t, double sigma, double gamma) {
double gt = gamma*t;
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
}
__device__ double tf(double t, double phi, double nu) {
double tmp_nu = TWO_PI*nu*t;
double tmp_phi = DEG_TO_RAD*phi;
@ -83,6 +108,56 @@ __device__ double ifld(double t, double alpha, double phi, double nu, double lam
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
}
__device__ double ifgk(double t, double alpha, double nu, double sigma, double lambda, double beta) {
double wt = TWO_PI*nu*t;
double rate2 = sigma*sigma*t*t;
double rateL = 0.0;
double result = 0.0;
// make sure lambda > 0
if (lambda < 0.0)
return 0.0;
if (beta < 0.001) {
rateL = 1.0;
} else {
rateL = pow(lambda*t, beta);
}
if (nu < 0.01) {
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-rate2)*exp(-0.5*rate2);
} else {
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-sigma*sigma*t*t/(wt)*sin(wt))*exp(-0.5*rate2);
}
return result;
}
__device__ double ifll(double t, double alpha, double nu, double a, double lambda, double beta) {
double wt = TWO_PI*nu*t;
double at = a*t;
double rateL = 0.0;
double result = 0.0;
// make sure lambda > 0
if (lambda < 0.0)
return 0.0;
if (beta < 0.001) {
rateL = 1.0;
} else {
rateL = pow(lambda*t, beta);
}
if (nu < 0.01) {
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-at)*exp(-at);
} else {
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-a/(TWO_PI*nu)*sin(wt))*exp(-at);
}
return result;
}
__device__ double b(double t, double phi, double nu) {
return j0(TWO_PI*nu*t + DEG_TO_RAD*phi);
}
@ -94,12 +169,6 @@ __device__ double ib(double t, double alpha, double phi, double nu, double lambd
return alpha*j0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
}
__device__ double ab(double t, double sigma, double gamma) {
double gt = gamma*t;
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
}
__device__ double snkzf(double t, double Delta0, double Rb) {
double D0t2 = pow(Delta0*t, 2.0);
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
@ -134,6 +203,13 @@ __device__ double dnktf(double t, double phi, double nu, double Delta0, double R
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
}
__device__ double fmuf(double t, double wd) {
double sqrt3 = sqrt(3.0);
double wd_t = wd*t;
return (3.0+cos(sqrt3*wd_t)+(1.0-1.0/sqrt3)*cos(((3.0-sqrt3)/2.0)*wd_t)+(1.0+1.0/sqrt3)*cos(((3.0 + sqrt3)/2.0)*wd_t))/6.0;
}
/** Theory and chisquare functions.
* Based on the compiler flags set theory is calculated either in single hist mode or asymetric.
* Based on the compiler flags calculate either chisq or MLE

View File

@ -146,10 +146,10 @@ int DKSBaseMuSR::initChiSquare(int size_data, int size_param, int size_func, int
if (apiCuda()) {
ierr = CUDA_SAFECALL( DKS_SUCCESS );
chiSq = CUDA_SAFEINIT(new CudaChiSquareRuntime(getCudaBase()));
chiSq = (ChiSquareRuntime*) CUDA_SAFEINIT(new CudaChiSquareRuntime(getCudaBase()));
} else {
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
chiSq = OPENCL_SAFECALL(new OpenCLChiSquareRuntime(getOpenCLBase()));
chiSq = (ChiSquareRuntime*) OPENCL_SAFECALL(new OpenCLChiSquareRuntime(getOpenCLBase()));
}
if (ierr == DKS_SUCCESS) {

View File

@ -42,7 +42,7 @@ std::string OpenCLChiSquareRuntime::buildProgram(std::string function) {
if (!fp)
DEBUG_MSG("Can't open kernel file" << kernel_file);
//get file size and allocate memory
//get file size and allocate memory
fseek(fp, 0, SEEK_END);
fsize = ftell(fp);
kernel_source = new char[fsize+1];
@ -52,7 +52,7 @@ std::string OpenCLChiSquareRuntime::buildProgram(std::string function) {
fread(kernel_source, 1, sizeof(char)*fsize, fp);
kernel_source[fsize] = '\0';
fclose(fp);
std::string kernel_string (kernel_source);
return kernel_string + openclFunctHeader + "return " + function + ";" + openclFunctFooter;
@ -76,7 +76,6 @@ int OpenCLChiSquareRuntime::compileProgram(std::string function, bool mlh) {
double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) {
int ierr;
//calc number of threads per workgroup and nr of work groups
size_t work_size_sum = (size_t)blockSize_m;
@ -87,7 +86,7 @@ double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) {
work_items = (length / work_size_sum + 1) * work_size_sum;
int work_groups = length / work_size_sum + 1;
*/
size_t work_items = 80 * work_size_sum;
int work_groups = 80;
@ -96,19 +95,19 @@ double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) {
double *partial_sums = new double[work_groups];
tmp_ptr = m_oclbase->ocl_allocateMemory(work_groups * sizeof(double), ierr);
//execute sum kernel
m_oclbase->ocl_createKernel("parallelReductionTwoPhase");
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &tmp_ptr);
m_oclbase->ocl_setKernelArg(2, work_size_sum*sizeof(double), NULL);
m_oclbase->ocl_setKernelArg(3, sizeof(int), &length);
m_oclbase->ocl_setKernelArg(3, sizeof(int), &length);
m_oclbase->ocl_executeKernel(1, &work_items, &work_size_sum);
//read partial sums and free temp mempry
//read partial sums and free temp memory
m_oclbase->ocl_readData(tmp_ptr, partial_sums, sizeof(double)*work_groups);
m_oclbase->ocl_freeMemory(tmp_ptr);
//sumup partial sums on the host
double result = 0;
for (int i = 0; i < work_groups; i++)
@ -157,6 +156,7 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
return ierr;
//set kernel args
size_t num=1;
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err);
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param);
@ -172,20 +172,23 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
m_oclbase->ocl_setKernelArg(12, sizeof(double), &tau_m);
m_oclbase->ocl_setKernelArg(13, sizeof(double), &N0_m);
m_oclbase->ocl_setKernelArg(14, sizeof(double), &bkg_m);
m_oclbase->ocl_setKernelArg(15, sizeof(double)*numpar, NULL);
m_oclbase->ocl_setKernelArg(16, sizeof(double)*numfunc, NULL);
m_oclbase->ocl_setKernelArg(17, sizeof(int)*nummap, NULL);
num = numpar; if (num == 0) num = 1;
m_oclbase->ocl_setKernelArg(15, sizeof(double)*num, NULL);
num = numfunc; if (num == 0) num = 1;
m_oclbase->ocl_setKernelArg(16, sizeof(double)*num, NULL);
num = nummap; if (num == 0) num = 1;
m_oclbase->ocl_setKernelArg(17, sizeof(int)*num, NULL);
if (ierr != DKS_SUCCESS)
return ierr;
} else if (fitType == FITTYPE_ASYMMETRY) {
//create kernel
ierr = m_oclbase->ocl_createKernel("kernelChiSquareAsymmetry");
if (ierr != DKS_SUCCESS)
return ierr;
//set kernel args
size_t num=1;
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err);
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param);
@ -200,9 +203,12 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
m_oclbase->ocl_setKernelArg(11, sizeof(double), &timeStep);
m_oclbase->ocl_setKernelArg(12, sizeof(double), &alpha_m);
m_oclbase->ocl_setKernelArg(13, sizeof(double), &beta_m);
m_oclbase->ocl_setKernelArg(14, sizeof(double)*numpar, NULL);
m_oclbase->ocl_setKernelArg(15, sizeof(double)*numfunc, NULL);
m_oclbase->ocl_setKernelArg(16, sizeof(int)*nummap, NULL);
num = numpar; if (num == 0) num = 1;
m_oclbase->ocl_setKernelArg(14, sizeof(double)*num, NULL);
num = numfunc; if (num == 0) num = 1;
m_oclbase->ocl_setKernelArg(15, sizeof(double)*num, NULL);
num = nummap; if (num == 0) num = 1;
m_oclbase->ocl_setKernelArg(16, sizeof(int)*num, NULL);
if (ierr != DKS_SUCCESS)
return ierr;
@ -250,7 +256,7 @@ int OpenCLChiSquareRuntime::writeMap(const int *map, int nummap) {
return ierr;
}
int OpenCLChiSquareRuntime::initChiSquare(int size_data, int size_param,
int OpenCLChiSquareRuntime::initChiSquare(int size_data, int size_param,
int size_func, int size_map)
{
@ -285,7 +291,7 @@ int OpenCLChiSquareRuntime::freeChiSquare() {
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_param_m);
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_func_m);
ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_map_m);
initDone_m = false;
}
@ -321,4 +327,3 @@ int OpenCLChiSquareRuntime::checkChiSquareKernels(int fitType, int &threadsPerBl
return ierr;
}

View File

@ -59,6 +59,25 @@ double sekt(double t, double lambda) {
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
}
double dglktfzf(double t, double sigma, double hopp) {
double nut = hopp*t;
return exp(-sqrt(4.0*pow(sigma/hopp,2.0)*(exp(-nut)-1.0+nut)));
}
double dglktflf(double t, double nu0, double sigma, double hopp) {
double w0 = TWO_PI*nu0;
double w0_2 = w0*w0;
double w0_t = w0*t;
double nu_2 = hopp*hopp;
double nu_t = hopp*t;
double Gamma_t = ((w0_2+nu_2)*nu_t+(w0_2-nu_2)*(1.0-exp(-nu_t)*cos(w0_t))-2.0*hopp*w0*exp(-nu_t)*sin(w0_t))/pow(w0_2+nu_2,2.0);
if (Gamma_t < 0.0)
Gamma_t = 0.0;
return exp(-sqrt(4.0*sigma*hopp*Gamma_t));
}
double lgkt(double t, double lambda, double sigma) {
double lambdat = lambda*t;
double sigmatsq = pow(sigma*t, 2.0);
@ -92,6 +111,12 @@ double rahf(double t, double nu, double lambda) {
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
}
double ab(double t, double sigma, double gamma) {
double gt = gamma*t;
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
}
double tf(double t, double phi, double nu) {
double tmp_nu = TWO_PI*nu*t;
double tmp_phi = DEG_TO_RAD * phi;
@ -106,6 +131,56 @@ double ifld(double t, double alpha, double phi, double nu, double lambdaT, doubl
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
}
double ifgk(double t, double alpha, double nu, double sigma, double lambda, double beta) {
double wt = TWO_PI*nu*t;
double rate2 = sigma*sigma*t*t;
double rateL = 0.0;
double result = 0.0;
// make sure lambda > 0
if (lambda < 0.0)
return 0.0;
if (beta < 0.001) {
rateL = 1.0;
} else {
rateL = pow(lambda*t, beta);
}
if (nu < 0.01) {
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-rate2)*exp(-0.5*rate2);
} else {
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-sigma*sigma*t*t/(wt)*sin(wt))*exp(-0.5*rate2);
}
return result;
}
double ifll(double t, double alpha, double nu, double a, double lambda, double beta) {
double wt = TWO_PI*nu*t;
double at = a*t;
double rateL = 0.0;
double result = 0.0;
// make sure lambda > 0
if (lambda < 0.0)
return 0.0;
if (beta < 0.001) {
rateL = 1.0;
} else {
rateL = pow(lambda*t, beta);
}
if (nu < 0.01) {
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-at)*exp(-at);
} else {
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-a/(TWO_PI*nu)*sin(wt))*exp(-at);
}
return result;
}
double b(double t, double phi, double nu) {
return bessj0(TWO_PI*nu*t + DEG_TO_RAD*phi);
}
@ -117,12 +192,6 @@ double ib(double t, double alpha, double phi, double nu, double lambdaT, double
return alpha*bessj0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
}
double ab(double t, double sigma, double gamma) {
double gt = gamma*t;
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
}
double snkzf(double t, double Delta0, double Rb) {
double D0t2 = pow(Delta0*t, 2.0);
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
@ -157,6 +226,14 @@ double dnktf(double t, double phi, double nu, double Delta0, double Rb, double n
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
}
double fmuf(double t, double wd) {
double sqrt3 = sqrt(3.0);
double wd_t = wd*t;
return (3.0+cos(sqrt3*wd_t)+(1.0-1.0/sqrt3)*cos(((3.0-sqrt3)/2.0)*wd_t)+(1.0+1.0/sqrt3)*cos(((3.0 + sqrt3)/2.0)*wd_t))/6.0;
}
__kernel void kernelChiSquareSingleHisto(__global double *data, __global double *err,
__global double *par, __global double *chisq, __global int *map, __global double *funcv,
int length, int numpar, int numfunc, int nummap,