Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
9381b14b87 | |||
43cb9020c4 | |||
3d946f666b | |||
e6021eb6e3 |
@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
|
||||
PROJECT (DKS)
|
||||
SET (DKS_VERSION_MAJOR 1)
|
||||
SET (DKS_VERSION_MINOR 1)
|
||||
SET (DKS_VERSION_PATCH 2)
|
||||
SET (DKS_VERSION_PATCH 4)
|
||||
set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
|
||||
SET (PACKAGE \"dks\")
|
||||
SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
|
||||
|
@ -1,7 +1,7 @@
|
||||
##################################################################
|
||||
#
|
||||
# Name: Dynamic Kernel Scheduler
|
||||
# Version: 1.0
|
||||
# Version: 1.1
|
||||
# Author: Uldis Locans
|
||||
# Contacts: locans.uldis@psi.ch
|
||||
#
|
||||
|
@ -86,15 +86,19 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
||||
|
||||
//create program
|
||||
nvrtcProgram prog;
|
||||
//std::cout << cudaProg.c_str() << std::endl;
|
||||
nvrtcCreateProgram(&prog, cudaProg.c_str(), "chiSquareRuntime.cu", 0, NULL, NULL);
|
||||
// std::cout << cudaProg.c_str() << std::endl;
|
||||
nvrtcResult createResult = nvrtcCreateProgram(&prog, cudaProg.c_str(), "chiSquareRuntime.cu", 0, NULL, NULL);
|
||||
if (createResult != NVRTC_SUCCESS) {
|
||||
DEBUG_MSG("Program creation failed!");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
//compile program
|
||||
const char *opts[] = {"-fmad=false", ""};
|
||||
int numopts = 1;
|
||||
const char *opts[] = {"-arch=compute_35", "-fmad=false", ""};
|
||||
int numopts = 2;
|
||||
if (mlh) {
|
||||
opts[1] = "-DMLH";
|
||||
numopts = 2;
|
||||
opts[2] = "-DMLH";
|
||||
numopts = 3;
|
||||
}
|
||||
|
||||
nvrtcResult compileResults = nvrtcCompileProgram(prog, numopts, opts);
|
||||
@ -118,7 +122,11 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
||||
if (ptx_m != NULL)
|
||||
delete[] ptx_m;
|
||||
size_t ptxSize;
|
||||
nvrtcGetPTXSize(prog, &ptxSize);
|
||||
nvrtcResult ptxSizeResult = nvrtcGetPTXSize(prog, &ptxSize);
|
||||
if (ptxSizeResult != NVRTC_SUCCESS) {
|
||||
DEBUG_MSG("PTX get size error!");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
ptx_m = new char[ptxSize];
|
||||
nvrtcResult nvrtcPTXResult = nvrtcGetPTX(prog, ptx_m);
|
||||
|
||||
@ -127,10 +135,26 @@ int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
// add some additional diagnostics
|
||||
const int buffer_size = 8192;
|
||||
CUjit_option options[3];
|
||||
void* values[3];
|
||||
char error_log[buffer_size];
|
||||
int err;
|
||||
options[0] = CU_JIT_ERROR_LOG_BUFFER;
|
||||
values[0] = (void*)error_log;
|
||||
options[1] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
|
||||
values[1] = (void*)buffer_size;
|
||||
options[2] = CU_JIT_TARGET_FROM_CUCONTEXT;
|
||||
values[2] = 0;
|
||||
//load module from ptx
|
||||
CUresult loadResult = cuModuleLoadDataEx(&module_m, ptx_m, 0, 0, 0);
|
||||
CUresult loadResult = cuModuleLoadDataEx(&module_m, ptx_m, 3, options, values);
|
||||
if (loadResult != CUDA_SUCCESS) {
|
||||
DEBUG_MSG("Load module from ptx failed!");
|
||||
const char *err_msg;
|
||||
cuGetErrorString(loadResult, &err_msg);
|
||||
std::string msg = "Load module from ptx failed! (" + std::to_string(loadResult) + ") : " + err_msg;
|
||||
DEBUG_MSG(msg);
|
||||
DEBUG_MSG(error_log);
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
|
@ -83,6 +83,56 @@ __device__ double ifld(double t, double alpha, double phi, double nu, double lam
|
||||
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
||||
}
|
||||
|
||||
__device__ double ifgk(double t, double alpha, double nu, double sigma, double lambda, double beta) {
|
||||
double wt = TWO_PI*nu*t;
|
||||
double rate2 = sigma*sigma*t*t;
|
||||
double rateL = 0.0;
|
||||
double result = 0.0;
|
||||
|
||||
// make sure lambda > 0
|
||||
if (lambda < 0.0)
|
||||
return 0.0;
|
||||
|
||||
if (beta < 0.001) {
|
||||
rateL = 1.0;
|
||||
} else {
|
||||
rateL = pow(lambda*t, beta);
|
||||
}
|
||||
|
||||
if (nu < 0.01) {
|
||||
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-rate2)*exp(-0.5*rate2);
|
||||
} else {
|
||||
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-sigma*sigma*t*t/(wt)*sin(wt))*exp(-0.5*rate2);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ double ifll(double t, double alpha, double nu, double a, double lambda, double beta) {
|
||||
double wt = TWO_PI*nu*t;
|
||||
double at = a*t;
|
||||
double rateL = 0.0;
|
||||
double result = 0.0;
|
||||
|
||||
// make sure lambda > 0
|
||||
if (lambda < 0.0)
|
||||
return 0.0;
|
||||
|
||||
if (beta < 0.001) {
|
||||
rateL = 1.0;
|
||||
} else {
|
||||
rateL = pow(lambda*t, beta);
|
||||
}
|
||||
|
||||
if (nu < 0.01) {
|
||||
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-at)*exp(-at);
|
||||
} else {
|
||||
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-a/(TWO_PI*nu)*sin(wt))*exp(-at);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ double b(double t, double phi, double nu) {
|
||||
return j0(TWO_PI*nu*t + DEG_TO_RAD*phi);
|
||||
}
|
||||
|
@ -76,7 +76,6 @@ int OpenCLChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
||||
|
||||
double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) {
|
||||
|
||||
|
||||
int ierr;
|
||||
//calc number of threads per workgroup and nr of work groups
|
||||
size_t work_size_sum = (size_t)blockSize_m;
|
||||
@ -105,7 +104,7 @@ double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) {
|
||||
m_oclbase->ocl_setKernelArg(3, sizeof(int), &length);
|
||||
m_oclbase->ocl_executeKernel(1, &work_items, &work_size_sum);
|
||||
|
||||
//read partial sums and free temp mempry
|
||||
//read partial sums and free temp memory
|
||||
m_oclbase->ocl_readData(tmp_ptr, partial_sums, sizeof(double)*work_groups);
|
||||
m_oclbase->ocl_freeMemory(tmp_ptr);
|
||||
|
||||
@ -157,6 +156,7 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
|
||||
return ierr;
|
||||
|
||||
//set kernel args
|
||||
size_t num=1;
|
||||
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data);
|
||||
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err);
|
||||
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param);
|
||||
@ -172,20 +172,23 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
|
||||
m_oclbase->ocl_setKernelArg(12, sizeof(double), &tau_m);
|
||||
m_oclbase->ocl_setKernelArg(13, sizeof(double), &N0_m);
|
||||
m_oclbase->ocl_setKernelArg(14, sizeof(double), &bkg_m);
|
||||
m_oclbase->ocl_setKernelArg(15, sizeof(double)*numpar, NULL);
|
||||
m_oclbase->ocl_setKernelArg(16, sizeof(double)*numfunc, NULL);
|
||||
m_oclbase->ocl_setKernelArg(17, sizeof(int)*nummap, NULL);
|
||||
num = numpar; if (num == 0) num = 1;
|
||||
m_oclbase->ocl_setKernelArg(15, sizeof(double)*num, NULL);
|
||||
num = numfunc; if (num == 0) num = 1;
|
||||
m_oclbase->ocl_setKernelArg(16, sizeof(double)*num, NULL);
|
||||
num = nummap; if (num == 0) num = 1;
|
||||
m_oclbase->ocl_setKernelArg(17, sizeof(int)*num, NULL);
|
||||
|
||||
if (ierr != DKS_SUCCESS)
|
||||
return ierr;
|
||||
} else if (fitType == FITTYPE_ASYMMETRY) {
|
||||
//create kernel
|
||||
ierr = m_oclbase->ocl_createKernel("kernelChiSquareAsymmetry");
|
||||
|
||||
if (ierr != DKS_SUCCESS)
|
||||
return ierr;
|
||||
|
||||
//set kernel args
|
||||
size_t num=1;
|
||||
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data);
|
||||
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err);
|
||||
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param);
|
||||
@ -200,9 +203,12 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType,
|
||||
m_oclbase->ocl_setKernelArg(11, sizeof(double), &timeStep);
|
||||
m_oclbase->ocl_setKernelArg(12, sizeof(double), &alpha_m);
|
||||
m_oclbase->ocl_setKernelArg(13, sizeof(double), &beta_m);
|
||||
m_oclbase->ocl_setKernelArg(14, sizeof(double)*numpar, NULL);
|
||||
m_oclbase->ocl_setKernelArg(15, sizeof(double)*numfunc, NULL);
|
||||
m_oclbase->ocl_setKernelArg(16, sizeof(int)*nummap, NULL);
|
||||
num = numpar; if (num == 0) num = 1;
|
||||
m_oclbase->ocl_setKernelArg(14, sizeof(double)*num, NULL);
|
||||
num = numfunc; if (num == 0) num = 1;
|
||||
m_oclbase->ocl_setKernelArg(15, sizeof(double)*num, NULL);
|
||||
num = nummap; if (num == 0) num = 1;
|
||||
m_oclbase->ocl_setKernelArg(16, sizeof(int)*num, NULL);
|
||||
|
||||
if (ierr != DKS_SUCCESS)
|
||||
return ierr;
|
||||
@ -321,4 +327,3 @@ int OpenCLChiSquareRuntime::checkChiSquareKernels(int fitType, int &threadsPerBl
|
||||
return ierr;
|
||||
|
||||
}
|
||||
|
||||
|
@ -106,6 +106,56 @@ double ifld(double t, double alpha, double phi, double nu, double lambdaT, doubl
|
||||
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
||||
}
|
||||
|
||||
double ifgk(double t, double alpha, double nu, double sigma, double lambda, double beta) {
|
||||
double wt = TWO_PI*nu*t;
|
||||
double rate2 = sigma*sigma*t*t;
|
||||
double rateL = 0.0;
|
||||
double result = 0.0;
|
||||
|
||||
// make sure lambda > 0
|
||||
if (lambda < 0.0)
|
||||
return 0.0;
|
||||
|
||||
if (beta < 0.001) {
|
||||
rateL = 1.0;
|
||||
} else {
|
||||
rateL = pow(lambda*t, beta);
|
||||
}
|
||||
|
||||
if (nu < 0.01) {
|
||||
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-rate2)*exp(-0.5*rate2);
|
||||
} else {
|
||||
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-sigma*sigma*t*t/(wt)*sin(wt))*exp(-0.5*rate2);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double ifll(double t, double alpha, double nu, double a, double lambda, double beta) {
|
||||
double wt = TWO_PI*nu*t;
|
||||
double at = a*t;
|
||||
double rateL = 0.0;
|
||||
double result = 0.0;
|
||||
|
||||
// make sure lambda > 0
|
||||
if (lambda < 0.0)
|
||||
return 0.0;
|
||||
|
||||
if (beta < 0.001) {
|
||||
rateL = 1.0;
|
||||
} else {
|
||||
rateL = pow(lambda*t, beta);
|
||||
}
|
||||
|
||||
if (nu < 0.01) {
|
||||
result = (1.0-alpha)*exp(-rateL) + alpha*(1.0-at)*exp(-at);
|
||||
} else {
|
||||
result = (1.0-alpha)*exp(-rateL) + alpha*(cos(wt)-a/(TWO_PI*nu)*sin(wt))*exp(-at);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double b(double t, double phi, double nu) {
|
||||
return bessj0(TWO_PI*nu*t + DEG_TO_RAD*phi);
|
||||
}
|
||||
|
Reference in New Issue
Block a user