314 lines
7.7 KiB
Plaintext
314 lines
7.7 KiB
Plaintext
#include "CudaChiSquareRuntime.cuh"
|
|
|
|
CudaChiSquareRuntime::CudaChiSquareRuntime(CudaBase *base) {
|
|
blockSize_m = BLOCK_SIZE;
|
|
numBlocks_m = -1;
|
|
|
|
ptx_m = NULL;
|
|
|
|
m_base = base;
|
|
base_create = false;
|
|
setUpContext();
|
|
}
|
|
|
|
//constructor, init cuda device and create context
|
|
CudaChiSquareRuntime::CudaChiSquareRuntime() {
|
|
blockSize_m = BLOCK_SIZE;
|
|
numBlocks_m = -1;
|
|
|
|
ptx_m = NULL;
|
|
|
|
m_base = new CudaBase();
|
|
base_create = true;
|
|
setUpContext();
|
|
}
|
|
|
|
//free resources
|
|
CudaChiSquareRuntime::~CudaChiSquareRuntime() {
|
|
delete[] ptx_m;
|
|
cuCtxDestroy(context_m);
|
|
|
|
freeChiSquare();
|
|
|
|
if (base_create)
|
|
delete m_base;
|
|
}
|
|
|
|
void CudaChiSquareRuntime::setUpContext() {
|
|
cuInit(0);
|
|
cuDeviceGet(&cuDevice_m, 0);
|
|
cuCtxCreate(&context_m, 0, cuDevice_m);
|
|
|
|
N0_m = 1.0;
|
|
tau_m = 1.0;
|
|
bkg_m = 1.0;
|
|
|
|
initDone_m = false;
|
|
}
|
|
|
|
//build program string
|
|
std::string CudaChiSquareRuntime::buildProgram(std::string function) {
|
|
|
|
long fsize;
|
|
char *kernel_source;
|
|
|
|
//get kernel source
|
|
char * kernel_file = new char[500];
|
|
kernel_file[0] = '\0';
|
|
strcat(kernel_file, OPENCL_KERNELS);
|
|
strcat(kernel_file, "CUDA/NVRTCKernels/CudaChiSquareKernel.cu");
|
|
|
|
//read kernels from file
|
|
FILE *fp = fopen(kernel_file, "rb");
|
|
if (!fp)
|
|
DEBUG_MSG("Can't open kernel file" << kernel_file);
|
|
|
|
//get file size and allocate memory
|
|
fseek(fp, 0, SEEK_END);
|
|
fsize = ftell(fp);
|
|
kernel_source = new char[fsize+1];
|
|
|
|
//read file and content in kernel source
|
|
rewind(fp);
|
|
fread(kernel_source, 1, sizeof(char)*fsize, fp);
|
|
kernel_source[fsize] = '\0';
|
|
fclose(fp);
|
|
|
|
std::string kernel_string (kernel_source);
|
|
return kernel_string + cudaFunctHeader + "return " + function + ";" + cudaFunctFooter;
|
|
}
|
|
|
|
//
|
|
int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
|
|
|
|
//build program string
|
|
std::string cudaProg = buildProgram(function);
|
|
|
|
//create program
|
|
nvrtcProgram prog;
|
|
//std::cout << cudaProg.c_str() << std::endl;
|
|
nvrtcCreateProgram(&prog, cudaProg.c_str(), "chiSquareRuntime.cu", 0, NULL, NULL);
|
|
|
|
//compile program
|
|
const char *opts[] = {"-fmad=false", ""};
|
|
int numopts = 1;
|
|
if (mlh) {
|
|
opts[1] = "-DMLH";
|
|
numopts = 2;
|
|
}
|
|
|
|
nvrtcResult compileResults = nvrtcCompileProgram(prog, numopts, opts);
|
|
|
|
if (compileResults != NVRTC_SUCCESS) {
|
|
//obtain compilation log
|
|
size_t logSize;
|
|
nvrtcGetProgramLogSize(prog, &logSize);
|
|
char *log = new char[logSize];
|
|
nvrtcGetProgramLog(prog, log);
|
|
DEBUG_MSG("Compilation failed!");
|
|
DEBUG_MSG(log);
|
|
delete[] log;
|
|
|
|
return DKS_ERROR;
|
|
} else {
|
|
DEBUG_MSG("Compilation successfull!");
|
|
}
|
|
|
|
//obtain PTX from program
|
|
if (ptx_m != NULL)
|
|
delete[] ptx_m;
|
|
size_t ptxSize;
|
|
nvrtcGetPTXSize(prog, &ptxSize);
|
|
ptx_m = new char[ptxSize];
|
|
nvrtcResult nvrtcPTXResult = nvrtcGetPTX(prog, ptx_m);
|
|
|
|
if (nvrtcPTXResult != NVRTC_SUCCESS) {
|
|
DEBUG_MSG("Get PTX failed!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
//load module from ptx
|
|
CUresult loadResult = cuModuleLoadDataEx(&module_m, ptx_m, 0, 0, 0);
|
|
if (loadResult != CUDA_SUCCESS) {
|
|
DEBUG_MSG("Load module from ptx failed!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
// Destroy the program
|
|
nvrtcDestroyProgram(&prog);
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int CudaChiSquareRuntime::launchChiSquare(int fitType,
|
|
void *mem_data, void *mem_err, int length,
|
|
int numpar, int numfunc, int nummap,
|
|
double timeStart, double timeStep, double &result)
|
|
{
|
|
|
|
if (!initDone_m) {
|
|
DEBUG_MSG("ChiSquare init needs to be called at some point!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
int blocks;
|
|
int threads = blockSize_m;
|
|
if (numBlocks_m < 0)
|
|
blocks = length / threads + 1;
|
|
else
|
|
blocks = numBlocks_m;
|
|
|
|
CUresult cuStatus;
|
|
void **args = 0;
|
|
|
|
if (fitType == FITTYPE_SINGLE_HISTO) {
|
|
cuStatus = cuModuleGetFunction(&kernel_m, module_m, "kernelChiSquareSingleHisto");
|
|
|
|
if (cuStatus != CUDA_SUCCESS) {
|
|
DEBUG_MSG("Failed to get function from module!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
args = (void**) malloc(15 * sizeof(void*));
|
|
args[0] = &mem_data;
|
|
args[1] = &mem_err;
|
|
args[2] = &mem_param_m;
|
|
args[3] = &mem_chisq_m;
|
|
args[4] = &mem_map_m;
|
|
args[5] = &mem_func_m;
|
|
args[6] = &length;
|
|
args[7] = &numpar;
|
|
args[8] = &numfunc;
|
|
args[9] = &nummap;
|
|
args[10] = &timeStart;
|
|
args[11] = &timeStep;
|
|
args[12] = &tau_m;
|
|
args[13] = &N0_m;
|
|
args[14] = &bkg_m;
|
|
} else if (fitType == FITTYPE_ASYMMETRY) {
|
|
cuStatus = cuModuleGetFunction(&kernel_m, module_m, "kernelChiSquareAsymmetry");
|
|
|
|
if (cuStatus != CUDA_SUCCESS) {
|
|
DEBUG_MSG("Failed to get function from module!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
args = (void**) malloc(14 * sizeof(void*));
|
|
args[0] = &mem_data;
|
|
args[1] = &mem_err;
|
|
args[2] = &mem_param_m;
|
|
args[3] = &mem_chisq_m;
|
|
args[4] = &mem_map_m;
|
|
args[5] = &mem_func_m;
|
|
args[6] = &length;
|
|
args[7] = &numpar;
|
|
args[8] = &numfunc;
|
|
args[9] = &nummap;
|
|
args[10] = &timeStart;
|
|
args[11] = &timeStep;
|
|
args[12] = &alpha_m;
|
|
args[13] = &beta_m;
|
|
} else if (fitType == FITTYPE_MU_MINUS) {
|
|
DEBUG_MSG("Not Yet Implemented!");
|
|
return DKS_ERROR;
|
|
} else {
|
|
DEBUG_MSG("Undefined Fit Type!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
cuStatus = cuLaunchKernel(kernel_m,
|
|
blocks, 1, 1,
|
|
threads, 1, 1,
|
|
(numpar + numfunc)*sizeof(double) + nummap*sizeof(int), NULL,
|
|
args, 0);
|
|
|
|
|
|
|
|
if (cuStatus != CUDA_SUCCESS) {
|
|
std::string msg;
|
|
msg = "Failed to run kernel! (" + std::to_string(blocks) + ", " + std::to_string(threads) + ")";
|
|
DEBUG_MSG(msg);
|
|
const char *desc;
|
|
cuGetErrorString(cuStatus, &desc);
|
|
std::cout << desc << std::endl;
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
cublasStatus_t status;
|
|
status = cublasDasum(defaultCublasRT, length, (double*)mem_chisq_m, 1, &result);
|
|
if (status != CUBLAS_STATUS_SUCCESS) {
|
|
DEBUG_MSG("cublas sum failed!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
// cleanup
|
|
if (args)
|
|
free(args);
|
|
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|
|
int CudaChiSquareRuntime::writeParams(const double *params, int numparams) {
|
|
int ierr = m_base->cuda_writeData( (double*)mem_param_m, params, sizeof(double)*numparams);
|
|
return ierr;
|
|
}
|
|
|
|
int CudaChiSquareRuntime::writeFunc(const double *func, int numfunc) {
|
|
int ierr = m_base->cuda_writeData( (double*)mem_func_m, func, sizeof(double)*numfunc);
|
|
return ierr;
|
|
}
|
|
|
|
int CudaChiSquareRuntime::writeMap(const int *map, int nummap) {
|
|
int ierr = m_base->cuda_writeData( (int*)mem_map_m, map, sizeof(int)*nummap);
|
|
return ierr;
|
|
}
|
|
|
|
int CudaChiSquareRuntime::initChiSquare(int size_data, int size_param, int size_func,
|
|
int size_map) {
|
|
|
|
int ierr = DKS_ERROR;
|
|
if (initDone_m) {
|
|
DEBUG_MSG("Reinitializing ChiSquare");
|
|
freeChiSquare();
|
|
}
|
|
|
|
//init cublas
|
|
cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
|
|
status = cublasCreate(&defaultCublasRT);
|
|
if (status != CUBLAS_STATUS_SUCCESS)
|
|
DEBUG_MSG("CUBLAS create default handle failed!");
|
|
|
|
//allocate temporary memory
|
|
mem_chisq_m = m_base->cuda_allocateMemory(size_data*sizeof(double), ierr);
|
|
mem_param_m = m_base->cuda_allocateMemory(size_param*sizeof(double), ierr);
|
|
mem_func_m = m_base->cuda_allocateMemory(size_func*sizeof(double), ierr);
|
|
mem_map_m = m_base->cuda_allocateMemory(size_map*sizeof(int), ierr);
|
|
initDone_m = true;
|
|
|
|
return ierr;
|
|
}
|
|
|
|
int CudaChiSquareRuntime::freeChiSquare() {
|
|
int ierr = DKS_ERROR;
|
|
if (initDone_m) {
|
|
//delete cublas
|
|
cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
|
|
status = cublasDestroy(defaultCublasRT);
|
|
if (status != CUBLAS_STATUS_SUCCESS) {
|
|
DEBUG_MSG("CUBLAS delete default handle failed!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
//free memory
|
|
ierr = m_base->cuda_freeMemory(mem_chisq_m);
|
|
ierr = m_base->cuda_freeMemory(mem_param_m);
|
|
ierr = m_base->cuda_freeMemory(mem_func_m);
|
|
ierr = m_base->cuda_freeMemory(mem_map_m);
|
|
|
|
initDone_m = false;
|
|
}
|
|
|
|
return ierr;
|
|
}
|