Files
DKS/src/CUDA/CudaChiSquareRuntime.cu
2016-10-10 14:49:32 +02:00

314 lines
7.7 KiB
Plaintext

#include "CudaChiSquareRuntime.cuh"
CudaChiSquareRuntime::CudaChiSquareRuntime(CudaBase *base) {
blockSize_m = BLOCK_SIZE;
numBlocks_m = -1;
ptx_m = NULL;
m_base = base;
base_create = false;
setUpContext();
}
//constructor, init cuda device and create context
CudaChiSquareRuntime::CudaChiSquareRuntime() {
blockSize_m = BLOCK_SIZE;
numBlocks_m = -1;
ptx_m = NULL;
m_base = new CudaBase();
base_create = true;
setUpContext();
}
//free resources
CudaChiSquareRuntime::~CudaChiSquareRuntime() {
delete[] ptx_m;
cuCtxDestroy(context_m);
freeChiSquare();
if (base_create)
delete m_base;
}
void CudaChiSquareRuntime::setUpContext() {
cuInit(0);
cuDeviceGet(&cuDevice_m, 0);
cuCtxCreate(&context_m, 0, cuDevice_m);
N0_m = 1.0;
tau_m = 1.0;
bkg_m = 1.0;
initDone_m = false;
}
//build program string
std::string CudaChiSquareRuntime::buildProgram(std::string function) {
long fsize;
char *kernel_source;
//get kernel source
char * kernel_file = new char[500];
kernel_file[0] = '\0';
strcat(kernel_file, OPENCL_KERNELS);
strcat(kernel_file, "CUDA/NVRTCKernels/CudaChiSquareKernel.cu");
//read kernels from file
FILE *fp = fopen(kernel_file, "rb");
if (!fp)
DEBUG_MSG("Can't open kernel file" << kernel_file);
//get file size and allocate memory
fseek(fp, 0, SEEK_END);
fsize = ftell(fp);
kernel_source = new char[fsize+1];
//read file and content in kernel source
rewind(fp);
fread(kernel_source, 1, sizeof(char)*fsize, fp);
kernel_source[fsize] = '\0';
fclose(fp);
std::string kernel_string (kernel_source);
return kernel_string + cudaFunctHeader + "return " + function + ";" + cudaFunctFooter;
}
//
int CudaChiSquareRuntime::compileProgram(std::string function, bool mlh) {
//build program string
std::string cudaProg = buildProgram(function);
//create program
nvrtcProgram prog;
//std::cout << cudaProg.c_str() << std::endl;
nvrtcCreateProgram(&prog, cudaProg.c_str(), "chiSquareRuntime.cu", 0, NULL, NULL);
//compile program
const char *opts[] = {"-fmad=false", ""};
int numopts = 1;
if (mlh) {
opts[1] = "-DMLH";
numopts = 2;
}
nvrtcResult compileResults = nvrtcCompileProgram(prog, numopts, opts);
if (compileResults != NVRTC_SUCCESS) {
//obtain compilation log
size_t logSize;
nvrtcGetProgramLogSize(prog, &logSize);
char *log = new char[logSize];
nvrtcGetProgramLog(prog, log);
DEBUG_MSG("Compilation failed!");
DEBUG_MSG(log);
delete[] log;
return DKS_ERROR;
} else {
DEBUG_MSG("Compilation successfull!");
}
//obtain PTX from program
if (ptx_m != NULL)
delete[] ptx_m;
size_t ptxSize;
nvrtcGetPTXSize(prog, &ptxSize);
ptx_m = new char[ptxSize];
nvrtcResult nvrtcPTXResult = nvrtcGetPTX(prog, ptx_m);
if (nvrtcPTXResult != NVRTC_SUCCESS) {
DEBUG_MSG("Get PTX failed!");
return DKS_ERROR;
}
//load module from ptx
CUresult loadResult = cuModuleLoadDataEx(&module_m, ptx_m, 0, 0, 0);
if (loadResult != CUDA_SUCCESS) {
DEBUG_MSG("Load module from ptx failed!");
return DKS_ERROR;
}
// Destroy the program
nvrtcDestroyProgram(&prog);
return DKS_SUCCESS;
}
int CudaChiSquareRuntime::launchChiSquare(int fitType,
void *mem_data, void *mem_err, int length,
int numpar, int numfunc, int nummap,
double timeStart, double timeStep, double &result)
{
if (!initDone_m) {
DEBUG_MSG("ChiSquare init needs to be called at some point!");
return DKS_ERROR;
}
int blocks;
int threads = blockSize_m;
if (numBlocks_m < 0)
blocks = length / threads + 1;
else
blocks = numBlocks_m;
CUresult cuStatus;
void **args = 0;
if (fitType == FITTYPE_SINGLE_HISTO) {
cuStatus = cuModuleGetFunction(&kernel_m, module_m, "kernelChiSquareSingleHisto");
if (cuStatus != CUDA_SUCCESS) {
DEBUG_MSG("Failed to get function from module!");
return DKS_ERROR;
}
args = (void**) malloc(15 * sizeof(void*));
args[0] = &mem_data;
args[1] = &mem_err;
args[2] = &mem_param_m;
args[3] = &mem_chisq_m;
args[4] = &mem_map_m;
args[5] = &mem_func_m;
args[6] = &length;
args[7] = &numpar;
args[8] = &numfunc;
args[9] = &nummap;
args[10] = &timeStart;
args[11] = &timeStep;
args[12] = &tau_m;
args[13] = &N0_m;
args[14] = &bkg_m;
} else if (fitType == FITTYPE_ASYMMETRY) {
cuStatus = cuModuleGetFunction(&kernel_m, module_m, "kernelChiSquareAsymmetry");
if (cuStatus != CUDA_SUCCESS) {
DEBUG_MSG("Failed to get function from module!");
return DKS_ERROR;
}
args = (void**) malloc(14 * sizeof(void*));
args[0] = &mem_data;
args[1] = &mem_err;
args[2] = &mem_param_m;
args[3] = &mem_chisq_m;
args[4] = &mem_map_m;
args[5] = &mem_func_m;
args[6] = &length;
args[7] = &numpar;
args[8] = &numfunc;
args[9] = &nummap;
args[10] = &timeStart;
args[11] = &timeStep;
args[12] = &alpha_m;
args[13] = &beta_m;
} else if (fitType == FITTYPE_MU_MINUS) {
DEBUG_MSG("Not Yet Implemented!");
return DKS_ERROR;
} else {
DEBUG_MSG("Undefined Fit Type!");
return DKS_ERROR;
}
cuStatus = cuLaunchKernel(kernel_m,
blocks, 1, 1,
threads, 1, 1,
(numpar + numfunc)*sizeof(double) + nummap*sizeof(int), NULL,
args, 0);
if (cuStatus != CUDA_SUCCESS) {
std::string msg;
msg = "Failed to run kernel! (" + std::to_string(blocks) + ", " + std::to_string(threads) + ")";
DEBUG_MSG(msg);
const char *desc;
cuGetErrorString(cuStatus, &desc);
std::cout << desc << std::endl;
return DKS_ERROR;
}
cublasStatus_t status;
status = cublasDasum(defaultCublasRT, length, (double*)mem_chisq_m, 1, &result);
if (status != CUBLAS_STATUS_SUCCESS) {
DEBUG_MSG("cublas sum failed!");
return DKS_ERROR;
}
// cleanup
if (args)
free(args);
return DKS_SUCCESS;
}
int CudaChiSquareRuntime::writeParams(const double *params, int numparams) {
int ierr = m_base->cuda_writeData( (double*)mem_param_m, params, sizeof(double)*numparams);
return ierr;
}
int CudaChiSquareRuntime::writeFunc(const double *func, int numfunc) {
int ierr = m_base->cuda_writeData( (double*)mem_func_m, func, sizeof(double)*numfunc);
return ierr;
}
int CudaChiSquareRuntime::writeMap(const int *map, int nummap) {
int ierr = m_base->cuda_writeData( (int*)mem_map_m, map, sizeof(int)*nummap);
return ierr;
}
int CudaChiSquareRuntime::initChiSquare(int size_data, int size_param, int size_func,
int size_map) {
int ierr = DKS_ERROR;
if (initDone_m) {
DEBUG_MSG("Reinitializing ChiSquare");
freeChiSquare();
}
//init cublas
cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
status = cublasCreate(&defaultCublasRT);
if (status != CUBLAS_STATUS_SUCCESS)
DEBUG_MSG("CUBLAS create default handle failed!");
//allocate temporary memory
mem_chisq_m = m_base->cuda_allocateMemory(size_data*sizeof(double), ierr);
mem_param_m = m_base->cuda_allocateMemory(size_param*sizeof(double), ierr);
mem_func_m = m_base->cuda_allocateMemory(size_func*sizeof(double), ierr);
mem_map_m = m_base->cuda_allocateMemory(size_map*sizeof(int), ierr);
initDone_m = true;
return ierr;
}
int CudaChiSquareRuntime::freeChiSquare() {
int ierr = DKS_ERROR;
if (initDone_m) {
//delete cublas
cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
status = cublasDestroy(defaultCublasRT);
if (status != CUBLAS_STATUS_SUCCESS) {
DEBUG_MSG("CUBLAS delete default handle failed!");
return DKS_ERROR;
}
//free memory
ierr = m_base->cuda_freeMemory(mem_chisq_m);
ierr = m_base->cuda_freeMemory(mem_param_m);
ierr = m_base->cuda_freeMemory(mem_func_m);
ierr = m_base->cuda_freeMemory(mem_map_m);
initDone_m = false;
}
return ierr;
}