diff --git a/src/OpenCL/OpenCLChiSquareRuntime.cpp b/src/OpenCL/OpenCLChiSquareRuntime.cpp index e3ef6ba..fb50945 100644 --- a/src/OpenCL/OpenCLChiSquareRuntime.cpp +++ b/src/OpenCL/OpenCLChiSquareRuntime.cpp @@ -42,7 +42,7 @@ std::string OpenCLChiSquareRuntime::buildProgram(std::string function) { if (!fp) DEBUG_MSG("Can't open kernel file" << kernel_file); - //get file size and allocate memory + //get file size and allocate memory fseek(fp, 0, SEEK_END); fsize = ftell(fp); kernel_source = new char[fsize+1]; @@ -52,7 +52,7 @@ std::string OpenCLChiSquareRuntime::buildProgram(std::string function) { fread(kernel_source, 1, sizeof(char)*fsize, fp); kernel_source[fsize] = '\0'; fclose(fp); - + std::string kernel_string (kernel_source); return kernel_string + openclFunctHeader + "return " + function + ";" + openclFunctFooter; @@ -76,7 +76,6 @@ int OpenCLChiSquareRuntime::compileProgram(std::string function, bool mlh) { double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) { - int ierr; //calc number of threads per workgroup and nr of work groups size_t work_size_sum = (size_t)blockSize_m; @@ -87,7 +86,7 @@ double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) { work_items = (length / work_size_sum + 1) * work_size_sum; int work_groups = length / work_size_sum + 1; */ - + size_t work_items = 80 * work_size_sum; int work_groups = 80; @@ -96,19 +95,19 @@ double OpenCLChiSquareRuntime::calculateSum(cl_mem data, int length) { double *partial_sums = new double[work_groups]; tmp_ptr = m_oclbase->ocl_allocateMemory(work_groups * sizeof(double), ierr); - + //execute sum kernel m_oclbase->ocl_createKernel("parallelReductionTwoPhase"); m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data); m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &tmp_ptr); m_oclbase->ocl_setKernelArg(2, work_size_sum*sizeof(double), NULL); - m_oclbase->ocl_setKernelArg(3, sizeof(int), &length); + m_oclbase->ocl_setKernelArg(3, sizeof(int), &length); m_oclbase->ocl_executeKernel(1, &work_items, &work_size_sum); - //read partial sums and free temp mempry + //read partial sums and free temp memory m_oclbase->ocl_readData(tmp_ptr, partial_sums, sizeof(double)*work_groups); m_oclbase->ocl_freeMemory(tmp_ptr); - + //sumup partial sums on the host double result = 0; for (int i = 0; i < work_groups; i++) @@ -157,6 +156,7 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType, return ierr; //set kernel args + size_t num=1; m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data); m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err); m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param); @@ -172,20 +172,23 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType, m_oclbase->ocl_setKernelArg(12, sizeof(double), &tau_m); m_oclbase->ocl_setKernelArg(13, sizeof(double), &N0_m); m_oclbase->ocl_setKernelArg(14, sizeof(double), &bkg_m); - m_oclbase->ocl_setKernelArg(15, sizeof(double)*numpar, NULL); - m_oclbase->ocl_setKernelArg(16, sizeof(double)*numfunc, NULL); - m_oclbase->ocl_setKernelArg(17, sizeof(int)*nummap, NULL); + num = numpar; if (num == 0) num = 1; + m_oclbase->ocl_setKernelArg(15, sizeof(double)*num, NULL); + num = numfunc; if (num == 0) num = 1; + m_oclbase->ocl_setKernelArg(16, sizeof(double)*num, NULL); + num = nummap; if (num == 0) num = 1; + m_oclbase->ocl_setKernelArg(17, sizeof(int)*num, NULL); if (ierr != DKS_SUCCESS) return ierr; } else if (fitType == FITTYPE_ASYMMETRY) { //create kernel ierr = m_oclbase->ocl_createKernel("kernelChiSquareAsymmetry"); - if (ierr != DKS_SUCCESS) return ierr; //set kernel args + size_t num=1; m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &cl_mem_data); m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &cl_mem_err); m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &cl_param); @@ -200,9 +203,12 @@ int OpenCLChiSquareRuntime::launchChiSquare(int fitType, m_oclbase->ocl_setKernelArg(11, sizeof(double), &timeStep); m_oclbase->ocl_setKernelArg(12, sizeof(double), &alpha_m); m_oclbase->ocl_setKernelArg(13, sizeof(double), &beta_m); - m_oclbase->ocl_setKernelArg(14, sizeof(double)*numpar, NULL); - m_oclbase->ocl_setKernelArg(15, sizeof(double)*numfunc, NULL); - m_oclbase->ocl_setKernelArg(16, sizeof(int)*nummap, NULL); + num = numpar; if (num == 0) num = 1; + m_oclbase->ocl_setKernelArg(14, sizeof(double)*num, NULL); + num = numfunc; if (num == 0) num = 1; + m_oclbase->ocl_setKernelArg(15, sizeof(double)*num, NULL); + num = nummap; if (num == 0) num = 1; + m_oclbase->ocl_setKernelArg(16, sizeof(int)*num, NULL); if (ierr != DKS_SUCCESS) return ierr; @@ -250,7 +256,7 @@ int OpenCLChiSquareRuntime::writeMap(const int *map, int nummap) { return ierr; } -int OpenCLChiSquareRuntime::initChiSquare(int size_data, int size_param, +int OpenCLChiSquareRuntime::initChiSquare(int size_data, int size_param, int size_func, int size_map) { @@ -285,7 +291,7 @@ int OpenCLChiSquareRuntime::freeChiSquare() { ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_param_m); ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_func_m); ierr = m_oclbase->ocl_freeMemory((cl_mem)mem_map_m); - + initDone_m = false; } @@ -321,4 +327,3 @@ int OpenCLChiSquareRuntime::checkChiSquareKernels(int fitType, int &threadsPerBl return ierr; } -