#include "OpenCLChiSquare.h" double OpenCLChiSquare::ocl_sum(cl_mem data, int length) { int ierr; //calc number of thread sper workgroup and nr of work groups size_t work_size_sum = 128; size_t work_items = (size_t)length; if (length % work_size_sum > 0) work_items = (length / work_size_sum + 1) * work_size_sum; int work_groups = length / work_size_sum + 1; //create tmp array for partial sums cl_mem tmp_ptr; double *partial_sums = new double[work_groups]; tmp_ptr = m_oclbase->ocl_allocateMemory(work_groups * sizeof(double), ierr); //execute sum kernel m_oclbase->ocl_createKernel("parallelReductionSum"); m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data); m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &tmp_ptr); m_oclbase->ocl_setKernelArg(2, work_size_sum*sizeof(double), NULL); m_oclbase->ocl_setKernelArg(3, sizeof(int), &length); m_oclbase->ocl_executeKernel(1, &work_items, &work_size_sum); //read partial sums and free temp mempry m_oclbase->ocl_readData(tmp_ptr, partial_sums, sizeof(double)*work_groups); m_oclbase->ocl_freeMemory(tmp_ptr); //sumup partial sums on the host double result = 0; for (int i = 0; i < work_groups; i++) result += partial_sums[i]; delete[] partial_sums; return result; } int OpenCLChiSquare::ocl_PHistoTFFcn(void *mem_data, void *mem_par, void *mem_result, double fTimeResolution, double fRebin, int sensors, int length, int numpar, double &result) { //set number of work items and work group sizes for kernel execution size_t work_size = 128; size_t work_items = (size_t)length * sensors; if (length % work_size > 0) work_items = (length / work_size + 1) * work_size; cl_mem data = (cl_mem)mem_data; cl_mem par = (cl_mem)mem_par; cl_mem chi = (cl_mem)mem_result; //load and execute PHistotFFcn kernel m_oclbase->ocl_createKernel("kernelPHistoTFFcn"); m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data); m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &par); m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &chi); m_oclbase->ocl_setKernelArg(3, sizeof(double), &fTimeResolution); m_oclbase->ocl_setKernelArg(4, sizeof(double), &fRebin); m_oclbase->ocl_setKernelArg(5, sizeof(int), &length); m_oclbase->ocl_setKernelArg(6, sizeof(int), &sensors); m_oclbase->ocl_setKernelArg(7, sizeof(int), &numpar); m_oclbase->ocl_setKernelArg(8, sizeof(double)*numpar, NULL); m_oclbase->ocl_executeKernel(1, &work_items, &work_size); result = ocl_sum(chi, sensors*length); return DKS_SUCCESS; } int OpenCLChiSquare::ocl_singleGaussTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result, double fTimeResolution, double fRebin, double fGoodBinOffset, int sensors, int length, int numpar, double &result) { //set number of work items and work group sizes for kernel execution size_t work_size = 128; size_t work_items = (size_t)length * sensors; if (length % work_size > 0) work_items = (length / work_size + 1) * work_size; cl_mem data = (cl_mem)mem_data; cl_mem t0 = (cl_mem)mem_t0; cl_mem par = (cl_mem)mem_par; cl_mem chi = (cl_mem)mem_result; //load and execute PHistotFFcn kernel m_oclbase->ocl_createKernel("kernelSingleGaussTF"); m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data); m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &t0); m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &par); m_oclbase->ocl_setKernelArg(3, sizeof(cl_mem), &chi); m_oclbase->ocl_setKernelArg(4, sizeof(double), &fTimeResolution); m_oclbase->ocl_setKernelArg(5, sizeof(double), &fRebin); m_oclbase->ocl_setKernelArg(6, sizeof(double), &fGoodBinOffset); m_oclbase->ocl_setKernelArg(7, sizeof(int), &length); m_oclbase->ocl_setKernelArg(8, sizeof(int), &sensors); m_oclbase->ocl_setKernelArg(9, sizeof(int), &numpar); m_oclbase->ocl_setKernelArg(10, sizeof(double)*numpar, NULL); m_oclbase->ocl_executeKernel(1, &work_items, &work_size); result = ocl_sum(chi, length); return DKS_SUCCESS; } int OpenCLChiSquare::ocl_doubleLorentzTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result, double fTimeResolution, double fRebin, double fGoodBinOffset, int sensors, int length, int numpar, double &result) { //set number of work items and work group sizes for kernel execution size_t work_size = 128; size_t work_items = (size_t)length * sensors; if (length % work_size > 0) work_items = (length / work_size + 1) * work_size; cl_mem data = (cl_mem)mem_data; cl_mem t0 = (cl_mem)mem_t0; cl_mem par = (cl_mem)mem_par; cl_mem chi = (cl_mem)mem_result; //load and execute PHistotFFcn kernel m_oclbase->ocl_createKernel("kernelDoubleLorentzTF"); m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data); m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &t0); m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &par); m_oclbase->ocl_setKernelArg(3, sizeof(cl_mem), &chi); m_oclbase->ocl_setKernelArg(4, sizeof(double), &fTimeResolution); m_oclbase->ocl_setKernelArg(5, sizeof(double), &fRebin); m_oclbase->ocl_setKernelArg(6, sizeof(double), &fGoodBinOffset); m_oclbase->ocl_setKernelArg(7, sizeof(int), &length); m_oclbase->ocl_setKernelArg(8, sizeof(int), &sensors); m_oclbase->ocl_setKernelArg(9, sizeof(int), &numpar); m_oclbase->ocl_setKernelArg(10, sizeof(double)*numpar, NULL); m_oclbase->ocl_executeKernel(1, &work_items, &work_size); result = ocl_sum(chi, length); return DKS_SUCCESS; }