158 lines
5.4 KiB
C++
158 lines
5.4 KiB
C++
#include "OpenCLChiSquare.h"
|
|
|
|
double OpenCLChiSquare::ocl_sum(cl_mem data, int length) {
|
|
|
|
|
|
int ierr;
|
|
//calc number of thread sper workgroup and nr of work groups
|
|
size_t work_size_sum = 128;
|
|
size_t work_items = (size_t)length;
|
|
if (length % work_size_sum > 0)
|
|
work_items = (length / work_size_sum + 1) * work_size_sum;
|
|
|
|
int work_groups = length / work_size_sum + 1;
|
|
|
|
//create tmp array for partial sums
|
|
cl_mem tmp_ptr;
|
|
|
|
double *partial_sums = new double[work_groups];
|
|
tmp_ptr = m_oclbase->ocl_allocateMemory(work_groups * sizeof(double), ierr);
|
|
|
|
//execute sum kernel
|
|
m_oclbase->ocl_createKernel("parallelReductionSum");
|
|
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
|
|
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &tmp_ptr);
|
|
m_oclbase->ocl_setKernelArg(2, work_size_sum*sizeof(double), NULL);
|
|
m_oclbase->ocl_setKernelArg(3, sizeof(int), &length);
|
|
m_oclbase->ocl_executeKernel(1, &work_items, &work_size_sum);
|
|
|
|
//read partial sums and free temp mempry
|
|
m_oclbase->ocl_readData(tmp_ptr, partial_sums, sizeof(double)*work_groups);
|
|
m_oclbase->ocl_freeMemory(tmp_ptr);
|
|
|
|
//sumup partial sums on the host
|
|
double result = 0;
|
|
for (int i = 0; i < work_groups; i++)
|
|
result += partial_sums[i];
|
|
|
|
delete[] partial_sums;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
int OpenCLChiSquare::ocl_PHistoTFFcn(void *mem_data, void *mem_par, void *mem_result,
|
|
double fTimeResolution, double fRebin,
|
|
int sensors, int length, int numpar,
|
|
double &result)
|
|
{
|
|
|
|
//set number of work items and work group sizes for kernel execution
|
|
size_t work_size = 128;
|
|
|
|
size_t work_items = (size_t)length * sensors;
|
|
if (length % work_size > 0)
|
|
work_items = (length / work_size + 1) * work_size;
|
|
|
|
cl_mem data = (cl_mem)mem_data;
|
|
cl_mem par = (cl_mem)mem_par;
|
|
cl_mem chi = (cl_mem)mem_result;
|
|
|
|
//load and execute PHistotFFcn kernel
|
|
m_oclbase->ocl_createKernel("kernelPHistoTFFcn");
|
|
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
|
|
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &par);
|
|
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &chi);
|
|
m_oclbase->ocl_setKernelArg(3, sizeof(double), &fTimeResolution);
|
|
m_oclbase->ocl_setKernelArg(4, sizeof(double), &fRebin);
|
|
m_oclbase->ocl_setKernelArg(5, sizeof(int), &length);
|
|
m_oclbase->ocl_setKernelArg(6, sizeof(int), &sensors);
|
|
m_oclbase->ocl_setKernelArg(7, sizeof(int), &numpar);
|
|
m_oclbase->ocl_setKernelArg(8, sizeof(double)*numpar, NULL);
|
|
m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
|
|
|
result = ocl_sum(chi, sensors*length);
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int OpenCLChiSquare::ocl_singleGaussTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
|
|
double fTimeResolution, double fRebin, double fGoodBinOffset,
|
|
int sensors, int length, int numpar,
|
|
double &result)
|
|
{
|
|
|
|
//set number of work items and work group sizes for kernel execution
|
|
size_t work_size = 128;
|
|
size_t work_items = (size_t)length * sensors;
|
|
if (length % work_size > 0)
|
|
work_items = (length / work_size + 1) * work_size;
|
|
|
|
cl_mem data = (cl_mem)mem_data;
|
|
cl_mem t0 = (cl_mem)mem_t0;
|
|
cl_mem par = (cl_mem)mem_par;
|
|
cl_mem chi = (cl_mem)mem_result;
|
|
|
|
//load and execute PHistotFFcn kernel
|
|
m_oclbase->ocl_createKernel("kernelSingleGaussTF");
|
|
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
|
|
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &t0);
|
|
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &par);
|
|
m_oclbase->ocl_setKernelArg(3, sizeof(cl_mem), &chi);
|
|
m_oclbase->ocl_setKernelArg(4, sizeof(double), &fTimeResolution);
|
|
m_oclbase->ocl_setKernelArg(5, sizeof(double), &fRebin);
|
|
m_oclbase->ocl_setKernelArg(6, sizeof(double), &fGoodBinOffset);
|
|
m_oclbase->ocl_setKernelArg(7, sizeof(int), &length);
|
|
m_oclbase->ocl_setKernelArg(8, sizeof(int), &sensors);
|
|
m_oclbase->ocl_setKernelArg(9, sizeof(int), &numpar);
|
|
m_oclbase->ocl_setKernelArg(10, sizeof(double)*numpar, NULL);
|
|
m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
|
|
|
result = ocl_sum(chi, length);
|
|
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
int OpenCLChiSquare::ocl_doubleLorentzTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
|
|
double fTimeResolution, double fRebin, double fGoodBinOffset,
|
|
int sensors, int length, int numpar,
|
|
double &result)
|
|
{
|
|
|
|
//set number of work items and work group sizes for kernel execution
|
|
size_t work_size = 128;
|
|
size_t work_items = (size_t)length * sensors;
|
|
if (length % work_size > 0)
|
|
work_items = (length / work_size + 1) * work_size;
|
|
|
|
cl_mem data = (cl_mem)mem_data;
|
|
cl_mem t0 = (cl_mem)mem_t0;
|
|
cl_mem par = (cl_mem)mem_par;
|
|
cl_mem chi = (cl_mem)mem_result;
|
|
|
|
//load and execute PHistotFFcn kernel
|
|
m_oclbase->ocl_createKernel("kernelDoubleLorentzTF");
|
|
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
|
|
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &t0);
|
|
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &par);
|
|
m_oclbase->ocl_setKernelArg(3, sizeof(cl_mem), &chi);
|
|
m_oclbase->ocl_setKernelArg(4, sizeof(double), &fTimeResolution);
|
|
m_oclbase->ocl_setKernelArg(5, sizeof(double), &fRebin);
|
|
m_oclbase->ocl_setKernelArg(6, sizeof(double), &fGoodBinOffset);
|
|
m_oclbase->ocl_setKernelArg(7, sizeof(int), &length);
|
|
m_oclbase->ocl_setKernelArg(8, sizeof(int), &sensors);
|
|
m_oclbase->ocl_setKernelArg(9, sizeof(int), &numpar);
|
|
m_oclbase->ocl_setKernelArg(10, sizeof(double)*numpar, NULL);
|
|
m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
|
|
|
result = ocl_sum(chi, length);
|
|
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|