Files
DKS/src/OpenCL/OpenCLChiSquare.cpp
2016-10-10 14:49:32 +02:00

158 lines
5.4 KiB
C++

#include "OpenCLChiSquare.h"
double OpenCLChiSquare::ocl_sum(cl_mem data, int length) {
int ierr;
//calc number of thread sper workgroup and nr of work groups
size_t work_size_sum = 128;
size_t work_items = (size_t)length;
if (length % work_size_sum > 0)
work_items = (length / work_size_sum + 1) * work_size_sum;
int work_groups = length / work_size_sum + 1;
//create tmp array for partial sums
cl_mem tmp_ptr;
double *partial_sums = new double[work_groups];
tmp_ptr = m_oclbase->ocl_allocateMemory(work_groups * sizeof(double), ierr);
//execute sum kernel
m_oclbase->ocl_createKernel("parallelReductionSum");
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &tmp_ptr);
m_oclbase->ocl_setKernelArg(2, work_size_sum*sizeof(double), NULL);
m_oclbase->ocl_setKernelArg(3, sizeof(int), &length);
m_oclbase->ocl_executeKernel(1, &work_items, &work_size_sum);
//read partial sums and free temp mempry
m_oclbase->ocl_readData(tmp_ptr, partial_sums, sizeof(double)*work_groups);
m_oclbase->ocl_freeMemory(tmp_ptr);
//sumup partial sums on the host
double result = 0;
for (int i = 0; i < work_groups; i++)
result += partial_sums[i];
delete[] partial_sums;
return result;
}
int OpenCLChiSquare::ocl_PHistoTFFcn(void *mem_data, void *mem_par, void *mem_result,
double fTimeResolution, double fRebin,
int sensors, int length, int numpar,
double &result)
{
//set number of work items and work group sizes for kernel execution
size_t work_size = 128;
size_t work_items = (size_t)length * sensors;
if (length % work_size > 0)
work_items = (length / work_size + 1) * work_size;
cl_mem data = (cl_mem)mem_data;
cl_mem par = (cl_mem)mem_par;
cl_mem chi = (cl_mem)mem_result;
//load and execute PHistotFFcn kernel
m_oclbase->ocl_createKernel("kernelPHistoTFFcn");
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &par);
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &chi);
m_oclbase->ocl_setKernelArg(3, sizeof(double), &fTimeResolution);
m_oclbase->ocl_setKernelArg(4, sizeof(double), &fRebin);
m_oclbase->ocl_setKernelArg(5, sizeof(int), &length);
m_oclbase->ocl_setKernelArg(6, sizeof(int), &sensors);
m_oclbase->ocl_setKernelArg(7, sizeof(int), &numpar);
m_oclbase->ocl_setKernelArg(8, sizeof(double)*numpar, NULL);
m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
result = ocl_sum(chi, sensors*length);
return DKS_SUCCESS;
}
int OpenCLChiSquare::ocl_singleGaussTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
double fTimeResolution, double fRebin, double fGoodBinOffset,
int sensors, int length, int numpar,
double &result)
{
//set number of work items and work group sizes for kernel execution
size_t work_size = 128;
size_t work_items = (size_t)length * sensors;
if (length % work_size > 0)
work_items = (length / work_size + 1) * work_size;
cl_mem data = (cl_mem)mem_data;
cl_mem t0 = (cl_mem)mem_t0;
cl_mem par = (cl_mem)mem_par;
cl_mem chi = (cl_mem)mem_result;
//load and execute PHistotFFcn kernel
m_oclbase->ocl_createKernel("kernelSingleGaussTF");
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &t0);
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &par);
m_oclbase->ocl_setKernelArg(3, sizeof(cl_mem), &chi);
m_oclbase->ocl_setKernelArg(4, sizeof(double), &fTimeResolution);
m_oclbase->ocl_setKernelArg(5, sizeof(double), &fRebin);
m_oclbase->ocl_setKernelArg(6, sizeof(double), &fGoodBinOffset);
m_oclbase->ocl_setKernelArg(7, sizeof(int), &length);
m_oclbase->ocl_setKernelArg(8, sizeof(int), &sensors);
m_oclbase->ocl_setKernelArg(9, sizeof(int), &numpar);
m_oclbase->ocl_setKernelArg(10, sizeof(double)*numpar, NULL);
m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
result = ocl_sum(chi, length);
return DKS_SUCCESS;
}
int OpenCLChiSquare::ocl_doubleLorentzTF(void *mem_data, void *mem_t0, void *mem_par, void *mem_result,
double fTimeResolution, double fRebin, double fGoodBinOffset,
int sensors, int length, int numpar,
double &result)
{
//set number of work items and work group sizes for kernel execution
size_t work_size = 128;
size_t work_items = (size_t)length * sensors;
if (length % work_size > 0)
work_items = (length / work_size + 1) * work_size;
cl_mem data = (cl_mem)mem_data;
cl_mem t0 = (cl_mem)mem_t0;
cl_mem par = (cl_mem)mem_par;
cl_mem chi = (cl_mem)mem_result;
//load and execute PHistotFFcn kernel
m_oclbase->ocl_createKernel("kernelDoubleLorentzTF");
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &data);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &t0);
m_oclbase->ocl_setKernelArg(2, sizeof(cl_mem), &par);
m_oclbase->ocl_setKernelArg(3, sizeof(cl_mem), &chi);
m_oclbase->ocl_setKernelArg(4, sizeof(double), &fTimeResolution);
m_oclbase->ocl_setKernelArg(5, sizeof(double), &fRebin);
m_oclbase->ocl_setKernelArg(6, sizeof(double), &fGoodBinOffset);
m_oclbase->ocl_setKernelArg(7, sizeof(int), &length);
m_oclbase->ocl_setKernelArg(8, sizeof(int), &sensors);
m_oclbase->ocl_setKernelArg(9, sizeof(int), &numpar);
m_oclbase->ocl_setKernelArg(10, sizeof(double)*numpar, NULL);
m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
result = ocl_sum(chi, length);
return DKS_SUCCESS;
}