OpenCL greens function calculation for OPAL

This commit is contained in:
Uldis Locans
2016-11-17 18:03:28 +01:00
parent 63a008d111
commit 4432d32480
3 changed files with 373 additions and 0 deletions

View File

@ -0,0 +1,144 @@
#include "OpenCLGreensFunction.h"
#define GREENS_KERNEL "OpenCLKernels/OpenCLGreensFunction.cl"
OpenCLGreensFunction::OpenCLGreensFunction(OpenCLBase *base) {
m_base = base;
base_create = false;
}
OpenCLGreensFunction::OpenCLGreensFunction() {
m_base = new OpenCLBase();
base_create = true;
}
OpenCLGreensFunction::~OpenCLGreensFunction() {
if (base_create)
delete m_base;
}
int OpenCLGreensFunction::buildProgram() {
char *kernel_file = new char[500];
kernel_file[0] = '\0';
strcat(kernel_file, OPENCL_KERNELS);
strcat(kernel_file, GREENS_KERNEL);
return m_base->ocl_loadKernel(kernel_file);
}
int OpenCLGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2,
int streamId)
{
//compile opencl program from source
buildProgram();
//cast the input data ptr to cl_mem
cl_mem tmpgreen_ptr = (cl_mem)tmpgreen;
//set the work item size
size_t work_size = 128;
size_t work_items = I * J * K;
if (work_items % work_size > 0)
work_items = (work_items / work_size + 1) * work_size;
//create kernel
ierr = m_oclbase->ocl_createKernel("kernelTmpgreen");
//set kernel parameters
m_base->setKernelArg(0, sizeof(cl_mem), &tmpgreen_ptr);
m_base->setKernelArg(1, sizeof(double), &hr_m0);
m_base->setKernelArg(2, sizeof(double), &hr_m1);
m_base->setKernelArg(3, sizeof(double), &hr_m2);
m_base->setKernelArg(4, sizeof(int), &I);
m_base->setKernelArg(5, sizeof(int), &J);
m_base->setKernelArg(6, sizeof(int), &K);
//execute kernel
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
return ierr;
}
int OpenCLGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
int streamId)
{
//compile opencl program from source
buildProgram();
//cast the input data ptr to cl_mem
cl_mem rho2_ptr = (cl_mem)rho2_m;
cl_mem tmpgreen_ptr = (cl_mem)tmpgreen;
int NI = 2*(I - 1);
int NJ = 2*(J - 1);
int NK = 2*(K - 1);
//set the work item size
size_t work_size = 128;
size_t work_items = I * J * K;
if (work_items % work_size > 0)
work_items = (work_items / work_size + 1) * work_size;
//create kernel
ierr = m_oclbase->ocl_createKernel("kernelIntegration");
//set kernel parameters
m_base->setKernelArg(0, sizeof(cl_mem), &rho2_ptr);
m_base->setKernelArg(1, sizeof(cl_mem), &tmpgreen_ptr);
m_base->setKernelArg(2, sizeof(int), &I);
m_base->setKernelArg(3, sizeof(int), &J);
m_base->setKernelArg(4, sizeof(int), &K);
m_base->setKernelArg(5, sizeof(int), &NI);
m_base->setKernelArg(6, sizeof(int), &NJ);
m_base->setKernelArg(7, sizeof(int), &NK);
//execute kernel
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
return ierr;
}
int OpenCLGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId)
{
//compile opencl program from source
buildProgram();
//cast the input data ptr to cl_mem
cl_mem rho2_ptr = (cl_mem)rho2_m;
int NI = I + 1;
int NJ = J + 1;
int NK = K + 1;
int I2 = 2*I;
int J2 = 2*J;
int K2 = 2*K;
//set the work item size
size_t work_size = 128;
size_t work_items = NI * NJ * NK;
if (work_items % work_size > 0)
work_items = (work_items / work_size + 1) * work_size;
//create kernel
ierr = m_oclbase->ocl_createKernel("kernelMirroredRhoField");
//set kernel parameters
m_base->setKernelArg(0, sizeof(cl_mem), &rho2_ptr);
m_base->setKernelArg(1, sizeof(int), &I2);
m_base->setKernelArg(2, sizeof(int), &J2);
m_base->setKernelArg(3, sizeof(int), &K2);
m_base->setKernelArg(4, sizeof(int), &NI);
m_base->setKernelArg(5, sizeof(int), &NJ);
m_base->setKernelArg(6, sizeof(int), &NK);
//execute kernel
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
return ierr;
}
int OpenCLGreensFunction::multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId)
{
}

View File

@ -0,0 +1,63 @@
#ifndef H_OPENCL_GREENSFUNCTION
#define H_OPENCL_GREENSFUNCTION
#include <iostream>
#include <cmath>
#include "../Algorithms/GreensFunction.h"
#include "OpenCLBase.h"
class OpenCLGreensFunction : public GreensFunction {
private:
bool base_create;
OpenCLBase *m_base;
public:
/** Constructor with OpenCLBase argument */
OpenCLGreensFunction(OpenCLBase *base);
/** Default constructor */
OpenCLGreensFunction();
/** Destructor */
~OpenCLGreensFunction();
/** Load OpenCL kernel file containing greens function kernels.
* m_base takes the kernel file and compiles the OpenCL programm.
*/
int buildProgram();
/**
Info: calc itegral on device memory (taken from OPAL src code)
Return: success or error code
*/
int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2,
int streamId = -1);
/**
Info: integration of rho2_m field (taken from OPAL src code)
Return: success or error code
*/
int integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1);
/**
Info: mirror rho field (taken from OPAL src code)
Return: succes or error code
*/
int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1);
/**
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
Return: success or error code
*/
int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1);
};
#endif H_OPENCL_GREENSFUNCTION

View File

@ -0,0 +1,166 @@
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
/** compute the greens integral analytically */
__kernel void kernelTmpgreen(__global double *tmpgreen, double hr_m0, double hr_m1, double hr_m2,
int NI, int NJ, int NK)
{
int tid = get_local_size(0);
int id = get_global_id(0);
if (id < NI * NJ * NK) {
int i = id % NI;
int k = id / (NI * NJ);
int j = (id - k * NI * NJ) / NI;
double cellVolume = hr_m0 * hr_m1 * hr_m2;
double vv0 = i * hr_m0 - hr_m0 / 2;
double vv1 = j * hr_m1 - hr_m1 / 2;
double vv2 = k * hr_m2 - hr_m2 / 2;
double r = sqrt(vv0 * vv0 + vv1 * vv1 + vv2 * vv2);
double tmpgrn = -vv2*vv2 * atan(vv0 * vv1 / (vv2 * r) );
tmpgrn += -vv1*vv1 * atan(vv0 * vv2 / (vv1 * r) );
tmpgrn += -vv0*vv0 * atan(vv1 * vv2 / (vv0 * r) );
tmpgrn = tmpgrn / 2;
tmpgrn += vv1 * vv2 * log(vv0 + r);
tmpgrn += vv0 * vv2 * log(vv1 + r);
tmpgrn += vv0 * vv1 * log(vv2 + r);
tmpgreen[id] = tmpgrn / cellVolume;
}
}
/** perform the actual integration */
__kernel void kernelIntegration(__global double *rho2_m, __global double *tmpgreen,
int NI, int NJ, int NI_tmp, int NJ_tmp, int NK_tmp)
{
int tid = get_local_id(0);
int id = get_global_id(0);
int ni = NI;
int nj = NJ;
double tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
if (id < NI_tmp * NJ_tmp * NK_tmp) {
int i = id % NI_tmp;
int k = id / (NI_tmp * NJ_tmp);
int j = (id - k * NI_tmp * NJ_tmp) / NI_tmp;
tmp0 = 0; tmp1 = 0; tmp2 = 0; tmp3 = 0;
tmp4 = 0; tmp5 = 0; tmp6 = 0; tmp7 = 0;
if (i+1 < NI_tmp && j+1 < NJ_tmp && k+1 < NK_tmp)
tmp0 = tmpgreen[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp)
tmp1 = tmpgreen[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp];
if (j+1 < NJ_tmp)
tmp2 = tmpgreen[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
if (k+1 < NK_tmp)
tmp3 = tmpgreen[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp && j+1 < NJ_tmp)
tmp4 = tmpgreen[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp && k+1 < NK_tmp)
tmp5 = tmpgreen[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (j+1 < NJ_tmp && k+1 < NK_tmp)
tmp6 = tmpgreen[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
tmp7 = tmpgreen[ i + j * NI_tmp + k * NI_tmp * NJ_tmp];
double tmp_rho = tmp0 + tmp1 + tmp2 + tmp3 - tmp4 - tmp5 - tmp6 - tmp7;
rho2_m[i + j*ni + k*ni*nj] = tmp_rho;
}
}
/** miror rho-field */
__kernel void mirroredRhoField0(__global double *rho2_m, int NI, int NJ) {
rho2_m[0] = rho2_m[NI*NJ];
}
__kernel void mirroredRhoField(__global double *rho2_m,
int NI, int NJ, int NK,
int NI_tmp, int NJ_tmp, int NK_tmp) {
int tid = get_local_id(0);
int id = get_global_id(0);
if (id == 0)
rho2_m[0] = rho2_m[NI * NJ];
barrier(CLK_GLOBAL_MEM_FENCE);
int id1, id2, id3, id4, id5, id6, id7, id8;
if (id < NI_tmp * NJ_tmp * NK_tmp) {
int i = id % NI_tmp;
int k = id / (NI_tmp * NJ_tmp);
int j = (id - k * NI_tmp * NJ_tmp) / NI_tmp;
int ri = NI - i;
int rj = NJ - j;
int rk = NK - k;
id1 = k * NI * NJ + j * NI + i;
id2 = k * NI * NJ + j * NI + ri;
id3 = k * NI * NJ + rj * NI + i;
id4 = k * NI * NJ + rj * NI + ri;
id5 = rk * NI * NJ + j * NI + i;
id6 = rk * NI * NJ + j * NI + ri;
id7 = rk * NI * NJ + rj * NI + i;
id8 = rk * NI * NJ + rj * NI + ri;
double data = rho2_m[id1];
if (i != 0) rho2_m[id2] = data;
if (j != 0) rho2_m[id3] = data;
if (i != 0 && j != 0) rho2_m[id4] = data;
if (k != 0) rho2_m[id5] = data;
if (k != 0 && i != 0) rho2_m[id6] = data;
if (k!= 0 && j != 0) rho2_m[id7] = data;
if (k != 0 && j != 0 & i != 0) rho2_m[id8] = data;
}
}
/** multiply complex fields */
double2 CompelxMul(double2 a, double2 b) {
double2 c;
c.x = a.x * b.x - a.y * b.y;
c.y = a.x * b.y + a.y * b.x;
return c;
}
__kernel void multiplyComplexFields_2(__global double2 *ptr1, __global double2 *ptr2,
int size)
{
int idx = get_global_id(0);
if (idx < size)
ptr1[idx] = ComplexMul(ptr1[idx], ptr2[idx]);
}