OpenCL greens function calculation for OPAL
This commit is contained in:
144
src/OpenCL/OpenCLGreensFunction.cpp
Normal file
144
src/OpenCL/OpenCLGreensFunction.cpp
Normal file
@ -0,0 +1,144 @@
|
||||
#include "OpenCLGreensFunction.h"
|
||||
#define GREENS_KERNEL "OpenCLKernels/OpenCLGreensFunction.cl"
|
||||
|
||||
OpenCLGreensFunction::OpenCLGreensFunction(OpenCLBase *base) {
|
||||
m_base = base;
|
||||
base_create = false;
|
||||
}
|
||||
|
||||
OpenCLGreensFunction::OpenCLGreensFunction() {
|
||||
m_base = new OpenCLBase();
|
||||
base_create = true;
|
||||
}
|
||||
|
||||
OpenCLGreensFunction::~OpenCLGreensFunction() {
|
||||
if (base_create)
|
||||
delete m_base;
|
||||
}
|
||||
|
||||
int OpenCLGreensFunction::buildProgram() {
|
||||
char *kernel_file = new char[500];
|
||||
kernel_file[0] = '\0';
|
||||
strcat(kernel_file, OPENCL_KERNELS);
|
||||
strcat(kernel_file, GREENS_KERNEL);
|
||||
|
||||
return m_base->ocl_loadKernel(kernel_file);
|
||||
}
|
||||
|
||||
int OpenCLGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
|
||||
double hr_m0, double hr_m1, double hr_m2,
|
||||
int streamId)
|
||||
{
|
||||
//compile opencl program from source
|
||||
buildProgram();
|
||||
|
||||
//cast the input data ptr to cl_mem
|
||||
cl_mem tmpgreen_ptr = (cl_mem)tmpgreen;
|
||||
|
||||
//set the work item size
|
||||
size_t work_size = 128;
|
||||
size_t work_items = I * J * K;
|
||||
if (work_items % work_size > 0)
|
||||
work_items = (work_items / work_size + 1) * work_size;
|
||||
|
||||
//create kernel
|
||||
ierr = m_oclbase->ocl_createKernel("kernelTmpgreen");
|
||||
|
||||
//set kernel parameters
|
||||
m_base->setKernelArg(0, sizeof(cl_mem), &tmpgreen_ptr);
|
||||
m_base->setKernelArg(1, sizeof(double), &hr_m0);
|
||||
m_base->setKernelArg(2, sizeof(double), &hr_m1);
|
||||
m_base->setKernelArg(3, sizeof(double), &hr_m2);
|
||||
m_base->setKernelArg(4, sizeof(int), &I);
|
||||
m_base->setKernelArg(5, sizeof(int), &J);
|
||||
m_base->setKernelArg(6, sizeof(int), &K);
|
||||
|
||||
//execute kernel
|
||||
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
int OpenCLGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
|
||||
int streamId)
|
||||
{
|
||||
//compile opencl program from source
|
||||
buildProgram();
|
||||
|
||||
//cast the input data ptr to cl_mem
|
||||
cl_mem rho2_ptr = (cl_mem)rho2_m;
|
||||
cl_mem tmpgreen_ptr = (cl_mem)tmpgreen;
|
||||
int NI = 2*(I - 1);
|
||||
int NJ = 2*(J - 1);
|
||||
int NK = 2*(K - 1);
|
||||
|
||||
|
||||
//set the work item size
|
||||
size_t work_size = 128;
|
||||
size_t work_items = I * J * K;
|
||||
if (work_items % work_size > 0)
|
||||
work_items = (work_items / work_size + 1) * work_size;
|
||||
|
||||
//create kernel
|
||||
ierr = m_oclbase->ocl_createKernel("kernelIntegration");
|
||||
|
||||
//set kernel parameters
|
||||
m_base->setKernelArg(0, sizeof(cl_mem), &rho2_ptr);
|
||||
m_base->setKernelArg(1, sizeof(cl_mem), &tmpgreen_ptr);
|
||||
m_base->setKernelArg(2, sizeof(int), &I);
|
||||
m_base->setKernelArg(3, sizeof(int), &J);
|
||||
m_base->setKernelArg(4, sizeof(int), &K);
|
||||
m_base->setKernelArg(5, sizeof(int), &NI);
|
||||
m_base->setKernelArg(6, sizeof(int), &NJ);
|
||||
m_base->setKernelArg(7, sizeof(int), &NK);
|
||||
|
||||
//execute kernel
|
||||
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
||||
|
||||
return ierr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int OpenCLGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId)
|
||||
{
|
||||
//compile opencl program from source
|
||||
buildProgram();
|
||||
|
||||
//cast the input data ptr to cl_mem
|
||||
cl_mem rho2_ptr = (cl_mem)rho2_m;
|
||||
int NI = I + 1;
|
||||
int NJ = J + 1;
|
||||
int NK = K + 1;
|
||||
int I2 = 2*I;
|
||||
int J2 = 2*J;
|
||||
int K2 = 2*K;
|
||||
|
||||
//set the work item size
|
||||
size_t work_size = 128;
|
||||
size_t work_items = NI * NJ * NK;
|
||||
if (work_items % work_size > 0)
|
||||
work_items = (work_items / work_size + 1) * work_size;
|
||||
|
||||
//create kernel
|
||||
ierr = m_oclbase->ocl_createKernel("kernelMirroredRhoField");
|
||||
|
||||
//set kernel parameters
|
||||
m_base->setKernelArg(0, sizeof(cl_mem), &rho2_ptr);
|
||||
m_base->setKernelArg(1, sizeof(int), &I2);
|
||||
m_base->setKernelArg(2, sizeof(int), &J2);
|
||||
m_base->setKernelArg(3, sizeof(int), &K2);
|
||||
m_base->setKernelArg(4, sizeof(int), &NI);
|
||||
m_base->setKernelArg(5, sizeof(int), &NJ);
|
||||
m_base->setKernelArg(6, sizeof(int), &NK);
|
||||
|
||||
//execute kernel
|
||||
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
|
||||
int OpenCLGreensFunction::multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId)
|
||||
{
|
||||
}
|
63
src/OpenCL/OpenCLGreensFunction.h
Normal file
63
src/OpenCL/OpenCLGreensFunction.h
Normal file
@ -0,0 +1,63 @@
|
||||
#ifndef H_OPENCL_GREENSFUNCTION
|
||||
#define H_OPENCL_GREENSFUNCTION
|
||||
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
#include "../Algorithms/GreensFunction.h"
|
||||
#include "OpenCLBase.h"
|
||||
|
||||
class OpenCLGreensFunction : public GreensFunction {
|
||||
|
||||
private:
|
||||
|
||||
bool base_create;
|
||||
OpenCLBase *m_base;
|
||||
|
||||
public:
|
||||
|
||||
/** Constructor with OpenCLBase argument */
|
||||
OpenCLGreensFunction(OpenCLBase *base);
|
||||
|
||||
/** Default constructor */
|
||||
OpenCLGreensFunction();
|
||||
|
||||
/** Destructor */
|
||||
~OpenCLGreensFunction();
|
||||
|
||||
/** Load OpenCL kernel file containing greens function kernels.
|
||||
* m_base takes the kernel file and compiles the OpenCL programm.
|
||||
*/
|
||||
int buildProgram();
|
||||
|
||||
/**
|
||||
Info: calc itegral on device memory (taken from OPAL src code)
|
||||
Return: success or error code
|
||||
*/
|
||||
int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
|
||||
double hr_m0, double hr_m1, double hr_m2,
|
||||
int streamId = -1);
|
||||
|
||||
/**
|
||||
Info: integration of rho2_m field (taken from OPAL src code)
|
||||
Return: success or error code
|
||||
*/
|
||||
int integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
|
||||
int streamId = -1);
|
||||
|
||||
/**
|
||||
Info: mirror rho field (taken from OPAL src code)
|
||||
Return: succes or error code
|
||||
*/
|
||||
int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1);
|
||||
|
||||
/**
|
||||
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
|
||||
Return: success or error code
|
||||
*/
|
||||
int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1);
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif H_OPENCL_GREENSFUNCTION
|
166
src/OpenCL/OpenCLKernels/OpenCLGreensFunction.cl
Normal file
166
src/OpenCL/OpenCLKernels/OpenCLGreensFunction.cl
Normal file
@ -0,0 +1,166 @@
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
/** compute the greens integral analytically */
|
||||
__kernel void kernelTmpgreen(__global double *tmpgreen, double hr_m0, double hr_m1, double hr_m2,
|
||||
int NI, int NJ, int NK)
|
||||
{
|
||||
|
||||
int tid = get_local_size(0);
|
||||
int id = get_global_id(0);
|
||||
|
||||
if (id < NI * NJ * NK) {
|
||||
int i = id % NI;
|
||||
int k = id / (NI * NJ);
|
||||
int j = (id - k * NI * NJ) / NI;
|
||||
|
||||
|
||||
double cellVolume = hr_m0 * hr_m1 * hr_m2;
|
||||
|
||||
double vv0 = i * hr_m0 - hr_m0 / 2;
|
||||
double vv1 = j * hr_m1 - hr_m1 / 2;
|
||||
double vv2 = k * hr_m2 - hr_m2 / 2;
|
||||
|
||||
double r = sqrt(vv0 * vv0 + vv1 * vv1 + vv2 * vv2);
|
||||
|
||||
double tmpgrn = -vv2*vv2 * atan(vv0 * vv1 / (vv2 * r) );
|
||||
tmpgrn += -vv1*vv1 * atan(vv0 * vv2 / (vv1 * r) );
|
||||
tmpgrn += -vv0*vv0 * atan(vv1 * vv2 / (vv0 * r) );
|
||||
|
||||
tmpgrn = tmpgrn / 2;
|
||||
|
||||
tmpgrn += vv1 * vv2 * log(vv0 + r);
|
||||
tmpgrn += vv0 * vv2 * log(vv1 + r);
|
||||
tmpgrn += vv0 * vv1 * log(vv2 + r);
|
||||
|
||||
tmpgreen[id] = tmpgrn / cellVolume;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** perform the actual integration */
|
||||
__kernel void kernelIntegration(__global double *rho2_m, __global double *tmpgreen,
|
||||
int NI, int NJ, int NI_tmp, int NJ_tmp, int NK_tmp)
|
||||
{
|
||||
|
||||
int tid = get_local_id(0);
|
||||
int id = get_global_id(0);
|
||||
|
||||
int ni = NI;
|
||||
int nj = NJ;
|
||||
|
||||
double tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
|
||||
if (id < NI_tmp * NJ_tmp * NK_tmp) {
|
||||
int i = id % NI_tmp;
|
||||
int k = id / (NI_tmp * NJ_tmp);
|
||||
int j = (id - k * NI_tmp * NJ_tmp) / NI_tmp;
|
||||
|
||||
tmp0 = 0; tmp1 = 0; tmp2 = 0; tmp3 = 0;
|
||||
tmp4 = 0; tmp5 = 0; tmp6 = 0; tmp7 = 0;
|
||||
|
||||
if (i+1 < NI_tmp && j+1 < NJ_tmp && k+1 < NK_tmp)
|
||||
tmp0 = tmpgreen[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
if (i+1 < NI_tmp)
|
||||
tmp1 = tmpgreen[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
if (j+1 < NJ_tmp)
|
||||
tmp2 = tmpgreen[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
if (k+1 < NK_tmp)
|
||||
tmp3 = tmpgreen[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
if (i+1 < NI_tmp && j+1 < NJ_tmp)
|
||||
tmp4 = tmpgreen[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
if (i+1 < NI_tmp && k+1 < NK_tmp)
|
||||
tmp5 = tmpgreen[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
if (j+1 < NJ_tmp && k+1 < NK_tmp)
|
||||
tmp6 = tmpgreen[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
tmp7 = tmpgreen[ i + j * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
double tmp_rho = tmp0 + tmp1 + tmp2 + tmp3 - tmp4 - tmp5 - tmp6 - tmp7;
|
||||
|
||||
rho2_m[i + j*ni + k*ni*nj] = tmp_rho;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** miror rho-field */
|
||||
__kernel void mirroredRhoField0(__global double *rho2_m, int NI, int NJ) {
|
||||
rho2_m[0] = rho2_m[NI*NJ];
|
||||
}
|
||||
|
||||
__kernel void mirroredRhoField(__global double *rho2_m,
|
||||
int NI, int NJ, int NK,
|
||||
int NI_tmp, int NJ_tmp, int NK_tmp) {
|
||||
|
||||
int tid = get_local_id(0);
|
||||
int id = get_global_id(0);
|
||||
|
||||
if (id == 0)
|
||||
rho2_m[0] = rho2_m[NI * NJ];
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
int id1, id2, id3, id4, id5, id6, id7, id8;
|
||||
|
||||
if (id < NI_tmp * NJ_tmp * NK_tmp) {
|
||||
int i = id % NI_tmp;
|
||||
int k = id / (NI_tmp * NJ_tmp);
|
||||
int j = (id - k * NI_tmp * NJ_tmp) / NI_tmp;
|
||||
|
||||
int ri = NI - i;
|
||||
int rj = NJ - j;
|
||||
int rk = NK - k;
|
||||
|
||||
id1 = k * NI * NJ + j * NI + i;
|
||||
id2 = k * NI * NJ + j * NI + ri;
|
||||
id3 = k * NI * NJ + rj * NI + i;
|
||||
id4 = k * NI * NJ + rj * NI + ri;
|
||||
|
||||
id5 = rk * NI * NJ + j * NI + i;
|
||||
id6 = rk * NI * NJ + j * NI + ri;
|
||||
id7 = rk * NI * NJ + rj * NI + i;
|
||||
id8 = rk * NI * NJ + rj * NI + ri;
|
||||
|
||||
|
||||
double data = rho2_m[id1];
|
||||
if (i != 0) rho2_m[id2] = data;
|
||||
|
||||
if (j != 0) rho2_m[id3] = data;
|
||||
|
||||
if (i != 0 && j != 0) rho2_m[id4] = data;
|
||||
|
||||
if (k != 0) rho2_m[id5] = data;
|
||||
|
||||
if (k != 0 && i != 0) rho2_m[id6] = data;
|
||||
|
||||
if (k!= 0 && j != 0) rho2_m[id7] = data;
|
||||
|
||||
if (k != 0 && j != 0 & i != 0) rho2_m[id8] = data;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** multiply complex fields */
|
||||
double2 CompelxMul(double2 a, double2 b) {
|
||||
double2 c;
|
||||
c.x = a.x * b.x - a.y * b.y;
|
||||
c.y = a.x * b.y + a.y * b.x;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
__kernel void multiplyComplexFields_2(__global double2 *ptr1, __global double2 *ptr2,
|
||||
int size)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
|
||||
if (idx < size)
|
||||
ptr1[idx] = ComplexMul(ptr1[idx], ptr2[idx]);
|
||||
}
|
Reference in New Issue
Block a user