OpenCL FFT using clfft and tests
This commit is contained in:
@ -4,6 +4,7 @@ SET (_SRCS
|
||||
OpenCLChiSquare.cpp
|
||||
OpenCLCollimatorPhysics.cpp
|
||||
OpenCLChiSquareRuntime.cpp
|
||||
OpenCLGreensFunction.cpp
|
||||
)
|
||||
|
||||
SET (_HDRS
|
||||
@ -12,6 +13,7 @@ SET (_HDRS
|
||||
OpenCLChiSquare.h
|
||||
OpenCLCollimatorPhysics.h
|
||||
OpenCLChiSquareRuntime.h
|
||||
OpenCLGreensFunction.h
|
||||
)
|
||||
|
||||
#INCLUDE_DIRECTORIES (
|
||||
@ -25,6 +27,7 @@ SET (_KERNELS
|
||||
OpenCLKernels/OpenCLTranspose.cl
|
||||
OpenCLKernels/OpenCLCollimatorPhysics.cl
|
||||
OpenCLKernels/OpenCLChiSquareRuntime.cl
|
||||
OpenCLKernels/OpenCLGreensFunction.cl
|
||||
)
|
||||
|
||||
ADD_SOURCES (${_SRCS})
|
||||
|
@ -428,7 +428,8 @@ int OpenCLBase::ocl_compileProgram(const char* kernel_source, const char* opts)
|
||||
int ierr;
|
||||
|
||||
//create program from kernel
|
||||
m_program = clCreateProgramWithSource(m_context, 1, (const char **)&kernel_source, NULL, &ierr);
|
||||
m_program = clCreateProgramWithSource(m_context, 1, (const char **)&kernel_source,
|
||||
NULL, &ierr);
|
||||
if (ierr != CL_SUCCESS) {
|
||||
DEBUG_MSG("Error creating program from source, OpenCL error: " << ierr);
|
||||
return DKS_ERROR;
|
||||
@ -438,7 +439,7 @@ int OpenCLBase::ocl_compileProgram(const char* kernel_source, const char* opts)
|
||||
ierr = clBuildProgram(m_program, 0, NULL, opts, NULL, NULL);
|
||||
|
||||
/*
|
||||
check if compileng kernel source succeded, if failed return error code
|
||||
check if compiling kernel source succeded, if failed return error code
|
||||
if in debug mode get compilation info and print program build log witch
|
||||
will give indication what made the compilation fail
|
||||
*/
|
||||
@ -447,7 +448,8 @@ int OpenCLBase::ocl_compileProgram(const char* kernel_source, const char* opts)
|
||||
|
||||
//get build status
|
||||
cl_build_status status;
|
||||
clGetProgramBuildInfo(m_program, m_device_id, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL);
|
||||
clGetProgramBuildInfo(m_program, m_device_id, CL_PROGRAM_BUILD_STATUS,
|
||||
sizeof(cl_build_status), &status, NULL);
|
||||
|
||||
//get log size
|
||||
size_t log_size;
|
||||
|
@ -30,24 +30,11 @@
|
||||
#include <CL/cl_ext.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include "clRNG/clRNG.h"
|
||||
#include "clRNG/mrg31k3p.h"
|
||||
|
||||
#include "../DKSDefinitions.h"
|
||||
|
||||
/* struct for random number state */
|
||||
typedef struct {
|
||||
|
||||
double s10;
|
||||
double s11;
|
||||
double s12;
|
||||
double s20;
|
||||
double s21;
|
||||
double s22;
|
||||
double z;
|
||||
bool gen;
|
||||
|
||||
} RNDState;
|
||||
|
||||
class OpenCLBase {
|
||||
|
||||
private:
|
||||
@ -195,7 +182,7 @@ public:
|
||||
Return: return pointer to memory
|
||||
*/
|
||||
cl_mem ocl_allocateMemory(size_t size, int &ierr);
|
||||
|
||||
|
||||
/*
|
||||
Name: allocateMemory
|
||||
Info: allocate memory on device
|
||||
@ -203,6 +190,20 @@ public:
|
||||
*/
|
||||
cl_mem ocl_allocateMemory(size_t size, int type, int &ierr);
|
||||
|
||||
/** Zero OpenCL memory buffer
|
||||
* Set all the elemetns in the device array to zero
|
||||
*/
|
||||
template <typename T>
|
||||
int ocl_fillMemory(cl_mem mem_ptr, size_t size, T value, int offset = 0) {
|
||||
|
||||
cl_int ierr;
|
||||
ierr = clEnqueueFillBuffer(m_command_queue, mem_ptr, &value, sizeof(T), offset,
|
||||
sizeof(T)*size, 0, nullptr, nullptr);
|
||||
if (ierr != CL_SUCCESS)
|
||||
return DKS_ERROR;
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
Name: writeData
|
||||
Info: write data to device memory (needs ptr to mem object)
|
||||
|
@ -117,15 +117,13 @@ int OpenCLFFT::executeFFT(void *data, int ndim, int N[3], int streamId, bool for
|
||||
*/
|
||||
int OpenCLFFT::executeRCFFT(void *real_ptr, void *comp_ptr, int ndim, int N[3], int streamId) {
|
||||
|
||||
std::cout << "execute RCFFT" << std::endl;
|
||||
|
||||
int dkserr = DKS_SUCCESS;
|
||||
cl_int ierr;
|
||||
cl_mem real_in = (cl_mem)real_ptr;
|
||||
cl_mem comp_out = (cl_mem)comp_ptr;
|
||||
|
||||
ierr = clfftEnqueueTransform(planHandleD2Z, CLFFT_FORWARD, 1, &m_oclbase->m_command_queue,
|
||||
0, NULL, NULL, &real_in, &comp_out, NULL);
|
||||
0, NULL, NULL, &real_in, &comp_out, NULL);
|
||||
|
||||
if (ierr != OCL_SUCCESS) {
|
||||
dkserr = DKS_ERROR;
|
||||
@ -144,8 +142,6 @@ int OpenCLFFT::executeRCFFT(void *real_ptr, void *comp_ptr, int ndim, int N[3],
|
||||
*/
|
||||
int OpenCLFFT::executeCRFFT(void *real_ptr, void *comp_ptr, int ndim, int N[3], int streamId) {
|
||||
|
||||
std::cout << "execute CRFFT" << std::endl;
|
||||
|
||||
int dkserr = DKS_SUCCESS;
|
||||
cl_int ierr;
|
||||
cl_mem real_in = (cl_mem)real_ptr;
|
||||
@ -214,7 +210,13 @@ int OpenCLFFT::setupFFT(int ndim, int N[3]) {
|
||||
|
||||
cl_int err;
|
||||
|
||||
clfftDim dim = CLFFT_3D;
|
||||
clfftDim dim;
|
||||
if (ndim == 1)
|
||||
dim = CLFFT_1D;
|
||||
else if (ndim == 2)
|
||||
dim = CLFFT_2D;
|
||||
else
|
||||
dim = CLFFT_3D;
|
||||
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
|
||||
|
||||
/* Create 3D fft plan*/
|
||||
@ -244,9 +246,20 @@ int OpenCLFFT::setupFFT(int ndim, int N[3]) {
|
||||
int OpenCLFFT::setupFFTRC(int ndim, int N[3], double scale) {
|
||||
cl_int err;
|
||||
|
||||
clfftDim dim = CLFFT_3D;
|
||||
clfftDim dim;
|
||||
if (ndim == 1)
|
||||
dim = CLFFT_1D;
|
||||
else if (ndim == 2)
|
||||
dim = CLFFT_2D;
|
||||
else
|
||||
dim = CLFFT_3D;
|
||||
|
||||
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
|
||||
|
||||
size_t half = (size_t)N[0] / 2 + 1;
|
||||
size_t clInStride[3] = {1, (size_t)N[0], (size_t)N[0]*N[1]};
|
||||
size_t clOutStride[3] = {1, half, half * N[1]};
|
||||
|
||||
/* Create 3D fft plan*/
|
||||
err = clfftCreateDefaultPlan(&planHandleD2Z, m_oclbase->m_context, dim, clLength);
|
||||
|
||||
@ -254,6 +267,8 @@ int OpenCLFFT::setupFFTRC(int ndim, int N[3], double scale) {
|
||||
err = clfftSetPlanPrecision(planHandleD2Z, CLFFT_DOUBLE);
|
||||
err = clfftSetLayout(planHandleD2Z, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);
|
||||
err = clfftSetResultLocation(planHandleD2Z, CLFFT_OUTOFPLACE);
|
||||
err = clfftSetPlanInStride(planHandleD2Z, dim, clInStride);
|
||||
err = clfftSetPlanOutStride(planHandleD2Z, dim, clOutStride);
|
||||
|
||||
/* Bake the plan */
|
||||
err = clfftBakePlan(planHandleD2Z, 1, &m_oclbase->m_command_queue, NULL, NULL);
|
||||
@ -269,9 +284,20 @@ int OpenCLFFT::setupFFTRC(int ndim, int N[3], double scale) {
|
||||
int OpenCLFFT::setupFFTCR(int ndim, int N[3], double scale) {
|
||||
cl_int err;
|
||||
|
||||
clfftDim dim = CLFFT_3D;
|
||||
clfftDim dim;
|
||||
if (ndim == 1)
|
||||
dim = CLFFT_1D;
|
||||
else if (ndim == 2)
|
||||
dim = CLFFT_2D;
|
||||
else
|
||||
dim = CLFFT_3D;
|
||||
|
||||
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
|
||||
|
||||
size_t half = (size_t)N[0] / 2 + 1;
|
||||
size_t clInStride[3] = {1, half, half * N[1]};
|
||||
size_t clOutStride[3] = {1, (size_t)N[0], (size_t)N[0]*N[1]};
|
||||
|
||||
/* Create 3D fft plan*/
|
||||
err = clfftCreateDefaultPlan(&planHandleZ2D, m_oclbase->m_context, dim, clLength);
|
||||
|
||||
@ -279,6 +305,8 @@ int OpenCLFFT::setupFFTCR(int ndim, int N[3], double scale) {
|
||||
err = clfftSetPlanPrecision(planHandleZ2D, CLFFT_DOUBLE);
|
||||
err = clfftSetLayout(planHandleZ2D, CLFFT_HERMITIAN_INTERLEAVED, CLFFT_REAL);
|
||||
err = clfftSetResultLocation(planHandleZ2D, CLFFT_OUTOFPLACE);
|
||||
err = clfftSetPlanInStride(planHandleZ2D, dim, clInStride);
|
||||
err = clfftSetPlanOutStride(planHandleZ2D, dim, clOutStride);
|
||||
|
||||
/* Bake the plan */
|
||||
err = clfftBakePlan(planHandleZ2D, 1, &m_oclbase->m_command_queue, NULL, NULL);
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include "OpenCLGreensFunction.h"
|
||||
#define GREENS_KERNEL "OpenCLKernels/OpenCLGreensFunction.cl"
|
||||
#define GREENS_KERNEL "OpenCL/OpenCLKernels/OpenCLGreensFunction.cl"
|
||||
|
||||
OpenCLGreensFunction::OpenCLGreensFunction(OpenCLBase *base) {
|
||||
m_base = base;
|
||||
@ -29,6 +29,8 @@ int OpenCLGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, in
|
||||
double hr_m0, double hr_m1, double hr_m2,
|
||||
int streamId)
|
||||
{
|
||||
int ierr = DKS_SUCCESS;
|
||||
|
||||
//compile opencl program from source
|
||||
buildProgram();
|
||||
|
||||
@ -42,26 +44,28 @@ int OpenCLGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, in
|
||||
work_items = (work_items / work_size + 1) * work_size;
|
||||
|
||||
//create kernel
|
||||
ierr = m_oclbase->ocl_createKernel("kernelTmpgreen");
|
||||
ierr = m_base->ocl_createKernel("kernelTmpgreen");
|
||||
|
||||
//set kernel parameters
|
||||
m_base->setKernelArg(0, sizeof(cl_mem), &tmpgreen_ptr);
|
||||
m_base->setKernelArg(1, sizeof(double), &hr_m0);
|
||||
m_base->setKernelArg(2, sizeof(double), &hr_m1);
|
||||
m_base->setKernelArg(3, sizeof(double), &hr_m2);
|
||||
m_base->setKernelArg(4, sizeof(int), &I);
|
||||
m_base->setKernelArg(5, sizeof(int), &J);
|
||||
m_base->setKernelArg(6, sizeof(int), &K);
|
||||
m_base->ocl_setKernelArg(0, sizeof(cl_mem), &tmpgreen_ptr);
|
||||
m_base->ocl_setKernelArg(1, sizeof(double), &hr_m0);
|
||||
m_base->ocl_setKernelArg(2, sizeof(double), &hr_m1);
|
||||
m_base->ocl_setKernelArg(3, sizeof(double), &hr_m2);
|
||||
m_base->ocl_setKernelArg(4, sizeof(int), &I);
|
||||
m_base->ocl_setKernelArg(5, sizeof(int), &J);
|
||||
m_base->ocl_setKernelArg(6, sizeof(int), &K);
|
||||
|
||||
//execute kernel
|
||||
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
||||
ierr = m_base->ocl_executeKernel(1, &work_items, &work_size);
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
int OpenCLGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
|
||||
int streamId)
|
||||
int OpenCLGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J,
|
||||
int K, int streamId)
|
||||
{
|
||||
int ierr = DKS_SUCCESS;
|
||||
|
||||
//compile opencl program from source
|
||||
buildProgram();
|
||||
|
||||
@ -70,8 +74,6 @@ int OpenCLGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen
|
||||
cl_mem tmpgreen_ptr = (cl_mem)tmpgreen;
|
||||
int NI = 2*(I - 1);
|
||||
int NJ = 2*(J - 1);
|
||||
int NK = 2*(K - 1);
|
||||
|
||||
|
||||
//set the work item size
|
||||
size_t work_size = 128;
|
||||
@ -80,20 +82,22 @@ int OpenCLGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen
|
||||
work_items = (work_items / work_size + 1) * work_size;
|
||||
|
||||
//create kernel
|
||||
ierr = m_oclbase->ocl_createKernel("kernelIntegration");
|
||||
ierr = m_base->ocl_createKernel("kernelIntegration");
|
||||
|
||||
//set kernel parameters
|
||||
m_base->setKernelArg(0, sizeof(cl_mem), &rho2_ptr);
|
||||
m_base->setKernelArg(1, sizeof(cl_mem), &tmpgreen_ptr);
|
||||
m_base->setKernelArg(2, sizeof(int), &I);
|
||||
m_base->setKernelArg(3, sizeof(int), &J);
|
||||
m_base->setKernelArg(4, sizeof(int), &K);
|
||||
m_base->setKernelArg(5, sizeof(int), &NI);
|
||||
m_base->setKernelArg(6, sizeof(int), &NJ);
|
||||
m_base->setKernelArg(7, sizeof(int), &NK);
|
||||
m_base->ocl_setKernelArg(0, sizeof(cl_mem), &rho2_ptr);
|
||||
m_base->ocl_setKernelArg(1, sizeof(cl_mem), &tmpgreen_ptr);
|
||||
m_base->ocl_setKernelArg(2, sizeof(int), &NI);
|
||||
m_base->ocl_setKernelArg(3, sizeof(int), &NJ);
|
||||
m_base->ocl_setKernelArg(4, sizeof(int), &I);
|
||||
m_base->ocl_setKernelArg(5, sizeof(int), &J);
|
||||
m_base->ocl_setKernelArg(6, sizeof(int), &K);
|
||||
|
||||
//execute kernel
|
||||
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
||||
double zero = 0.0;
|
||||
int sizerho = 2*(I - 1) * 2*(J - 1) * 2*(K - 1);
|
||||
m_base->ocl_fillMemory(rho2_ptr, sizerho, zero, 0);
|
||||
ierr = m_base->ocl_executeKernel(1, &work_items, &work_size);
|
||||
|
||||
return ierr;
|
||||
|
||||
@ -102,6 +106,8 @@ int OpenCLGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen
|
||||
|
||||
int OpenCLGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId)
|
||||
{
|
||||
int ierr = DKS_SUCCESS;
|
||||
|
||||
//compile opencl program from source
|
||||
buildProgram();
|
||||
|
||||
@ -114,6 +120,8 @@ int OpenCLGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int
|
||||
int J2 = 2*J;
|
||||
int K2 = 2*K;
|
||||
|
||||
int rhosize = ( (I - 1) * 2 ) * ( (J - 1) * 2 ) * ( (K - 1) * 2 );
|
||||
|
||||
//set the work item size
|
||||
size_t work_size = 128;
|
||||
size_t work_items = NI * NJ * NK;
|
||||
@ -121,19 +129,20 @@ int OpenCLGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int
|
||||
work_items = (work_items / work_size + 1) * work_size;
|
||||
|
||||
//create kernel
|
||||
ierr = m_oclbase->ocl_createKernel("kernelMirroredRhoField");
|
||||
ierr = m_base->ocl_createKernel("kernelMirroredRhoField");
|
||||
|
||||
//set kernel parameters
|
||||
m_base->setKernelArg(0, sizeof(cl_mem), &rho2_ptr);
|
||||
m_base->setKernelArg(1, sizeof(int), &I2);
|
||||
m_base->setKernelArg(2, sizeof(int), &J2);
|
||||
m_base->setKernelArg(3, sizeof(int), &K2);
|
||||
m_base->setKernelArg(4, sizeof(int), &NI);
|
||||
m_base->setKernelArg(5, sizeof(int), &NJ);
|
||||
m_base->setKernelArg(6, sizeof(int), &NK);
|
||||
m_base->ocl_setKernelArg(0, sizeof(cl_mem), &rho2_ptr);
|
||||
m_base->ocl_setKernelArg(1, sizeof(int), &I2);
|
||||
m_base->ocl_setKernelArg(2, sizeof(int), &J2);
|
||||
m_base->ocl_setKernelArg(3, sizeof(int), &K2);
|
||||
m_base->ocl_setKernelArg(4, sizeof(int), &NI);
|
||||
m_base->ocl_setKernelArg(5, sizeof(int), &NJ);
|
||||
m_base->ocl_setKernelArg(6, sizeof(int), &NK);
|
||||
m_base->ocl_setKernelArg(7, sizeof(int), &rhosize);
|
||||
|
||||
//execute kernel
|
||||
ierr = m_oclbase->ocl_executeKernel(1, &work_items, &work_size);
|
||||
ierr = m_base->ocl_executeKernel(1, &work_items, &work_size);
|
||||
|
||||
return ierr;
|
||||
}
|
||||
@ -141,4 +150,32 @@ int OpenCLGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int
|
||||
|
||||
int OpenCLGreensFunction::multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId)
|
||||
{
|
||||
int ierr = DKS_SUCCESS;
|
||||
|
||||
//compile opencl program from source
|
||||
buildProgram();
|
||||
|
||||
//cast the input data ptr to cl_mem
|
||||
cl_mem mem_ptr1 = (cl_mem) ptr1;
|
||||
cl_mem mem_ptr2 = (cl_mem) ptr2;
|
||||
|
||||
//set the work item size
|
||||
size_t work_size = 128;
|
||||
size_t work_items = size;
|
||||
if (work_items % work_size > 0)
|
||||
work_items = (work_items / work_size + 1) * work_size;
|
||||
|
||||
//create kernel
|
||||
ierr = m_base->ocl_createKernel("multiplyComplexFields");
|
||||
|
||||
//set kernel parameters
|
||||
m_base->ocl_setKernelArg(0, sizeof(cl_mem), &mem_ptr1);
|
||||
m_base->ocl_setKernelArg(1, sizeof(cl_mem), &mem_ptr2);
|
||||
m_base->ocl_setKernelArg(2, sizeof(int), &size);
|
||||
|
||||
//execute kernel
|
||||
ierr = m_base->ocl_executeKernel(1, &work_items, &work_size);
|
||||
|
||||
return ierr;
|
||||
|
||||
}
|
||||
|
@ -60,4 +60,4 @@ public:
|
||||
};
|
||||
|
||||
|
||||
#endif H_OPENCL_GREENSFUNCTION
|
||||
#endif
|
||||
|
@ -81,28 +81,29 @@ __kernel void kernelIntegration(__global double *rho2_m, __global double *tmpgre
|
||||
tmp6 = tmpgreen[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
tmp7 = tmpgreen[ i + j * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
|
||||
double tmp_rho = tmp0 + tmp1 + tmp2 + tmp3 - tmp4 - tmp5 - tmp6 - tmp7;
|
||||
|
||||
|
||||
rho2_m[i + j*ni + k*ni*nj] = tmp_rho;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** miror rho-field */
|
||||
__kernel void mirroredRhoField0(__global double *rho2_m, int NI, int NJ) {
|
||||
__kernel void kernelMirroredRhoField0(__global double *rho2_m, int NI, int NJ) {
|
||||
rho2_m[0] = rho2_m[NI*NJ];
|
||||
}
|
||||
|
||||
__kernel void mirroredRhoField(__global double *rho2_m,
|
||||
int NI, int NJ, int NK,
|
||||
int NI_tmp, int NJ_tmp, int NK_tmp) {
|
||||
__kernel void kernelMirroredRhoField(__global double *rho2_m,
|
||||
int NI, int NJ, int NK,
|
||||
int NI_tmp, int NJ_tmp, int NK_tmp,
|
||||
int size)
|
||||
{
|
||||
|
||||
int tid = get_local_id(0);
|
||||
int id = get_global_id(0);
|
||||
|
||||
if (id == 0)
|
||||
rho2_m[0] = rho2_m[NI * NJ];
|
||||
rho2_m[0] = rho2_m[NI * NJ];
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
@ -127,27 +128,29 @@ __kernel void mirroredRhoField(__global double *rho2_m,
|
||||
id7 = rk * NI * NJ + rj * NI + i;
|
||||
id8 = rk * NI * NJ + rj * NI + ri;
|
||||
|
||||
double data = 0.0;
|
||||
if (id1 < size)
|
||||
data = rho2_m[id1];
|
||||
|
||||
double data = rho2_m[id1];
|
||||
if (i != 0) rho2_m[id2] = data;
|
||||
if (i != 0 && id2 < size) rho2_m[id2] = data;
|
||||
|
||||
if (j != 0) rho2_m[id3] = data;
|
||||
if (j != 0 && id3 < size) rho2_m[id3] = data;
|
||||
|
||||
if (i != 0 && j != 0) rho2_m[id4] = data;
|
||||
if (i != 0 && j != 0 && id4 < size) rho2_m[id4] = data;
|
||||
|
||||
if (k != 0) rho2_m[id5] = data;
|
||||
if (k != 0 && id5 < size) rho2_m[id5] = data;
|
||||
|
||||
if (k != 0 && i != 0) rho2_m[id6] = data;
|
||||
if (k != 0 && i != 0 && id6 < size) rho2_m[id6] = data;
|
||||
|
||||
if (k!= 0 && j != 0) rho2_m[id7] = data;
|
||||
if (k!= 0 && j != 0 && id7 < size) rho2_m[id7] = data;
|
||||
|
||||
if (k != 0 && j != 0 & i != 0) rho2_m[id8] = data;
|
||||
if (k != 0 && j != 0 & i != 0 && id8 < size) rho2_m[id8] = data;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** multiply complex fields */
|
||||
double2 CompelxMul(double2 a, double2 b) {
|
||||
double2 ComplexMul(double2 a, double2 b) {
|
||||
double2 c;
|
||||
c.x = a.x * b.x - a.y * b.y;
|
||||
c.y = a.x * b.y + a.y * b.x;
|
||||
@ -155,12 +158,13 @@ double2 CompelxMul(double2 a, double2 b) {
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
__kernel void multiplyComplexFields_2(__global double2 *ptr1, __global double2 *ptr2,
|
||||
int size)
|
||||
__kernel void multiplyComplexFields(__global double2 *ptr1, __global double2 *ptr2,
|
||||
int size)
|
||||
{
|
||||
|
||||
int idx = get_global_id(0);
|
||||
|
||||
if (idx < size)
|
||||
ptr1[idx] = ComplexMul(ptr1[idx], ptr2[idx]);
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user