FFT for OpenCL using clFFT library
This commit is contained in:
@ -52,9 +52,6 @@ class OpenCLBase {
|
||||
|
||||
private:
|
||||
|
||||
static cl_context m_context;
|
||||
static cl_command_queue m_command_queue;
|
||||
|
||||
static cl_platform_id m_platform_id;
|
||||
static cl_device_id m_device_id;
|
||||
|
||||
@ -118,6 +115,9 @@ protected:
|
||||
|
||||
|
||||
public:
|
||||
|
||||
static cl_context m_context;
|
||||
static cl_command_queue m_command_queue;
|
||||
|
||||
/*
|
||||
constructor
|
||||
|
@ -89,26 +89,82 @@ int OpenCLFFT::ocl_callBitReverseKernel(cl_mem &data, int cdim, int ndim, int N)
|
||||
call fft execution on device for every dimension
|
||||
*/
|
||||
int OpenCLFFT::executeFFT(void *data, int ndim, int N[3], int streamId, bool forward) {
|
||||
int ierr;
|
||||
|
||||
|
||||
int dkserr = DKS_SUCCESS;
|
||||
cl_int ierr;
|
||||
cl_mem inout = (cl_mem)data;
|
||||
int n = N[0];
|
||||
|
||||
for (int dim = 0; dim < ndim; dim++) {
|
||||
ierr = ocl_callBitReverseKernel(inout, dim, ndim, n);
|
||||
if (ierr != OCL_SUCCESS) {
|
||||
DEBUG_MSG("Error executing bit reverse");
|
||||
return OCL_ERROR;
|
||||
}
|
||||
if (forward)
|
||||
ierr = clfftEnqueueTransform(planHandleZ2Z, CLFFT_FORWARD, 1, &m_oclbase->m_command_queue,
|
||||
0, NULL, NULL, &inout, NULL, NULL);
|
||||
else
|
||||
ierr = clfftEnqueueTransform(planHandleZ2Z, CLFFT_BACKWARD, 1, &m_oclbase->m_command_queue,
|
||||
0, NULL, NULL, &inout, NULL, NULL);
|
||||
|
||||
ierr = ocl_callFFTKernel(inout, dim, ndim, n, forward);
|
||||
if (ierr != OCL_SUCCESS) {
|
||||
DEBUG_MSG("Error executing fft reverse");
|
||||
return OCL_ERROR;
|
||||
}
|
||||
if (ierr != OCL_SUCCESS) {
|
||||
dkserr = DKS_ERROR;
|
||||
DEBUG_MSG("Error executing cfFFT\n");
|
||||
if (ierr == CLFFT_INVALID_PLAN)
|
||||
std::cout << "Invlalid plan" << std::endl;
|
||||
else
|
||||
std::cout << "CLFFT error" << std::endl;
|
||||
}
|
||||
|
||||
return OCL_SUCCESS;
|
||||
return dkserr;
|
||||
}
|
||||
|
||||
/*
|
||||
call rcfft execution on device for every dimension
|
||||
*/
|
||||
int OpenCLFFT::executeRCFFT(void *real_ptr, void *comp_ptr, int ndim, int N[3], int streamId) {
|
||||
|
||||
std::cout << "execute RCFFT" << std::endl;
|
||||
|
||||
int dkserr = DKS_SUCCESS;
|
||||
cl_int ierr;
|
||||
cl_mem real_in = (cl_mem)real_ptr;
|
||||
cl_mem comp_out = (cl_mem)comp_ptr;
|
||||
|
||||
ierr = clfftEnqueueTransform(planHandleD2Z, CLFFT_FORWARD, 1, &m_oclbase->m_command_queue,
|
||||
0, NULL, NULL, &real_in, &comp_out, NULL);
|
||||
|
||||
if (ierr != OCL_SUCCESS) {
|
||||
dkserr = DKS_ERROR;
|
||||
DEBUG_MSG("Error executing cfFFT\n");
|
||||
if (ierr == CLFFT_INVALID_PLAN)
|
||||
std::cout << "Invlalid plan" << std::endl;
|
||||
else
|
||||
std::cout << "CLFFT error" << std::endl;
|
||||
}
|
||||
|
||||
return dkserr;
|
||||
}
|
||||
|
||||
/*
|
||||
call rcfft execution on device for every dimension
|
||||
*/
|
||||
int OpenCLFFT::executeCRFFT(void *real_ptr, void *comp_ptr, int ndim, int N[3], int streamId) {
|
||||
|
||||
std::cout << "execute CRFFT" << std::endl;
|
||||
|
||||
int dkserr = DKS_SUCCESS;
|
||||
cl_int ierr;
|
||||
cl_mem real_in = (cl_mem)real_ptr;
|
||||
cl_mem comp_out = (cl_mem)comp_ptr;
|
||||
|
||||
ierr = clfftEnqueueTransform(planHandleZ2D, CLFFT_BACKWARD, 1, &m_oclbase->m_command_queue,
|
||||
0, NULL, NULL, &comp_out, &real_in, NULL);
|
||||
|
||||
if (ierr != OCL_SUCCESS) {
|
||||
dkserr = DKS_ERROR;
|
||||
DEBUG_MSG("Error executing cfFFT\n");
|
||||
if (ierr == CLFFT_INVALID_PLAN)
|
||||
std::cout << "Invlalid plan" << std::endl;
|
||||
else
|
||||
std::cout << "CLFFT error" << std::endl;
|
||||
}
|
||||
|
||||
return dkserr;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -120,10 +176,11 @@ int OpenCLFFT::executeIFFT(void *data, int ndim, int N[3], int streamId) {
|
||||
}
|
||||
|
||||
/*
|
||||
call kernel to normalize fft
|
||||
call kernel to normalize fft. clFFT inverse already includes the scaling so this is disabled.
|
||||
*/
|
||||
int OpenCLFFT::normalizeFFT(void *data, int ndim, int N[3], int streamId) {
|
||||
|
||||
/*
|
||||
cl_mem inout = (cl_mem)data;
|
||||
|
||||
int n = N[0];
|
||||
@ -150,132 +207,143 @@ int OpenCLFFT::normalizeFFT(void *data, int ndim, int N[3], int streamId) {
|
||||
DEBUG_MSG("Error executing kernel");
|
||||
return OCL_ERROR;
|
||||
}
|
||||
|
||||
*/
|
||||
return OCL_SUCCESS;
|
||||
}
|
||||
|
||||
int OpenCLFFT::ocl_executeFFTStockham(void* &src, int ndim, int N, bool forward) {
|
||||
|
||||
int ierr;
|
||||
int size = sizeof(cl_double2)*pow(N,ndim);
|
||||
|
||||
cl_mem mem_tmp;
|
||||
cl_mem mem_src = (cl_mem)src;
|
||||
cl_mem mem_dst = (cl_mem)m_oclbase->ocl_allocateMemory(size, ierr);
|
||||
int OpenCLFFT::setupFFT(int ndim, int N[3]) {
|
||||
|
||||
//set the number of work items in each dimension
|
||||
size_t work_items[3];
|
||||
int p = 1;
|
||||
int threads = N / 2;
|
||||
int f = (forward) ? -1 : 1;
|
||||
|
||||
//execute kernel
|
||||
int n = (int)log2(N);
|
||||
for (int i = 0; i < ndim; i++) {
|
||||
cl_int err;
|
||||
|
||||
int dim = i+1;
|
||||
p = 1;
|
||||
work_items[0] = (dim == 1) ? N/2 : N;
|
||||
work_items[1] = (dim == 2) ? N/2 : N;
|
||||
work_items[2] = (dim == 3) ? N/2 : N;
|
||||
|
||||
//transpose array if calculating dimension larger than 1
|
||||
//if (dim > 1)
|
||||
// ocl_executeTranspose(mem_src, N, ndim, dim);
|
||||
|
||||
//create kernel and set kernel arguments
|
||||
if (m_oclbase->ocl_createKernel("fft3d_radix2") != OCL_SUCCESS)
|
||||
return OCL_ERROR;
|
||||
|
||||
for (int t = 1; t <= log2(N); t++) {
|
||||
|
||||
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &mem_src);
|
||||
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &mem_dst);
|
||||
m_oclbase->ocl_setKernelArg(2, sizeof(int), &p);
|
||||
m_oclbase->ocl_setKernelArg(3, sizeof(int), &threads);
|
||||
m_oclbase->ocl_setKernelArg(4, sizeof(int), &dim);
|
||||
m_oclbase->ocl_setKernelArg(5, sizeof(int), &f);
|
||||
|
||||
if (m_oclbase->ocl_executeKernel(ndim, work_items) != OCL_SUCCESS)
|
||||
return OCL_ERROR;
|
||||
clfftDim dim = CLFFT_3D;
|
||||
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
|
||||
|
||||
mem_tmp = mem_src;
|
||||
mem_src = mem_dst;
|
||||
mem_dst = mem_tmp;
|
||||
|
||||
p = 2*p;
|
||||
/* Create 3D fft plan*/
|
||||
err = clfftCreateDefaultPlan(&planHandleZ2Z, m_oclbase->m_context, dim, clLength);
|
||||
|
||||
/* Set plan parameters */
|
||||
err = clfftSetPlanPrecision(planHandleZ2Z, CLFFT_DOUBLE);
|
||||
if (err != CL_SUCCESS)
|
||||
std::cout << "Error setting precision" << std::endl;
|
||||
err = clfftSetLayout(planHandleZ2Z, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
|
||||
if (err != CL_SUCCESS)
|
||||
std::cout << "Error setting layout" << std::endl;
|
||||
err = clfftSetResultLocation(planHandleZ2Z, CLFFT_INPLACE);
|
||||
if (err != CL_SUCCESS)
|
||||
std::cout << "Error setting result location" << std::endl;
|
||||
/* Bake the plan */
|
||||
err = clfftBakePlan(planHandleZ2Z, 1, &m_oclbase->m_command_queue, NULL, NULL);
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
DEBUG_MSG("Error creating Complex-to-complex plan");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
int OpenCLFFT::setupFFTRC(int ndim, int N[3], double scale) {
|
||||
cl_int err;
|
||||
|
||||
clfftDim dim = CLFFT_3D;
|
||||
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
|
||||
|
||||
/* Create 3D fft plan*/
|
||||
err = clfftCreateDefaultPlan(&planHandleD2Z, m_oclbase->m_context, dim, clLength);
|
||||
|
||||
/* Set plan parameters */
|
||||
err = clfftSetPlanPrecision(planHandleD2Z, CLFFT_DOUBLE);
|
||||
err = clfftSetLayout(planHandleD2Z, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);
|
||||
err = clfftSetResultLocation(planHandleD2Z, CLFFT_OUTOFPLACE);
|
||||
|
||||
/* Bake the plan */
|
||||
err = clfftBakePlan(planHandleD2Z, 1, &m_oclbase->m_command_queue, NULL, NULL);
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
DEBUG_MSG("Error creating Real-to-complex plan");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
int OpenCLFFT::setupFFTCR(int ndim, int N[3], double scale) {
|
||||
cl_int err;
|
||||
|
||||
clfftDim dim = CLFFT_3D;
|
||||
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
|
||||
|
||||
/* Create 3D fft plan*/
|
||||
err = clfftCreateDefaultPlan(&planHandleZ2D, m_oclbase->m_context, dim, clLength);
|
||||
|
||||
/* Set plan parameters */
|
||||
err = clfftSetPlanPrecision(planHandleZ2D, CLFFT_DOUBLE);
|
||||
err = clfftSetLayout(planHandleZ2D, CLFFT_HERMITIAN_INTERLEAVED, CLFFT_REAL);
|
||||
err = clfftSetResultLocation(planHandleZ2D, CLFFT_OUTOFPLACE);
|
||||
|
||||
/* Bake the plan */
|
||||
err = clfftBakePlan(planHandleZ2D, 1, &m_oclbase->m_command_queue, NULL, NULL);
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
DEBUG_MSG("Error creating Complex-to-real plan");
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
int OpenCLFFT::destroyFFT() {
|
||||
clfftDestroyPlan(&planHandleZ2Z);
|
||||
clfftDestroyPlan(&planHandleD2Z);
|
||||
clfftDestroyPlan(&planHandleZ2D);
|
||||
|
||||
clfftTeardown();
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
void OpenCLFFT::printError(clfftStatus err) {
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
std::cout << "Error creating default plan " << err << std::endl;
|
||||
switch(err) {
|
||||
case CLFFT_BUGCHECK:
|
||||
std::cout << "bugcheck" << std::endl;
|
||||
break;
|
||||
case CLFFT_NOTIMPLEMENTED:
|
||||
std::cout << "not implemented" << std::endl;
|
||||
break;
|
||||
case CLFFT_TRANSPOSED_NOTIMPLEMENTED:
|
||||
std::cout << "transposed not implemented" << std::endl;
|
||||
break;
|
||||
case CLFFT_FILE_NOT_FOUND:
|
||||
std::cout << "file not found" << std::endl;
|
||||
break;
|
||||
case CLFFT_FILE_CREATE_FAILURE:
|
||||
std::cout << "file create failure" << std::endl;
|
||||
break;
|
||||
case CLFFT_VERSION_MISMATCH:
|
||||
std::cout << "version missmatch" << std::endl;
|
||||
break;
|
||||
case CLFFT_INVALID_PLAN:
|
||||
std::cout << "invalid plan" << std::endl;
|
||||
break;
|
||||
case CLFFT_DEVICE_NO_DOUBLE:
|
||||
std::cout << "no double" << std::endl;
|
||||
break;
|
||||
case CLFFT_DEVICE_MISMATCH:
|
||||
std::cout << "device missmatch" << std::endl;
|
||||
break;
|
||||
case CLFFT_ENDSTATUS:
|
||||
std::cout << "end status" << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::cout << "other: " << err << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
//transpose array back if calculating dimension larger than 1
|
||||
//if (dim > 1)
|
||||
// ocl_executeTranspose(mem_src, N, ndim, dim);
|
||||
}
|
||||
|
||||
if (ndim*n % 2 == 1) {
|
||||
m_oclbase->ocl_copyData(mem_src, mem_dst, size);
|
||||
mem_tmp = mem_src;
|
||||
mem_src = mem_dst;
|
||||
mem_dst = mem_tmp;
|
||||
}
|
||||
|
||||
m_oclbase->ocl_freeMemory(mem_dst);
|
||||
|
||||
return OCL_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
int OpenCLFFT::ocl_executeFFTStockham2(void* &src, int ndim, int N, bool forward) {
|
||||
|
||||
cl_mem mem_src = (cl_mem)src;
|
||||
|
||||
size_t work_items[3] = { (size_t)N/2, (size_t)N, (size_t)N};
|
||||
size_t work_group_size[3] = {(size_t)N/2, 1, 1};
|
||||
|
||||
m_oclbase->ocl_createKernel("fft_batch3D");
|
||||
|
||||
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &mem_src);
|
||||
m_oclbase->ocl_setKernelArg(1, sizeof(cl_double2)*N, NULL);
|
||||
m_oclbase->ocl_setKernelArg(2, sizeof(cl_double2)*N, NULL);
|
||||
m_oclbase->ocl_setKernelArg(3, sizeof(cl_double2), NULL);
|
||||
m_oclbase->ocl_setKernelArg(4, sizeof(int), &N);
|
||||
|
||||
|
||||
for (int dim = 1; dim < ndim+1; dim++) {
|
||||
m_oclbase->ocl_setKernelArg(5, sizeof(int), &dim);
|
||||
m_oclbase->ocl_executeKernel(3, work_items, work_group_size);
|
||||
}
|
||||
|
||||
return OCL_SUCCESS;
|
||||
}
|
||||
|
||||
int OpenCLFFT::ocl_executeTranspose(void *src, int N[3], int ndim, int dim) {
|
||||
|
||||
cl_mem mem_src = (cl_mem)src;
|
||||
|
||||
if (ndim == 1)
|
||||
return OCL_SUCCESS;
|
||||
|
||||
size_t work_items[3];
|
||||
work_items[0] = N[0];
|
||||
work_items[1] = N[1];
|
||||
work_items[2] = 1;
|
||||
|
||||
size_t work_group_size[3];
|
||||
work_group_size[0] = N[0];
|
||||
work_group_size[1] = N[1];
|
||||
work_group_size[2] = 1;
|
||||
|
||||
size_t local_size = work_group_size[0] * work_group_size[1] * work_group_size[2];
|
||||
|
||||
m_oclbase->ocl_createKernel("transpose");
|
||||
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &mem_src);
|
||||
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &mem_src);
|
||||
m_oclbase->ocl_setKernelArg(2, sizeof(int), &N[0]);
|
||||
m_oclbase->ocl_setKernelArg(3, sizeof(int), &N[1]);
|
||||
m_oclbase->ocl_setKernelArg(4, sizeof(cl_double2)*local_size, NULL);
|
||||
m_oclbase->ocl_executeKernel(ndim, work_items, work_group_size);
|
||||
|
||||
return OCL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -20,12 +20,19 @@
|
||||
#include "../Algorithms/FFT.h"
|
||||
#include "OpenCLBase.h"
|
||||
|
||||
#include "clFFT.h"
|
||||
|
||||
class OpenCLFFT : public DKSFFT {
|
||||
|
||||
private:
|
||||
|
||||
OpenCLBase *m_oclbase;
|
||||
|
||||
clfftSetupData fftSetup;
|
||||
clfftPlanHandle planHandleZ2Z;
|
||||
clfftPlanHandle planHandleD2Z;
|
||||
clfftPlanHandle planHandleZ2D;
|
||||
|
||||
/*
|
||||
Info: call fft kernels to execute FFT of the given domain,
|
||||
data - devevice memory ptr, cdim - current dim to transform,
|
||||
@ -42,15 +49,31 @@ private:
|
||||
*/
|
||||
int ocl_callBitReverseKernel(cl_mem &data, int cdim, int ndim, int N);
|
||||
|
||||
/** Get clfftStatus and print the corresponding error message.
|
||||
* clfftStatus is returned from all clFFT library functions, print error displays the
|
||||
* corresponding error message. If "other" is printed then error code corresponds to
|
||||
* OpenCL error code and not specifically to clFFT library, then OpenCL error codes should
|
||||
* be checked to determine the reason for the error.
|
||||
*/
|
||||
void printError(clfftStatus err);
|
||||
|
||||
public:
|
||||
|
||||
/* constructor - currently does nothing*/
|
||||
OpenCLFFT(OpenCLBase *base) {
|
||||
m_oclbase = base;
|
||||
|
||||
/* Set up fft */
|
||||
cl_int err;
|
||||
err = clfftInitSetupData(&fftSetup);
|
||||
err = clfftSetup(&fftSetup);
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
DEBUG_MSG("Error seting up clFFT");
|
||||
}
|
||||
|
||||
/* destructor - currently does nothing*/
|
||||
~OpenCLFFT() { }
|
||||
~OpenCLFFT() { destroyFFT(); }
|
||||
|
||||
/*
|
||||
Info: execute forward fft function with data set on device
|
||||
@ -77,35 +100,23 @@ public:
|
||||
Info: set FFT size
|
||||
Return: success or error code
|
||||
*/
|
||||
int setupFFT(int ndim, int N[3]) { return DKS_SUCCESS; }
|
||||
int setupFFT(int ndim, int N[3]);
|
||||
|
||||
int setupFFTRC(int ndim, int N[3], double scale = 1.0) { return DKS_SUCCESS; }
|
||||
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
|
||||
|
||||
int setupFFTCR(int ndim, int N[3], double scale = 1.0) { return DKS_SUCCESS; }
|
||||
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
|
||||
|
||||
int destroyFFT() { return DKS_SUCCESS; }
|
||||
int destroyFFT();
|
||||
|
||||
int executeRCFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3],
|
||||
int streamId = -1)
|
||||
{
|
||||
return DKS_ERROR;
|
||||
}
|
||||
int streamId = -1);
|
||||
int executeCRFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3],
|
||||
int streamId = -1)
|
||||
{
|
||||
return DKS_ERROR;
|
||||
}
|
||||
int streamId = -1);
|
||||
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1)
|
||||
{
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
int ocl_executeFFTStockham(void* &src, int ndim, int N, bool forward = true);
|
||||
|
||||
int ocl_executeFFTStockham2(void* &src, int ndim, int N, bool forward = true);
|
||||
|
||||
int ocl_executeTranspose(void *src, int N[3], int ndim, int dim);
|
||||
|
||||
//void printData3DN4(cl_double2* &data, int N);
|
||||
|
||||
};
|
||||
|
Reference in New Issue
Block a user