diff --git a/src/DKSFFT.cpp b/src/DKSFFT.cpp new file mode 100644 index 0000000..9942fc2 --- /dev/null +++ b/src/DKSFFT.cpp @@ -0,0 +1,144 @@ +#include "DKSFFT.h" + +DKSFFT::DKSFFT() { + dksfft = nullptr; +} + +~DKSFFT::DKSFFT() { + delete dksfft; +} + +/* setup fft plans to reuse if multiple ffts of same size are needed */ +int DKSFFT::setupFFT(int ndim, int N[3]) { + + if (apiCuda()) { + return dksfft->setupFFT(ndim, N); + } else if (apiOpenCL()) { + int ierr1 = dksfft->setupFFT(ndim, N); + int ierr2 = dksfft->setupFFTRC(ndim, N); + int ierr3 = dksfft->setupFFTCR(ndim, N); + if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS) + return DKS_ERROR; + + return DKS_SUCCESS; + } else if (apiOpenMP()) { + //micbase.mic_setupFFT(ndim, N); + //BENI: setting up RC and CR transformations on MIC + int ierr1 = dksfft->setupFFTRC(ndim, N, 1.); + int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2])); + if (ierr1 != DKS_SUCCESS) + return ierr1; + if (ierr2 != DKS_SUCCESS) + return ierr2; + return DKS_SUCCESS; + } + + return DKS_ERROR; + +} +//BENI: +int DKSFFT::setupFFTRC(int ndim, int N[3], double scale) { + + if (apiCuda()) + return dksfft->setupFFT(ndim, N); + if (apiOpenCL()) + return dksfft->setupFFTRC(ndim, N); + else if (apiOpenMP()) + return dksfft->setupFFTRC(ndim, N, scale); + + return DKS_ERROR; + +} + +//BENI: +int DKSFFT::setupFFTCR(int ndim, int N[3], double scale) { + + if (apiCuda()) + return dksfft->setupFFT(ndim, N); + if (apiOpenCL()) + return dksfft->setupFFTCR(ndim, N); + else if (apiOpenMP()) + return dksfft->setupFFTCR(ndim, N, scale); + + return DKS_ERROR; + +} + +/* call OpenCL FFT function for selected platform */ +int DKSFFT::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { + + if (apiOpenCL() || apiOpenMP()) + return dksfft->executeFFT(data_ptr, ndim, dimsize); + else if (apiCuda()) + return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId); + + DEBUG_MSG("No implementation for selected platform"); + return DKS_ERROR; +} + +/* call OpenCL IFFT function for selected platform */ +int DKSFFT::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { + if (apiOpenCL() || apiOpenMP()) + return dksfft->executeIFFT(data_ptr, ndim, dimsize); + else if (apiCuda()) + return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId); + + DEBUG_MSG("No implementation for selected platform"); + return DKS_ERROR; +} + +/* call normalize FFT function for selected platform */ +int DKSFFT::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { + + if (apiOpenCL()) { + if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS ) + return dksfft->normalizeFFT(data_ptr, ndim, dimsize); + else + return DKS_ERROR; + } else if (apiCuda()) { + return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId); + } else if (apiOpenMP()) { + return dksfft->normalizeFFT(data_ptr, ndim, dimsize); + } + + DEBUG_MSG("No implementation for selected platform"); + return DKS_ERROR; +} + +/* call real to complex FFT */ +int DKSFFT::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) { + + if (apiCuda()) + return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId); + else if (apiOpenCL() || apiOpenMP()) + return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize); + + DEBUG_MSG("No implementation for selected platform"); + return DKS_ERROR; +} + +/* call complex to real FFT */ +int DKSFFT::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) { + if (apiCuda()) + return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId); + else if (apiOpenCL() || apiOpenMP()) + return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize); + + DEBUG_MSG("No implementation for selected platform"); + return DKS_ERROR; +} + +/* normalize complex to real iFFT */ +int DKSFFT::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) { + if (apiCuda()) + return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId); + else if (apiOpenCL()) + return DKS_ERROR; + else if (apiOpenMP()) + return DKS_ERROR; + + DEBUG_MSG("No implementation for selected platform"); + return DKS_ERROR; +} + + diff --git a/src/DKSFFT.h b/src/DKSFFT.h new file mode 100644 index 0000000..c13fff5 --- /dev/null +++ b/src/DKSFFT.h @@ -0,0 +1,107 @@ +#ifndef H_DKS_FFT +#define H_DKS_FFT + +#include +#include "AutoTuning/DKSAutoTuning.h" + +#include "DKSBase.h" + +#include "DKSDefinitions.h" + +#include "Algorithms/GreensFunction.h" +#include "Algorithms/CollimatorPhysics.h" +#include "Algorithms/FFT.h" + +#ifdef DKS_AMD +#include "OpenCL/OpenCLFFT.h" +#endif + +#ifdef DKS_CUDA +#include "CUDA/CudaFFT.cuh" + +#endif + +#ifdef DKS_MIC +#include "MIC/MICFFT.h" +#endif + +class DKSFFT : public DKSBase { + +private: + + DKSFFT *dksfft; + + int initFFT(); + +public: + + DKSFFT(); + ~DKSFFT(); + + /** + * Setup FFT function. + * Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls. + * If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case + * each fft will do its own setup according to fft size and dimensions. + * TODO: opencl and mic implementations + */ + int setupFFT(int ndim, int N[3]); + //BENI: + int setupFFTRC(int ndim, int N[3], double scale = 1.0); + //BENI: + int setupFFTCR(int ndim, int N[3], double scale = 1.0); + + /** + * Call complex-to-complex fft. + * Executes in place complex to compelx fft on the device on data pointed by data_ptr. + * stream id can be specified to use other streams than default. + * TODO: mic implementation + */ + int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1); + + /** + * Call complex-to-complex ifft. + * Executes in place complex to compelx ifft on the device on data pointed by data_ptr. + * stream id can be specified to use other streams than default. + * TODO: mic implementation. + */ + int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1); + + /** + * Normalize complex to complex ifft. + * Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by + * fft size + * TODO: mic implementation. + */ + int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1); + + /** + * Call real to complex FFT. + * Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points + * to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size + * should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast + * (dimsize[0]/2+1)*dimsize[1]*dimsize[2] + * TODO: opencl and mic implementations + */ + int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1); + + /** + * Call complex to real iFFT. + * Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points + * to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size + * should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast + * (dimsize[0]/2+1)*dimsize[1]*dimsize[2] + * TODO: opencl and mic implementations. + */ + int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1); + + /** + * Normalize compelx to real ifft. + * Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by + * fft size. + * TODO: opencl and mic implementations. + */ + int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1); + + +}; diff --git a/src/DKSOPAL.cpp b/src/DKSOPAL.cpp index 914f977..1b8ca9e 100644 --- a/src/DKSOPAL.cpp +++ b/src/DKSOPAL.cpp @@ -1,7 +1,6 @@ #include "DKSOPAL.h" DKSOPAL::DKSOPAL() { - dksfft = nullptr; dkscol = nullptr; dksgreens = nullptr; } @@ -50,139 +49,6 @@ int DKSOPAL::initDevice() { } -/* setup fft plans to reuse if multiple ffts of same size are needed */ -int DKSOPAL::setupFFT(int ndim, int N[3]) { - - if (apiCuda()) { - return dksfft->setupFFT(ndim, N); - } else if (apiOpenCL()) { - int ierr1 = dksfft->setupFFT(ndim, N); - int ierr2 = dksfft->setupFFTRC(ndim, N); - int ierr3 = dksfft->setupFFTCR(ndim, N); - if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS) - return DKS_ERROR; - - return DKS_SUCCESS; - } else if (apiOpenMP()) { - //micbase.mic_setupFFT(ndim, N); - //BENI: setting up RC and CR transformations on MIC - int ierr1 = dksfft->setupFFTRC(ndim, N, 1.); - int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2])); - if (ierr1 != DKS_SUCCESS) - return ierr1; - if (ierr2 != DKS_SUCCESS) - return ierr2; - return DKS_SUCCESS; - } - - return DKS_ERROR; - -} -//BENI: -int DKSOPAL::setupFFTRC(int ndim, int N[3], double scale) { - - if (apiCuda()) - return dksfft->setupFFT(ndim, N); - if (apiOpenCL()) - return dksfft->setupFFTRC(ndim, N); - else if (apiOpenMP()) - return dksfft->setupFFTRC(ndim, N, scale); - - return DKS_ERROR; - -} - -//BENI: -int DKSOPAL::setupFFTCR(int ndim, int N[3], double scale) { - - if (apiCuda()) - return dksfft->setupFFT(ndim, N); - if (apiOpenCL()) - return dksfft->setupFFTCR(ndim, N); - else if (apiOpenMP()) - return dksfft->setupFFTCR(ndim, N, scale); - - return DKS_ERROR; - -} - -/* call OpenCL FFT function for selected platform */ -int DKSOPAL::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { - - if (apiOpenCL() || apiOpenMP()) - return dksfft->executeFFT(data_ptr, ndim, dimsize); - else if (apiCuda()) - return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId); - - DEBUG_MSG("No implementation for selected platform"); - return DKS_ERROR; -} - -/* call OpenCL IFFT function for selected platform */ -int DKSOPAL::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { - if (apiOpenCL() || apiOpenMP()) - return dksfft->executeIFFT(data_ptr, ndim, dimsize); - else if (apiCuda()) - return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId); - - DEBUG_MSG("No implementation for selected platform"); - return DKS_ERROR; -} - -/* call normalize FFT function for selected platform */ -int DKSOPAL::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) { - - if (apiOpenCL()) { - if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS ) - return dksfft->normalizeFFT(data_ptr, ndim, dimsize); - else - return DKS_ERROR; - } else if (apiCuda()) { - return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId); - } else if (apiOpenMP()) { - return dksfft->normalizeFFT(data_ptr, ndim, dimsize); - } - - DEBUG_MSG("No implementation for selected platform"); - return DKS_ERROR; -} - -/* call real to complex FFT */ -int DKSOPAL::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) { - - if (apiCuda()) - return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId); - else if (apiOpenCL() || apiOpenMP()) - return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize); - - DEBUG_MSG("No implementation for selected platform"); - return DKS_ERROR; -} - -/* call complex to real FFT */ -int DKSOPAL::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) { - if (apiCuda()) - return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId); - else if (apiOpenCL() || apiOpenMP()) - return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize); - - DEBUG_MSG("No implementation for selected platform"); - return DKS_ERROR; -} - -/* normalize complex to real iFFT */ -int DKSOPAL::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) { - if (apiCuda()) - return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId); - else if (apiOpenCL()) - return DKS_ERROR; - else if (apiOpenMP()) - return DKS_ERROR; - - DEBUG_MSG("No implementation for selected platform"); - return DKS_ERROR; -} - int DKSOPAL::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ, double hz_m0, double hz_m1, double hz_m2, int streamId) { diff --git a/src/DKSOPAL.h b/src/DKSOPAL.h index add9ac6..feee92f 100644 --- a/src/DKSOPAL.h +++ b/src/DKSOPAL.h @@ -56,71 +56,6 @@ public: ///////Function library part of dksbase//////// /////////////////////////////////////////////// - /** - * Setup FFT function. - * Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls. - * If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case - * each fft will do its own setup according to fft size and dimensions. - * TODO: opencl and mic implementations - */ - int setupFFT(int ndim, int N[3]); - //BENI: - int setupFFTRC(int ndim, int N[3], double scale = 1.0); - //BENI: - int setupFFTCR(int ndim, int N[3], double scale = 1.0); - -/** - * Call complex-to-complex fft. - * Executes in place complex to compelx fft on the device on data pointed by data_ptr. - * stream id can be specified to use other streams than default. - * TODO: mic implementation - */ - int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1); - - /** - * Call complex-to-complex ifft. - * Executes in place complex to compelx ifft on the device on data pointed by data_ptr. - * stream id can be specified to use other streams than default. - * TODO: mic implementation. - */ - int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1); - - /** - * Normalize complex to complex ifft. - * Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by - * fft size - * TODO: mic implementation. - */ - int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1); - - /** - * Call real to complex FFT. - * Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points - * to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size - * should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast - * (dimsize[0]/2+1)*dimsize[1]*dimsize[2] - * TODO: opencl and mic implementations - */ - int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1); - - /** - * Call complex to real iFFT. - * Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points - * to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size - * should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast - * (dimsize[0]/2+1)*dimsize[1]*dimsize[2] - * TODO: opencl and mic implementations. - */ - int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1); - - /** - * Normalize compelx to real ifft. - * Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by - * fft size. - * TODO: opencl and mic implementations. - */ - int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1); - /** * Integrated greens function from OPAL FFTPoissonsolver.cpp put on device. * For specifics check OPAL docs.