seperate FFT from DKSOPAL

This commit is contained in:
Uldis Locans
2017-05-29 09:39:25 +02:00
parent cadd258668
commit f3527969cb
4 changed files with 251 additions and 199 deletions

144
src/DKSFFT.cpp Normal file
View File

@ -0,0 +1,144 @@
#include "DKSFFT.h"
DKSFFT::DKSFFT() {
dksfft = nullptr;
}
~DKSFFT::DKSFFT() {
delete dksfft;
}
/* setup fft plans to reuse if multiple ffts of same size are needed */
int DKSFFT::setupFFT(int ndim, int N[3]) {
if (apiCuda()) {
return dksfft->setupFFT(ndim, N);
} else if (apiOpenCL()) {
int ierr1 = dksfft->setupFFT(ndim, N);
int ierr2 = dksfft->setupFFTRC(ndim, N);
int ierr3 = dksfft->setupFFTCR(ndim, N);
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
return DKS_ERROR;
return DKS_SUCCESS;
} else if (apiOpenMP()) {
//micbase.mic_setupFFT(ndim, N);
//BENI: setting up RC and CR transformations on MIC
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
if (ierr1 != DKS_SUCCESS)
return ierr1;
if (ierr2 != DKS_SUCCESS)
return ierr2;
return DKS_SUCCESS;
}
return DKS_ERROR;
}
//BENI:
int DKSFFT::setupFFTRC(int ndim, int N[3], double scale) {
if (apiCuda())
return dksfft->setupFFT(ndim, N);
if (apiOpenCL())
return dksfft->setupFFTRC(ndim, N);
else if (apiOpenMP())
return dksfft->setupFFTRC(ndim, N, scale);
return DKS_ERROR;
}
//BENI:
int DKSFFT::setupFFTCR(int ndim, int N[3], double scale) {
if (apiCuda())
return dksfft->setupFFT(ndim, N);
if (apiOpenCL())
return dksfft->setupFFTCR(ndim, N);
else if (apiOpenMP())
return dksfft->setupFFTCR(ndim, N, scale);
return DKS_ERROR;
}
/* call OpenCL FFT function for selected platform */
int DKSFFT::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL() || apiOpenMP())
return dksfft->executeFFT(data_ptr, ndim, dimsize);
else if (apiCuda())
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call OpenCL IFFT function for selected platform */
int DKSFFT::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL() || apiOpenMP())
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
else if (apiCuda())
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call normalize FFT function for selected platform */
int DKSFFT::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL()) {
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
else
return DKS_ERROR;
} else if (apiCuda()) {
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
} else if (apiOpenMP()) {
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
}
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call real to complex FFT */
int DKSFFT::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL() || apiOpenMP())
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call complex to real FFT */
int DKSFFT::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL() || apiOpenMP())
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* normalize complex to real iFFT */
int DKSFFT::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
else if (apiOpenCL())
return DKS_ERROR;
else if (apiOpenMP())
return DKS_ERROR;
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}

107
src/DKSFFT.h Normal file
View File

@ -0,0 +1,107 @@
#ifndef H_DKS_FFT
#define H_DKS_FFT
#include <iostream>
#include "AutoTuning/DKSAutoTuning.h"
#include "DKSBase.h"
#include "DKSDefinitions.h"
#include "Algorithms/GreensFunction.h"
#include "Algorithms/CollimatorPhysics.h"
#include "Algorithms/FFT.h"
#ifdef DKS_AMD
#include "OpenCL/OpenCLFFT.h"
#endif
#ifdef DKS_CUDA
#include "CUDA/CudaFFT.cuh"
#endif
#ifdef DKS_MIC
#include "MIC/MICFFT.h"
#endif
class DKSFFT : public DKSBase {
private:
DKSFFT *dksfft;
int initFFT();
public:
DKSFFT();
~DKSFFT();
/**
* Setup FFT function.
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
* each fft will do its own setup according to fft size and dimensions.
* TODO: opencl and mic implementations
*/
int setupFFT(int ndim, int N[3]);
//BENI:
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
//BENI:
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
/**
* Call complex-to-complex fft.
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation
*/
int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call complex-to-complex ifft.
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation.
*/
int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Normalize complex to complex ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size
* TODO: mic implementation.
*/
int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call real to complex FFT.
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations
*/
int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call complex to real iFFT.
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations.
*/
int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Normalize compelx to real ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size.
* TODO: opencl and mic implementations.
*/
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
};

View File

@ -1,7 +1,6 @@
#include "DKSOPAL.h" #include "DKSOPAL.h"
DKSOPAL::DKSOPAL() { DKSOPAL::DKSOPAL() {
dksfft = nullptr;
dkscol = nullptr; dkscol = nullptr;
dksgreens = nullptr; dksgreens = nullptr;
} }
@ -50,139 +49,6 @@ int DKSOPAL::initDevice() {
} }
/* setup fft plans to reuse if multiple ffts of same size are needed */
int DKSOPAL::setupFFT(int ndim, int N[3]) {
if (apiCuda()) {
return dksfft->setupFFT(ndim, N);
} else if (apiOpenCL()) {
int ierr1 = dksfft->setupFFT(ndim, N);
int ierr2 = dksfft->setupFFTRC(ndim, N);
int ierr3 = dksfft->setupFFTCR(ndim, N);
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
return DKS_ERROR;
return DKS_SUCCESS;
} else if (apiOpenMP()) {
//micbase.mic_setupFFT(ndim, N);
//BENI: setting up RC and CR transformations on MIC
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
if (ierr1 != DKS_SUCCESS)
return ierr1;
if (ierr2 != DKS_SUCCESS)
return ierr2;
return DKS_SUCCESS;
}
return DKS_ERROR;
}
//BENI:
int DKSOPAL::setupFFTRC(int ndim, int N[3], double scale) {
if (apiCuda())
return dksfft->setupFFT(ndim, N);
if (apiOpenCL())
return dksfft->setupFFTRC(ndim, N);
else if (apiOpenMP())
return dksfft->setupFFTRC(ndim, N, scale);
return DKS_ERROR;
}
//BENI:
int DKSOPAL::setupFFTCR(int ndim, int N[3], double scale) {
if (apiCuda())
return dksfft->setupFFT(ndim, N);
if (apiOpenCL())
return dksfft->setupFFTCR(ndim, N);
else if (apiOpenMP())
return dksfft->setupFFTCR(ndim, N, scale);
return DKS_ERROR;
}
/* call OpenCL FFT function for selected platform */
int DKSOPAL::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL() || apiOpenMP())
return dksfft->executeFFT(data_ptr, ndim, dimsize);
else if (apiCuda())
return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call OpenCL IFFT function for selected platform */
int DKSOPAL::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL() || apiOpenMP())
return dksfft->executeIFFT(data_ptr, ndim, dimsize);
else if (apiCuda())
return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call normalize FFT function for selected platform */
int DKSOPAL::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
if (apiOpenCL()) {
if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
else
return DKS_ERROR;
} else if (apiCuda()) {
return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
} else if (apiOpenMP()) {
return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
}
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call real to complex FFT */
int DKSOPAL::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL() || apiOpenMP())
return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* call complex to real FFT */
int DKSOPAL::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
else if (apiOpenCL() || apiOpenMP())
return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
/* normalize complex to real iFFT */
int DKSOPAL::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
else if (apiOpenCL())
return DKS_ERROR;
else if (apiOpenMP())
return DKS_ERROR;
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
int DKSOPAL::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ, int DKSOPAL::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
double hz_m0, double hz_m1, double hz_m2, int streamId) { double hz_m0, double hz_m1, double hz_m2, int streamId) {

View File

@ -56,71 +56,6 @@ public:
///////Function library part of dksbase//////// ///////Function library part of dksbase////////
/////////////////////////////////////////////// ///////////////////////////////////////////////
/**
* Setup FFT function.
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
* each fft will do its own setup according to fft size and dimensions.
* TODO: opencl and mic implementations
*/
int setupFFT(int ndim, int N[3]);
//BENI:
int setupFFTRC(int ndim, int N[3], double scale = 1.0);
//BENI:
int setupFFTCR(int ndim, int N[3], double scale = 1.0);
/**
* Call complex-to-complex fft.
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation
*/
int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call complex-to-complex ifft.
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation.
*/
int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Normalize complex to complex ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size
* TODO: mic implementation.
*/
int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call real to complex FFT.
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations
*/
int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Call complex to real iFFT.
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations.
*/
int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Normalize compelx to real ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size.
* TODO: opencl and mic implementations.
*/
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
/** /**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device. * Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs. * For specifics check OPAL docs.