Files
DKS/src/MIC/MICFFT.cpp
2016-11-16 18:12:17 +01:00

214 lines
7.8 KiB
C++

#include "MICFFT.h"
#include<stdio.h>
#include<complex>
#include <time.h>
#include <sys/time.h>
MICFFT::MICFFT(MICBase *base) {
m_micbase = base;
m_fftsetup = false;
}
MICFFT::~MICFFT() {
if (m_fftsetup) {
#pragma offload target(mic:0)
{
DftiFreeDescriptor(&FFTHandle_m);
DftiFreeDescriptor(&handle);
}
}
}
//setup fft
int MICFFT::setupFFT(int ndim, int N[3]) {
//set up FFT engine
#pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE)
{
MKL_LONG sizes[3], strides[4];
sizes[0] = N[0]; sizes[1] = N[1]; sizes[2] = N[2];
//strides[0] = 0; strides[1] = sizes[1]; strides[2] = 1; strides[3] = sizes[0]*sizes[1];
strides[0] = 0; strides[1] = sizes[0]*sizes[1]; strides[2] = sizes[0]; strides[3] = 1;
MKL_LONG dims = 3;
DftiCreateDescriptor(&(this->getHandle()), DFTI_DOUBLE, DFTI_COMPLEX, dims, sizes);
DftiSetValue(this->getHandle(), DFTI_INPUT_STRIDES, strides);
DftiSetValue(this->getHandle(), DFTI_COMPLEX_STORAGE, DFTI_COMPLEX_COMPLEX);
DftiCommitDescriptor(this->getHandle());
}
m_fftsetup = true;
return DKS_SUCCESS;
}
//BENI:
//setup fft
int MICFFT::setupFFTRC(int ndim, int N[3], double scale) {
//set up FFT engine for REAL->COMPLEX
#pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE)
{
MKL_LONG sizes[3], real_strides[4], complex_strides[4];
sizes[0] = N[2]; sizes[1] = N[1]; sizes[2] = N[0];
//real_strides[0] = 0; real_strides[1] = 2*sizes[1]*(sizes[0]/2+1); real_strides[2] = 2*(sizes[0]/2+1); real_strides[3] = 1;
real_strides[0] = 0; real_strides[1] = sizes[2]*sizes[1]; real_strides[2] = sizes[2]; real_strides[3] = 1;
//real_strides[0] = 0; real_strides[1] = 1; real_strides[2] = sizes[0]; real_strides[3] = sizes[0]*sizes[1];
//complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[0]/2+1); complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = 1;
complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[2]/2+1); complex_strides[2] = (sizes[2]/2+1); complex_strides[3] = 1;
//complex_strides[0] = 0; complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = sizes[1]*(sizes[0]/2+1); complex_strides[1] = 1;
MKL_LONG dims = 3;
DftiCreateDescriptor(&(this->getHandleRC()), DFTI_DOUBLE, DFTI_REAL, dims, sizes);
DftiSetValue(this->getHandleRC(),DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
DftiSetValue(this->getHandleRC(), DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT);
DftiSetValue(this->getHandleRC(), DFTI_PLACEMENT, DFTI_NOT_INPLACE);
DftiSetValue(this->getHandleRC(), DFTI_INPUT_STRIDES, real_strides);
DftiSetValue(this->getHandleRC(), DFTI_OUTPUT_STRIDES, complex_strides);
DftiSetValue(this->getHandleRC(), DFTI_FORWARD_SCALE, scale);
DftiCommitDescriptor(this->getHandleRC());
}
return DKS_SUCCESS;
}
//BENI:
//setup fft
int MICFFT::setupFFTCR(int ndim, int N[3], double scale) {
//set up FFT engine for COMPLEX->REAL
#pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE)
{
MKL_LONG sizes[3], real_strides[4], complex_strides[4];
sizes[0] = N[2]; sizes[1] = N[1]; sizes[2] = N[0];
//real_strides[0] = 0; real_strides[1] = 2*sizes[1]*(sizes[0]/2+1); real_strides[2] = 2*(sizes[0]/2+1); real_strides[3] = 1;
real_strides[0] = 0; real_strides[1] = sizes[2]*sizes[1]; real_strides[2] = sizes[2]; real_strides[3] = 1;
//real_strides[0] = 0; real_strides[1] = 1; real_strides[2] = sizes[0]; real_strides[3] = sizes[0]*sizes[1];
//complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[0]/2+1); complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = 1;
complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[2]/2+1); complex_strides[2] = (sizes[2]/2+1); complex_strides[3] = 1;
//complex_strides[0] = 0; complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = sizes[1]*(sizes[0]/2+1); complex_strides[1] = 1;
MKL_LONG dims = 3;
DftiCreateDescriptor(&(this->getHandleCR()), DFTI_DOUBLE, DFTI_REAL, dims, sizes);
DftiSetValue(this->getHandleCR(),DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
DftiSetValue(this->getHandleCR(), DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT);
DftiSetValue(this->getHandleCR(), DFTI_PLACEMENT, DFTI_NOT_INPLACE);
DftiSetValue(this->getHandleCR(), DFTI_INPUT_STRIDES, complex_strides);
DftiSetValue(this->getHandleCR(), DFTI_OUTPUT_STRIDES, real_strides);
DftiSetValue(this->getHandleCR(), DFTI_BACKWARD_SCALE, scale);
DftiCommitDescriptor(this->getHandleCR());
}
return DKS_SUCCESS;
}
//execute COMPLEX->COMPLEX FFT
int MICFFT::executeFFT(void *mem_ptr, int ndim, int N[3], int streamId, bool forward) {
_Complex double *ptr = (_Complex double*) mem_ptr;
#pragma offload target(mic:0) in(ptr:length(0) DKS_RETAIN DKS_REUSE) in(forward)
{
if (forward)
DftiComputeForward(this->getHandle(), ptr);
else
DftiComputeBackward(this->getHandle(), ptr);
}
return DKS_SUCCESS;
}
//execute iFFT
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3], int streamId) {
return executeFFT(mem_ptr, ndim, N, -1, false);
}
//execute REAL->COMPLEX FFT
int MICFFT::executeRCFFT(void *in_ptr, void *out_ptr, int ndim, int N[3], int streamId) {
double *real_ptr = (double*) in_ptr;
//std::complex<double> *compl_ptr = (std::complex<double> *) out_ptr;
_Complex double *compl_ptr = (_Complex double *) out_ptr;
int sizereal = N[0]*N[1]*N[2];
int sizecompl = (N[0]/2+1)*N[1]*N[2];
//std::cout << "start real-compl fft on mic " << std::endl;
//std::cout << "real_ptr = " << real_ptr << std::endl;
//std::cout << "compl_ptr = " << compl_ptr << std::endl;
//std::cout << "EXECUTE AVERAGING OVER 10 LOOPS OF FFT" << std::endl;
#pragma offload target(mic:0) in(real_ptr:length(0) DKS_RETAIN DKS_REUSE) in(compl_ptr:length(0) DKS_RETAIN DKS_REUSE)
//#pragma offload target(mic:0) nocopy(real_ptr:length(sizereal) RETAIN REUSE) nocopy(compl_ptr:length(sizecompl) RETAIN REUSE)
{
//for (int i=0;i<10;++i){ //loop 10 times for benchmarking
DftiComputeForward(this->getHandleRC(), real_ptr, compl_ptr);
//}
}
//std::cout << "end real-compl fft on mic " << std::endl;
return DKS_SUCCESS;
}
//execute COMPLEX->REAL FFT
int MICFFT::executeCRFFT(void *in_ptr, void *out_ptr, int ndim, int N[3], int streamId) {
//_Complex double *ptr = (_Complex double*) mem_ptr;
double *real_ptr = (double*) out_ptr;
_Complex double *compl_ptr = (_Complex double *) in_ptr;
//std::cout << "real_ptr = " << real_ptr << std::endl;
//std::cout << "compl_ptr = " << compl_ptr << std::endl;
int sizereal = N[0]*N[1]*N[2];
int sizecompl = (N[0]/2+1)*N[1]*N[2];
//std::cout << "offload to perform backward fft ... " << std::endl;
//struct timeval start, end;
//gettimeofday(&start,NULL);
#pragma offload target(mic:0) in(real_ptr:length(0) DKS_RETAIN DKS_REUSE) in(compl_ptr:length(0) DKS_RETAIN DKS_REUSE)
//#pragma offload target(mic:0) nocopy(real_ptr:length(sizereal) RETAIN REUSE) nocopy(compl_ptr:length(sizecompl) RETAIN REUSE)
{
//for (int i=0;i<10;++i){ //loop 10 times for benchmarking
DftiComputeBackward(this->getHandleCR(), compl_ptr, real_ptr);
//}
}
// End timing offloaded FFT.
//gettimeofday(&end,NULL);
// Print execution time of offloaded computational loop.
//printf ("Total time for IFFT spent = %f seconds\n",
//(double) (end.tv_usec-start.tv_usec) /1000000+(double) (end.tv_sec-start.tv_sec));
//std::cout << "IFFT DONE!" << std::endl;
return DKS_SUCCESS;
}
//normalize IFFT
int MICFFT::normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId) {
int size = N[0] * N[1] * N[2];
_Complex double *ptr = (_Complex double*) mem_ptr;
#pragma offload target(mic:0) in(ptr:length(0) DKS_RETAIN DKS_REUSE) in(size)
{
#pragma omp parallel for
for (int i = 0; i < size; i++) {
__real__ ptr[i] = __real__ ptr[i] / size;
__imag__ ptr[i] = __imag__ ptr[i] / size;
}
}
return DKS_SUCCESS;
}