214 lines
7.8 KiB
C++
214 lines
7.8 KiB
C++
#include "MICFFT.h"
|
|
#include<stdio.h>
|
|
#include<complex>
|
|
#include <time.h>
|
|
#include <sys/time.h>
|
|
|
|
MICFFT::MICFFT(MICBase *base) {
|
|
m_micbase = base;
|
|
m_fftsetup = false;
|
|
}
|
|
|
|
MICFFT::~MICFFT() {
|
|
if (m_fftsetup) {
|
|
#pragma offload target(mic:0)
|
|
{
|
|
DftiFreeDescriptor(&FFTHandle_m);
|
|
DftiFreeDescriptor(&handle);
|
|
}
|
|
}
|
|
}
|
|
|
|
//setup fft
|
|
int MICFFT::setupFFT(int ndim, int N[3]) {
|
|
//set up FFT engine
|
|
#pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE)
|
|
{
|
|
|
|
MKL_LONG sizes[3], strides[4];
|
|
sizes[0] = N[0]; sizes[1] = N[1]; sizes[2] = N[2];
|
|
//strides[0] = 0; strides[1] = sizes[1]; strides[2] = 1; strides[3] = sizes[0]*sizes[1];
|
|
strides[0] = 0; strides[1] = sizes[0]*sizes[1]; strides[2] = sizes[0]; strides[3] = 1;
|
|
|
|
MKL_LONG dims = 3;
|
|
DftiCreateDescriptor(&(this->getHandle()), DFTI_DOUBLE, DFTI_COMPLEX, dims, sizes);
|
|
DftiSetValue(this->getHandle(), DFTI_INPUT_STRIDES, strides);
|
|
DftiSetValue(this->getHandle(), DFTI_COMPLEX_STORAGE, DFTI_COMPLEX_COMPLEX);
|
|
DftiCommitDescriptor(this->getHandle());
|
|
|
|
}
|
|
|
|
m_fftsetup = true;
|
|
return DKS_SUCCESS;
|
|
}
|
|
//BENI:
|
|
//setup fft
|
|
int MICFFT::setupFFTRC(int ndim, int N[3], double scale) {
|
|
|
|
//set up FFT engine for REAL->COMPLEX
|
|
|
|
#pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE)
|
|
{
|
|
|
|
MKL_LONG sizes[3], real_strides[4], complex_strides[4];
|
|
sizes[0] = N[2]; sizes[1] = N[1]; sizes[2] = N[0];
|
|
//real_strides[0] = 0; real_strides[1] = 2*sizes[1]*(sizes[0]/2+1); real_strides[2] = 2*(sizes[0]/2+1); real_strides[3] = 1;
|
|
real_strides[0] = 0; real_strides[1] = sizes[2]*sizes[1]; real_strides[2] = sizes[2]; real_strides[3] = 1;
|
|
//real_strides[0] = 0; real_strides[1] = 1; real_strides[2] = sizes[0]; real_strides[3] = sizes[0]*sizes[1];
|
|
//complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[0]/2+1); complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = 1;
|
|
complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[2]/2+1); complex_strides[2] = (sizes[2]/2+1); complex_strides[3] = 1;
|
|
//complex_strides[0] = 0; complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = sizes[1]*(sizes[0]/2+1); complex_strides[1] = 1;
|
|
|
|
MKL_LONG dims = 3;
|
|
DftiCreateDescriptor(&(this->getHandleRC()), DFTI_DOUBLE, DFTI_REAL, dims, sizes);
|
|
DftiSetValue(this->getHandleRC(),DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
|
|
DftiSetValue(this->getHandleRC(), DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT);
|
|
DftiSetValue(this->getHandleRC(), DFTI_PLACEMENT, DFTI_NOT_INPLACE);
|
|
DftiSetValue(this->getHandleRC(), DFTI_INPUT_STRIDES, real_strides);
|
|
DftiSetValue(this->getHandleRC(), DFTI_OUTPUT_STRIDES, complex_strides);
|
|
DftiSetValue(this->getHandleRC(), DFTI_FORWARD_SCALE, scale);
|
|
DftiCommitDescriptor(this->getHandleRC());
|
|
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
//BENI:
|
|
//setup fft
|
|
int MICFFT::setupFFTCR(int ndim, int N[3], double scale) {
|
|
|
|
//set up FFT engine for COMPLEX->REAL
|
|
|
|
#pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE)
|
|
{
|
|
MKL_LONG sizes[3], real_strides[4], complex_strides[4];
|
|
sizes[0] = N[2]; sizes[1] = N[1]; sizes[2] = N[0];
|
|
//real_strides[0] = 0; real_strides[1] = 2*sizes[1]*(sizes[0]/2+1); real_strides[2] = 2*(sizes[0]/2+1); real_strides[3] = 1;
|
|
real_strides[0] = 0; real_strides[1] = sizes[2]*sizes[1]; real_strides[2] = sizes[2]; real_strides[3] = 1;
|
|
//real_strides[0] = 0; real_strides[1] = 1; real_strides[2] = sizes[0]; real_strides[3] = sizes[0]*sizes[1];
|
|
//complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[0]/2+1); complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = 1;
|
|
complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[2]/2+1); complex_strides[2] = (sizes[2]/2+1); complex_strides[3] = 1;
|
|
//complex_strides[0] = 0; complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = sizes[1]*(sizes[0]/2+1); complex_strides[1] = 1;
|
|
|
|
MKL_LONG dims = 3;
|
|
DftiCreateDescriptor(&(this->getHandleCR()), DFTI_DOUBLE, DFTI_REAL, dims, sizes);
|
|
DftiSetValue(this->getHandleCR(),DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
|
|
DftiSetValue(this->getHandleCR(), DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT);
|
|
DftiSetValue(this->getHandleCR(), DFTI_PLACEMENT, DFTI_NOT_INPLACE);
|
|
DftiSetValue(this->getHandleCR(), DFTI_INPUT_STRIDES, complex_strides);
|
|
DftiSetValue(this->getHandleCR(), DFTI_OUTPUT_STRIDES, real_strides);
|
|
DftiSetValue(this->getHandleCR(), DFTI_BACKWARD_SCALE, scale);
|
|
DftiCommitDescriptor(this->getHandleCR());
|
|
|
|
|
|
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
//execute COMPLEX->COMPLEX FFT
|
|
int MICFFT::executeFFT(void *mem_ptr, int ndim, int N[3], int streamId, bool forward) {
|
|
|
|
_Complex double *ptr = (_Complex double*) mem_ptr;
|
|
|
|
#pragma offload target(mic:0) in(ptr:length(0) DKS_RETAIN DKS_REUSE) in(forward)
|
|
{
|
|
if (forward)
|
|
DftiComputeForward(this->getHandle(), ptr);
|
|
else
|
|
DftiComputeBackward(this->getHandle(), ptr);
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
//execute iFFT
|
|
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3], int streamId) {
|
|
return executeFFT(mem_ptr, ndim, N, -1, false);
|
|
}
|
|
|
|
//execute REAL->COMPLEX FFT
|
|
int MICFFT::executeRCFFT(void *in_ptr, void *out_ptr, int ndim, int N[3], int streamId) {
|
|
|
|
double *real_ptr = (double*) in_ptr;
|
|
//std::complex<double> *compl_ptr = (std::complex<double> *) out_ptr;
|
|
_Complex double *compl_ptr = (_Complex double *) out_ptr;
|
|
int sizereal = N[0]*N[1]*N[2];
|
|
int sizecompl = (N[0]/2+1)*N[1]*N[2];
|
|
|
|
//std::cout << "start real-compl fft on mic " << std::endl;
|
|
|
|
//std::cout << "real_ptr = " << real_ptr << std::endl;
|
|
//std::cout << "compl_ptr = " << compl_ptr << std::endl;
|
|
//std::cout << "EXECUTE AVERAGING OVER 10 LOOPS OF FFT" << std::endl;
|
|
|
|
#pragma offload target(mic:0) in(real_ptr:length(0) DKS_RETAIN DKS_REUSE) in(compl_ptr:length(0) DKS_RETAIN DKS_REUSE)
|
|
//#pragma offload target(mic:0) nocopy(real_ptr:length(sizereal) RETAIN REUSE) nocopy(compl_ptr:length(sizecompl) RETAIN REUSE)
|
|
{
|
|
//for (int i=0;i<10;++i){ //loop 10 times for benchmarking
|
|
DftiComputeForward(this->getHandleRC(), real_ptr, compl_ptr);
|
|
//}
|
|
}
|
|
|
|
//std::cout << "end real-compl fft on mic " << std::endl;
|
|
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
//execute COMPLEX->REAL FFT
|
|
int MICFFT::executeCRFFT(void *in_ptr, void *out_ptr, int ndim, int N[3], int streamId) {
|
|
|
|
//_Complex double *ptr = (_Complex double*) mem_ptr;
|
|
|
|
double *real_ptr = (double*) out_ptr;
|
|
_Complex double *compl_ptr = (_Complex double *) in_ptr;
|
|
|
|
//std::cout << "real_ptr = " << real_ptr << std::endl;
|
|
//std::cout << "compl_ptr = " << compl_ptr << std::endl;
|
|
int sizereal = N[0]*N[1]*N[2];
|
|
int sizecompl = (N[0]/2+1)*N[1]*N[2];
|
|
|
|
//std::cout << "offload to perform backward fft ... " << std::endl;
|
|
//struct timeval start, end;
|
|
//gettimeofday(&start,NULL);
|
|
#pragma offload target(mic:0) in(real_ptr:length(0) DKS_RETAIN DKS_REUSE) in(compl_ptr:length(0) DKS_RETAIN DKS_REUSE)
|
|
//#pragma offload target(mic:0) nocopy(real_ptr:length(sizereal) RETAIN REUSE) nocopy(compl_ptr:length(sizecompl) RETAIN REUSE)
|
|
{
|
|
//for (int i=0;i<10;++i){ //loop 10 times for benchmarking
|
|
DftiComputeBackward(this->getHandleCR(), compl_ptr, real_ptr);
|
|
//}
|
|
}
|
|
|
|
// End timing offloaded FFT.
|
|
//gettimeofday(&end,NULL);
|
|
// Print execution time of offloaded computational loop.
|
|
//printf ("Total time for IFFT spent = %f seconds\n",
|
|
//(double) (end.tv_usec-start.tv_usec) /1000000+(double) (end.tv_sec-start.tv_sec));
|
|
//std::cout << "IFFT DONE!" << std::endl;
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
|
|
//normalize IFFT
|
|
int MICFFT::normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId) {
|
|
|
|
int size = N[0] * N[1] * N[2];
|
|
|
|
_Complex double *ptr = (_Complex double*) mem_ptr;
|
|
#pragma offload target(mic:0) in(ptr:length(0) DKS_RETAIN DKS_REUSE) in(size)
|
|
{
|
|
#pragma omp parallel for
|
|
for (int i = 0; i < size; i++) {
|
|
__real__ ptr[i] = __real__ ptr[i] / size;
|
|
__imag__ ptr[i] = __imag__ ptr[i] / size;
|
|
}
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|