#include "MICFFT.h" #include #include #include #include MICFFT::MICFFT(MICBase *base) { m_micbase = base; } MICFFT::~MICFFT() { #pragma offload target(mic:0) { DftiFreeDescriptor(&FFTHandle_m); DftiFreeDescriptor(&handle); } } //setup fft int MICFFT::setupFFT(int ndim, int N[3]) { //set up FFT engine #pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE) { MKL_LONG sizes[3], strides[4]; sizes[0] = N[0]; sizes[1] = N[1]; sizes[2] = N[2]; //strides[0] = 0; strides[1] = sizes[1]; strides[2] = 1; strides[3] = sizes[0]*sizes[1]; strides[0] = 0; strides[1] = sizes[0]*sizes[1]; strides[2] = sizes[0]; strides[3] = 1; MKL_LONG dims = 3; DftiCreateDescriptor(&(this->getHandle()), DFTI_DOUBLE, DFTI_COMPLEX, dims, sizes); DftiSetValue(this->getHandle(), DFTI_INPUT_STRIDES, strides); DftiSetValue(this->getHandle(), DFTI_COMPLEX_STORAGE, DFTI_COMPLEX_COMPLEX); DftiCommitDescriptor(this->getHandle()); } return DKS_SUCCESS; } //BENI: //setup fft int MICFFT::setupFFTRC(int ndim, int N[3], double scale) { //set up FFT engine for REAL->COMPLEX #pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE) { MKL_LONG sizes[3], real_strides[4], complex_strides[4]; sizes[0] = N[2]; sizes[1] = N[1]; sizes[2] = N[0]; //real_strides[0] = 0; real_strides[1] = 2*sizes[1]*(sizes[0]/2+1); real_strides[2] = 2*(sizes[0]/2+1); real_strides[3] = 1; real_strides[0] = 0; real_strides[1] = sizes[2]*sizes[1]; real_strides[2] = sizes[2]; real_strides[3] = 1; //real_strides[0] = 0; real_strides[1] = 1; real_strides[2] = sizes[0]; real_strides[3] = sizes[0]*sizes[1]; //complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[0]/2+1); complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = 1; complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[2]/2+1); complex_strides[2] = (sizes[2]/2+1); complex_strides[3] = 1; //complex_strides[0] = 0; complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = sizes[1]*(sizes[0]/2+1); complex_strides[1] = 1; MKL_LONG dims = 3; DftiCreateDescriptor(&(this->getHandleRC()), DFTI_DOUBLE, DFTI_REAL, dims, sizes); DftiSetValue(this->getHandleRC(),DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); DftiSetValue(this->getHandleRC(), DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT); DftiSetValue(this->getHandleRC(), DFTI_PLACEMENT, DFTI_NOT_INPLACE); DftiSetValue(this->getHandleRC(), DFTI_INPUT_STRIDES, real_strides); DftiSetValue(this->getHandleRC(), DFTI_OUTPUT_STRIDES, complex_strides); DftiSetValue(this->getHandleRC(), DFTI_FORWARD_SCALE, scale); DftiCommitDescriptor(this->getHandleRC()); } return DKS_SUCCESS; } //BENI: //setup fft int MICFFT::setupFFTCR(int ndim, int N[3], double scale) { //set up FFT engine for COMPLEX->REAL #pragma offload target(mic:0) in(N:length(3) DKS_ALLOC DKS_FREE) { MKL_LONG sizes[3], real_strides[4], complex_strides[4]; sizes[0] = N[2]; sizes[1] = N[1]; sizes[2] = N[0]; //real_strides[0] = 0; real_strides[1] = 2*sizes[1]*(sizes[0]/2+1); real_strides[2] = 2*(sizes[0]/2+1); real_strides[3] = 1; real_strides[0] = 0; real_strides[1] = sizes[2]*sizes[1]; real_strides[2] = sizes[2]; real_strides[3] = 1; //real_strides[0] = 0; real_strides[1] = 1; real_strides[2] = sizes[0]; real_strides[3] = sizes[0]*sizes[1]; //complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[0]/2+1); complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = 1; complex_strides[0] = 0; complex_strides[1] = sizes[1]*(sizes[2]/2+1); complex_strides[2] = (sizes[2]/2+1); complex_strides[3] = 1; //complex_strides[0] = 0; complex_strides[2] = (sizes[0]/2+1); complex_strides[3] = sizes[1]*(sizes[0]/2+1); complex_strides[1] = 1; MKL_LONG dims = 3; DftiCreateDescriptor(&(this->getHandleCR()), DFTI_DOUBLE, DFTI_REAL, dims, sizes); DftiSetValue(this->getHandleCR(),DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); DftiSetValue(this->getHandleCR(), DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT); DftiSetValue(this->getHandleCR(), DFTI_PLACEMENT, DFTI_NOT_INPLACE); DftiSetValue(this->getHandleCR(), DFTI_INPUT_STRIDES, complex_strides); DftiSetValue(this->getHandleCR(), DFTI_OUTPUT_STRIDES, real_strides); DftiSetValue(this->getHandleCR(), DFTI_BACKWARD_SCALE, scale); DftiCommitDescriptor(this->getHandleCR()); } return DKS_SUCCESS; } //execute COMPLEX->COMPLEX FFT int MICFFT::executeFFT(void *mem_ptr, int ndim, int N[3], int streamId, bool forward) { _Complex double *ptr = (_Complex double*) mem_ptr; #pragma offload target(mic:0) in(ptr:length(0) DKS_RETAIN DKS_REUSE) in(forward) { if (forward) DftiComputeForward(this->getHandle(), ptr); else DftiComputeBackward(this->getHandle(), ptr); } return DKS_SUCCESS; } //execute iFFT int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3]) { return mic_executeFFT(mem_ptr, ndim, N, -1, false); } //execute REAL->COMPLEX FFT int MICFFT::executeRCFFT(void *in_ptr, void *out_ptr, int ndim, int N[3], int streamId) { double *real_ptr = (double*) in_ptr; //std::complex *compl_ptr = (std::complex *) out_ptr; _Complex double *compl_ptr = (_Complex double *) out_ptr; int sizereal = N[0]*N[1]*N[2]; int sizecompl = (N[0]/2+1)*N[1]*N[2]; //std::cout << "start real-compl fft on mic " << std::endl; //std::cout << "real_ptr = " << real_ptr << std::endl; //std::cout << "compl_ptr = " << compl_ptr << std::endl; //std::cout << "EXECUTE AVERAGING OVER 10 LOOPS OF FFT" << std::endl; #pragma offload target(mic:0) in(real_ptr:length(0) DKS_RETAIN DKS_REUSE) in(compl_ptr:length(0) DKS_RETAIN DKS_REUSE) //#pragma offload target(mic:0) nocopy(real_ptr:length(sizereal) RETAIN REUSE) nocopy(compl_ptr:length(sizecompl) RETAIN REUSE) { //for (int i=0;i<10;++i){ //loop 10 times for benchmarking DftiComputeForward(this->getHandleRC(), real_ptr, compl_ptr); //} } //std::cout << "end real-compl fft on mic " << std::endl; return DKS_SUCCESS; } //execute COMPLEX->REAL FFT int MICFFT::executeCRFFT(void *in_ptr, void *out_ptr, int ndim, int N[3], int streamId) { //_Complex double *ptr = (_Complex double*) mem_ptr; double *real_ptr = (double*) out_ptr; _Complex double *compl_ptr = (_Complex double *) in_ptr; //std::cout << "real_ptr = " << real_ptr << std::endl; //std::cout << "compl_ptr = " << compl_ptr << std::endl; int sizereal = N[0]*N[1]*N[2]; int sizecompl = (N[0]/2+1)*N[1]*N[2]; //std::cout << "offload to perform backward fft ... " << std::endl; //struct timeval start, end; //gettimeofday(&start,NULL); #pragma offload target(mic:0) in(real_ptr:length(0) DKS_RETAIN DKS_REUSE) in(compl_ptr:length(0) DKS_RETAIN DKS_REUSE) //#pragma offload target(mic:0) nocopy(real_ptr:length(sizereal) RETAIN REUSE) nocopy(compl_ptr:length(sizecompl) RETAIN REUSE) { //for (int i=0;i<10;++i){ //loop 10 times for benchmarking DftiComputeBackward(this->getHandleCR(), compl_ptr, real_ptr); //} } // End timing offloaded FFT. //gettimeofday(&end,NULL); // Print execution time of offloaded computational loop. //printf ("Total time for IFFT spent = %f seconds\n", //(double) (end.tv_usec-start.tv_usec) /1000000+(double) (end.tv_sec-start.tv_sec)); //std::cout << "IFFT DONE!" << std::endl; return DKS_SUCCESS; } //normalize IFFT int MICFFT::normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId) { int size = N[0] * N[1] * N[2]; _Complex double *ptr = (_Complex double*) mem_ptr; #pragma offload target(mic:0) in(ptr:length(0) DKS_RETAIN DKS_REUSE) in(size) { #pragma omp parallel for for (int i = 0; i < size; i++) { __real__ ptr[i] = __real__ ptr[i] / size; __imag__ ptr[i] = __imag__ ptr[i] / size; } } return DKS_SUCCESS; }