118 lines
3.1 KiB
C++
118 lines
3.1 KiB
C++
#include <iostream>
|
|
#include <cstdlib>
|
|
#include <complex>
|
|
|
|
#include <cufft.h>
|
|
#include <cuda_runtime.h>
|
|
|
|
#include "Utility/TimeStamp.h"
|
|
#include "DKSBase.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
void initData(double *data, int dimsize[3]) {
|
|
for (int i = 0; i < dimsize[2]; i++) {
|
|
for (int j = 0; j < dimsize[1]; j++) {
|
|
for (int k = 0; k < dimsize[0]; k++) {
|
|
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
int N = 8;
|
|
if (argc == 2)
|
|
N = atoi(argv[1]);
|
|
|
|
int N1 = N;
|
|
int N2 = N;
|
|
int N3 = N;
|
|
int dim = 3;
|
|
|
|
int dimsize[3] = {N3, N2, N1};
|
|
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
|
|
int sizecomp = dimsize[0] * dimsize[1] * (dimsize[2]/2+1);
|
|
|
|
double *data1 = new double[sizereal];
|
|
double *data2 = new double[sizereal];
|
|
|
|
initData(data1, dimsize);
|
|
initData(data2, dimsize);
|
|
|
|
/* init DKSBase */
|
|
cout << "Init device and set function" << endl;
|
|
|
|
DKSBase base;
|
|
base.setAPI("Cuda", 4);
|
|
base.setDevice("-gpu", 4);
|
|
base.initDevice();
|
|
base.setupFFT(3, dimsize);
|
|
|
|
/* pagelock data */
|
|
base.allocateHostMemory(data1, sizereal);
|
|
base.allocateHostMemory(data2, sizereal);
|
|
|
|
/* create streams */
|
|
int fft1, fft2;
|
|
base.createStream(fft1);
|
|
base.createStream(fft2);
|
|
|
|
int ierr;
|
|
void *real_ptr1, *real_ptr2, *comp_ptr1, *comp_ptr2;
|
|
|
|
cout << "allocating memory ..." << endl;
|
|
/* allocate memory on device */;
|
|
real_ptr1 = base.allocateMemory<double>(sizereal, ierr);
|
|
real_ptr2 = base.allocateMemory<double>(sizereal, ierr);
|
|
comp_ptr1 = base.allocateMemory< complex<double> >(sizecomp*2, ierr);
|
|
comp_ptr2 = base.allocateMemory< complex<double> >(sizecomp*2, ierr);
|
|
|
|
cufftHandle defaultPlan;
|
|
cudaStream_t cfft1, cfft2;
|
|
cufftPlan3d(&defaultPlan, N1, N2, N3, CUFFT_D2Z);
|
|
cudaStreamCreate(&cfft1);
|
|
cudaStreamCreate(&cfft2);
|
|
|
|
|
|
for (int i = 0; i < 5; i++) {
|
|
|
|
cufftHandle plan = defaultPlan;
|
|
|
|
cout << "Iteration: " << i << endl;
|
|
/* write data to device */
|
|
base.writeDataAsync<double>(real_ptr1, data1, sizereal, fft1);
|
|
//cudaMemcpyAsync( (double*)real_ptr1,data1,sizeof(double)*sizereal,cudaMemcpyHostToDevice,cfft1);
|
|
|
|
/* execute rcfft */
|
|
base.callR2CFFT(real_ptr1, comp_ptr1, dim, dimsize, fft1);
|
|
//cufftSetStream(plan, cfft1);
|
|
//cufftExecD2Z(plan, (cufftDoubleReal*)real_ptr1, (cufftDoubleComplex*)comp_ptr2);
|
|
|
|
/* write data to device */
|
|
base.writeDataAsync<double>(real_ptr2, data2, sizereal, fft2);
|
|
//cudaMemcpyAsync( (double*)real_ptr2,data2,sizeof(double)*sizereal,cudaMemcpyHostToDevice,cfft2);
|
|
|
|
/* execute rcfft */
|
|
base.callR2CFFT(real_ptr2, comp_ptr2, dim, dimsize, fft2);
|
|
//cufftSetStream(plan, cfft2);
|
|
//cufftExecD2Z(plan, (cufftDoubleReal*)real_ptr2, (cufftDoubleComplex*)comp_ptr2);
|
|
|
|
}
|
|
|
|
base.freeMemory<double>(real_ptr1, sizereal);
|
|
base.freeMemory<double>(real_ptr2, sizereal);
|
|
base.freeMemory< complex<double> >(comp_ptr1, sizereal);
|
|
base.freeMemory< complex<double> >(comp_ptr2, sizereal);
|
|
|
|
/* free pagelock data */
|
|
base.freeHostMemory(data1, sizereal);
|
|
base.freeHostMemory(data2, sizereal);
|
|
|
|
return 0;
|
|
|
|
}
|