snapshot of svn
This commit is contained in:
117
test/testFFTAsync.cpp
Normal file
117
test/testFFTAsync.cpp
Normal file
@ -0,0 +1,117 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <complex>
|
||||
|
||||
#include <cufft.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "Utility/TimeStamp.h"
|
||||
#include "DKSBase.h"
|
||||
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
void initData(double *data, int dimsize[3]) {
|
||||
for (int i = 0; i < dimsize[2]; i++) {
|
||||
for (int j = 0; j < dimsize[1]; j++) {
|
||||
for (int k = 0; k < dimsize[0]; k++) {
|
||||
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
int N = 8;
|
||||
if (argc == 2)
|
||||
N = atoi(argv[1]);
|
||||
|
||||
int N1 = N;
|
||||
int N2 = N;
|
||||
int N3 = N;
|
||||
int dim = 3;
|
||||
|
||||
int dimsize[3] = {N3, N2, N1};
|
||||
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
|
||||
int sizecomp = dimsize[0] * dimsize[1] * (dimsize[2]/2+1);
|
||||
|
||||
double *data1 = new double[sizereal];
|
||||
double *data2 = new double[sizereal];
|
||||
|
||||
initData(data1, dimsize);
|
||||
initData(data2, dimsize);
|
||||
|
||||
/* init DKSBase */
|
||||
cout << "Init device and set function" << endl;
|
||||
|
||||
DKSBase base;
|
||||
base.setAPI("Cuda", 4);
|
||||
base.setDevice("-gpu", 4);
|
||||
base.initDevice();
|
||||
base.setupFFT(3, dimsize);
|
||||
|
||||
/* pagelock data */
|
||||
base.allocateHostMemory(data1, sizereal);
|
||||
base.allocateHostMemory(data2, sizereal);
|
||||
|
||||
/* create streams */
|
||||
int fft1, fft2;
|
||||
base.createStream(fft1);
|
||||
base.createStream(fft2);
|
||||
|
||||
int ierr;
|
||||
void *real_ptr1, *real_ptr2, *comp_ptr1, *comp_ptr2;
|
||||
|
||||
cout << "allocating memory ..." << endl;
|
||||
/* allocate memory on device */;
|
||||
real_ptr1 = base.allocateMemory<double>(sizereal, ierr);
|
||||
real_ptr2 = base.allocateMemory<double>(sizereal, ierr);
|
||||
comp_ptr1 = base.allocateMemory< complex<double> >(sizecomp*2, ierr);
|
||||
comp_ptr2 = base.allocateMemory< complex<double> >(sizecomp*2, ierr);
|
||||
|
||||
cufftHandle defaultPlan;
|
||||
cudaStream_t cfft1, cfft2;
|
||||
cufftPlan3d(&defaultPlan, N1, N2, N3, CUFFT_D2Z);
|
||||
cudaStreamCreate(&cfft1);
|
||||
cudaStreamCreate(&cfft2);
|
||||
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
|
||||
cufftHandle plan = defaultPlan;
|
||||
|
||||
cout << "Iteration: " << i << endl;
|
||||
/* write data to device */
|
||||
base.writeDataAsync<double>(real_ptr1, data1, sizereal, fft1);
|
||||
//cudaMemcpyAsync( (double*)real_ptr1,data1,sizeof(double)*sizereal,cudaMemcpyHostToDevice,cfft1);
|
||||
|
||||
/* execute rcfft */
|
||||
base.callR2CFFT(real_ptr1, comp_ptr1, dim, dimsize, fft1);
|
||||
//cufftSetStream(plan, cfft1);
|
||||
//cufftExecD2Z(plan, (cufftDoubleReal*)real_ptr1, (cufftDoubleComplex*)comp_ptr2);
|
||||
|
||||
/* write data to device */
|
||||
base.writeDataAsync<double>(real_ptr2, data2, sizereal, fft2);
|
||||
//cudaMemcpyAsync( (double*)real_ptr2,data2,sizeof(double)*sizereal,cudaMemcpyHostToDevice,cfft2);
|
||||
|
||||
/* execute rcfft */
|
||||
base.callR2CFFT(real_ptr2, comp_ptr2, dim, dimsize, fft2);
|
||||
//cufftSetStream(plan, cfft2);
|
||||
//cufftExecD2Z(plan, (cufftDoubleReal*)real_ptr2, (cufftDoubleComplex*)comp_ptr2);
|
||||
|
||||
}
|
||||
|
||||
base.freeMemory<double>(real_ptr1, sizereal);
|
||||
base.freeMemory<double>(real_ptr2, sizereal);
|
||||
base.freeMemory< complex<double> >(comp_ptr1, sizereal);
|
||||
base.freeMemory< complex<double> >(comp_ptr2, sizereal);
|
||||
|
||||
/* free pagelock data */
|
||||
base.freeHostMemory(data1, sizereal);
|
||||
base.freeHostMemory(data2, sizereal);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
Reference in New Issue
Block a user