#include #include #include #include "Utility/TimeStamp.h" #include "DKSBase.h" using namespace std; void initData(complex *d, int N, int dim) { int size = N; if (dim == 2) size = N*N; if (dim == 3) size = N*N*N; for (int i = 0; i < size; i++) d[i] = complex(i, 0); } void printData(complex *d, int N, int dim) { int NZ = N; int NY = (dim > 1) ? N : 1; int NX = (dim > 2) ? N : 1; for (int i = 0; i < NX; i++) { for (int j = 0; j < NY; j++) { for (int k = 0; k < NZ; k++) { std::cout << d[i*N*N + j*N + k].real() << "\t"; } std::cout << std::endl; } std::cout << std::endl; } std::cout << std::endl; } int main(int argc, char *argv[]) { int N = (argc > 1) ? atoi(argv[1]) : 4; int dimN[3] = {N, N, 1}; int dim = 2; int ndim = 1; int size = dimN[0] * dimN[1] * dimN[2]; std::complex *hd_in = new std::complex[size]; std::complex *hd_out = new std::complex[size]; initData(hd_in, N, dim); printData(hd_in, N, dim); DKSBase base; base.setAPI("OpenCL", 6); base.setDevice("-gpu", 4); base.initDevice(); int ierr; void *mem_ptr; mem_ptr = base.allocateMemory< std::complex >(size, ierr); base.writeData< std::complex >(mem_ptr, hd_in, size); base.callTranspose(mem_ptr, dimN, dim, ndim); base.readData< std::complex >(mem_ptr, hd_out, size); base.freeMemory< std::complex >(mem_ptr, size); printData(hd_out, N, 2); delete[] hd_in; delete[] hd_out; return 0; }