diff --git a/src/Algorithms/GreensFunction.h b/src/Algorithms/GreensFunction.h new file mode 100644 index 0000000..45674ec --- /dev/null +++ b/src/Algorithms/GreensFunction.h @@ -0,0 +1,29 @@ +#ifndef H_GREENSFUNCTION +#define H_GREENSFUNCTION + +#include +#include + +class GreensFunction { + +public: + + virtual ~GreensFunction() { } + + /** calc greens integral, as defined in OPAL */ + virtual int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ, + double hr_m0, double hr_m1, double hr_m2, int streamId = -1) = 0; + + /** integration if rho2_m, see OPAL for more details */ + virtual int integrationGreensFunction(void * rho2_m, void *tmpgreen, int I, int J, int K, + int streamId = -1) = 0; + + /** mirror rho2_m field */ + virtual int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1) = 0; + + /** multiply two complex fields from device memory */ + virtual int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1) = 0; + +}; + +#endif diff --git a/src/CUDA/CudaGreensFunction.cu b/src/CUDA/CudaGreensFunction.cu index 140954b..36e7463 100644 --- a/src/CUDA/CudaGreensFunction.cu +++ b/src/CUDA/CudaGreensFunction.cu @@ -275,26 +275,19 @@ __global__ void mirroredRhoField(double *rho2_m, double data = rho2_m[id1]; - if (i != 0) - rho2_m[id2] = data; + if (i != 0) rho2_m[id2] = data; - if (j != 0) - rho2_m[id3] = data; + if (j != 0) rho2_m[id3] = data; - if (i != 0 && j != 0) - rho2_m[id4] = data; + if (i != 0 && j != 0) rho2_m[id4] = data; - if (k != 0) - rho2_m[id5] = data; + if (k != 0) rho2_m[id5] = data; - if (k != 0 && i != 0) - rho2_m[id6] = data; + if (k != 0 && i != 0) rho2_m[id6] = data; - if (k!= 0 && j != 0) - rho2_m[id7] = data; + if (k!= 0 && j != 0) rho2_m[id7] = data; - if (k != 0 && j != 0 & i != 0) - rho2_m[id8] = data; + if (k != 0 && j != 0 & i != 0) rho2_m[id8] = data; } @@ -363,9 +356,9 @@ CudaGreensFunction::~CudaGreensFunction() { delete m_base; } -int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, int NI, int NJ, - double hr_m0, double hr_m1, double hr_m2, - int streamId) +int CudaGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ, + double hr_m0, double hr_m1, double hr_m2, + int streamId) { int thread = 128; @@ -373,7 +366,7 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i //if no stream specified use default stream if (streamId == -1) { - kernelTmpgreen_2<<< block, thread >>>((double*)tmpptr, hr_m0, hr_m1, hr_m2, I, J, K); + kernelTmpgreen_2<<< block, thread >>>((double*)tmpgreen, hr_m0, hr_m1, hr_m2, I, J, K); return DKS_SUCCESS; } @@ -381,7 +374,7 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i if (streamId < m_base->cuda_numberOfStreams()) { cudaStream_t cs = m_base->cuda_getStream(streamId); - kernelTmpgreen_2<<< block, thread, 0, cs>>>((double*)tmpptr, hr_m0, hr_m1, hr_m2, I, J, K); + kernelTmpgreen_2<<< block, thread, 0, cs>>>((double*)tmpgreen, hr_m0, hr_m1, hr_m2, I, J, K); return DKS_SUCCESS; } @@ -389,9 +382,9 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i } -int CudaGreensFunction::cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, - int I, int J, int K, - int streamId) +int CudaGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen, + int I, int J, int K, + int streamId) { int thread = 128; @@ -415,22 +408,22 @@ int CudaGreensFunction::cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgr return DKS_ERROR; } -int CudaGreensFunction::cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) { +int CudaGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId) { int thread = 128; int block = ( (I + 1) * (J + 1) * (K + 1) / thread) + 1; if (streamId == -1) { - mirroredRhoField0<<< 1, 1>>>( (double *)mem_ptr, 2*I, 2*J); - mirroredRhoField<<< block, thread >>>( (double *) mem_ptr, 2*I, 2*J, 2*K, I + 1, J + 1, K + 1); + mirroredRhoField0<<< 1, 1>>>( (double *)rho2_m, 2*I, 2*J); + mirroredRhoField<<< block, thread >>>( (double *) rho2_m, 2*I, 2*J, 2*K, I + 1, J + 1, K + 1); return DKS_SUCCESS; } if (streamId < m_base->cuda_numberOfStreams()) { cudaStream_t cs = m_base->cuda_getStream(streamId); - mirroredRhoField0<<< 1, 1, 0, cs>>>( (double *)mem_ptr, 2*I, 2*J); - mirroredRhoField<<< block, thread, 0, cs>>>( (double *) mem_ptr, 2*I, 2*J, 2*K, I+1, J+1, K+1); + mirroredRhoField0<<< 1, 1, 0, cs>>>( (double *)rho2_m, 2*I, 2*J); + mirroredRhoField<<< block, thread, 0, cs>>>( (double *) rho2_m, 2*I, 2*J, 2*K, I+1, J+1, K+1); return DKS_SUCCESS; } @@ -440,13 +433,13 @@ int CudaGreensFunction::cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, return DKS_ERROR; } -int CudaGreensFunction::cuda_MultiplyCompelxFields(void *ptr1, void *ptr2, - int size, int streamId) { +int CudaGreensFunction::multiplyCompelxFields(void *ptr1, void *ptr2, + int size, int streamId) { int threads = 128; int blocks = size / threads + 1; int datasize = 2 * threads * sizeof(cuDoubleComplex); - + if (streamId == -1) { multiplyComplexFields_2<<>> ( (cuDoubleComplex*)ptr1, (cuDoubleComplex*)ptr2, diff --git a/src/CUDA/CudaGreensFunction.cuh b/src/CUDA/CudaGreensFunction.cuh index ea8d8ce..69f29de 100644 --- a/src/CUDA/CudaGreensFunction.cuh +++ b/src/CUDA/CudaGreensFunction.cuh @@ -2,17 +2,17 @@ #define H_CUDA_GREENSFUNCTION #include -#include +#include #include #include #include #include "cublas_v2.h" - +#include "../Algorithms/GreensFunction.h" #include "CudaBase.cuh" -class CudaGreensFunction { +class CudaGreensFunction : public GreensFunction{ private: @@ -34,28 +34,28 @@ public: Info: calc itegral on device memory (taken from OPAL src code) Return: success or error code */ - int cuda_GreensIntegral(void *tmpptr, int I, int J, int K, int NI, int NJ, - double hr_m0, double hr_m1, double hr_m2, - int streamId = -1); + int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ, + double hr_m0, double hr_m1, double hr_m2, + int streamId = -1); /** Info: integration of rho2_m field (taken from OPAL src code) Return: success or error code */ - int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K, - int streamId = -1); + int integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K, + int streamId = -1); /** Info: mirror rho field (taken from OPAL src code) Return: succes or error code */ - int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1); + int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1); /** Info: multiply complex fields already on the GPU memory, result will be put in ptr1 Return: success or error code */ - int cuda_MultiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1); + int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1); }; diff --git a/src/DKSBase.cpp b/src/DKSBase.cpp index 7194971..290cb38 100644 --- a/src/DKSBase.cpp +++ b/src/DKSBase.cpp @@ -611,11 +611,11 @@ int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int double hz_m0, double hz_m1, double hz_m2, int streamId) { if (apiCuda()) { - return CUDA_SAFECALL(cgreens->cuda_GreensIntegral(tmp_ptr, I, J, K, NI, NJ, - hz_m0, hz_m1, hz_m2, streamId) ); + return CUDA_SAFECALL(cgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ, + hz_m0, hz_m1, hz_m2, streamId) ); } else if (apiOpenMP()) { //BENI: - return MIC_SAFECALL(micgreens->mic_GreensIntegral(tmp_ptr, I, J, K, hz_m0, hz_m1, hz_m2)); + return MIC_SAFECALL(micgreens->greensIntegral(tmp_ptr, I, J, K, hz_m0, hz_m1, hz_m2)); } DEBUG_MSG("No implementation for selceted platform"); @@ -626,9 +626,9 @@ int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr, int I, int J, int K, int streamId) { if (apiCuda()) - return CUDA_SAFECALL(cgreens->cuda_IntegrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId)); + return CUDA_SAFECALL(cgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId)); else if (apiOpenMP()) - return MIC_SAFECALL(micgreens->mic_IntegrationGreensFunction(mem_ptr, tmp_ptr, I, J, K)); + return MIC_SAFECALL(micgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K)); DEBUG_MSG("No implementation for selceted platform"); return DKS_ERROR; @@ -637,9 +637,9 @@ int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr, int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) { if (apiCuda()) - return CUDA_SAFECALL(cgreens->cuda_MirrorRhoField(mem_ptr, I, J, K, streamId)); + return CUDA_SAFECALL(cgreens->mirrorRhoField(mem_ptr, I, J, K, streamId)); else if (apiOpenMP()) - return MIC_SAFECALL(micgreens->mic_MirrorRhoField(mem_ptr, I, J, K)); + return MIC_SAFECALL(micgreens->mirrorRhoField(mem_ptr, I, J, K)); DEBUG_MSG("No implementation for selceted platform"); return DKS_ERROR; @@ -648,9 +648,9 @@ int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId int DKSBase::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) { if (apiCuda()) - return CUDA_SAFECALL(cgreens->cuda_MultiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId)); + return CUDA_SAFECALL(cgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId)); else if (apiOpenMP()) - return MIC_SAFECALL(micgreens->mic_MultiplyCompelxFields(mem_ptr1, mem_ptr2, size)); + return MIC_SAFECALL(micgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size)); DEBUG_MSG("No implementation for selceted platform"); return DKS_ERROR; diff --git a/src/MIC/MICGreensFunction.cpp b/src/MIC/MICGreensFunction.cpp index 6725a1e..33cb72a 100644 --- a/src/MIC/MICGreensFunction.cpp +++ b/src/MIC/MICGreensFunction.cpp @@ -55,11 +55,11 @@ MICGreensFunction::~MICGreensFunction() { } */ -int MICGreensFunction::mic_GreensIntegral(void * tmp_ptr_, int I,int J, int K, double hr_m0, - double hr_m1, double hr_m2) +int MICGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ, + double hr_m0, double hr_m1, double hr_m2, int streamId) { - double *tmp_ptr = (double*) tmp_ptr_; + double *tmp_ptr = (double*) tmpgreen; #pragma offload target(mic:0) in(tmp_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I, J,K, hr_m0, hr_m1, hr_m2) { std::memset(tmp_ptr,0,I*J*K); @@ -173,12 +173,14 @@ return 0; */ //CUDA similar version: -int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp_ptr_,int I,int J, int K) { - double *tmpgreen = (double*) tmp_ptr_; - double *mem_ptr = (double*) mem_ptr_; +int MICGreensFunction::integrationGreensFunction(void * rho2_m, void *tmpgreen, int I, int J, int K, + int streamId) +{ + double *tmpgreen_ptr = (double*) tmpgreen; + double *mem_ptr = (double*) rho2_m; // the actual integration -#pragma offload target(mic:0) in(tmpgreen:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K) +#pragma offload target(mic:0) in(tmpgreen_ptr:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K) { int II = 2*(I-1); int JJ=2*(J-1); int KK=2*(K-1); std::memset(mem_ptr,0,II*JJ*KK); @@ -197,27 +199,27 @@ int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp tmp4 = 0; tmp5 = 0; tmp6 = 0; tmp7 = 0; if (i+1 < NI_tmp && j+1 < NJ_tmp && k+1 < NK_tmp) - tmp0 = tmpgreen[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp]; + tmp0 = tmpgreen_ptr[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp]; if (i+1 < NI_tmp) - tmp1 = tmpgreen[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp]; + tmp1 = tmpgreen_ptr[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp]; if (j+1 < NJ_tmp) - tmp2 = tmpgreen[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp]; + tmp2 = tmpgreen_ptr[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp]; if (k+1 < NK_tmp) - tmp3 = tmpgreen[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp]; + tmp3 = tmpgreen_ptr[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp]; if (i+1 < NI_tmp && j+1 < NJ_tmp) - tmp4 = tmpgreen[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp]; + tmp4 = tmpgreen_ptr[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp]; if (i+1 < NI_tmp && k+1 < NK_tmp) - tmp5 = tmpgreen[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp]; + tmp5 = tmpgreen_ptr[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp]; if (j+1 < NJ_tmp && k+1 < NK_tmp) - tmp6 = tmpgreen[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp]; + tmp6 = tmpgreen_ptr[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp]; - tmp7 = tmpgreen[ i + j * NI_tmp + k * NI_tmp * NJ_tmp]; + tmp7 = tmpgreen_ptr[ i + j * NI_tmp + k * NI_tmp * NJ_tmp]; double tmp_rho = tmp0 + tmp1 + tmp2 + tmp3 - tmp4 - tmp5 - tmp6 - tmp7; @@ -234,8 +236,8 @@ int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp -int MICGreensFunction::mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K) { - double *mem_ptr = (double*) mem_ptr_; +int MICGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId) { + double *mem_ptr = (double*) rho2_m; #pragma offload target(mic:0) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K) { @@ -281,11 +283,11 @@ int MICGreensFunction::mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K) } /*multiply complex fields*/ -int MICGreensFunction::mic_MultiplyCompelxFields(void * mem_ptr1_, void * mem_ptr2_, int size) { +int MICGreensFunction::multiplyCompelxFields(void * ptr1, void * ptr2, int size) { // double *mem_ptr1 = (double*) mem_ptr1_; // double *mem_ptr2 = (double*) mem_ptr2_; - _Complex double *mem_ptr1 = (_Complex double *) mem_ptr1_; - _Complex double *mem_ptr2 = (_Complex double *) mem_ptr2_; + _Complex double *mem_ptr1 = (_Complex double *) ptr1; + _Complex double *mem_ptr2 = (_Complex double *) ptr2; #pragma offload target(mic:0) in(mem_ptr1:length(0) DKS_RETAIN DKS_REUSE) in (mem_ptr2:length(0) DKS_RETAIN DKS_REUSE) in(size) { diff --git a/src/MIC/MICGreensFunction.hpp b/src/MIC/MICGreensFunction.hpp index 0b83d56..dc2641c 100644 --- a/src/MIC/MICGreensFunction.hpp +++ b/src/MIC/MICGreensFunction.hpp @@ -9,12 +9,13 @@ #include #include +#include "../Algorithms/GreensFunction.h" #include "MICBase.h" #define DKS_SUCCESS 0 #define DKS_ERROR 1 -class MICGreensFunction { +class MICGreensFunction : public GreensFunction { private: MICBase *m_micbase; @@ -28,16 +29,18 @@ public: ~MICGreensFunction(); /* compute greens integral analytically */ - int mic_GreensIntegral(void * tmp_ptr_, int I, int J, int K, double hr_m0, double hr_m1, double hr_m2); + int greensIntegral(void * tmpgreen_, int I, int J, int K, double hr_m0, double hr_m1, double hr_m2, + int streamId = -1); /* perform the actual integration */ - int mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp_ptr_,int I,int J, int K); + int integrationGreensFunction(void * rho2_m, void * tmpgreen,int I,int J, int K, + int stremaId = -1); /* Mirror rho-Field */ - int mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K); + int mirrorRhoField(void * rho2_m, int I, int J, int K, int streamId = -1); /*multiply complex fields*/ - int mic_MultiplyCompelxFields(void * mem_ptr1_, void * mem_ptr2_, int size); + int multiplyCompelxFields(void * ptr1, void * ptr2, int size, int streamId = -1); }; diff --git a/src/OpenCL/OpenCLBase.cpp b/src/OpenCL/OpenCLBase.cpp index b40fd64..dcb46ab 100644 --- a/src/OpenCL/OpenCLBase.cpp +++ b/src/OpenCL/OpenCLBase.cpp @@ -613,12 +613,12 @@ int OpenCLBase::ocl_loadKernel(const char * kernel_file) { } } - if (ierr != OCL_SUCCESS) { + if (ierr != DKS_SUCCESS) { DEBUG_MSG("Failed to build kernel file " << kernel_file); - return OCL_ERROR; + return DKS_ERROR; } - return OCL_SUCCESS; + return DKS_SUCCESS; } //compile kernel form source code provided diff --git a/src/OpenCL/OpenCLFFT.cpp b/src/OpenCL/OpenCLFFT.cpp index 66ab3fe..9e986d1 100644 --- a/src/OpenCL/OpenCLFFT.cpp +++ b/src/OpenCL/OpenCLFFT.cpp @@ -31,7 +31,6 @@ int OpenCLFFT::ocl_callFFTKernel(cl_mem &data, int cdim, int ndim, int N, bool f if (m_oclbase->ocl_setKernelArg(3, sizeof(int), &f) != OCL_SUCCESS) return OCL_ERROR; - //execute kernel for (int step = 1; step < N; step <<= 1) {