Greens function calculation for OPAL rewriten with abstract base class

This commit is contained in:
Uldis Locans
2016-11-17 18:02:48 +01:00
parent 87cdf52f07
commit 63a008d111
8 changed files with 104 additions and 78 deletions

View File

@ -0,0 +1,29 @@
#ifndef H_GREENSFUNCTION
#define H_GREENSFUNCTION
#include <iostream>
#include <cmath>
class GreensFunction {
public:
virtual ~GreensFunction() { }
/** calc greens integral, as defined in OPAL */
virtual int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2, int streamId = -1) = 0;
/** integration if rho2_m, see OPAL for more details */
virtual int integrationGreensFunction(void * rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1) = 0;
/** mirror rho2_m field */
virtual int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1) = 0;
/** multiply two complex fields from device memory */
virtual int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1) = 0;
};
#endif

View File

@ -275,26 +275,19 @@ __global__ void mirroredRhoField(double *rho2_m,
double data = rho2_m[id1];
if (i != 0)
rho2_m[id2] = data;
if (i != 0) rho2_m[id2] = data;
if (j != 0)
rho2_m[id3] = data;
if (j != 0) rho2_m[id3] = data;
if (i != 0 && j != 0)
rho2_m[id4] = data;
if (i != 0 && j != 0) rho2_m[id4] = data;
if (k != 0)
rho2_m[id5] = data;
if (k != 0) rho2_m[id5] = data;
if (k != 0 && i != 0)
rho2_m[id6] = data;
if (k != 0 && i != 0) rho2_m[id6] = data;
if (k!= 0 && j != 0)
rho2_m[id7] = data;
if (k!= 0 && j != 0) rho2_m[id7] = data;
if (k != 0 && j != 0 & i != 0)
rho2_m[id8] = data;
if (k != 0 && j != 0 & i != 0) rho2_m[id8] = data;
}
@ -363,9 +356,9 @@ CudaGreensFunction::~CudaGreensFunction() {
delete m_base;
}
int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2,
int streamId)
int CudaGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2,
int streamId)
{
int thread = 128;
@ -373,7 +366,7 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i
//if no stream specified use default stream
if (streamId == -1) {
kernelTmpgreen_2<<< block, thread >>>((double*)tmpptr, hr_m0, hr_m1, hr_m2, I, J, K);
kernelTmpgreen_2<<< block, thread >>>((double*)tmpgreen, hr_m0, hr_m1, hr_m2, I, J, K);
return DKS_SUCCESS;
}
@ -381,7 +374,7 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i
if (streamId < m_base->cuda_numberOfStreams()) {
cudaStream_t cs = m_base->cuda_getStream(streamId);
kernelTmpgreen_2<<< block, thread, 0, cs>>>((double*)tmpptr, hr_m0, hr_m1, hr_m2, I, J, K);
kernelTmpgreen_2<<< block, thread, 0, cs>>>((double*)tmpgreen, hr_m0, hr_m1, hr_m2, I, J, K);
return DKS_SUCCESS;
}
@ -389,9 +382,9 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i
}
int CudaGreensFunction::cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen,
int I, int J, int K,
int streamId)
int CudaGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen,
int I, int J, int K,
int streamId)
{
int thread = 128;
@ -415,22 +408,22 @@ int CudaGreensFunction::cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgr
return DKS_ERROR;
}
int CudaGreensFunction::cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) {
int CudaGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId) {
int thread = 128;
int block = ( (I + 1) * (J + 1) * (K + 1) / thread) + 1;
if (streamId == -1) {
mirroredRhoField0<<< 1, 1>>>( (double *)mem_ptr, 2*I, 2*J);
mirroredRhoField<<< block, thread >>>( (double *) mem_ptr, 2*I, 2*J, 2*K, I + 1, J + 1, K + 1);
mirroredRhoField0<<< 1, 1>>>( (double *)rho2_m, 2*I, 2*J);
mirroredRhoField<<< block, thread >>>( (double *) rho2_m, 2*I, 2*J, 2*K, I + 1, J + 1, K + 1);
return DKS_SUCCESS;
}
if (streamId < m_base->cuda_numberOfStreams()) {
cudaStream_t cs = m_base->cuda_getStream(streamId);
mirroredRhoField0<<< 1, 1, 0, cs>>>( (double *)mem_ptr, 2*I, 2*J);
mirroredRhoField<<< block, thread, 0, cs>>>( (double *) mem_ptr, 2*I, 2*J, 2*K, I+1, J+1, K+1);
mirroredRhoField0<<< 1, 1, 0, cs>>>( (double *)rho2_m, 2*I, 2*J);
mirroredRhoField<<< block, thread, 0, cs>>>( (double *) rho2_m, 2*I, 2*J, 2*K, I+1, J+1, K+1);
return DKS_SUCCESS;
}
@ -440,13 +433,13 @@ int CudaGreensFunction::cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K,
return DKS_ERROR;
}
int CudaGreensFunction::cuda_MultiplyCompelxFields(void *ptr1, void *ptr2,
int size, int streamId) {
int CudaGreensFunction::multiplyCompelxFields(void *ptr1, void *ptr2,
int size, int streamId) {
int threads = 128;
int blocks = size / threads + 1;
int datasize = 2 * threads * sizeof(cuDoubleComplex);
if (streamId == -1) {
multiplyComplexFields_2<<<blocks, threads, datasize>>> ( (cuDoubleComplex*)ptr1,
(cuDoubleComplex*)ptr2,

View File

@ -2,17 +2,17 @@
#define H_CUDA_GREENSFUNCTION
#include <iostream>
#include <math.h>
#include <cmath>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuComplex.h>
#include "cublas_v2.h"
#include "../Algorithms/GreensFunction.h"
#include "CudaBase.cuh"
class CudaGreensFunction {
class CudaGreensFunction : public GreensFunction{
private:
@ -34,28 +34,28 @@ public:
Info: calc itegral on device memory (taken from OPAL src code)
Return: success or error code
*/
int cuda_GreensIntegral(void *tmpptr, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2,
int streamId = -1);
int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2,
int streamId = -1);
/**
Info: integration of rho2_m field (taken from OPAL src code)
Return: success or error code
*/
int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1);
int integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1);
/**
Info: mirror rho field (taken from OPAL src code)
Return: succes or error code
*/
int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1);
/**
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
Return: success or error code
*/
int cuda_MultiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1);
int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1);
};

View File

@ -611,11 +611,11 @@ int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int
double hz_m0, double hz_m1, double hz_m2, int streamId) {
if (apiCuda()) {
return CUDA_SAFECALL(cgreens->cuda_GreensIntegral(tmp_ptr, I, J, K, NI, NJ,
hz_m0, hz_m1, hz_m2, streamId) );
return CUDA_SAFECALL(cgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ,
hz_m0, hz_m1, hz_m2, streamId) );
} else if (apiOpenMP()) {
//BENI:
return MIC_SAFECALL(micgreens->mic_GreensIntegral(tmp_ptr, I, J, K, hz_m0, hz_m1, hz_m2));
return MIC_SAFECALL(micgreens->greensIntegral(tmp_ptr, I, J, K, hz_m0, hz_m1, hz_m2));
}
DEBUG_MSG("No implementation for selceted platform");
@ -626,9 +626,9 @@ int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr,
int I, int J, int K, int streamId) {
if (apiCuda())
return CUDA_SAFECALL(cgreens->cuda_IntegrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId));
return CUDA_SAFECALL(cgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId));
else if (apiOpenMP())
return MIC_SAFECALL(micgreens->mic_IntegrationGreensFunction(mem_ptr, tmp_ptr, I, J, K));
return MIC_SAFECALL(micgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K));
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
@ -637,9 +637,9 @@ int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr,
int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) {
if (apiCuda())
return CUDA_SAFECALL(cgreens->cuda_MirrorRhoField(mem_ptr, I, J, K, streamId));
return CUDA_SAFECALL(cgreens->mirrorRhoField(mem_ptr, I, J, K, streamId));
else if (apiOpenMP())
return MIC_SAFECALL(micgreens->mic_MirrorRhoField(mem_ptr, I, J, K));
return MIC_SAFECALL(micgreens->mirrorRhoField(mem_ptr, I, J, K));
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;
@ -648,9 +648,9 @@ int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId
int DKSBase::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) {
if (apiCuda())
return CUDA_SAFECALL(cgreens->cuda_MultiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId));
return CUDA_SAFECALL(cgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId));
else if (apiOpenMP())
return MIC_SAFECALL(micgreens->mic_MultiplyCompelxFields(mem_ptr1, mem_ptr2, size));
return MIC_SAFECALL(micgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size));
DEBUG_MSG("No implementation for selceted platform");
return DKS_ERROR;

View File

@ -55,11 +55,11 @@ MICGreensFunction::~MICGreensFunction() {
}
*/
int MICGreensFunction::mic_GreensIntegral(void * tmp_ptr_, int I,int J, int K, double hr_m0,
double hr_m1, double hr_m2)
int MICGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2, int streamId)
{
double *tmp_ptr = (double*) tmp_ptr_;
double *tmp_ptr = (double*) tmpgreen;
#pragma offload target(mic:0) in(tmp_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I, J,K, hr_m0, hr_m1, hr_m2)
{
std::memset(tmp_ptr,0,I*J*K);
@ -173,12 +173,14 @@ return 0;
*/
//CUDA similar version:
int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp_ptr_,int I,int J, int K) {
double *tmpgreen = (double*) tmp_ptr_;
double *mem_ptr = (double*) mem_ptr_;
int MICGreensFunction::integrationGreensFunction(void * rho2_m, void *tmpgreen, int I, int J, int K,
int streamId)
{
double *tmpgreen_ptr = (double*) tmpgreen;
double *mem_ptr = (double*) rho2_m;
// the actual integration
#pragma offload target(mic:0) in(tmpgreen:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
#pragma offload target(mic:0) in(tmpgreen_ptr:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
{
int II = 2*(I-1); int JJ=2*(J-1); int KK=2*(K-1);
std::memset(mem_ptr,0,II*JJ*KK);
@ -197,27 +199,27 @@ int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp
tmp4 = 0; tmp5 = 0; tmp6 = 0; tmp7 = 0;
if (i+1 < NI_tmp && j+1 < NJ_tmp && k+1 < NK_tmp)
tmp0 = tmpgreen[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
tmp0 = tmpgreen_ptr[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp)
tmp1 = tmpgreen[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp];
tmp1 = tmpgreen_ptr[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp];
if (j+1 < NJ_tmp)
tmp2 = tmpgreen[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
tmp2 = tmpgreen_ptr[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
if (k+1 < NK_tmp)
tmp3 = tmpgreen[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
tmp3 = tmpgreen_ptr[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp && j+1 < NJ_tmp)
tmp4 = tmpgreen[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
tmp4 = tmpgreen_ptr[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp && k+1 < NK_tmp)
tmp5 = tmpgreen[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
tmp5 = tmpgreen_ptr[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (j+1 < NJ_tmp && k+1 < NK_tmp)
tmp6 = tmpgreen[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
tmp6 = tmpgreen_ptr[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
tmp7 = tmpgreen[ i + j * NI_tmp + k * NI_tmp * NJ_tmp];
tmp7 = tmpgreen_ptr[ i + j * NI_tmp + k * NI_tmp * NJ_tmp];
double tmp_rho = tmp0 + tmp1 + tmp2 + tmp3 - tmp4 - tmp5 - tmp6 - tmp7;
@ -234,8 +236,8 @@ int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp
int MICGreensFunction::mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K) {
double *mem_ptr = (double*) mem_ptr_;
int MICGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId) {
double *mem_ptr = (double*) rho2_m;
#pragma offload target(mic:0) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
{
@ -281,11 +283,11 @@ int MICGreensFunction::mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K)
}
/*multiply complex fields*/
int MICGreensFunction::mic_MultiplyCompelxFields(void * mem_ptr1_, void * mem_ptr2_, int size) {
int MICGreensFunction::multiplyCompelxFields(void * ptr1, void * ptr2, int size) {
// double *mem_ptr1 = (double*) mem_ptr1_;
// double *mem_ptr2 = (double*) mem_ptr2_;
_Complex double *mem_ptr1 = (_Complex double *) mem_ptr1_;
_Complex double *mem_ptr2 = (_Complex double *) mem_ptr2_;
_Complex double *mem_ptr1 = (_Complex double *) ptr1;
_Complex double *mem_ptr2 = (_Complex double *) ptr2;
#pragma offload target(mic:0) in(mem_ptr1:length(0) DKS_RETAIN DKS_REUSE) in (mem_ptr2:length(0) DKS_RETAIN DKS_REUSE) in(size)
{

View File

@ -9,12 +9,13 @@
#include <offload.h>
#include <mkl_dfti.h>
#include "../Algorithms/GreensFunction.h"
#include "MICBase.h"
#define DKS_SUCCESS 0
#define DKS_ERROR 1
class MICGreensFunction {
class MICGreensFunction : public GreensFunction {
private:
MICBase *m_micbase;
@ -28,16 +29,18 @@ public:
~MICGreensFunction();
/* compute greens integral analytically */
int mic_GreensIntegral(void * tmp_ptr_, int I, int J, int K, double hr_m0, double hr_m1, double hr_m2);
int greensIntegral(void * tmpgreen_, int I, int J, int K, double hr_m0, double hr_m1, double hr_m2,
int streamId = -1);
/* perform the actual integration */
int mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp_ptr_,int I,int J, int K);
int integrationGreensFunction(void * rho2_m, void * tmpgreen,int I,int J, int K,
int stremaId = -1);
/* Mirror rho-Field */
int mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K);
int mirrorRhoField(void * rho2_m, int I, int J, int K, int streamId = -1);
/*multiply complex fields*/
int mic_MultiplyCompelxFields(void * mem_ptr1_, void * mem_ptr2_, int size);
int multiplyCompelxFields(void * ptr1, void * ptr2, int size, int streamId = -1);
};

View File

@ -613,12 +613,12 @@ int OpenCLBase::ocl_loadKernel(const char * kernel_file) {
}
}
if (ierr != OCL_SUCCESS) {
if (ierr != DKS_SUCCESS) {
DEBUG_MSG("Failed to build kernel file " << kernel_file);
return OCL_ERROR;
return DKS_ERROR;
}
return OCL_SUCCESS;
return DKS_SUCCESS;
}
//compile kernel form source code provided

View File

@ -31,7 +31,6 @@ int OpenCLFFT::ocl_callFFTKernel(cl_mem &data, int cdim, int ndim, int N, bool f
if (m_oclbase->ocl_setKernelArg(3, sizeof(int), &f) != OCL_SUCCESS)
return OCL_ERROR;
//execute kernel
for (int step = 1; step < N; step <<= 1) {