Greens function calculation for OPAL rewriten with abstract base class
This commit is contained in:
29
src/Algorithms/GreensFunction.h
Normal file
29
src/Algorithms/GreensFunction.h
Normal file
@ -0,0 +1,29 @@
|
||||
#ifndef H_GREENSFUNCTION
|
||||
#define H_GREENSFUNCTION
|
||||
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
class GreensFunction {
|
||||
|
||||
public:
|
||||
|
||||
virtual ~GreensFunction() { }
|
||||
|
||||
/** calc greens integral, as defined in OPAL */
|
||||
virtual int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
|
||||
double hr_m0, double hr_m1, double hr_m2, int streamId = -1) = 0;
|
||||
|
||||
/** integration if rho2_m, see OPAL for more details */
|
||||
virtual int integrationGreensFunction(void * rho2_m, void *tmpgreen, int I, int J, int K,
|
||||
int streamId = -1) = 0;
|
||||
|
||||
/** mirror rho2_m field */
|
||||
virtual int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1) = 0;
|
||||
|
||||
/** multiply two complex fields from device memory */
|
||||
virtual int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1) = 0;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -275,26 +275,19 @@ __global__ void mirroredRhoField(double *rho2_m,
|
||||
|
||||
|
||||
double data = rho2_m[id1];
|
||||
if (i != 0)
|
||||
rho2_m[id2] = data;
|
||||
if (i != 0) rho2_m[id2] = data;
|
||||
|
||||
if (j != 0)
|
||||
rho2_m[id3] = data;
|
||||
if (j != 0) rho2_m[id3] = data;
|
||||
|
||||
if (i != 0 && j != 0)
|
||||
rho2_m[id4] = data;
|
||||
if (i != 0 && j != 0) rho2_m[id4] = data;
|
||||
|
||||
if (k != 0)
|
||||
rho2_m[id5] = data;
|
||||
if (k != 0) rho2_m[id5] = data;
|
||||
|
||||
if (k != 0 && i != 0)
|
||||
rho2_m[id6] = data;
|
||||
if (k != 0 && i != 0) rho2_m[id6] = data;
|
||||
|
||||
if (k!= 0 && j != 0)
|
||||
rho2_m[id7] = data;
|
||||
if (k!= 0 && j != 0) rho2_m[id7] = data;
|
||||
|
||||
if (k != 0 && j != 0 & i != 0)
|
||||
rho2_m[id8] = data;
|
||||
if (k != 0 && j != 0 & i != 0) rho2_m[id8] = data;
|
||||
|
||||
}
|
||||
|
||||
@ -363,7 +356,7 @@ CudaGreensFunction::~CudaGreensFunction() {
|
||||
delete m_base;
|
||||
}
|
||||
|
||||
int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, int NI, int NJ,
|
||||
int CudaGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
|
||||
double hr_m0, double hr_m1, double hr_m2,
|
||||
int streamId)
|
||||
{
|
||||
@ -373,7 +366,7 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i
|
||||
|
||||
//if no stream specified use default stream
|
||||
if (streamId == -1) {
|
||||
kernelTmpgreen_2<<< block, thread >>>((double*)tmpptr, hr_m0, hr_m1, hr_m2, I, J, K);
|
||||
kernelTmpgreen_2<<< block, thread >>>((double*)tmpgreen, hr_m0, hr_m1, hr_m2, I, J, K);
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
@ -381,7 +374,7 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i
|
||||
|
||||
if (streamId < m_base->cuda_numberOfStreams()) {
|
||||
cudaStream_t cs = m_base->cuda_getStream(streamId);
|
||||
kernelTmpgreen_2<<< block, thread, 0, cs>>>((double*)tmpptr, hr_m0, hr_m1, hr_m2, I, J, K);
|
||||
kernelTmpgreen_2<<< block, thread, 0, cs>>>((double*)tmpgreen, hr_m0, hr_m1, hr_m2, I, J, K);
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
@ -389,7 +382,7 @@ int CudaGreensFunction::cuda_GreensIntegral(void *tmpptr, int I, int J, int K, i
|
||||
|
||||
}
|
||||
|
||||
int CudaGreensFunction::cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen,
|
||||
int CudaGreensFunction::integrationGreensFunction(void *rho2_m, void *tmpgreen,
|
||||
int I, int J, int K,
|
||||
int streamId)
|
||||
{
|
||||
@ -415,22 +408,22 @@ int CudaGreensFunction::cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgr
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
int CudaGreensFunction::cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) {
|
||||
int CudaGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId) {
|
||||
|
||||
int thread = 128;
|
||||
int block = ( (I + 1) * (J + 1) * (K + 1) / thread) + 1;
|
||||
|
||||
if (streamId == -1) {
|
||||
mirroredRhoField0<<< 1, 1>>>( (double *)mem_ptr, 2*I, 2*J);
|
||||
mirroredRhoField<<< block, thread >>>( (double *) mem_ptr, 2*I, 2*J, 2*K, I + 1, J + 1, K + 1);
|
||||
mirroredRhoField0<<< 1, 1>>>( (double *)rho2_m, 2*I, 2*J);
|
||||
mirroredRhoField<<< block, thread >>>( (double *) rho2_m, 2*I, 2*J, 2*K, I + 1, J + 1, K + 1);
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
if (streamId < m_base->cuda_numberOfStreams()) {
|
||||
cudaStream_t cs = m_base->cuda_getStream(streamId);
|
||||
mirroredRhoField0<<< 1, 1, 0, cs>>>( (double *)mem_ptr, 2*I, 2*J);
|
||||
mirroredRhoField<<< block, thread, 0, cs>>>( (double *) mem_ptr, 2*I, 2*J, 2*K, I+1, J+1, K+1);
|
||||
mirroredRhoField0<<< 1, 1, 0, cs>>>( (double *)rho2_m, 2*I, 2*J);
|
||||
mirroredRhoField<<< block, thread, 0, cs>>>( (double *) rho2_m, 2*I, 2*J, 2*K, I+1, J+1, K+1);
|
||||
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
@ -440,7 +433,7 @@ int CudaGreensFunction::cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K,
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
int CudaGreensFunction::cuda_MultiplyCompelxFields(void *ptr1, void *ptr2,
|
||||
int CudaGreensFunction::multiplyCompelxFields(void *ptr1, void *ptr2,
|
||||
int size, int streamId) {
|
||||
|
||||
int threads = 128;
|
||||
|
@ -2,17 +2,17 @@
|
||||
#define H_CUDA_GREENSFUNCTION
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <cmath>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuComplex.h>
|
||||
#include "cublas_v2.h"
|
||||
|
||||
|
||||
#include "../Algorithms/GreensFunction.h"
|
||||
#include "CudaBase.cuh"
|
||||
|
||||
class CudaGreensFunction {
|
||||
class CudaGreensFunction : public GreensFunction{
|
||||
|
||||
private:
|
||||
|
||||
@ -34,7 +34,7 @@ public:
|
||||
Info: calc itegral on device memory (taken from OPAL src code)
|
||||
Return: success or error code
|
||||
*/
|
||||
int cuda_GreensIntegral(void *tmpptr, int I, int J, int K, int NI, int NJ,
|
||||
int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
|
||||
double hr_m0, double hr_m1, double hr_m2,
|
||||
int streamId = -1);
|
||||
|
||||
@ -42,20 +42,20 @@ public:
|
||||
Info: integration of rho2_m field (taken from OPAL src code)
|
||||
Return: success or error code
|
||||
*/
|
||||
int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
|
||||
int integrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
|
||||
int streamId = -1);
|
||||
|
||||
/**
|
||||
Info: mirror rho field (taken from OPAL src code)
|
||||
Return: succes or error code
|
||||
*/
|
||||
int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
|
||||
int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1);
|
||||
|
||||
/**
|
||||
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
|
||||
Return: success or error code
|
||||
*/
|
||||
int cuda_MultiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1);
|
||||
int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1);
|
||||
|
||||
|
||||
};
|
||||
|
@ -611,11 +611,11 @@ int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int
|
||||
double hz_m0, double hz_m1, double hz_m2, int streamId) {
|
||||
|
||||
if (apiCuda()) {
|
||||
return CUDA_SAFECALL(cgreens->cuda_GreensIntegral(tmp_ptr, I, J, K, NI, NJ,
|
||||
return CUDA_SAFECALL(cgreens->greensIntegral(tmp_ptr, I, J, K, NI, NJ,
|
||||
hz_m0, hz_m1, hz_m2, streamId) );
|
||||
} else if (apiOpenMP()) {
|
||||
//BENI:
|
||||
return MIC_SAFECALL(micgreens->mic_GreensIntegral(tmp_ptr, I, J, K, hz_m0, hz_m1, hz_m2));
|
||||
return MIC_SAFECALL(micgreens->greensIntegral(tmp_ptr, I, J, K, hz_m0, hz_m1, hz_m2));
|
||||
}
|
||||
|
||||
DEBUG_MSG("No implementation for selceted platform");
|
||||
@ -626,9 +626,9 @@ int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr,
|
||||
int I, int J, int K, int streamId) {
|
||||
|
||||
if (apiCuda())
|
||||
return CUDA_SAFECALL(cgreens->cuda_IntegrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId));
|
||||
return CUDA_SAFECALL(cgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K, streamId));
|
||||
else if (apiOpenMP())
|
||||
return MIC_SAFECALL(micgreens->mic_IntegrationGreensFunction(mem_ptr, tmp_ptr, I, J, K));
|
||||
return MIC_SAFECALL(micgreens->integrationGreensFunction(mem_ptr, tmp_ptr, I, J, K));
|
||||
|
||||
DEBUG_MSG("No implementation for selceted platform");
|
||||
return DKS_ERROR;
|
||||
@ -637,9 +637,9 @@ int DKSBase::callGreensIntegration(void *mem_ptr, void *tmp_ptr,
|
||||
int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId) {
|
||||
|
||||
if (apiCuda())
|
||||
return CUDA_SAFECALL(cgreens->cuda_MirrorRhoField(mem_ptr, I, J, K, streamId));
|
||||
return CUDA_SAFECALL(cgreens->mirrorRhoField(mem_ptr, I, J, K, streamId));
|
||||
else if (apiOpenMP())
|
||||
return MIC_SAFECALL(micgreens->mic_MirrorRhoField(mem_ptr, I, J, K));
|
||||
return MIC_SAFECALL(micgreens->mirrorRhoField(mem_ptr, I, J, K));
|
||||
|
||||
DEBUG_MSG("No implementation for selceted platform");
|
||||
return DKS_ERROR;
|
||||
@ -648,9 +648,9 @@ int DKSBase::callMirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId
|
||||
int DKSBase::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size, int streamId) {
|
||||
|
||||
if (apiCuda())
|
||||
return CUDA_SAFECALL(cgreens->cuda_MultiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId));
|
||||
return CUDA_SAFECALL(cgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size, streamId));
|
||||
else if (apiOpenMP())
|
||||
return MIC_SAFECALL(micgreens->mic_MultiplyCompelxFields(mem_ptr1, mem_ptr2, size));
|
||||
return MIC_SAFECALL(micgreens->multiplyCompelxFields(mem_ptr1, mem_ptr2, size));
|
||||
|
||||
DEBUG_MSG("No implementation for selceted platform");
|
||||
return DKS_ERROR;
|
||||
|
@ -55,11 +55,11 @@ MICGreensFunction::~MICGreensFunction() {
|
||||
}
|
||||
*/
|
||||
|
||||
int MICGreensFunction::mic_GreensIntegral(void * tmp_ptr_, int I,int J, int K, double hr_m0,
|
||||
double hr_m1, double hr_m2)
|
||||
int MICGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
|
||||
double hr_m0, double hr_m1, double hr_m2, int streamId)
|
||||
{
|
||||
|
||||
double *tmp_ptr = (double*) tmp_ptr_;
|
||||
double *tmp_ptr = (double*) tmpgreen;
|
||||
#pragma offload target(mic:0) in(tmp_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I, J,K, hr_m0, hr_m1, hr_m2)
|
||||
{
|
||||
std::memset(tmp_ptr,0,I*J*K);
|
||||
@ -173,12 +173,14 @@ return 0;
|
||||
*/
|
||||
|
||||
//CUDA similar version:
|
||||
int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp_ptr_,int I,int J, int K) {
|
||||
double *tmpgreen = (double*) tmp_ptr_;
|
||||
double *mem_ptr = (double*) mem_ptr_;
|
||||
int MICGreensFunction::integrationGreensFunction(void * rho2_m, void *tmpgreen, int I, int J, int K,
|
||||
int streamId)
|
||||
{
|
||||
double *tmpgreen_ptr = (double*) tmpgreen;
|
||||
double *mem_ptr = (double*) rho2_m;
|
||||
|
||||
// the actual integration
|
||||
#pragma offload target(mic:0) in(tmpgreen:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
|
||||
#pragma offload target(mic:0) in(tmpgreen_ptr:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
|
||||
{
|
||||
int II = 2*(I-1); int JJ=2*(J-1); int KK=2*(K-1);
|
||||
std::memset(mem_ptr,0,II*JJ*KK);
|
||||
@ -197,27 +199,27 @@ int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp
|
||||
tmp4 = 0; tmp5 = 0; tmp6 = 0; tmp7 = 0;
|
||||
|
||||
if (i+1 < NI_tmp && j+1 < NJ_tmp && k+1 < NK_tmp)
|
||||
tmp0 = tmpgreen[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
tmp0 = tmpgreen_ptr[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
if (i+1 < NI_tmp)
|
||||
tmp1 = tmpgreen[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
tmp1 = tmpgreen_ptr[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
if (j+1 < NJ_tmp)
|
||||
tmp2 = tmpgreen[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
tmp2 = tmpgreen_ptr[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
if (k+1 < NK_tmp)
|
||||
tmp3 = tmpgreen[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
tmp3 = tmpgreen_ptr[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
if (i+1 < NI_tmp && j+1 < NJ_tmp)
|
||||
tmp4 = tmpgreen[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
tmp4 = tmpgreen_ptr[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
if (i+1 < NI_tmp && k+1 < NK_tmp)
|
||||
tmp5 = tmpgreen[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
tmp5 = tmpgreen_ptr[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
if (j+1 < NJ_tmp && k+1 < NK_tmp)
|
||||
tmp6 = tmpgreen[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
tmp6 = tmpgreen_ptr[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
|
||||
|
||||
tmp7 = tmpgreen[ i + j * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
tmp7 = tmpgreen_ptr[ i + j * NI_tmp + k * NI_tmp * NJ_tmp];
|
||||
|
||||
double tmp_rho = tmp0 + tmp1 + tmp2 + tmp3 - tmp4 - tmp5 - tmp6 - tmp7;
|
||||
|
||||
@ -234,8 +236,8 @@ int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp
|
||||
|
||||
|
||||
|
||||
int MICGreensFunction::mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K) {
|
||||
double *mem_ptr = (double*) mem_ptr_;
|
||||
int MICGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId) {
|
||||
double *mem_ptr = (double*) rho2_m;
|
||||
|
||||
#pragma offload target(mic:0) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
|
||||
{
|
||||
@ -281,11 +283,11 @@ int MICGreensFunction::mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K)
|
||||
}
|
||||
|
||||
/*multiply complex fields*/
|
||||
int MICGreensFunction::mic_MultiplyCompelxFields(void * mem_ptr1_, void * mem_ptr2_, int size) {
|
||||
int MICGreensFunction::multiplyCompelxFields(void * ptr1, void * ptr2, int size) {
|
||||
// double *mem_ptr1 = (double*) mem_ptr1_;
|
||||
// double *mem_ptr2 = (double*) mem_ptr2_;
|
||||
_Complex double *mem_ptr1 = (_Complex double *) mem_ptr1_;
|
||||
_Complex double *mem_ptr2 = (_Complex double *) mem_ptr2_;
|
||||
_Complex double *mem_ptr1 = (_Complex double *) ptr1;
|
||||
_Complex double *mem_ptr2 = (_Complex double *) ptr2;
|
||||
|
||||
#pragma offload target(mic:0) in(mem_ptr1:length(0) DKS_RETAIN DKS_REUSE) in (mem_ptr2:length(0) DKS_RETAIN DKS_REUSE) in(size)
|
||||
{
|
||||
|
@ -9,12 +9,13 @@
|
||||
#include <offload.h>
|
||||
#include <mkl_dfti.h>
|
||||
|
||||
#include "../Algorithms/GreensFunction.h"
|
||||
#include "MICBase.h"
|
||||
|
||||
#define DKS_SUCCESS 0
|
||||
#define DKS_ERROR 1
|
||||
|
||||
class MICGreensFunction {
|
||||
class MICGreensFunction : public GreensFunction {
|
||||
|
||||
private:
|
||||
MICBase *m_micbase;
|
||||
@ -28,16 +29,18 @@ public:
|
||||
~MICGreensFunction();
|
||||
|
||||
/* compute greens integral analytically */
|
||||
int mic_GreensIntegral(void * tmp_ptr_, int I, int J, int K, double hr_m0, double hr_m1, double hr_m2);
|
||||
int greensIntegral(void * tmpgreen_, int I, int J, int K, double hr_m0, double hr_m1, double hr_m2,
|
||||
int streamId = -1);
|
||||
|
||||
/* perform the actual integration */
|
||||
int mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp_ptr_,int I,int J, int K);
|
||||
int integrationGreensFunction(void * rho2_m, void * tmpgreen,int I,int J, int K,
|
||||
int stremaId = -1);
|
||||
|
||||
/* Mirror rho-Field */
|
||||
int mic_MirrorRhoField(void * mem_ptr_, int I, int J, int K);
|
||||
int mirrorRhoField(void * rho2_m, int I, int J, int K, int streamId = -1);
|
||||
|
||||
/*multiply complex fields*/
|
||||
int mic_MultiplyCompelxFields(void * mem_ptr1_, void * mem_ptr2_, int size);
|
||||
int multiplyCompelxFields(void * ptr1, void * ptr2, int size, int streamId = -1);
|
||||
|
||||
};
|
||||
|
||||
|
@ -613,12 +613,12 @@ int OpenCLBase::ocl_loadKernel(const char * kernel_file) {
|
||||
}
|
||||
}
|
||||
|
||||
if (ierr != OCL_SUCCESS) {
|
||||
if (ierr != DKS_SUCCESS) {
|
||||
DEBUG_MSG("Failed to build kernel file " << kernel_file);
|
||||
return OCL_ERROR;
|
||||
return DKS_ERROR;
|
||||
}
|
||||
|
||||
return OCL_SUCCESS;
|
||||
return DKS_SUCCESS;
|
||||
}
|
||||
|
||||
//compile kernel form source code provided
|
||||
|
@ -32,7 +32,6 @@ int OpenCLFFT::ocl_callFFTKernel(cl_mem &data, int cdim, int ndim, int N, bool f
|
||||
if (m_oclbase->ocl_setKernelArg(3, sizeof(int), &f) != OCL_SUCCESS)
|
||||
return OCL_ERROR;
|
||||
|
||||
|
||||
//execute kernel
|
||||
for (int step = 1; step < N; step <<= 1) {
|
||||
if (m_oclbase->ocl_setKernelArg(1, sizeof(int), &step) != OCL_SUCCESS)
|
||||
|
Reference in New Issue
Block a user