Files
DKS/src/MIC/MICGreensFunction.cpp

310 lines
8.5 KiB
C++

#include "MICGreensFunction.hpp"
#include<stdio.h>
#include<complex>
#include <cstring>
/* constructor */
MICGreensFunction::MICGreensFunction(MICBase *base) {
m_micbase = base;
}
/* destructor */
MICGreensFunction::~MICGreensFunction() {
}
/* compute greens integral analytically */
// Version with extended domain
/*
int MICGreensFunction::mic_GreensIntegral(void * tmp_ptr_, int I,int J, int K, double hr_m0,double hr_m1, double hr_m2) {
double *tmp_ptr = (double*) tmp_ptr_;
#pragma offload target(mic:0) in(tmp_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I, J,K, hr_m0, hr_m1, hr_m2)
{
std::memset(tmp_ptr,0,(I+1)*(J+1)*(K+1));
double cellVolume = hr_m0 * hr_m1 * hr_m2;
#pragma omp parallel for collapse(3) schedule(dynamic)
for (int k = 0; k < K; k++) {
for (int j = 0; j < J; j++) {
for (int i = 0; i < I; i++) {
double vv0 = i * hr_m0 - hr_m0 / 2;
double vv1 = j * hr_m1 - hr_m1 / 2;
double vv2 = k * hr_m2 - hr_m2 / 2;
double r = sqrt(vv0 * vv0 + vv1 * vv1 + vv2 * vv2);
double tmpgrn = 0;
tmpgrn += -vv2*vv2 * atan(vv0 * vv1 / (vv2 * r) );
tmpgrn += -vv1*vv1 * atan(vv0 * vv2 / (vv1 * r) );
tmpgrn += -vv0*vv0 * atan(vv1 * vv2 / (vv0 * r) );
tmpgrn = tmpgrn / 2;
tmpgrn += vv1 * vv2 * log(vv0 + r);
tmpgrn += vv0 * vv2 * log(vv1 + r);
tmpgrn += vv0 * vv1 * log(vv2 + r);
tmpgrn = tmpgrn / cellVolume;
tmp_ptr[k*(J+1)*(I+1) + j*(I+1) + i] = tmpgrn;
}
}
}
}
return 0;
}
*/
int MICGreensFunction::greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2, int streamId)
{
double *tmp_ptr = (double*) tmpgreen;
#pragma offload target(mic:0) in(tmp_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I, J,K, hr_m0, hr_m1, hr_m2)
{
std::memset(tmp_ptr,0,I*J*K);
double cellVolume = hr_m0 * hr_m1 * hr_m2;
#pragma omp parallel for collapse(3) schedule(dynamic)
for (int k = 0; k < K; k++) {
for (int j = 0; j < J; j++) {
for (int i = 0; i < I; i++) {
double vv0 = i * hr_m0 - hr_m0 / 2;
double vv1 = j * hr_m1 - hr_m1 / 2;
double vv2 = k * hr_m2 - hr_m2 / 2;
double r = sqrt(vv0 * vv0 + vv1 * vv1 + vv2 * vv2);
double tmpgrn = 0;
tmpgrn += -vv2*vv2 * atan(vv0 * vv1 / (vv2 * r) );
tmpgrn += -vv1*vv1 * atan(vv0 * vv2 / (vv1 * r) );
tmpgrn += -vv0*vv0 * atan(vv1 * vv2 / (vv0 * r) );
tmpgrn = tmpgrn / 2;
tmpgrn += vv1 * vv2 * log(vv0 + r);
tmpgrn += vv0 * vv2 * log(vv1 + r);
tmpgrn += vv0 * vv1 * log(vv2 + r);
tmpgrn = tmpgrn / cellVolume;
tmp_ptr[k*(J)*(I) + j*(I) + i] = tmpgrn;
}
}
}
}
return 0;
}
/* perform the actual integration */
// version with extended domain
/*
int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp_ptr_,int I,int J, int K) {
double *tmp_ptr = (double*) tmp_ptr_;
double *mem_ptr = (double*) mem_ptr_;
// the actual integration
#pragma offload target(mic:0) in(tmp_ptr:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
{
int Ii = I;
int Jj = J;
int Kk = K;
int II = 2*(I-1); int JJ=2*(J-1); int KK=2*(K-1);
std::memset(mem_ptr,0,II*JJ*KK);
I=I+1; J=J+1; K=K+1;
#pragma omp parallel for collapse(3)
for (int i=0; i<Ii; i++) {
for (int j=0; j<Jj; j++) {
for (int k=0; k<Kk; k++) {
//mem_ptr[k*JJ*II + j*II + i] = 0.0;
mem_ptr[k*JJ*II + j*II + i] = tmp_ptr[(k+1)*J*I + (j+1)*I + (i+1)];
mem_ptr[k*JJ*II + j*II + i] += tmp_ptr[k*J*I + j*I + (i+1)];
mem_ptr[k*JJ*II + j*II + i] += tmp_ptr[k*J*I + (j+1)*I + i];
mem_ptr[k*JJ*II + j*II + i] += tmp_ptr[(k+1)*J*I + j*I + i];
mem_ptr[k*JJ*II + j*II + i] -= tmp_ptr[k*J*I + (j+1)*I + (i+1)];
mem_ptr[k*JJ*II + j*II + i] -= tmp_ptr[(k+1)*J*I + j*I + (i+1)];
mem_ptr[k*JJ*II + j*II + i] -= tmp_ptr[(k+1)*J*I + (j+1)*I + i];
mem_ptr[k*JJ*II + j*II + i] -= tmp_ptr[k*J*I + j*I + i];
}
}
}
}
return 0;
}
*/
/*
int MICGreensFunction::mic_IntegrationGreensFunction(void * mem_ptr_, void * tmp_ptr_,int I,int J, int K) {
double *tmp_ptr = (double*) tmp_ptr_;
double *mem_ptr = (double*) mem_ptr_;
// the actual integration
#pragma offload target(mic:0) in(tmp_ptr:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
{
int Ii = I;
int Jj = J;
int Kk = K;
int II = 2*(I-1); int JJ=2*(J-1); int KK=2*(K-1);
std::memset(mem_ptr,0,II*JJ*KK);
//I=I+1; J=J+1; K=K+1;
#pragma omp parallel for collapse(3)
for (int i=0; i<Ii; i++) {
for (int j=0; j<Jj; j++) {
for (int k=0; k<Kk; k++) {
//mem_ptr[k*JJ*II + j*II + i] = 0.0;
mem_ptr[k*JJ*II + j*II + i] = tmp_ptr[(k+1)*J*I + (j+1)*I + (i+1)];
mem_ptr[k*JJ*II + j*II + i] += tmp_ptr[k*J*I + j*I + (i+1)];
mem_ptr[k*JJ*II + j*II + i] += tmp_ptr[k*J*I + (j+1)*I + i];
mem_ptr[k*JJ*II + j*II + i] += tmp_ptr[(k+1)*J*I + j*I + i];
mem_ptr[k*JJ*II + j*II + i] -= tmp_ptr[k*J*I + (j+1)*I + (i+1)];
mem_ptr[k*JJ*II + j*II + i] -= tmp_ptr[(k+1)*J*I + j*I + (i+1)];
mem_ptr[k*JJ*II + j*II + i] -= tmp_ptr[(k+1)*J*I + (j+1)*I + i];
mem_ptr[k*JJ*II + j*II + i] -= tmp_ptr[k*J*I + j*I + i];
}
}
}
}
return 0;
}
*/
//CUDA similar version:
int MICGreensFunction::integrationGreensFunction(void * rho2_m, void *tmpgreen, int I, int J, int K,
int streamId)
{
double *tmpgreen_ptr = (double*) tmpgreen;
double *mem_ptr = (double*) rho2_m;
// the actual integration
#pragma offload target(mic:0) in(tmpgreen_ptr:length(0) DKS_RETAIN DKS_REUSE) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
{
int II = 2*(I-1); int JJ=2*(J-1); int KK=2*(K-1);
std::memset(mem_ptr,0,II*JJ*KK);
//I=I+1; J=J+1; K=K+1;
double tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int NI_tmp=I;
int NJ_tmp=J;
int NK_tmp=K;
#pragma omp parallel for collapse(3)
for (int i=0; i<I; i++) {
for (int j=0; j<J; j++) {
for (int k=0; k<K; k++) {
tmp0 = 0; tmp1 = 0; tmp2 = 0; tmp3 = 0;
tmp4 = 0; tmp5 = 0; tmp6 = 0; tmp7 = 0;
if (i+1 < NI_tmp && j+1 < NJ_tmp && k+1 < NK_tmp)
tmp0 = tmpgreen_ptr[(i+1) + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp)
tmp1 = tmpgreen_ptr[(i+1) + j * NI_tmp + k * NI_tmp * NJ_tmp];
if (j+1 < NJ_tmp)
tmp2 = tmpgreen_ptr[ i + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
if (k+1 < NK_tmp)
tmp3 = tmpgreen_ptr[ i + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp && j+1 < NJ_tmp)
tmp4 = tmpgreen_ptr[(i+1) + (j+1) * NI_tmp + k * NI_tmp * NJ_tmp];
if (i+1 < NI_tmp && k+1 < NK_tmp)
tmp5 = tmpgreen_ptr[(i+1) + j * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
if (j+1 < NJ_tmp && k+1 < NK_tmp)
tmp6 = tmpgreen_ptr[ i + (j+1) * NI_tmp + (k+1) * NI_tmp * NJ_tmp];
tmp7 = tmpgreen_ptr[ i + j * NI_tmp + k * NI_tmp * NJ_tmp];
double tmp_rho = tmp0 + tmp1 + tmp2 + tmp3 - tmp4 - tmp5 - tmp6 - tmp7;
mem_ptr[i + j*II + k*II*JJ] = tmp_rho;
}
}
}
}
return 0;
}
int MICGreensFunction::mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId) {
double *mem_ptr = (double*) rho2_m;
#pragma offload target(mic:0) in(mem_ptr:length(0) DKS_RETAIN DKS_REUSE) in(I,J,K)
{
int id, id_mirr;
int II = 2*I; int JJ = 2*J; int KK = 2*K;
mem_ptr[0] = mem_ptr[II*JJ];
#pragma omp parallel for collapse(3) schedule(dynamic)
for (int ie = I+1; ie<2*I; ++ie) {
for(int j = 0; j<= J; ++j) {
for (int k=0; k<= K; ++k) {
id = k * II * JJ + j * II + ie;
id_mirr = k * II * JJ + j * II + (2*I-ie);
mem_ptr[id] = mem_ptr[id_mirr];
}
}
}
#pragma omp parallel for collapse(3) schedule(dynamic)
for (int ai = 0; ai<2*I; ++ai) {
for(int je = J+1; je< 2*J; ++je) {
for (int k=0; k<= K; ++k) {
id = k * II * JJ + je * II + ai;
id_mirr = k * II * JJ + (2*J-je) * II + ai;
mem_ptr[id] = mem_ptr[id_mirr];
}
}
}
#pragma omp parallel for collapse(3) schedule(dynamic)
for (int ai = 0; ai<2*I; ++ai) {
for(int aj = 0; aj< 2*J; ++aj) {
for (int ke=K+1; ke< 2*K; ++ke) {
id = ke * II * JJ + aj * II + ai;
id_mirr = (2*K-ke) * II * JJ + aj * II + ai;
mem_ptr[id] = mem_ptr[id_mirr];
}
}
}
}
return 0;
}
/*multiply complex fields*/
int MICGreensFunction::multiplyCompelxFields(void * ptr1, void * ptr2, int size) {
// double *mem_ptr1 = (double*) mem_ptr1_;
// double *mem_ptr2 = (double*) mem_ptr2_;
_Complex double *mem_ptr1 = (_Complex double *) ptr1;
_Complex double *mem_ptr2 = (_Complex double *) ptr2;
#pragma offload target(mic:0) in(mem_ptr1:length(0) DKS_RETAIN DKS_REUSE) in (mem_ptr2:length(0) DKS_RETAIN DKS_REUSE) in(size)
{
#pragma omp parallel for
for (int i=0; i<size; ++i) {
mem_ptr1[i]*=mem_ptr2[i];
}
}
return 0;
}