Applied a few minor optimizations to the field-distribution calculations

This commit is contained in:
Bastian M. Wojek 2011-05-28 13:07:36 +00:00
parent db4425e352
commit e22256ef8a
5 changed files with 1423 additions and 922 deletions

View File

@ -57,10 +57,13 @@ void TBofZCalc::Calculate()
fZ.resize(fSteps); fZ.resize(fSteps);
fBZ.resize(fSteps); fBZ.resize(fSteps);
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic) int chunk = fSteps/omp_get_num_procs();
#endif if (chunk < 10)
for (j=0; j<fSteps; j++) { chunk = 10;
#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic,chunk)
#endif
for (j=0; j<fSteps; ++j) {
ZZ = fParam[1] + static_cast<double>(j)*fDZ; ZZ = fParam[1] + static_cast<double>(j)*fDZ;
fZ[j] = ZZ; fZ[j] = ZZ;
fBZ[j] = GetBofZ(ZZ); fBZ[j] = GetBofZ(ZZ);

File diff suppressed because it is too large Load Diff

View File

@ -290,7 +290,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix // First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) int chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][1] = fFFTin[l][0]; fBkMatrix[l][1] = fFFTin[l][0];
@ -299,61 +302,86 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
// sum_K aK Kx^2 cos(Kx*x + Ky*y) cos(Kz*z) // sum_K aK Kx^2 cos(Kx*x + Ky*y) cos(Kz*z)
// First multiply the aK with Kx^2, then call FFTW // First multiply the aK with Kx^2, then call FFTW
float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));; float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));
// k = 0 // k = 0
#ifdef HAVE_GOMP
// even rows #pragma omp parallel default(shared) private(i,j)
for (i = 0; i < NFFT; i += 2) { {
// j = 0 #pragma omp sections
fFFTin[fStepsZ*NFFT*i][0] = 0.0f; {
// j != 0
for (j = 2; j < NFFT_2; j += 2) {
fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast<float>(j*j);
}
for (j = NFFT_2; j < NFFT; j += 2) {
fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast<float>((j - NFFT)*(j - NFFT));
}
}
// odd rows
for (i = 1; i < NFFT; i += 2) {
for (j = 0; j < NFFT_2; j += 2) {
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
}
for (j = NFFT_2; j < NFFT; j += 2) {
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
}
}
// k != 0
if (fFind3dSolution) {
for (k = 1; k < NFFTz; ++k) {
// even rows // even rows
#pragma omp section
#endif
for (i = 0; i < NFFT; i += 2) { for (i = 0; i < NFFT; i += 2) {
// j = 0 // j = 0
fFFTin[k + NFFTz*NFFT*i][0] = 0.0f; fFFTin[fStepsZ*NFFT*i][0] = 0.0f;
// j != 0 // j != 0
for (j = 2; j < NFFT_2; j += 2) { for (j = 2; j < NFFT_2; j += 2) {
fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast<float>(j*j); fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast<float>(j*j);
} }
for (j = NFFT_2; j < NFFT; j += 2) { for (j = NFFT_2; j < NFFT; j += 2) {
fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast<float>((j - NFFT)*(j - NFFT)); fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast<float>((j - NFFT)*(j - NFFT));
} }
} }
// odd rows // odd rows
#ifdef HAVE_GOMP
#pragma omp section
#endif
for (i = 1; i < NFFT; i += 2) { for (i = 1; i < NFFT; i += 2) {
for (j = 0; j < NFFT_2; j += 2) { for (j = 0; j < NFFT_2; j += 2) {
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1)); fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
} }
for (j = NFFT_2; j < NFFT; j += 2) { for (j = NFFT_2; j < NFFT; j += 2) {
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT)); fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
} }
} }
} #ifdef HAVE_GOMP
} // else do nothing since the other aK are already zero since the former aK manipulation } // end omp sections
#endif
// k != 0
if (fFind3dSolution) {
for (k = 1; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp sections
{
// even rows
#pragma omp section
#endif
for (i = 0; i < NFFT; i += 2) {
// j = 0
fFFTin[k + NFFTz*NFFT*i][0] = 0.0f;
// j != 0
for (j = 2; j < NFFT_2; j += 2) {
fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast<float>(j*j);
}
for (j = NFFT_2; j < NFFT; j += 2) {
fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast<float>((j - NFFT)*(j - NFFT));
}
}
// odd rows
#ifdef HAVE_GOMP
#pragma omp section
#endif
for (i = 1; i < NFFT; i += 2) {
for (j = 0; j < NFFT_2; j += 2) {
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
}
for (j = NFFT_2; j < NFFT; j += 2) {
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
}
}
#ifdef HAVE_GOMP
}
#endif
}
} // else do nothing since the other aK are already zero since the former aK manipulation
#ifdef HAVE_GOMP
} // end omp parallel
#endif
fftwf_execute(fFFTplan); fftwf_execute(fFFTplan);
@ -363,7 +391,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
fGstorage[k] = fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0]; fGstorage[k] = fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
} }
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][0] = fBkMatrix[l][1]; fFFTin[l][0] = fBkMatrix[l][1];
@ -473,7 +501,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
fGstorage[k] += fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0]; fGstorage[k] += fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
} }
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][0] = fBkMatrix[l][1]; fFFTin[l][0] = fBkMatrix[l][1];
@ -583,7 +611,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
fGstorage[k] += fRealSpaceMatrix[k][1]*fRealSpaceMatrix[k][1]; fGstorage[k] += fRealSpaceMatrix[k][1]*fRealSpaceMatrix[k][1];
} }
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][0] = fBkMatrix[l][1]; fFFTin[l][0] = fBkMatrix[l][1];
@ -612,13 +640,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
const int NFFTz_2(fStepsZ/2); const int NFFTz_2(fStepsZ/2);
int i, j, k, l, index; int i, j, k, l, index;
#ifdef HAVE_GOMP
int chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
// Take the derivative of the Fourier sum of omega // Take the derivative of the Fourier sum of omega
// This is going to be a bit lengthy... // This is going to be a bit lengthy...
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix // First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][1] = fFFTin[l][0]; fBkMatrix[l][1] = fFFTin[l][0];
@ -687,7 +720,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
// Copy the results to the gradient matrix and restore the original aK-matrix // Copy the results to the gradient matrix and restore the original aK-matrix
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fOmegaDiffMatrix[0][l] = fRealSpaceMatrix[l][1]; fOmegaDiffMatrix[0][l] = fRealSpaceMatrix[l][1];
@ -777,7 +810,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
// Copy the results to the gradient matrix and restore the original aK-matrix // Copy the results to the gradient matrix and restore the original aK-matrix
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fOmegaDiffMatrix[1][l] = fRealSpaceMatrix[l][1]; fOmegaDiffMatrix[1][l] = fRealSpaceMatrix[l][1];
@ -838,10 +871,16 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
// 3D transform // 3D transform
fftwf_execute(fFFTplan); fftwf_execute(fFFTplan);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
// Copy the results to the gradient matrix - with the 1D-FORWARD-transform we have to _add_ fSumAk // Copy the results to the gradient matrix - with the 1D-FORWARD-transform we have to _add_ fSumAk
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fOmegaDiffMatrix[2][k + NFFTz*index] = fRealSpaceMatrix[k + NFFTz*index][1] + fSumAk[k][1]; fOmegaDiffMatrix[2][k + NFFTz*index] = fRealSpaceMatrix[k + NFFTz*index][1] + fSumAk[k][1];
@ -850,7 +889,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
// Restore the original aK-matrix // Restore the original aK-matrix
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][0] = fBkMatrix[l][1]; fFFTin[l][0] = fBkMatrix[l][1];
@ -859,7 +901,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
} else { } else {
// For the 2D solution, dw/dz = 0 // For the 2D solution, dw/dz = 0
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fOmegaDiffMatrix[2][l] = 0.0; fOmegaDiffMatrix[2][l] = 0.0;
@ -1004,9 +1046,15 @@ void TFilmTriVortexNGLFieldCalc::FillAbrikosovCoefficients(const float reducedFi
fFFTin[0][0] = 0.0; fFFTin[0][0] = 0.0;
#ifdef HAVE_GOMP
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 1; k < NFFTz; ++k) { for (k = 1; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fFFTin[k + NFFTz*index][0] = 0.0; fFFTin[k + NFFTz*index][0] = 0.0;
@ -1699,9 +1747,16 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsB() const {
} }
*/ */
} else { // for 2D solution only } else { // for 2D solution only
#ifdef HAVE_GOMP
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 1; k < NFFTz; ++k) { for (k = 1; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[k + NFFTz*index][0] = 0.0; fBkMatrix[k + NFFTz*index][0] = 0.0;
@ -1971,10 +2026,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXFirst() c
int i, j, k, kx, ky, kz, index; int i, j, k, kx, ky, kz, index;
float ii; float ii;
#ifdef HAVE_GOMP
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
// k = 0 // k = 0
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[NFFTz*index][0] = 0.0f; fBkMatrix[NFFTz*index][0] = 0.0f;
@ -2116,10 +2176,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXSecond()
int i, j, k, kx, ky, kz, index; int i, j, k, kx, ky, kz, index;
float ii; float ii;
#ifdef HAVE_GOMP
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
// k = 0 // k = 0
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[NFFTz*index][0] = 0.0f; fBkMatrix[NFFTz*index][0] = 0.0f;
@ -2260,7 +2325,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYFirst() c
// k = 0 // k = 0
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[NFFTz*index][0] = 0.0f; fBkMatrix[NFFTz*index][0] = 0.0f;
@ -2397,7 +2465,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYSecond()
// k = 0 // k = 0
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[NFFTz*index][0] = 0.0f; fBkMatrix[NFFTz*index][0] = 0.0f;
@ -2575,11 +2646,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
const int NFFTz_2(fStepsZ/2); const int NFFTz_2(fStepsZ/2);
const int NFFTsqStZ_2(NFFTsq*(fStepsZ/2 + 1)); const int NFFTsqStZ_2(NFFTsq*(fStepsZ/2 + 1));
#ifdef HAVE_GOMP
int chunk;
#endif
// first check that the field is not larger than Hc2 and that we are dealing with a type II SC ... // first check that the field is not larger than Hc2 and that we are dealing with a type II SC ...
if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) {
int m; int m;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(m) schedule(dynamic) chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk)
#endif #endif
for (m = 0; m < NFFTsqStZ; ++m) { for (m = 0; m < NFFTsqStZ; ++m) {
fBout[0][m] = 0.0f; fBout[0][m] = 0.0f;
@ -2601,7 +2679,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j,index) schedule(dynamic) chunk = NFFT/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(j,index) schedule(dynamic,chunk)
#endif #endif
for (j = 0; j < NFFT; ++j) { for (j = 0; j < NFFT; ++j) {
index = k + NFFTz*j; index = k + NFFTz*j;
@ -2626,7 +2707,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
for (j = 0; j < NFFT; ++j) { for (j = 0; j < NFFT; ++j) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i,index) schedule(dynamic) #pragma omp parallel for default(shared) private(i,index) schedule(dynamic,chunk)
#endif #endif
for (i = 0; i < NFFT; ++i) { for (i = 0; i < NFFT; ++i) {
index = k + NFFTz*(j + NFFT*i); index = k + NFFTz*(j + NFFT*i);
@ -2645,7 +2726,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
int indexQA; int indexQA;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic) chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
if (!fOmegaMatrix[NFFTz*index] || !index || (index == (NFFT+1)*NFFT_2)) { if (!fOmegaMatrix[NFFTz*index] || !index || (index == (NFFT+1)*NFFT_2)) {
@ -2672,7 +2756,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0]; fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0];
@ -2682,7 +2766,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// initialize the bK-Matrix // initialize the bK-Matrix
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][0] = 0.0; fBkMatrix[l][0] = 0.0;
@ -2708,7 +2795,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// } // }
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; l++) { for (l = 0; l < NFFTsqStZ; l++) {
if (fOmegaMatrix[l]) { if (fOmegaMatrix[l]) {
@ -2726,9 +2816,6 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// since all of this should be a smooth function anyway, I set the value of the next neighbour r // since all of this should be a smooth function anyway, I set the value of the next neighbour r
// for the two vortex core positions in my matrix // for the two vortex core positions in my matrix
// If this was not enough we can get the g(0)-values by an expensive CalculateGatVortexCore()-invocation (not working at the moment) // If this was not enough we can get the g(0)-values by an expensive CalculateGatVortexCore()-invocation (not working at the moment)
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(k) schedule(dynamic)
#endif
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
fRealSpaceMatrix[k][0] = fRealSpaceMatrix[k + fStepsZ*fSteps][0];//fGstorage[k]; fRealSpaceMatrix[k][0] = fRealSpaceMatrix[k + fStepsZ*fSteps][0];//fGstorage[k];
fRealSpaceMatrix[k + NFFTz*(NFFT+1)*NFFT_2][0] = fRealSpaceMatrix[k][0];//fGstorage[k]; fRealSpaceMatrix[k + NFFTz*(NFFT+1)*NFFT_2][0] = fRealSpaceMatrix[k][0];//fGstorage[k];
@ -2747,9 +2834,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplan); fftwf_execute(fFFTplan);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0]; fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
@ -2794,7 +2887,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Multiply the aK with the spacial averages // Multiply the aK with the spacial averages
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fFFTin[k + NFFTz*index][0] = fFFTin[k + NFFTz*index][0]*fSumSum; fFFTin[k + NFFTz*index][0] = fFFTin[k + NFFTz*index][0]*fSumSum;
@ -2823,9 +2916,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
} }
if (!akConverged) { if (!akConverged) {
#ifdef HAVE_GOMP
chunk = NFFT/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j, index) schedule(dynamic) #pragma omp parallel for default(shared) private(j, index) schedule(dynamic,chunk)
#endif #endif
for (j = 0; j < NFFT; ++j) { for (j = 0; j < NFFT; ++j) {
index = k + NFFTz*j; index = k + NFFTz*j;
@ -2856,9 +2954,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplan); fftwf_execute(fFFTplan);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0]; fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
@ -2869,6 +2973,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// first calculate PK (use the Q-Matrix memory for the second part) // first calculate PK (use the Q-Matrix memory for the second part)
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
@ -2905,7 +3012,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
if (firstBkCalculation) { if (firstBkCalculation) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) chunk = NFFTStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTStZ; ++l) { for (l = 0; l < NFFTStZ; ++l) {
fCheckBkConvergence[l] = 0.0f; fCheckBkConvergence[l] = 0.0f;
@ -2937,9 +3047,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// cout << "Bk Convergence: " << bkConverged << endl; // cout << "Bk Convergence: " << bkConverged << endl;
if (!bkConverged) { if (!bkConverged) {
#ifdef HAVE_GOMP
chunk = NFFT/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j) schedule(dynamic) #pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
#endif #endif
for (j = 0; j < NFFT; ++j) { for (j = 0; j < NFFT; ++j) {
index = k + NFFTz*j; index = k + NFFTz*j;
@ -2950,7 +3065,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix // In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][1] = fBkMatrix[l][0]; fFFTin[l][1] = fBkMatrix[l][0];
@ -2986,9 +3104,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplanBkToBandQ); fftwf_execute(fFFTplanBkToBandQ);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0] - fBkMatrix[k + NFFTz*index][1]; fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0] - fBkMatrix[k + NFFTz*index][1];
@ -2997,7 +3121,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Restore bKs for Qy calculation and Fourier transform to get Qy // Restore bKs for Qy calculation and Fourier transform to get Qy
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][0] = fFFTin[l][1]; fBkMatrix[l][0] = fFFTin[l][1];
@ -3008,9 +3135,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplanBkToBandQ); fftwf_execute(fFFTplanBkToBandQ);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) { for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1] + fBkMatrix[k + NFFTz*index][1]; fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1] + fBkMatrix[k + NFFTz*index][1];
@ -3019,7 +3152,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Restore bKs for the next iteration // Restore bKs for the next iteration
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][0] = fFFTin[l][1]; fBkMatrix[l][0] = fFFTin[l][1];
@ -3035,7 +3171,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Fill in the B-Matrix and restore the bKs for the second part of the Bx-calculation // Fill in the B-Matrix and restore the bKs for the second part of the Bx-calculation
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBout[0][l] = fBkMatrix[l][0]; fBout[0][l] = fBkMatrix[l][0];
@ -3050,7 +3189,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Fill in the B-Matrix and restore the bKs for the By-calculation // Fill in the B-Matrix and restore the bKs for the By-calculation
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBout[0][l] = 0.5f*(fBkMatrix[l][0] - fBout[0][l])*Hc2_kappa; fBout[0][l] = 0.5f*(fBkMatrix[l][0] - fBout[0][l])*Hc2_kappa;
@ -3064,7 +3203,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation // Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBout[1][l] = fBkMatrix[l][0]; fBout[1][l] = fBkMatrix[l][0];
@ -3078,7 +3217,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation // Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBout[1][l] = 0.5f*(fBkMatrix[l][0] - fBout[1][l])*Hc2_kappa; fBout[1][l] = 0.5f*(fBkMatrix[l][0] - fBout[1][l])*Hc2_kappa;
@ -3089,7 +3228,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplanBkToBandQ); fftwf_execute(fFFTplanBkToBandQ);
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic) #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif #endif
for (l = 0; l < NFFTsqStZ; ++l) { for (l = 0; l < NFFTsqStZ; ++l) {
fBout[2][l] = (scaledB + fBkMatrix[l][0])*Hc2_kappa; fBout[2][l] = (scaledB + fBkMatrix[l][0])*Hc2_kappa;
@ -3099,7 +3238,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Save a copy of the BkS - where does not matter since this is the very end of the calculation... // Save a copy of the BkS - where does not matter since this is the very end of the calculation...
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fFFTin[index][1] = fBkS[index][0]; fFFTin[index][1] = fBkS[index][0];
@ -3112,7 +3254,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Write the surface fields to the field-Matrix and restore the BkS for the By-calculation // Write the surface fields to the field-Matrix and restore the BkS for the By-calculation
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fBout[0][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa; fBout[0][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;
@ -3127,7 +3269,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Write the surface fields to the field-Matrix // Write the surface fields to the field-Matrix
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic) #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif #endif
for (index = 0; index < NFFTsq; ++index) { for (index = 0; index < NFFTsq; ++index) {
fBout[1][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa; fBout[1][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;

View File

@ -62,7 +62,10 @@ TPofBCalc::TPofBCalc(const vector<double> &para) : fBmin(0.0), fBmax(0.0), fDT(p
int i; int i;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) { for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fB[i] = static_cast<double>(i)*fDB; fB[i] = static_cast<double>(i)*fDB;
@ -83,7 +86,10 @@ TPofBCalc::TPofBCalc(const vector<double>& b, const vector<double>& pb, double d
int i; int i;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) { for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fB[i] = b[i]; fB[i] = b[i];
@ -121,9 +127,12 @@ TPofBCalc::TPofBCalc(const vector<double>& b, const vector<double>& pb, double d
void TPofBCalc::UnsetPBExists() { void TPofBCalc::UnsetPBExists() {
int i; int i;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) int chunk = fPBSize/omp_get_num_procs();
#endif if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) { for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fPB[i] = 0.0; fPB[i] = 0.0;
} }
@ -140,14 +149,17 @@ void TPofBCalc::Normalize(unsigned int minFilledIndex = 0, unsigned int maxFille
double pBsum(0.0); double pBsum(0.0);
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum) int chunk = (maxFilledIndex-minFilledIndex)/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum)
#endif #endif
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i) for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
pBsum += fPB[i]; pBsum += fPB[i];
pBsum *= fDB; pBsum *= fDB;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i) for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
fPB[i] /= pBsum; fPB[i] /= pBsum;
@ -160,7 +172,10 @@ void TPofBCalc::SetPB(const vector<double> &pb) const {
int i; int i;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) { for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fPB[i] = pb[i]; fPB[i] = pb[i];
@ -190,14 +205,20 @@ void TPofBCalc::Calculate(const string &type, const vector<double> &para) {
int i; int i;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) int chunk = B0Index/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for (i = 0; i < B0Index; ++i) { for (i = 0; i < B0Index; ++i) {
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expominus); fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expominus);
} }
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) chunk = (BmaxIndex-B0Index)/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for (i = B0Index; i <= BmaxIndex; ++i) { for (i = B0Index; i <= BmaxIndex; ++i) {
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expoplus); fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expoplus);
@ -546,6 +567,10 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto
// cannot use a reduction clause here (like e.g. in Normalize()), since pBvec[] is not a scalar variable // cannot use a reduction clause here (like e.g. in Normalize()), since pBvec[] is not a scalar variable
// therefore, we need to work on it a bit more // therefore, we need to work on it a bit more
int n(omp_get_num_procs()), tid, offset; int n(omp_get_num_procs()), tid, offset;
int chunk = fPBSize/n;
if (chunk < 10)
chunk = 10;
vector< vector<unsigned int> > pBvec(n, vector<unsigned int>(fPBSize, 0)); vector< vector<unsigned int> > pBvec(n, vector<unsigned int>(fPBSize, 0));
int indexStep(static_cast<int>(floor(static_cast<float>(numberOfSteps_2)/static_cast<float>(n)))); int indexStep(static_cast<int>(floor(static_cast<float>(numberOfSteps_2)/static_cast<float>(n))));
@ -577,7 +602,7 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto
} }
for (j = 0; j < n; ++j) { for (j = 0; j < n; ++j) {
#pragma omp parallel for default(shared) private(i) schedule(dynamic) #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
for (i = 0; i < static_cast<int>(fPBSize); ++i) { for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fPB[i] += static_cast<double>(pBvec[j][i]); fPB[i] += static_cast<double>(pBvec[j][i]);
} }
@ -624,7 +649,10 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
// calculate Gaussian background // calculate Gaussian background
double bg[fPBSize]; double bg[fPBSize];
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for(i = 0; i < static_cast<int>(fPBSize); ++i) { for(i = 0; i < static_cast<int>(fPBSize); ++i) {
bg[i] = exp(-(fB[i]-B)*(fB[i]-B)/(2.0*BsSq)); bg[i] = exp(-(fB[i]-B)*(fB[i]-B)/(2.0*BsSq));
@ -634,7 +662,7 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
double bgsum(0.0); double bgsum(0.0);
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:bgsum) #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:bgsum)
#endif #endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) for (i = 0; i < static_cast<int>(fPBSize); ++i)
bgsum += bg[i]; bgsum += bg[i];
@ -642,14 +670,14 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
bgsum *= fDB; bgsum *= fDB;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) for (i = 0; i < static_cast<int>(fPBSize); ++i)
bg[i] /= bgsum; bg[i] /= bgsum;
// add background to P(B) // add background to P(B)
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif #endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) for (i = 0; i < static_cast<int>(fPBSize); ++i)
fPB[i] = (1.0 - w)*fPB[i] + w*bg[i]; fPB[i] = (1.0 - w)*fPB[i] + w*bg[i];
@ -691,7 +719,10 @@ void TPofBCalc::ConvolveGss(double w) {
int i; int i;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic) int chunk = (NFFT/2+1)/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic,chunk)
#endif #endif
for (i = 0; i < static_cast<int>(NFFT/2+1); ++i) { for (i = 0; i < static_cast<int>(NFFT/2+1); ++i) {
GssInTimeDomain = exp(expo*static_cast<double>(i)*static_cast<double>(i)); GssInTimeDomain = exp(expo*static_cast<double>(i)*static_cast<double>(i));
@ -725,7 +756,10 @@ double TPofBCalc::GetFirstMoment() const {
double pBsum(0.0); double pBsum(0.0);
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum) int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum)
#endif #endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) for (i = 0; i < static_cast<int>(fPBSize); ++i)
pBsum += fB[i]*fPB[i]; pBsum += fB[i]*fPB[i];
@ -746,7 +780,10 @@ double TPofBCalc::GetCentralMoment(unsigned int n) const {
double pBsum(0.0); double pBsum(0.0);
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i, diff) schedule(dynamic) reduction(+:pBsum) int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i, diff) schedule(dynamic,chunk) reduction(+:pBsum)
#endif #endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) { for (i = 0; i < static_cast<int>(fPBSize); ++i) {
diff = fB[i]-firstMoment; diff = fB[i]-firstMoment;

View File

@ -102,10 +102,13 @@ TPofTCalc::TPofTCalc (const TPofBCalc *PofB, const string &wisdom, const vector<
int i; int i;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) int chunk = NFFT_2p1/omp_get_num_procs();
#endif if (chunk < 10)
for (i = 0; i < NFFT_2p1; i++) { chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < NFFT_2p1; ++i) {
fT[i] = static_cast<double>(i)*fTBin; fT[i] = static_cast<double>(i)*fTBin;
} }
@ -190,10 +193,13 @@ void TPofTCalc::CalcPol(const vector<double> &par) {
double sinph(sin(par[0]*PI/180.0)), cosph(cos(par[0]*PI/180.0)); double sinph(sin(par[0]*PI/180.0)), cosph(cos(par[0]*PI/180.0));
int i; int i;
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) int chunk = (fNFFT/2+1)/omp_get_num_procs();
#endif if (chunk < 10)
for (i=0; i<fNFFT/2+1; i++){ chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i=0; i<fNFFT/2+1; ++i) {
fPT[i] = (cosph*fFFTout[i][0] + sinph*fFFTout[i][1])*par[2]; fPT[i] = (cosph*fFFTout[i][0] + sinph*fFFTout[i][1])*par[2];
} }
} }
@ -240,7 +246,7 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
vector<double> N0; vector<double> N0;
vector<double> bg; vector<double> bg;
for(unsigned int i(0); i<numHist; i++) { for(unsigned int i(0); i<numHist; ++i) {
t0.push_back(int(par[i+4+numHist*2])); t0.push_back(int(par[i+4+numHist*2]));
asy0.push_back(par[i+4]); asy0.push_back(par[i+4]);
phase0.push_back(par[i+4+numHist]); phase0.push_back(par[i+4+numHist]);
@ -258,15 +264,21 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
double ttime; double ttime;
int j,k; int j,k;
for(unsigned int i(0); i<numHist; i++) { #ifdef HAVE_GOMP
int chunk = nChannels/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for(unsigned int i(0); i<numHist; ++i) {
param[0]=phase0[i]; param[0]=phase0[i];
// calculate asymmetry // calculate asymmetry
CalcPol(param); CalcPol(param);
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic) #pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic,chunk)
#endif #endif
for(j=0; j<nChannels; j++) { for(j=0; j<nChannels; ++j) {
ttime=j*par[2]; ttime=j*par[2];
k = static_cast<int>(floor(ttime/fTBin)); k = static_cast<int>(floor(ttime/fTBin));
asydata[j]=asy0[i]*(fPT[k]+(fPT[k+1]-fPT[k])/fTBin*(ttime-fT[k])); asydata[j]=asy0[i]*(fPT[k]+(fPT[k+1]-fPT[k])/fTBin*(ttime-fT[k]));
@ -289,12 +301,12 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
vector< vector<double> > histo; vector< vector<double> > histo;
vector<double> data(nChannels); vector<double> data(nChannels);
for (unsigned int i(0); i<numHist; i++) { // loop over all histos for (unsigned int i(0); i<numHist; ++i) { // loop over all histos
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j) schedule(dynamic) #pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
#endif #endif
for (j = 0; j<nChannels; j++) { // loop over time for (j = 0; j<nChannels; ++j) { // loop over time
if (j < t0[i]) // j<t0 if (j < t0[i]) // j<t0
data[j] = bg[i]; // background data[j] = bg[i]; // background
else else
@ -315,7 +327,7 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
vector<TH1F*> histoData; vector<TH1F*> histoData;
TString name; TString name;
for (unsigned int i(0); i<numHist; i++) { // loop over all histos for (unsigned int i(0); i<numHist; ++i) { // loop over all histos
// create histos // create histos
name = "theoHisto"; name = "theoHisto";
name += i; name += i;
@ -327,10 +339,10 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
name += i; name += i;
fakeHisto = new TH1F(name.Data(), name.Data(), int(par[3]), -par[2]/2.0, (par[3]+0.5)*par[2]); fakeHisto = new TH1F(name.Data(), name.Data(), int(par[3]), -par[2]/2.0, (par[3]+0.5)*par[2]);
// fill theoHisto // fill theoHisto
#ifdef HAVE_GOMP #ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j) schedule(dynamic) #pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
#endif #endif
for (j = 0; j<nChannels; j++) for (j = 0; j<nChannels; ++j)
theoHisto->SetBinContent(j, histo[i][j]); theoHisto->SetBinContent(j, histo[i][j]);
// end omp // end omp