Applied a few minor optimizations to the field-distribution calculations

This commit is contained in:
Bastian M. Wojek 2011-05-28 13:07:36 +00:00
parent db4425e352
commit e22256ef8a
5 changed files with 1423 additions and 922 deletions

View File

@ -57,10 +57,13 @@ void TBofZCalc::Calculate()
fZ.resize(fSteps);
fBZ.resize(fSteps);
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic)
#endif
for (j=0; j<fSteps; j++) {
#ifdef HAVE_GOMP
int chunk = fSteps/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic,chunk)
#endif
for (j=0; j<fSteps; ++j) {
ZZ = fParam[1] + static_cast<double>(j)*fDZ;
fZ[j] = ZZ;
fBZ[j] = GetBofZ(ZZ);

File diff suppressed because it is too large Load Diff

View File

@ -290,7 +290,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
int chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][1] = fFFTin[l][0];
@ -299,11 +302,17 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
// sum_K aK Kx^2 cos(Kx*x + Ky*y) cos(Kz*z)
// First multiply the aK with Kx^2, then call FFTW
float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));;
float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));
// k = 0
#ifdef HAVE_GOMP
#pragma omp parallel default(shared) private(i,j)
{
#pragma omp sections
{
// even rows
#pragma omp section
#endif
for (i = 0; i < NFFT; i += 2) {
// j = 0
fFFTin[fStepsZ*NFFT*i][0] = 0.0f;
@ -317,6 +326,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
}
// odd rows
#ifdef HAVE_GOMP
#pragma omp section
#endif
for (i = 1; i < NFFT; i += 2) {
for (j = 0; j < NFFT_2; j += 2) {
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
@ -325,12 +337,19 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
}
}
#ifdef HAVE_GOMP
} // end omp sections
#endif
// k != 0
if (fFind3dSolution) {
for (k = 1; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp sections
{
// even rows
#pragma omp section
#endif
for (i = 0; i < NFFT; i += 2) {
// j = 0
fFFTin[k + NFFTz*NFFT*i][0] = 0.0f;
@ -344,6 +363,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
}
// odd rows
#ifdef HAVE_GOMP
#pragma omp section
#endif
for (i = 1; i < NFFT; i += 2) {
for (j = 0; j < NFFT_2; j += 2) {
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
@ -352,8 +374,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
}
}
#ifdef HAVE_GOMP
}
#endif
}
} // else do nothing since the other aK are already zero since the former aK manipulation
#ifdef HAVE_GOMP
} // end omp parallel
#endif
fftwf_execute(fFFTplan);
@ -363,7 +391,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
fGstorage[k] = fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
}
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][0] = fBkMatrix[l][1];
@ -473,7 +501,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
fGstorage[k] += fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
}
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][0] = fBkMatrix[l][1];
@ -583,7 +611,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
fGstorage[k] += fRealSpaceMatrix[k][1]*fRealSpaceMatrix[k][1];
}
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][0] = fBkMatrix[l][1];
@ -612,13 +640,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
const int NFFTz_2(fStepsZ/2);
int i, j, k, l, index;
#ifdef HAVE_GOMP
int chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
// Take the derivative of the Fourier sum of omega
// This is going to be a bit lengthy...
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][1] = fFFTin[l][0];
@ -687,7 +720,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
// Copy the results to the gradient matrix and restore the original aK-matrix
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fOmegaDiffMatrix[0][l] = fRealSpaceMatrix[l][1];
@ -777,7 +810,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
// Copy the results to the gradient matrix and restore the original aK-matrix
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fOmegaDiffMatrix[1][l] = fRealSpaceMatrix[l][1];
@ -838,10 +871,16 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
// 3D transform
fftwf_execute(fFFTplan);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
// Copy the results to the gradient matrix - with the 1D-FORWARD-transform we have to _add_ fSumAk
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fOmegaDiffMatrix[2][k + NFFTz*index] = fRealSpaceMatrix[k + NFFTz*index][1] + fSumAk[k][1];
@ -850,7 +889,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
// Restore the original aK-matrix
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][0] = fBkMatrix[l][1];
@ -859,7 +901,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
} else {
// For the 2D solution, dw/dz = 0
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fOmegaDiffMatrix[2][l] = 0.0;
@ -1004,9 +1046,15 @@ void TFilmTriVortexNGLFieldCalc::FillAbrikosovCoefficients(const float reducedFi
fFFTin[0][0] = 0.0;
#ifdef HAVE_GOMP
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 1; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fFFTin[k + NFFTz*index][0] = 0.0;
@ -1699,9 +1747,16 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsB() const {
}
*/
} else { // for 2D solution only
#ifdef HAVE_GOMP
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 1; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[k + NFFTz*index][0] = 0.0;
@ -1971,10 +2026,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXFirst() c
int i, j, k, kx, ky, kz, index;
float ii;
#ifdef HAVE_GOMP
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
// k = 0
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[NFFTz*index][0] = 0.0f;
@ -2116,10 +2176,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXSecond()
int i, j, k, kx, ky, kz, index;
float ii;
#ifdef HAVE_GOMP
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
// k = 0
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[NFFTz*index][0] = 0.0f;
@ -2260,7 +2325,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYFirst() c
// k = 0
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[NFFTz*index][0] = 0.0f;
@ -2397,7 +2465,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYSecond()
// k = 0
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
int chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fBkMatrix[NFFTz*index][0] = 0.0f;
@ -2575,11 +2646,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
const int NFFTz_2(fStepsZ/2);
const int NFFTsqStZ_2(NFFTsq*(fStepsZ/2 + 1));
#ifdef HAVE_GOMP
int chunk;
#endif
// first check that the field is not larger than Hc2 and that we are dealing with a type II SC ...
if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) {
int m;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(m) schedule(dynamic)
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk)
#endif
for (m = 0; m < NFFTsqStZ; ++m) {
fBout[0][m] = 0.0f;
@ -2601,7 +2679,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j,index) schedule(dynamic)
chunk = NFFT/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(j,index) schedule(dynamic,chunk)
#endif
for (j = 0; j < NFFT; ++j) {
index = k + NFFTz*j;
@ -2626,7 +2707,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
for (k = 0; k < NFFTz; ++k) {
for (j = 0; j < NFFT; ++j) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i,index) schedule(dynamic)
#pragma omp parallel for default(shared) private(i,index) schedule(dynamic,chunk)
#endif
for (i = 0; i < NFFT; ++i) {
index = k + NFFTz*(j + NFFT*i);
@ -2645,7 +2726,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
int indexQA;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic)
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
if (!fOmegaMatrix[NFFTz*index] || !index || (index == (NFFT+1)*NFFT_2)) {
@ -2672,7 +2756,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0];
@ -2682,7 +2766,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// initialize the bK-Matrix
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][0] = 0.0;
@ -2708,7 +2795,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// }
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; l++) {
if (fOmegaMatrix[l]) {
@ -2726,9 +2816,6 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// since all of this should be a smooth function anyway, I set the value of the next neighbour r
// for the two vortex core positions in my matrix
// If this was not enough we can get the g(0)-values by an expensive CalculateGatVortexCore()-invocation (not working at the moment)
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(k) schedule(dynamic)
#endif
for (k = 0; k < NFFTz; ++k) {
fRealSpaceMatrix[k][0] = fRealSpaceMatrix[k + fStepsZ*fSteps][0];//fGstorage[k];
fRealSpaceMatrix[k + NFFTz*(NFFT+1)*NFFT_2][0] = fRealSpaceMatrix[k][0];//fGstorage[k];
@ -2747,9 +2834,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplan);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
@ -2794,7 +2887,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Multiply the aK with the spacial averages
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fFFTin[k + NFFTz*index][0] = fFFTin[k + NFFTz*index][0]*fSumSum;
@ -2823,9 +2916,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
}
if (!akConverged) {
#ifdef HAVE_GOMP
chunk = NFFT/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j, index) schedule(dynamic)
#pragma omp parallel for default(shared) private(j, index) schedule(dynamic,chunk)
#endif
for (j = 0; j < NFFT; ++j) {
index = k + NFFTz*j;
@ -2856,9 +2954,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplan);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
@ -2869,6 +2973,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// first calculate PK (use the Q-Matrix memory for the second part)
#ifdef HAVE_GOMP
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
@ -2905,7 +3012,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
if (firstBkCalculation) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
chunk = NFFTStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTStZ; ++l) {
fCheckBkConvergence[l] = 0.0f;
@ -2937,9 +3047,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// cout << "Bk Convergence: " << bkConverged << endl;
if (!bkConverged) {
#ifdef HAVE_GOMP
chunk = NFFT/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
#endif
for (j = 0; j < NFFT; ++j) {
index = k + NFFTz*j;
@ -2950,7 +3065,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fFFTin[l][1] = fBkMatrix[l][0];
@ -2986,9 +3104,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplanBkToBandQ);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0] - fBkMatrix[k + NFFTz*index][1];
@ -2997,7 +3121,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Restore bKs for Qy calculation and Fourier transform to get Qy
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][0] = fFFTin[l][1];
@ -3008,9 +3135,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplanBkToBandQ);
#ifdef HAVE_GOMP
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for (k = 0; k < NFFTz; ++k) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1] + fBkMatrix[k + NFFTz*index][1];
@ -3019,7 +3152,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Restore bKs for the next iteration
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBkMatrix[l][0] = fFFTin[l][1];
@ -3035,7 +3171,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Fill in the B-Matrix and restore the bKs for the second part of the Bx-calculation
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
chunk = NFFTsqStZ/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBout[0][l] = fBkMatrix[l][0];
@ -3050,7 +3189,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Fill in the B-Matrix and restore the bKs for the By-calculation
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBout[0][l] = 0.5f*(fBkMatrix[l][0] - fBout[0][l])*Hc2_kappa;
@ -3064,7 +3203,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBout[1][l] = fBkMatrix[l][0];
@ -3078,7 +3217,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBout[1][l] = 0.5f*(fBkMatrix[l][0] - fBout[1][l])*Hc2_kappa;
@ -3089,7 +3228,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
fftwf_execute(fFFTplanBkToBandQ);
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
#endif
for (l = 0; l < NFFTsqStZ; ++l) {
fBout[2][l] = (scaledB + fBkMatrix[l][0])*Hc2_kappa;
@ -3099,7 +3238,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Save a copy of the BkS - where does not matter since this is the very end of the calculation...
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
chunk = NFFTsq/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fFFTin[index][1] = fBkS[index][0];
@ -3112,7 +3254,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Write the surface fields to the field-Matrix and restore the BkS for the By-calculation
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fBout[0][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;
@ -3127,7 +3269,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
// Write the surface fields to the field-Matrix
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
#endif
for (index = 0; index < NFFTsq; ++index) {
fBout[1][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;

View File

@ -62,7 +62,10 @@ TPofBCalc::TPofBCalc(const vector<double> &para) : fBmin(0.0), fBmax(0.0), fDT(p
int i;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fB[i] = static_cast<double>(i)*fDB;
@ -83,7 +86,10 @@ TPofBCalc::TPofBCalc(const vector<double>& b, const vector<double>& pb, double d
int i;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fB[i] = b[i];
@ -121,9 +127,12 @@ TPofBCalc::TPofBCalc(const vector<double>& b, const vector<double>& pb, double d
void TPofBCalc::UnsetPBExists() {
int i;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
#endif
#ifdef HAVE_GOMP
int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fPB[i] = 0.0;
}
@ -140,14 +149,17 @@ void TPofBCalc::Normalize(unsigned int minFilledIndex = 0, unsigned int maxFille
double pBsum(0.0);
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum)
int chunk = (maxFilledIndex-minFilledIndex)/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum)
#endif
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
pBsum += fPB[i];
pBsum *= fDB;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
fPB[i] /= pBsum;
@ -160,7 +172,10 @@ void TPofBCalc::SetPB(const vector<double> &pb) const {
int i;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fPB[i] = pb[i];
@ -190,14 +205,20 @@ void TPofBCalc::Calculate(const string &type, const vector<double> &para) {
int i;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
int chunk = B0Index/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < B0Index; ++i) {
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expominus);
}
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
chunk = (BmaxIndex-B0Index)/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = B0Index; i <= BmaxIndex; ++i) {
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expoplus);
@ -546,6 +567,10 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto
// cannot use a reduction clause here (like e.g. in Normalize()), since pBvec[] is not a scalar variable
// therefore, we need to work on it a bit more
int n(omp_get_num_procs()), tid, offset;
int chunk = fPBSize/n;
if (chunk < 10)
chunk = 10;
vector< vector<unsigned int> > pBvec(n, vector<unsigned int>(fPBSize, 0));
int indexStep(static_cast<int>(floor(static_cast<float>(numberOfSteps_2)/static_cast<float>(n))));
@ -577,7 +602,7 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto
}
for (j = 0; j < n; ++j) {
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
fPB[i] += static_cast<double>(pBvec[j][i]);
}
@ -624,7 +649,10 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
// calculate Gaussian background
double bg[fPBSize];
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for(i = 0; i < static_cast<int>(fPBSize); ++i) {
bg[i] = exp(-(fB[i]-B)*(fB[i]-B)/(2.0*BsSq));
@ -634,7 +662,7 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
double bgsum(0.0);
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:bgsum)
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:bgsum)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i)
bgsum += bg[i];
@ -642,14 +670,14 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
bgsum *= fDB;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i)
bg[i] /= bgsum;
// add background to P(B)
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i)
fPB[i] = (1.0 - w)*fPB[i] + w*bg[i];
@ -691,7 +719,10 @@ void TPofBCalc::ConvolveGss(double w) {
int i;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic)
int chunk = (NFFT/2+1)/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic,chunk)
#endif
for (i = 0; i < static_cast<int>(NFFT/2+1); ++i) {
GssInTimeDomain = exp(expo*static_cast<double>(i)*static_cast<double>(i));
@ -725,7 +756,10 @@ double TPofBCalc::GetFirstMoment() const {
double pBsum(0.0);
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum)
int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i)
pBsum += fB[i]*fPB[i];
@ -746,7 +780,10 @@ double TPofBCalc::GetCentralMoment(unsigned int n) const {
double pBsum(0.0);
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i, diff) schedule(dynamic) reduction(+:pBsum)
int chunk = fPBSize/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i, diff) schedule(dynamic,chunk) reduction(+:pBsum)
#endif
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
diff = fB[i]-firstMoment;

View File

@ -102,10 +102,13 @@ TPofTCalc::TPofTCalc (const TPofBCalc *PofB, const string &wisdom, const vector<
int i;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
#endif
for (i = 0; i < NFFT_2p1; i++) {
#ifdef HAVE_GOMP
int chunk = NFFT_2p1/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i = 0; i < NFFT_2p1; ++i) {
fT[i] = static_cast<double>(i)*fTBin;
}
@ -190,10 +193,13 @@ void TPofTCalc::CalcPol(const vector<double> &par) {
double sinph(sin(par[0]*PI/180.0)), cosph(cos(par[0]*PI/180.0));
int i;
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
#endif
for (i=0; i<fNFFT/2+1; i++){
#ifdef HAVE_GOMP
int chunk = (fNFFT/2+1)/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
#endif
for (i=0; i<fNFFT/2+1; ++i) {
fPT[i] = (cosph*fFFTout[i][0] + sinph*fFFTout[i][1])*par[2];
}
}
@ -240,7 +246,7 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
vector<double> N0;
vector<double> bg;
for(unsigned int i(0); i<numHist; i++) {
for(unsigned int i(0); i<numHist; ++i) {
t0.push_back(int(par[i+4+numHist*2]));
asy0.push_back(par[i+4]);
phase0.push_back(par[i+4+numHist]);
@ -258,15 +264,21 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
double ttime;
int j,k;
for(unsigned int i(0); i<numHist; i++) {
#ifdef HAVE_GOMP
int chunk = nChannels/omp_get_num_procs();
if (chunk < 10)
chunk = 10;
#endif
for(unsigned int i(0); i<numHist; ++i) {
param[0]=phase0[i];
// calculate asymmetry
CalcPol(param);
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic)
#endif
for(j=0; j<nChannels; j++) {
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic,chunk)
#endif
for(j=0; j<nChannels; ++j) {
ttime=j*par[2];
k = static_cast<int>(floor(ttime/fTBin));
asydata[j]=asy0[i]*(fPT[k]+(fPT[k+1]-fPT[k])/fTBin*(ttime-fT[k]));
@ -289,12 +301,12 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
vector< vector<double> > histo;
vector<double> data(nChannels);
for (unsigned int i(0); i<numHist; i++) { // loop over all histos
for (unsigned int i(0); i<numHist; ++i) { // loop over all histos
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
#endif
for (j = 0; j<nChannels; j++) { // loop over time
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
#endif
for (j = 0; j<nChannels; ++j) { // loop over time
if (j < t0[i]) // j<t0
data[j] = bg[i]; // background
else
@ -315,7 +327,7 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
vector<TH1F*> histoData;
TString name;
for (unsigned int i(0); i<numHist; i++) { // loop over all histos
for (unsigned int i(0); i<numHist; ++i) { // loop over all histos
// create histos
name = "theoHisto";
name += i;
@ -327,10 +339,10 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
name += i;
fakeHisto = new TH1F(name.Data(), name.Data(), int(par[3]), -par[2]/2.0, (par[3]+0.5)*par[2]);
// fill theoHisto
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
#endif
for (j = 0; j<nChannels; j++)
#ifdef HAVE_GOMP
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
#endif
for (j = 0; j<nChannels; ++j)
theoHisto->SetBinContent(j, histo[i][j]);
// end omp