diff --git a/src/external/libFitPofB/classes/TBofZCalc.cpp b/src/external/libFitPofB/classes/TBofZCalc.cpp index 91361198..593218d2 100644 --- a/src/external/libFitPofB/classes/TBofZCalc.cpp +++ b/src/external/libFitPofB/classes/TBofZCalc.cpp @@ -57,10 +57,13 @@ void TBofZCalc::Calculate() fZ.resize(fSteps); fBZ.resize(fSteps); -#ifdef HAVE_GOMP -#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic) -#endif - for (j=0; j(j)*fDZ; fZ[j] = ZZ; fBZ[j] = GetBofZ(ZZ); diff --git a/src/external/libFitPofB/classes/TBulkTriVortexFieldCalc.cpp b/src/external/libFitPofB/classes/TBulkTriVortexFieldCalc.cpp index 04f8d99e..923d0691 100644 --- a/src/external/libFitPofB/classes/TBulkTriVortexFieldCalc.cpp +++ b/src/external/libFitPofB/classes/TBulkTriVortexFieldCalc.cpp @@ -198,15 +198,21 @@ void TBulkTriVortexLondonFieldCalc::CalculateGrid() const { const int NFFT_2(fSteps/2); const int NFFTsq(fSteps*fSteps); + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + // fill the field Fourier components in the matrix // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif - for (m = 0; m < NFFTsq; m++) { + for (m = 0; m < NFFTsq; ++m) { fFFTout[m] = field; } // Set the flag which shows that the calculation has been done @@ -289,7 +295,7 @@ void TBulkTriVortexLondonFieldCalc::CalculateGrid() const { // Multiply by the applied field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; l++) { fFFTout[l] *= field; @@ -375,13 +381,19 @@ void TBulkSqVortexLondonFieldCalc::CalculateGrid() const { const int NFFT_2(fSteps/2); const int NFFTsq(fSteps*fSteps); - // fill the field Fourier components in the matrix + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + + // fill the field Fourier components in the matrix // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif for (m = 0; m < NFFTsq; m++) { fFFTout[m] = field; @@ -395,25 +407,36 @@ void TBulkSqVortexLondonFieldCalc::CalculateGrid() const { double Gsq, ll; int k, l, lNFFT_2; - for (l = 0; l < NFFT_2; ++l) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast(l*l); - for (k = 0; k <= NFFT_2; ++k) { - Gsq = static_cast(k*k) + ll; - fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][1] = 0.0; + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(l,k,lNFFT_2,ll,Gsq) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; ++l) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast(l*l); + for (k = 0; k <= NFFT_2; ++k) { + Gsq = static_cast(k*k) + ll; + fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][1] = 0.0; + } } - } - for (l = NFFT_2; l < NFFT; ++l) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k <= NFFT_2; ++k) { - Gsq = static_cast(k*k) + ll; - fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][1] = 0.0; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; ++l) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k <= NFFT_2; ++k) { + Gsq = static_cast(k*k) + ll; + fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][1] = 0.0; + } } + #ifdef HAVE_GOMP } + #endif // Do the Fourier transform to get B(x,y) @@ -421,9 +444,9 @@ void TBulkSqVortexLondonFieldCalc::CalculateGrid() const { // Multiply by the applied field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fFFTout[l] *= field; } @@ -506,15 +529,21 @@ void TBulkTriVortexMLFieldCalc::CalculateGrid() const { const int NFFT_2(fSteps/2); const int NFFTsq(fSteps*fSteps); - // fill the field Fourier components in the matrix + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + + // fill the field Fourier components in the matrix // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif - for (m = 0; m < NFFTsq; m++) { + for (m = 0; m < NFFTsq; ++m) { fFFTout[m] = field; } // Set the flag which shows that the calculation has been done @@ -530,70 +559,85 @@ void TBulkTriVortexMLFieldCalc::CalculateGrid() const { double Gsq, ll; int k, l, lNFFT_2; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(l,k,lNFFT_2,ll,Gsq) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast(k*k) + ll; + fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; Gsq = static_cast(k*k) + ll; fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - Gsq = static_cast(k*k) + ll; - fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast(k*k) + ll; + fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; Gsq = static_cast(k*k) + ll; fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - Gsq = static_cast(k*k) + ll; - fFFTin[lNFFT_2 + k][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - // intermediate rows - - for (l = 1; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast((k + 1)*(k + 1)) + ll; + // intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast((k + 1)*(k + 1)) + ll; + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - } - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast((k+1)*(k+1)) + ll; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast((k+1)*(k+1)) + ll; + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-xisq_2_scaled*Gsq)/(1.0+lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; + #ifdef HAVE_GOMP } + #endif // Do the Fourier transform to get B(x,y) @@ -601,9 +645,9 @@ void TBulkTriVortexMLFieldCalc::CalculateGrid() const { // Multiply by the applied field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fFFTout[l] *= field; } @@ -615,8 +659,6 @@ void TBulkTriVortexMLFieldCalc::CalculateGrid() const { } - - TBulkTriVortexAGLFieldCalc::TBulkTriVortexAGLFieldCalc(const string& wisdom, const unsigned int steps) { fWisdom = wisdom; if (steps % 2) { @@ -687,15 +729,21 @@ void TBulkTriVortexAGLFieldCalc::CalculateGrid() const { const int NFFT_2(fSteps/2); const int NFFTsq(fSteps*fSteps); - // fill the field Fourier components in the matrix + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + + // fill the field Fourier components in the matrix // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif - for (m = 0; m < NFFTsq; m++) { + for (m = 0; m < NFFTsq; ++m) { fFFTout[m] = field; } // Set the flag which shows that the calculation has been done @@ -711,77 +759,93 @@ void TBulkTriVortexAGLFieldCalc::CalculateGrid() const { double Gsq, sqrtfInfSqPlusLsqGsq, ll; int k, l, lNFFT_2; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast(k*k) + ll; - sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][0] = ((!k && !l) ? 1.0 : \ - fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf))); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - k = NFFT_2; - Gsq = static_cast(k*k) + ll; - sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - - - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(l,k,lNFFT_2,ll,Gsq,sqrtfInfSqPlusLsqGsq) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast(k*k) + ll; + sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = ((!k && !l) ? 1.0 : \ + fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf))); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; Gsq = static_cast(k*k) + ll; sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - Gsq = static_cast(k*k) + ll; - sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - // intermediate rows - - for (l = 1; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast((k + 1)*(k + 1)) + ll; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast(k*k) + ll; + sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; + Gsq = static_cast(k*k) + ll; sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); + fFFTin[lNFFT_2 + k][1] = 0.0; + } + + // intermediate rows + + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast((k + 1)*(k + 1)) + ll; + sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - } - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast((k+1)*(k+1)) + ll; - sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast((k+1)*(k+1)) + ll; + sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; + #ifdef HAVE_GOMP } + #endif // Do the Fourier transform to get B(x,y) @@ -789,9 +853,9 @@ void TBulkTriVortexAGLFieldCalc::CalculateGrid() const { // Multiply by the applied field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fFFTout[l] *= field; } @@ -874,13 +938,19 @@ void TBulkTriVortexAGLIIFieldCalc::CalculateGrid() const { const int NFFT_2(fSteps/2); const int NFFTsq(fSteps*fSteps); - // fill the field Fourier components in the matrix + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + + // fill the field Fourier components in the matrix // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xiV/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif for (m = 0; m < NFFTsq; ++m) { fFFTout[m] = field; @@ -899,77 +969,92 @@ void TBulkTriVortexAGLIIFieldCalc::CalculateGrid() const { double Gsq, sqrtfInfSqPlusLsqGsq, ll; int k, l, lNFFT_2; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast(k*k) + ll; - sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][0] = ((!k && !l) ? 1.0 : \ - fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf))); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - k = NFFT_2; - Gsq = static_cast(k*k) + ll; - sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - - - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(l,k,lNFFT_2,ll,Gsq,sqrtfInfSqPlusLsqGsq) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast(k*k) + ll; + sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = ((!k && !l) ? 1.0 : \ + fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf))); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; Gsq = static_cast(k*k) + ll; sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - Gsq = static_cast(k*k) + ll; - sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); - fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - // intermediate rows - - for (l = 1; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast((k + 1)*(k + 1)) + ll; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast(k*k) + ll; + sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; + Gsq = static_cast(k*k) + ll; sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); + fFFTin[lNFFT_2 + k][1] = 0.0; + } + + // intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast((k + 1)*(k + 1)) + ll; + sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - } - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast((k+1)*(k+1)) + ll; - sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = 3.0*static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast((k+1)*(k+1)) + ll; + sqrtfInfSqPlusLsqGsq = sqrt(fInf*fInf + lambdasq_scaled*Gsq); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = fInf*TMath::BesselK1(xiV/lambda*sqrtfInfSqPlusLsqGsq)/(sqrtfInfSqPlusLsqGsq*TMath::BesselK1(xiV/lambda*fInf)); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; + #ifdef HAVE_GOMP } + #endif // Do the Fourier transform to get B(x,y) @@ -977,7 +1062,7 @@ void TBulkTriVortexAGLIIFieldCalc::CalculateGrid() const { // Multiply by the applied field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; ++l) { fFFTout[l] *= field; @@ -991,8 +1076,6 @@ void TBulkTriVortexAGLIIFieldCalc::CalculateGrid() const { } - - TBulkTriVortexNGLFieldCalc::TBulkTriVortexNGLFieldCalc(const string& wisdom, const unsigned int steps) : fLatticeConstant(0.0), fKappa(0.0), fSumAk(0.0), fSumOmegaSq(0.0), fSumSum(0.0) { @@ -1108,13 +1191,19 @@ void TBulkTriVortexNGLFieldCalc::CalculateGradient() const { const int NFFT_2(fSteps/2); const int NFFTsq(fSteps*fSteps); - int i, j, l, index; + int i, j, l; + + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif // Take the derivative of the Fourier sum of omega // First save a copy of the real aK-matrix in the imaginary part of the bK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; ++l) { fBkMatrix[l][1] = fFFTin[l][0]; @@ -1125,34 +1214,45 @@ void TBulkTriVortexNGLFieldCalc::CalculateGradient() const { const double coeffKx(TWOPI/(sqrt3*fLatticeConstant)); - // even rows - for (i = 0; i < NFFT; i += 2) { - // j = 0 - fFFTin[NFFT*i][0] = 0.0; - // j != 0 - for (j = 2; j < NFFT_2; j += 2) { - fFFTin[(j + NFFT*i)][0] *= coeffKx*static_cast(j); + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(i,j) + { + // even rows + #pragma omp section + #endif + for (i = 0; i < NFFT; i += 2) { + // j = 0 + fFFTin[NFFT*i][0] = 0.0; + // j != 0 + for (j = 2; j < NFFT_2; j += 2) { + fFFTin[(j + NFFT*i)][0] *= coeffKx*static_cast(j); + } + for (j = NFFT_2; j < NFFT; j += 2) { + fFFTin[(j + NFFT*i)][0] *= coeffKx*static_cast(j - NFFT); + } } - for (j = NFFT_2; j < NFFT; j += 2) { - fFFTin[(j + NFFT*i)][0] *= coeffKx*static_cast(j - NFFT); - } - } - // odd rows - for (i = 1; i < NFFT; i += 2) { - for (j = 0; j < NFFT_2; j += 2) { - fFFTin[(j + 1 + NFFT*i)][0] *= coeffKx*static_cast(j + 1); - } - for (j = NFFT_2; j < NFFT; j += 2) { - fFFTin[(j + 1 + NFFT*i)][0] *= coeffKx*static_cast(j + 1 - NFFT); + // odd rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (i = 1; i < NFFT; i += 2) { + for (j = 0; j < NFFT_2; j += 2) { + fFFTin[(j + 1 + NFFT*i)][0] *= coeffKx*static_cast(j + 1); + } + for (j = NFFT_2; j < NFFT; j += 2) { + fFFTin[(j + 1 + NFFT*i)][0] *= coeffKx*static_cast(j + 1 - NFFT); + } } + #ifdef HAVE_GOMP } + #endif fftw_execute(fFFTplan); // Copy the results to the gradient matrix and restore the original aK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; ++l) { fOmegaDiffMatrix[l][0] = fRealSpaceMatrix[l][1]; @@ -1165,44 +1265,64 @@ void TBulkTriVortexNGLFieldCalc::CalculateGradient() const { const double coeffKy(TWOPI/fLatticeConstant); double ky; - // even rows - // i = 0 - for (j = 0; j < NFFT; j += 2) { - fFFTin[j][0] = 0.0; - } - // i != 0 - for (i = 2; i < NFFT_2; i += 2) { - ky = coeffKy*static_cast(i); + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(i,j,ky) + { + // even rows + // i = 0 + #pragma omp section + #endif for (j = 0; j < NFFT; j += 2) { - fFFTin[(j + NFFT*i)][0] *= ky; + fFFTin[j][0] = 0.0; } - } - for (i = NFFT_2; i < NFFT; i += 2) { - ky = coeffKy*static_cast(i - NFFT); - for (j = 0; j < NFFT; j += 2) { - fFFTin[(j + NFFT*i)][0] *= ky; + // i != 0 + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (i = 2; i < NFFT_2; i += 2) { + ky = coeffKy*static_cast(i); + for (j = 0; j < NFFT; j += 2) { + fFFTin[(j + NFFT*i)][0] *= ky; + } + } + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (i = NFFT_2; i < NFFT; i += 2) { + ky = coeffKy*static_cast(i - NFFT); + for (j = 0; j < NFFT; j += 2) { + fFFTin[(j + NFFT*i)][0] *= ky; + } } - } - // odd rows - for (i = 1; i < NFFT_2; i += 2) { - ky = coeffKy*static_cast(i); - for (j = 0; j < NFFT; j += 2) { - fFFTin[(j + 1 + NFFT*i)][0] *= ky; + // odd rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (i = 1; i < NFFT_2; i += 2) { + ky = coeffKy*static_cast(i); + for (j = 0; j < NFFT; j += 2) { + fFFTin[(j + 1 + NFFT*i)][0] *= ky; + } } - } - for (i = NFFT_2 + 1; i < NFFT; i += 2) { - ky = coeffKy*static_cast(i - NFFT); - for (j = 0; j < NFFT; j += 2) { - fFFTin[(j + 1 + NFFT*i)][0] *= ky; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (i = NFFT_2 + 1; i < NFFT; i += 2) { + ky = coeffKy*static_cast(i - NFFT); + for (j = 0; j < NFFT; j += 2) { + fFFTin[(j + 1 + NFFT*i)][0] *= ky; + } } + #ifdef HAVE_GOMP } + #endif fftw_execute(fFFTplan); // Copy the results to the gradient matrix and restore the original aK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; ++l) { fOmegaDiffMatrix[l][1] = fRealSpaceMatrix[l][1]; @@ -1282,96 +1402,113 @@ void TBulkTriVortexNGLFieldCalc::FillAbrikosovCoefficients() const { double Gsq, sign, ll; int k, l, lNFFT_2; - for (l = 0; l < NFFT_2; l += 2) { - if (!(l % 4)) { - sign = 1.0; - } else { - sign = -1.0; + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(k,l,lNFFT_2,Gsq,sign,ll) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + if (!(l % 4)) { + sign = 1.0; + } else { + sign = -1.0; + } + lNFFT_2 = l*(NFFT); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + sign = -sign; + Gsq = static_cast(k*k) + ll; + fFFTin[lNFFT_2 + k][0] = sign*exp(-pi_4sqrt3*Gsq); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + for (k = NFFT_2; k < NFFT; k += 2) { + sign = -sign; + Gsq = static_cast((k-NFFT)*(k-NFFT)) + ll; + fFFTin[lNFFT_2 + k][0] = sign*exp(-pi_4sqrt3*Gsq); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } } - lNFFT_2 = l*(NFFT); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - sign = -sign; - Gsq = static_cast(k*k) + ll; - fFFTin[lNFFT_2 + k][0] = sign*exp(-pi_4sqrt3*Gsq); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - for (k = NFFT_2; k < NFFT; k += 2) { - sign = -sign; - Gsq = static_cast((k-NFFT)*(k-NFFT)) + ll; - fFFTin[lNFFT_2 + k][0] = sign*exp(-pi_4sqrt3*Gsq); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - } - for (l = NFFT_2; l < NFFT; l += 2) { - if (!(l % 4)) { - sign = 1.0; - } else { - sign = -1.0; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + if (!(l % 4)) { + sign = 1.0; + } else { + sign = -1.0; + } + lNFFT_2 = l*(NFFT); + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2; k += 2) { + sign = -sign; + Gsq = static_cast(k*k) + ll; + fFFTin[lNFFT_2 + k][0] = sign*exp(-pi_4sqrt3*Gsq); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + for (k = NFFT_2; k < NFFT_2; k += 2) { + sign = -sign; + Gsq = static_cast((k-NFFT)*(k-NFFT)) + ll; + fFFTin[lNFFT_2 + k][0] = sign*exp(-pi_4sqrt3*Gsq); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } } - lNFFT_2 = l*(NFFT); - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2; k += 2) { - sign = -sign; - Gsq = static_cast(k*k) + ll; - fFFTin[lNFFT_2 + k][0] = sign*exp(-pi_4sqrt3*Gsq); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - for (k = NFFT_2; k < NFFT_2; k += 2) { - sign = -sign; - Gsq = static_cast((k-NFFT)*(k-NFFT)) + ll; - fFFTin[lNFFT_2 + k][0] = sign*exp(-pi_4sqrt3*Gsq); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - } - // intermediate rows - for (l = 1; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = static_cast((k + 1)*(k + 1)) + ll; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-pi_4sqrt3*Gsq); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; + // intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = static_cast((k + 1)*(k + 1)) + ll; + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = exp(-pi_4sqrt3*Gsq); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + for (k = NFFT_2; k < NFFT; k += 2) { + Gsq = static_cast((k + 1 - NFFT)*(k + 1 - NFFT)) + ll; + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = exp(-pi_4sqrt3*Gsq); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } } - for (k = NFFT_2; k < NFFT; k += 2) { - Gsq = static_cast((k + 1 - NFFT)*(k + 1 - NFFT)) + ll; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-pi_4sqrt3*Gsq); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - } - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT); - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2 - 1; k += 2) { - Gsq = static_cast((k+1)*(k+1)) + ll; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-pi_4sqrt3*Gsq); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - for (k = NFFT_2; k < NFFT; k += 2) { - Gsq = static_cast((k+1 - NFFT)*(k+1 - NFFT)) + ll; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-pi_4sqrt3*Gsq); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT); + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2 - 1; k += 2) { + Gsq = static_cast((k+1)*(k+1)) + ll; + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = exp(-pi_4sqrt3*Gsq); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + for (k = NFFT_2; k < NFFT; k += 2) { + Gsq = static_cast((k+1 - NFFT)*(k+1 - NFFT)) + ll; + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = exp(-pi_4sqrt3*Gsq); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } } + #ifdef HAVE_GOMP } + #endif fFFTin[0][0] = 0.0; @@ -1389,83 +1526,99 @@ void TBulkTriVortexNGLFieldCalc::ManipulateFourierCoefficientsA() const { int lNFFT_2, l, k; double Gsq, ll; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = coeff1*(static_cast(k*k) + ll); - fFFTin[lNFFT_2 + k][0] *= coeff2/(Gsq+coeff3); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(k,l,lNFFT_2,Gsq,ll) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = coeff1*(static_cast(k*k) + ll); + fFFTin[lNFFT_2 + k][0] *= coeff2/(Gsq+coeff3); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + for (k = NFFT_2; k < NFFT; k += 2) { + Gsq = coeff1*(static_cast((k - NFFT)*(k - NFFT)) + ll); + fFFTin[lNFFT_2 + k][0] *= coeff2/(Gsq+coeff3); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } } - for (k = NFFT_2; k < NFFT; k += 2) { - Gsq = coeff1*(static_cast((k - NFFT)*(k - NFFT)) + ll); - fFFTin[lNFFT_2 + k][0] *= coeff2/(Gsq+coeff3); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - } - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT); - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2 - 1; k += 2) { - Gsq = coeff1*(static_cast(k*k) + ll); - fFFTin[lNFFT_2 + k][0] *= coeff2/(Gsq+coeff3); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT); + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2 - 1; k += 2) { + Gsq = coeff1*(static_cast(k*k) + ll); + fFFTin[lNFFT_2 + k][0] *= coeff2/(Gsq+coeff3); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + for (k = NFFT_2; k < NFFT; k += 2) { + Gsq = coeff1*(static_cast((k-NFFT)*(k-NFFT)) + ll); + fFFTin[lNFFT_2 + k][0] *= coeff2/(Gsq+coeff3); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } } - for (k = NFFT_2; k < NFFT; k += 2) { - Gsq = coeff1*(static_cast((k-NFFT)*(k-NFFT)) + ll); - fFFTin[lNFFT_2 + k][0] *= coeff2/(Gsq+coeff3); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - } - //intermediate rows + //intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT); + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = coeff1*(static_cast((k+1)*(k+1)) + ll); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] *= coeff2/(Gsq+coeff3); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + for (k = NFFT_2; k < NFFT; k += 2) { + Gsq = coeff1*(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] *= coeff2/(Gsq+coeff3); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + } - for (l = 1; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT); - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = coeff1*(static_cast((k+1)*(k+1)) + ll); - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] *= coeff2/(Gsq+coeff3); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - for (k = NFFT_2; k < NFFT; k += 2) { - Gsq = coeff1*(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] *= coeff2/(Gsq+coeff3); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - } - - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT); - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2; k += 2) { - Gsq = coeff1*(static_cast((k+1)*(k+1)) + ll); - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] *= coeff2/(Gsq+coeff3); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - for (k = NFFT_2; k < NFFT; k += 2) { - Gsq = coeff1*(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] *= coeff2/(Gsq+coeff3); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT); + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2; k += 2) { + Gsq = coeff1*(static_cast((k+1)*(k+1)) + ll); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] *= coeff2/(Gsq+coeff3); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + for (k = NFFT_2; k < NFFT; k += 2) { + Gsq = coeff1*(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] *= coeff2/(Gsq+coeff3); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } } + #ifdef HAVE_GOMP } + #endif fFFTin[0][0] = 0.0; @@ -1483,87 +1636,103 @@ void TBulkTriVortexNGLFieldCalc::ManipulateFourierCoefficientsB() const { int lNFFT, l, k; double Gsq, ll; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2 - 1; k += 2) { - Gsq = coeff1*(static_cast(k*k) + ll); - fBkMatrix[lNFFT + k][0] *= coeff2/(Gsq+coeff3); - fBkMatrix[lNFFT + k][1] = 0.0; - fBkMatrix[lNFFT + k + 1][0] = 0.0; - fBkMatrix[lNFFT + k + 1][1] = 0.0; + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(k,l,lNFFT,Gsq,ll) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2 - 1; k += 2) { + Gsq = coeff1*(static_cast(k*k) + ll); + fBkMatrix[lNFFT + k][0] *= coeff2/(Gsq+coeff3); + fBkMatrix[lNFFT + k][1] = 0.0; + fBkMatrix[lNFFT + k + 1][0] = 0.0; + fBkMatrix[lNFFT + k + 1][1] = 0.0; + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + Gsq = coeff1*(static_cast((k-NFFT)*(k-NFFT)) + ll); + fBkMatrix[lNFFT + k][0] *= coeff2/(Gsq+coeff3); + fBkMatrix[lNFFT + k][1] = 0.0; + fBkMatrix[lNFFT + k + 1][0] = 0.0; + fBkMatrix[lNFFT + k + 1][1] = 0.0; + } } - for (k = NFFT_2; k < NFFT - 1; k += 2) { - Gsq = coeff1*(static_cast((k-NFFT)*(k-NFFT)) + ll); - fBkMatrix[lNFFT + k][0] *= coeff2/(Gsq+coeff3); - fBkMatrix[lNFFT + k][1] = 0.0; - fBkMatrix[lNFFT + k + 1][0] = 0.0; - fBkMatrix[lNFFT + k + 1][1] = 0.0; - } - } - - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2 - 1; k += 2) { - Gsq = coeff1*(static_cast(k*k) + ll); - fBkMatrix[lNFFT + k][0] *= coeff2/(Gsq+coeff3); - fBkMatrix[lNFFT + k][1] = 0.0; - fBkMatrix[lNFFT + k + 1][0] = 0.0; - fBkMatrix[lNFFT + k + 1][1] = 0.0; - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - Gsq = coeff1*(static_cast((k-NFFT)*(k-NFFT)) + ll); - fBkMatrix[lNFFT + k][0] *= coeff2/(Gsq+coeff3); - fBkMatrix[lNFFT + k][1] = 0.0; - fBkMatrix[lNFFT + k + 1][0] = 0.0; - fBkMatrix[lNFFT + k + 1][1] = 0.0; - } - } - - //intermediate rows - - for (l = 1; l < NFFT_2; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2 - 1; k += 2) { - Gsq = coeff1*(static_cast((k+1)*(k+1)) + ll); - fBkMatrix[lNFFT + k][0] = 0.0; - fBkMatrix[lNFFT + k][1] = 0.0; - fBkMatrix[lNFFT + k + 1][0] *= coeff2/(Gsq+coeff3); - fBkMatrix[lNFFT + k + 1][1] = 0.0; - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - Gsq = coeff1*(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); - fBkMatrix[lNFFT + k][0] = 0.0; - fBkMatrix[lNFFT + k][1] = 0.0; - fBkMatrix[lNFFT + k + 1][0] *= coeff2/(Gsq+coeff3); - fBkMatrix[lNFFT + k + 1][1] = 0.0; - } - } - - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2 - 1; k += 2) { - Gsq = coeff1*(static_cast((k+1)*(k+1)) + ll); - fBkMatrix[lNFFT + k][0] = 0.0; - fBkMatrix[lNFFT + k][1] = 0.0; - fBkMatrix[lNFFT + k + 1][0] *= coeff2/(Gsq+coeff3); - fBkMatrix[lNFFT + k + 1][1] = 0.0; - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - Gsq = coeff1*(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); - fBkMatrix[lNFFT + k][0] = 0.0; - fBkMatrix[lNFFT + k][1] = 0.0; - fBkMatrix[lNFFT + k + 1][0] *= coeff2/(Gsq+coeff3); - fBkMatrix[lNFFT + k + 1][1] = 0.0; + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2 - 1; k += 2) { + Gsq = coeff1*(static_cast(k*k) + ll); + fBkMatrix[lNFFT + k][0] *= coeff2/(Gsq+coeff3); + fBkMatrix[lNFFT + k][1] = 0.0; + fBkMatrix[lNFFT + k + 1][0] = 0.0; + fBkMatrix[lNFFT + k + 1][1] = 0.0; + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + Gsq = coeff1*(static_cast((k-NFFT)*(k-NFFT)) + ll); + fBkMatrix[lNFFT + k][0] *= coeff2/(Gsq+coeff3); + fBkMatrix[lNFFT + k][1] = 0.0; + fBkMatrix[lNFFT + k + 1][0] = 0.0; + fBkMatrix[lNFFT + k + 1][1] = 0.0; + } } + + //intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2 - 1; k += 2) { + Gsq = coeff1*(static_cast((k+1)*(k+1)) + ll); + fBkMatrix[lNFFT + k][0] = 0.0; + fBkMatrix[lNFFT + k][1] = 0.0; + fBkMatrix[lNFFT + k + 1][0] *= coeff2/(Gsq+coeff3); + fBkMatrix[lNFFT + k + 1][1] = 0.0; + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + Gsq = coeff1*(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + fBkMatrix[lNFFT + k][0] = 0.0; + fBkMatrix[lNFFT + k][1] = 0.0; + fBkMatrix[lNFFT + k + 1][0] *= coeff2/(Gsq+coeff3); + fBkMatrix[lNFFT + k + 1][1] = 0.0; + } + } + + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2 - 1; k += 2) { + Gsq = coeff1*(static_cast((k+1)*(k+1)) + ll); + fBkMatrix[lNFFT + k][0] = 0.0; + fBkMatrix[lNFFT + k][1] = 0.0; + fBkMatrix[lNFFT + k + 1][0] *= coeff2/(Gsq+coeff3); + fBkMatrix[lNFFT + k + 1][1] = 0.0; + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + Gsq = coeff1*(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + fBkMatrix[lNFFT + k][0] = 0.0; + fBkMatrix[lNFFT + k][1] = 0.0; + fBkMatrix[lNFFT + k + 1][0] *= coeff2/(Gsq+coeff3); + fBkMatrix[lNFFT + k + 1][1] = 0.0; + } + } + #ifdef HAVE_GOMP } + #endif fBkMatrix[0][0] = 0.0; @@ -1578,58 +1747,74 @@ void TBulkTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForQx() const { int lNFFT, l, k; double ll; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2 - 1; k += 2) { - if (!k && !l) - fBkMatrix[0][0] = 0.0; - else - fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(l)/(static_cast(k*k) + ll); + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(k,l,lNFFT,ll) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2 - 1; k += 2) { + if (!k && !l) + fBkMatrix[0][0] = 0.0; + else + fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(l)/(static_cast(k*k) + ll); + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(l)/(static_cast((k-NFFT)*(k-NFFT)) + ll); + } } - for (k = NFFT_2; k < NFFT - 1; k += 2) { - fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(l)/(static_cast((k-NFFT)*(k-NFFT)) + ll); - } - } - - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2 - 1; k += 2) { - fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(l-NFFT)/(static_cast(k*k) + ll); - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(l-NFFT)/(static_cast((k-NFFT)*(k-NFFT)) + ll); - } - } - - //intermediate rows - - for (l = 1; l < NFFT_2; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2 - 1; k += 2) { - fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(l)/(static_cast((k+1)*(k+1)) + ll); - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(l)/(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); - } - } - - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2 - 1; k += 2) { - fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(l-NFFT)/(static_cast((k+1)*(k+1)) + ll); - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(l-NFFT)/(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2 - 1; k += 2) { + fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(l-NFFT)/(static_cast(k*k) + ll); + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(l-NFFT)/(static_cast((k-NFFT)*(k-NFFT)) + ll); + } } + + //intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2 - 1; k += 2) { + fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(l)/(static_cast((k+1)*(k+1)) + ll); + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(l)/(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + } + } + + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2 - 1; k += 2) { + fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(l-NFFT)/(static_cast((k+1)*(k+1)) + ll); + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(l-NFFT)/(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + } + } + #ifdef HAVE_GOMP } + #endif return; } @@ -1641,65 +1826,81 @@ void TBulkTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForQy() const { int lNFFT, l, k; double ll; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2 - 1; k += 2) { - if (!k && !l) - fBkMatrix[0][0] = 0.0; - else + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(k,l,lNFFT,ll) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2 - 1; k += 2) { + if (!k && !l) + fBkMatrix[0][0] = 0.0; + else + fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(k)/(static_cast(k*k) + ll); + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(k-NFFT)/(static_cast((k-NFFT)*(k-NFFT)) + ll); + } + } + + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2 - 1; k += 2) { fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(k)/(static_cast(k*k) + ll); + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(k-NFFT)/(static_cast((k-NFFT)*(k-NFFT)) + ll); + } } - for (k = NFFT_2; k < NFFT - 1; k += 2) { - fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(k-NFFT)/(static_cast((k-NFFT)*(k-NFFT)) + ll); - } - } - - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2 - 1; k += 2) { - fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(k)/(static_cast(k*k) + ll); - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - fBkMatrix[lNFFT + k][0] *= coeff1*static_cast(k-NFFT)/(static_cast((k-NFFT)*(k-NFFT)) + ll); - } - } - - //intermediate rows - - for (l = 1; l < NFFT_2; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast(l*l); - for (k = 0; k < NFFT_2 - 1; k += 2) { - fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(k+1)/(static_cast((k+1)*(k+1)) + ll); - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(k+1-NFFT)/(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); - } - } - - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT = l*NFFT; - ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); - for (k = 0; k < NFFT_2 - 1; k += 2) { - fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(k+1)/(static_cast((k+1)*(k+1)) + ll); - } - - for (k = NFFT_2; k < NFFT - 1; k += 2) { - fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(k+1-NFFT)/(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + //intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast(l*l); + for (k = 0; k < NFFT_2 - 1; k += 2) { + fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(k+1)/(static_cast((k+1)*(k+1)) + ll); + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(k+1-NFFT)/(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + } } + + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT = l*NFFT; + ll = 3.0*static_cast((l-NFFT)*(l-NFFT)); + for (k = 0; k < NFFT_2 - 1; k += 2) { + fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(k+1)/(static_cast((k+1)*(k+1)) + ll); + } + + for (k = NFFT_2; k < NFFT - 1; k += 2) { + fBkMatrix[lNFFT + k + 1][0] *= coeff1*static_cast(k+1-NFFT)/(static_cast((k+1-NFFT)*(k+1-NFFT)) + ll); + } + } + #ifdef HAVE_GOMP } + #endif return; } void TBulkTriVortexNGLFieldCalc::CalculateSumAk() const { - const int NFFT_2(fSteps/2); - const int NFFTsq_2((fSteps/2 + 1)*fSteps); +// const int NFFT_2(fSteps/2); +// const int NFFTsq_2((fSteps/2 + 1)*fSteps); const int NFFTsq(fSteps*fSteps); /* double SumFirstColumn(0.0); @@ -1713,11 +1914,21 @@ void TBulkTriVortexNGLFieldCalc::CalculateSumAk() const { } fSumAk = 2.0*fSumAk - SumFirstColumn; */ - fSumAk = 0.0; - for (int l(0); l < NFFTsq; ++l) { + double sumAk(0.0); + int l; + + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) reduction(+:sumAk) + #endif + for (l=0; l < NFFTsq; ++l) { fSumAk += fFFTin[l][0]; } + fSumAk = sumAk; + return; } @@ -1743,13 +1954,20 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { const int NFFTsq(fSteps*fSteps); const int NFFTsq_2((fSteps/2 + 1)*fSteps); + #ifdef HAVE_GOMP + int chunk; + #endif + // first check that the field is not larger than Hc2 and that we are dealing with a type II SC ... if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif - for (m = 0; m < NFFTsq; m++) { + for (m = 0; m < NFFTsq; ++m) { fFFTout[m] = field; } // Set the flag which shows that the calculation has been done @@ -1764,9 +1982,12 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { FillAbrikosovCoefficients(); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFT/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFT; l++) { + for (l = 0; l < NFFT; ++l) { fCheckAkConvergence[l] = fFFTin[l][0]; } @@ -1779,9 +2000,12 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fOmegaMatrix[l] = fSumAk - fRealSpaceMatrix[l][0]; } @@ -1794,9 +2018,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { double denomQA; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l,denomQA) schedule(dynamic) + #pragma omp parallel for default(shared) private(l,denomQA) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { if (!fOmegaMatrix[l] || !l || (l == (NFFT+1)*NFFT_2)) { fQMatrixA[l][0] = 0.0; fQMatrixA[l][1] = 0.0; @@ -1821,23 +2045,25 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // initialize B(x,y) with the mean field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fFFTout[l] = scaledB; } bool akConverged(false), bkConverged(false), akInitiallyConverged(false), firstBkCalculation(true); - double fourKappaSq(4.0*fKappa*fKappa); - + double fourKappaSq(4.0*fKappa*fKappa), sumSum, sumOmegaSq; while (!akConverged || !bkConverged) { // First iteration step for aK #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { if (fOmegaMatrix[l]) { fRealSpaceMatrix[l][0] = fOmegaMatrix[l]*(fOmegaMatrix[l] + fQMatrix[l][0]*fQMatrix[l][0] + fQMatrix[l][1]*fQMatrix[l][1] - 2.0) + \ (fOmegaDiffMatrix[l][0]*fOmegaDiffMatrix[l][0] + fOmegaDiffMatrix[l][1]*fOmegaDiffMatrix[l][1])/(fourKappaSq*fOmegaMatrix[l]); @@ -1865,9 +2091,12 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // Need a copy of the aK-matrix since FFTW is manipulating the input in c2r and r2c transforms // Store it in the first half of the bK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq_2/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq_2; l++) { + for (l = 0; l < NFFTsq_2; ++l) { fBkMatrix[l][0] = fFFTin[l][0]; } @@ -1876,9 +2105,12 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fOmegaMatrix[l] = fSumAk - fRealSpaceMatrix[l][0]; } @@ -1886,27 +2118,35 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // Get the spacial averages of the second iteration step for aK - fSumSum = 0.0; - fSumOmegaSq = 0.0; - for (l = 0; l < NFFTsq; l++) { - fSumOmegaSq += fOmegaMatrix[l]*fOmegaMatrix[l]; + sumSum = 0.0; + sumOmegaSq = 0.0; + #ifdef HAVE_GOMP + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) reduction(+:sumSum,sumOmegaSq) + #endif + for (l = 0; l < NFFTsq; ++l) { + sumOmegaSq += fOmegaMatrix[l]*fOmegaMatrix[l]; if(fOmegaMatrix[l]){ - fSumSum += fOmegaMatrix[l]*(1.0 - (fQMatrix[l][0]*fQMatrix[l][0] + fQMatrix[l][1]*fQMatrix[l][1])) - \ + sumSum += fOmegaMatrix[l]*(1.0 - (fQMatrix[l][0]*fQMatrix[l][0] + fQMatrix[l][1]*fQMatrix[l][1])) - \ (fOmegaDiffMatrix[l][0]*fOmegaDiffMatrix[l][0] + fOmegaDiffMatrix[l][1]*fOmegaDiffMatrix[l][1])/(fourKappaSq*fOmegaMatrix[l]); } else { if (l < NFFTsq - NFFT && fOmegaMatrix[l+NFFT]) { - fSumSum += fOmegaMatrix[l+NFFT]*(1.0 - (fQMatrix[l+NFFT][0]*fQMatrix[l+NFFT][0] + fQMatrix[l+NFFT][1]*fQMatrix[l+NFFT][1])) - \ + sumSum += fOmegaMatrix[l+NFFT]*(1.0 - (fQMatrix[l+NFFT][0]*fQMatrix[l+NFFT][0] + fQMatrix[l+NFFT][1]*fQMatrix[l+NFFT][1])) - \ (fOmegaDiffMatrix[l+NFFT][0]*fOmegaDiffMatrix[l+NFFT][0] + \ fOmegaDiffMatrix[l+NFFT][1]*fOmegaDiffMatrix[l+NFFT][1])/(fourKappaSq*fOmegaMatrix[l+NFFT]); } } } + fSumSum = sumSum; + fSumOmegaSq = sumOmegaSq; // reduction clauses do not work with class members that have been declared elsewhere // Restore the aK-matrix from the bK-space and multiply with the spacial averages #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq_2/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq_2; l++) { + for (l = 0; l < NFFTsq_2; ++l) { fFFTin[l][0] = fBkMatrix[l][0]*fSumSum/fSumOmegaSq; fFFTin[l][1] = 0.0; } @@ -1915,7 +2155,7 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { akConverged = true; - for (l = 0; l < NFFT; l++) { + for (l = 0; l < NFFT; ++l) { if (fFFTin[l][0]){ if (((fabs(fFFTin[l][0]) > 1.0E-6) && (fabs(fCheckAkConvergence[l] - fFFTin[l][0])/fFFTin[l][0] > 1.0E-3)) || \ (fCheckAkConvergence[l]/fFFTin[l][0] < 0.0)) { @@ -1928,10 +2168,13 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { } if (!akConverged) { - #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) - #endif - for (l = 0; l < NFFT; l++) { + #ifdef HAVE_GOMP + chunk = NFFT/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) + #endif + for (l = 0; l < NFFT; ++l) { fCheckAkConvergence[l] = fFFTin[l][0]; } } else { @@ -1952,9 +2195,12 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fOmegaMatrix[l] = fSumAk - fRealSpaceMatrix[l][0]; } @@ -1963,9 +2209,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { if (akInitiallyConverged) { // if the aK iterations converged, go on with the bK calculation //cout << "converged, count=" << count << endl; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fBkMatrix[l][0] = fOmegaMatrix[l]*fFFTout[l] + fSumAk*(scaledB - fFFTout[l]) + \ fQMatrix[l][1]*fOmegaDiffMatrix[l][0] - fQMatrix[l][0]*fOmegaDiffMatrix[l][1]; fBkMatrix[l][1] = 0.0; @@ -1984,9 +2230,12 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { if (firstBkCalculation) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFT/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFT; l++) { + for (l = 0; l < NFFT; ++l) { fCheckBkConvergence[l] = 0.0; } firstBkCalculation = false; @@ -1995,7 +2244,7 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { bkConverged = true; - for (l = 0; l < NFFT; l++) { + for (l = 0; l < NFFT; ++l) { if (fBkMatrix[l][0]) { if (((fabs(fBkMatrix[l][0]) > 1.0E-6) && (fabs(fCheckBkConvergence[l] - fBkMatrix[l][0])/fabs(fBkMatrix[l][0]) > 1.0E-3)) || \ (fCheckBkConvergence[l]/fBkMatrix[l][0] < 0.0)) { @@ -2010,9 +2259,12 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { if (!bkConverged) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFT/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFT; l++) { + for (l = 0; l < NFFT; ++l) { fCheckBkConvergence[l] = fBkMatrix[l][0]; } } @@ -2022,7 +2274,10 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix // Since aK is only half the size of bK, store every second entry in the imaginary part of aK #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; l+=2) { fFFTin[l/2][0] = fBkMatrix[l][0]; @@ -2034,9 +2289,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplanBkToBandQ); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fFFTout[l] = scaledB + fBkMatrix[l][0]; } @@ -2045,7 +2300,7 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // Restore bKs for Qx calculation and Fourier transform to get Qx #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; l+=2) { fBkMatrix[l][0] = fFFTin[l/2][0]; @@ -2059,16 +2314,16 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplanBkToBandQ); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fQMatrix[l][0] = fQMatrixA[l][0] - fBkMatrix[l][1]; } // Restore bKs for Qy calculation and Fourier transform to get Qy #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; l+=2) { fBkMatrix[l][0] = fFFTin[l/2][0]; @@ -2082,9 +2337,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplanBkToBandQ); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fQMatrix[l][1] = fQMatrixA[l][1] + fBkMatrix[l][1]; } } // end if (akInitiallyConverged) @@ -2093,9 +2348,12 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // If the iterations have converged, rescale the field from Brandt's units to Gauss #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif - for (l = 0; l < NFFTsq; l++) { + for (l = 0; l < NFFTsq; ++l) { fFFTout[l] *= Hc2_kappa; } @@ -2184,13 +2442,20 @@ void TBulkAnisotropicTriVortexLondonFieldCalc::CalculateGrid() const { const int NFFTsq(fSteps*fSteps); const int NFFTsq_2((fSteps/2 + 1) * fSteps); + #ifdef HAVE_GOMP + int chunk; + #endif + // fill the field Fourier components in the matrix // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (sqrt(lambdaX*lambdaY) < sqrt(xiX*xiY)/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif for (m = 0; m < NFFTsq; ++m) { fFFTout[m] = field; @@ -2206,7 +2471,10 @@ void TBulkAnisotropicTriVortexLondonFieldCalc::CalculateGrid() const { // zero first everything since the r2c FFT changes the input, too #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(k) schedule(dynamic) + chunk = NFFTsq_2/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(k) schedule(dynamic,chunk) #endif for (k = 0; k < NFFTsq_2; ++k) { fFFTin[k][0] = 0.0; @@ -2214,90 +2482,84 @@ void TBulkAnisotropicTriVortexLondonFieldCalc::CalculateGrid() const { } #ifdef HAVE_GOMP - #pragma omp parallel private(k, l, lNFFT_2, kk, ll) num_threads(4) + #pragma omp parallel sections default(shared) private(k, l, lNFFT_2, kk, ll) { - switch(omp_get_thread_num()) { - case 0: + #pragma omp section #endif - for (l = 0; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast(k*k); - fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + for (l = 0; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast(k*k); + fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); +// fFFTin[lNFFT_2 + k][1] = 0.0; +// fFFTin[lNFFT_2 + k + 1][0] = 0.0; +// fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + + k = NFFT_2; + kk = static_cast(k*k); + fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); +// fFFTin[lNFFT_2 + k][1] = 0.0; + } + + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast(k*k); + fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); // fFFTin[lNFFT_2 + k][1] = 0.0; // fFFTin[lNFFT_2 + k + 1][0] = 0.0; // fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } + } - k = NFFT_2; - kk = static_cast(k*k); - fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + k = NFFT_2; + kk = static_cast(k*k); + fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); // fFFTin[lNFFT_2 + k][1] = 0.0; + } - } - #ifdef HAVE_GOMP - break; - case 1: - #endif - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast(k*k); - fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); -// fFFTin[lNFFT_2 + k][1] = 0.0; -// fFFTin[lNFFT_2 + k + 1][0] = 0.0; -// fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - - k = NFFT_2; - kk = static_cast(k*k); - fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); -// fFFTin[lNFFT_2 + k][1] = 0.0; - } - #ifdef HAVE_GOMP - break; - case 2: - #endif - // intermediate rows - for (l = 1; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast((k + 1)*(k + 1)); + // intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast((k + 1)*(k + 1)); // fFFTin[lNFFT_2 + k][0] = 0.0; // fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k + 1][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); // fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } + } // k = NFFT_2; // fFFTin[lNFFT_2 + k][0] = 0.0; // fFFTin[lNFFT_2 + k][1] = 0.0; - } - #ifdef HAVE_GOMP - break; - case 3: - #endif - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast((k+1)*(k+1)); + } + + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast((k+1)*(k+1)); // fFFTin[lNFFT_2 + k][0] = 0.0; // fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k + 1][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); // fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - k = NFFT_2; + } + k = NFFT_2; // fFFTin[lNFFT_2 + k][0] = 0.0; // fFFTin[lNFFT_2 + k][1] = 0.0; - } - #ifdef HAVE_GOMP - break; - default: - break; } + #ifdef HAVE_GOMP } #endif @@ -2307,7 +2569,10 @@ void TBulkAnisotropicTriVortexLondonFieldCalc::CalculateGrid() const { // Multiply by the applied field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; ++l) { fFFTout[l] *= field; @@ -2398,13 +2663,19 @@ void TBulkAnisotropicTriVortexMLFieldCalc::CalculateGrid() const { const int NFFT_2(fSteps/2); const int NFFTsq(fSteps*fSteps); + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + // fill the field Fourier components in the matrix // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (sqrt(lambdaX*lambdaY) < sqrt(xiX*xiY)/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif for (m = 0; m < NFFTsq; ++m) { fFFTout[m] = field; @@ -2418,70 +2689,85 @@ void TBulkAnisotropicTriVortexMLFieldCalc::CalculateGrid() const { double kk, ll; int k, l, lNFFT_2; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(k, l, lNFFT_2, kk, ll) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast(k*k); + fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; kk = static_cast(k*k); fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - kk = static_cast(k*k); - fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast(k*k); + fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; kk = static_cast(k*k); fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - kk = static_cast(k*k); - fFFTin[lNFFT_2 + k][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - // intermediate rows - - for (l = 1; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast((k + 1)*(k + 1)); + // intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast((k + 1)*(k + 1)); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - } - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast((k+1)*(k+1)); + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast((k+1)*(k+1)); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = exp(-(xiXsq_2_scaled*kk + xiYsq_2_scaled*ll))/(1.0+lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; + #ifdef HAVE_GOMP } + #endif // Do the Fourier transform to get B(x,y) @@ -2489,7 +2775,7 @@ void TBulkAnisotropicTriVortexMLFieldCalc::CalculateGrid() const { // Multiply by the applied field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; ++l) { fFFTout[l] *= field; @@ -2583,13 +2869,19 @@ void TBulkAnisotropicTriVortexAGLFieldCalc::CalculateGrid() const { const int NFFT_2(fSteps/2); const int NFFTsq(fSteps*fSteps); + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + // fill the field Fourier components in the matrix // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (sqrt(lambdaX*lambdaY) < sqrt(xiX*xiY)/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif for (m = 0; m < NFFTsq; ++m) { fFFTout[m] = field; @@ -2603,76 +2895,91 @@ void TBulkAnisotropicTriVortexAGLFieldCalc::CalculateGrid() const { double kk, ll, u; int k, l, lNFFT_2; - for (l = 0; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast(k*k); - u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); - fFFTin[lNFFT_2 + k][0] = ((!k && !l) ? 1.0 : coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk)); - fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; - } - k = NFFT_2; - kk = static_cast(k*k); - u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); - fFFTin[lNFFT_2 + k][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - - - for (l = NFFT_2; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { + #ifdef HAVE_GOMP + #pragma omp parallel sections default(shared) private(k, l, lNFFT_2, kk, ll, u) + { + #pragma omp section + #endif + for (l = 0; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast(k*k); + u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); + fFFTin[lNFFT_2 + k][0] = ((!k && !l) ? 1.0 : coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk)); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; kk = static_cast(k*k); u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); fFFTin[lNFFT_2 + k][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = 0.0; - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - kk = static_cast(k*k); - u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); - fFFTin[lNFFT_2 + k][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); - fFFTin[lNFFT_2 + k][1] = 0.0; - } - // intermediate rows - - for (l = 1; l < NFFT_2; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast(l*l); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast((k + 1)*(k + 1)); + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast(k*k); + u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); + fFFTin[lNFFT_2 + k][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = 0.0; + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; + kk = static_cast(k*k); u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); + fFFTin[lNFFT_2 + k][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k][1] = 0.0; + } + + // intermediate rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = 1; l < NFFT_2; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast(l*l); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast((k + 1)*(k + 1)); + u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; - } - for (l = NFFT_2 + 1; l < NFFT; l += 2) { - lNFFT_2 = l*(NFFT_2 + 1); - ll = static_cast((NFFT-l)*(NFFT-l)); - for (k = 0; k < NFFT_2; k += 2) { - kk = static_cast((k+1)*(k+1)); - u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (l = NFFT_2 + 1; l < NFFT; l += 2) { + lNFFT_2 = l*(NFFT_2 + 1); + ll = static_cast((NFFT-l)*(NFFT-l)); + for (k = 0; k < NFFT_2; k += 2) { + kk = static_cast((k+1)*(k+1)); + u = sqrt(xiXsq_scaled*kk + xiYsq_scaled*ll); + fFFTin[lNFFT_2 + k][0] = 0.0; + fFFTin[lNFFT_2 + k][1] = 0.0; + fFFTin[lNFFT_2 + k + 1][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); + fFFTin[lNFFT_2 + k + 1][1] = 0.0; + } + k = NFFT_2; fFFTin[lNFFT_2 + k][0] = 0.0; fFFTin[lNFFT_2 + k][1] = 0.0; - fFFTin[lNFFT_2 + k + 1][0] = coeff1*u*TMath::BesselK1(u)/(lambdaXsq_scaled*ll+lambdaYsq_scaled*kk); - fFFTin[lNFFT_2 + k + 1][1] = 0.0; } - k = NFFT_2; - fFFTin[lNFFT_2 + k][0] = 0.0; - fFFTin[lNFFT_2 + k][1] = 0.0; + #ifdef HAVE_GOMP } + #endif // Do the Fourier transform to get B(x,y) @@ -2680,7 +2987,7 @@ void TBulkAnisotropicTriVortexAGLFieldCalc::CalculateGrid() const { // Multiply by the applied field #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsq; ++l) { fFFTout[l] *= field; diff --git a/src/external/libFitPofB/classes/TFilmTriVortexFieldCalc.cpp b/src/external/libFitPofB/classes/TFilmTriVortexFieldCalc.cpp index 6fdd0675..b97aa37f 100644 --- a/src/external/libFitPofB/classes/TFilmTriVortexFieldCalc.cpp +++ b/src/external/libFitPofB/classes/TFilmTriVortexFieldCalc.cpp @@ -290,7 +290,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { // First save a copy of the real aK-matrix in the imaginary part of the bK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + int chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][1] = fFFTin[l][0]; @@ -299,61 +302,86 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { // sum_K aK Kx^2 cos(Kx*x + Ky*y) cos(Kz*z) // First multiply the aK with Kx^2, then call FFTW - float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));; + float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f)); // k = 0 - - // even rows - for (i = 0; i < NFFT; i += 2) { - // j = 0 - fFFTin[fStepsZ*NFFT*i][0] = 0.0f; - // j != 0 - for (j = 2; j < NFFT_2; j += 2) { - fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast(j*j); - } - for (j = NFFT_2; j < NFFT; j += 2) { - fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast((j - NFFT)*(j - NFFT)); - } - } - - // odd rows - for (i = 1; i < NFFT; i += 2) { - for (j = 0; j < NFFT_2; j += 2) { - fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast((j + 1)*(j + 1)); - } - for (j = NFFT_2; j < NFFT; j += 2) { - fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast((j + 1 - NFFT)*(j + 1 - NFFT)); - } - } - - // k != 0 - - if (fFind3dSolution) { - for (k = 1; k < NFFTz; ++k) { + #ifdef HAVE_GOMP + #pragma omp parallel default(shared) private(i,j) + { + #pragma omp sections + { // even rows + #pragma omp section + #endif for (i = 0; i < NFFT; i += 2) { // j = 0 - fFFTin[k + NFFTz*NFFT*i][0] = 0.0f; + fFFTin[fStepsZ*NFFT*i][0] = 0.0f; // j != 0 for (j = 2; j < NFFT_2; j += 2) { - fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast(j*j); + fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast(j*j); } for (j = NFFT_2; j < NFFT; j += 2) { - fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast((j - NFFT)*(j - NFFT)); + fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast((j - NFFT)*(j - NFFT)); } } // odd rows + #ifdef HAVE_GOMP + #pragma omp section + #endif for (i = 1; i < NFFT; i += 2) { for (j = 0; j < NFFT_2; j += 2) { - fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast((j + 1)*(j + 1)); + fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast((j + 1)*(j + 1)); } for (j = NFFT_2; j < NFFT; j += 2) { - fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast((j + 1 - NFFT)*(j + 1 - NFFT)); + fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast((j + 1 - NFFT)*(j + 1 - NFFT)); } } - } - } // else do nothing since the other aK are already zero since the former aK manipulation + #ifdef HAVE_GOMP + } // end omp sections + #endif + + // k != 0 + if (fFind3dSolution) { + for (k = 1; k < NFFTz; ++k) { + #ifdef HAVE_GOMP + #pragma omp sections + { + // even rows + #pragma omp section + #endif + for (i = 0; i < NFFT; i += 2) { + // j = 0 + fFFTin[k + NFFTz*NFFT*i][0] = 0.0f; + // j != 0 + for (j = 2; j < NFFT_2; j += 2) { + fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast(j*j); + } + for (j = NFFT_2; j < NFFT; j += 2) { + fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast((j - NFFT)*(j - NFFT)); + } + } + + // odd rows + #ifdef HAVE_GOMP + #pragma omp section + #endif + for (i = 1; i < NFFT; i += 2) { + for (j = 0; j < NFFT_2; j += 2) { + fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast((j + 1)*(j + 1)); + } + for (j = NFFT_2; j < NFFT; j += 2) { + fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast((j + 1 - NFFT)*(j + 1 - NFFT)); + } + } + #ifdef HAVE_GOMP + } + #endif + } + } // else do nothing since the other aK are already zero since the former aK manipulation + #ifdef HAVE_GOMP + } // end omp parallel + #endif fftwf_execute(fFFTplan); @@ -363,7 +391,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { fGstorage[k] = fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0]; } #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][0] = fBkMatrix[l][1]; @@ -473,7 +501,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { fGstorage[k] += fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0]; } #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][0] = fBkMatrix[l][1]; @@ -583,7 +611,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { fGstorage[k] += fRealSpaceMatrix[k][1]*fRealSpaceMatrix[k][1]; } #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][0] = fBkMatrix[l][1]; @@ -612,13 +640,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { const int NFFTz_2(fStepsZ/2); int i, j, k, l, index; + #ifdef HAVE_GOMP + int chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif // Take the derivative of the Fourier sum of omega // This is going to be a bit lengthy... // First save a copy of the real aK-matrix in the imaginary part of the bK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][1] = fFFTin[l][0]; @@ -687,7 +720,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { // Copy the results to the gradient matrix and restore the original aK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fOmegaDiffMatrix[0][l] = fRealSpaceMatrix[l][1]; @@ -777,7 +810,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { // Copy the results to the gradient matrix and restore the original aK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fOmegaDiffMatrix[1][l] = fRealSpaceMatrix[l][1]; @@ -838,10 +871,16 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { // 3D transform fftwf_execute(fFFTplan); + #ifdef HAVE_GOMP + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + // Copy the results to the gradient matrix - with the 1D-FORWARD-transform we have to _add_ fSumAk for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fOmegaDiffMatrix[2][k + NFFTz*index] = fRealSpaceMatrix[k + NFFTz*index][1] + fSumAk[k][1]; @@ -850,7 +889,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { // Restore the original aK-matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][0] = fBkMatrix[l][1]; @@ -859,7 +901,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { } else { // For the 2D solution, dw/dz = 0 #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fOmegaDiffMatrix[2][l] = 0.0; @@ -1004,9 +1046,15 @@ void TFilmTriVortexNGLFieldCalc::FillAbrikosovCoefficients(const float reducedFi fFFTin[0][0] = 0.0; + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + for (k = 1; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fFFTin[k + NFFTz*index][0] = 0.0; @@ -1699,9 +1747,16 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsB() const { } */ } else { // for 2D solution only + + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + for (k = 1; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[k + NFFTz*index][0] = 0.0; @@ -1971,10 +2026,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXFirst() c int i, j, k, kx, ky, kz, index; float ii; + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif // k = 0 #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[NFFTz*index][0] = 0.0f; @@ -2116,10 +2176,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXSecond() int i, j, k, kx, ky, kz, index; float ii; + #ifdef HAVE_GOMP + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif // k = 0 #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[NFFTz*index][0] = 0.0f; @@ -2260,7 +2325,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYFirst() c // k = 0 #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[NFFTz*index][0] = 0.0f; @@ -2397,7 +2465,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYSecond() // k = 0 #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + int chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[NFFTz*index][0] = 0.0f; @@ -2575,11 +2646,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { const int NFFTz_2(fStepsZ/2); const int NFFTsqStZ_2(NFFTsq*(fStepsZ/2 + 1)); + #ifdef HAVE_GOMP + int chunk; + #endif + // first check that the field is not larger than Hc2 and that we are dealing with a type II SC ... if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(m) schedule(dynamic) + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk) #endif for (m = 0; m < NFFTsqStZ; ++m) { fBout[0][m] = 0.0f; @@ -2601,7 +2679,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(j,index) schedule(dynamic) + chunk = NFFT/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(j,index) schedule(dynamic,chunk) #endif for (j = 0; j < NFFT; ++j) { index = k + NFFTz*j; @@ -2626,7 +2707,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { for (k = 0; k < NFFTz; ++k) { for (j = 0; j < NFFT; ++j) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i,index) schedule(dynamic) + #pragma omp parallel for default(shared) private(i,index) schedule(dynamic,chunk) #endif for (i = 0; i < NFFT; ++i) { index = k + NFFTz*(j + NFFT*i); @@ -2645,7 +2726,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { int indexQA; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { if (!fOmegaMatrix[NFFTz*index] || !index || (index == (NFFT+1)*NFFT_2)) { @@ -2672,7 +2756,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0]; @@ -2682,7 +2766,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // initialize the bK-Matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][0] = 0.0; @@ -2708,7 +2795,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // } #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; l++) { if (fOmegaMatrix[l]) { @@ -2726,9 +2816,6 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // since all of this should be a smooth function anyway, I set the value of the next neighbour r // for the two vortex core positions in my matrix // If this was not enough we can get the g(0)-values by an expensive CalculateGatVortexCore()-invocation (not working at the moment) - #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(k) schedule(dynamic) - #endif for (k = 0; k < NFFTz; ++k) { fRealSpaceMatrix[k][0] = fRealSpaceMatrix[k + fStepsZ*fSteps][0];//fGstorage[k]; fRealSpaceMatrix[k + NFFTz*(NFFT+1)*NFFT_2][0] = fRealSpaceMatrix[k][0];//fGstorage[k]; @@ -2747,9 +2834,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplan); + #ifdef HAVE_GOMP + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0]; @@ -2794,7 +2887,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Multiply the aK with the spacial averages for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fFFTin[k + NFFTz*index][0] = fFFTin[k + NFFTz*index][0]*fSumSum; @@ -2823,9 +2916,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { } if (!akConverged) { + #ifdef HAVE_GOMP + chunk = NFFT/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(j, index) schedule(dynamic) + #pragma omp parallel for default(shared) private(j, index) schedule(dynamic,chunk) #endif for (j = 0; j < NFFT; ++j) { index = k + NFFTz*j; @@ -2856,9 +2954,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplan); + #ifdef HAVE_GOMP + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0]; @@ -2869,6 +2973,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // first calculate PK (use the Q-Matrix memory for the second part) #ifdef HAVE_GOMP + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; #pragma omp parallel for default(shared) private(l) schedule(dynamic) #endif for (l = 0; l < NFFTsqStZ; ++l) { @@ -2905,7 +3012,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { if (firstBkCalculation) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTStZ; ++l) { fCheckBkConvergence[l] = 0.0f; @@ -2937,9 +3047,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // cout << "Bk Convergence: " << bkConverged << endl; if (!bkConverged) { + #ifdef HAVE_GOMP + chunk = NFFT/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(j) schedule(dynamic) + #pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk) #endif for (j = 0; j < NFFT; ++j) { index = k + NFFTz*j; @@ -2950,7 +3065,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][1] = fBkMatrix[l][0]; @@ -2986,9 +3104,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); + #ifdef HAVE_GOMP + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0] - fBkMatrix[k + NFFTz*index][1]; @@ -2997,7 +3121,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Restore bKs for Qy calculation and Fourier transform to get Qy #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][0] = fFFTin[l][1]; @@ -3008,9 +3135,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); + #ifdef HAVE_GOMP + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #endif + for (k = 0; k < NFFTz; ++k) { #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1] + fBkMatrix[k + NFFTz*index][1]; @@ -3019,7 +3152,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Restore bKs for the next iteration #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][0] = fFFTin[l][1]; @@ -3035,7 +3171,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Fill in the B-Matrix and restore the bKs for the second part of the Bx-calculation #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + chunk = NFFTsqStZ/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[0][l] = fBkMatrix[l][0]; @@ -3050,7 +3189,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Fill in the B-Matrix and restore the bKs for the By-calculation #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[0][l] = 0.5f*(fBkMatrix[l][0] - fBout[0][l])*Hc2_kappa; @@ -3064,7 +3203,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Fill in the B-Matrix and restore the bKs for the second part of the By-calculation #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[1][l] = fBkMatrix[l][0]; @@ -3078,7 +3217,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Fill in the B-Matrix and restore the bKs for the second part of the By-calculation #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[1][l] = 0.5f*(fBkMatrix[l][0] - fBout[1][l])*Hc2_kappa; @@ -3089,7 +3228,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk) #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[2][l] = (scaledB + fBkMatrix[l][0])*Hc2_kappa; @@ -3099,7 +3238,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Save a copy of the BkS - where does not matter since this is the very end of the calculation... #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + chunk = NFFTsq/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fFFTin[index][1] = fBkS[index][0]; @@ -3112,7 +3254,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Write the surface fields to the field-Matrix and restore the BkS for the By-calculation #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fBout[0][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa; @@ -3127,7 +3269,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Write the surface fields to the field-Matrix #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk) #endif for (index = 0; index < NFFTsq; ++index) { fBout[1][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa; diff --git a/src/external/libFitPofB/classes/TPofBCalc.cpp b/src/external/libFitPofB/classes/TPofBCalc.cpp index 42b55019..d37ed24b 100644 --- a/src/external/libFitPofB/classes/TPofBCalc.cpp +++ b/src/external/libFitPofB/classes/TPofBCalc.cpp @@ -62,7 +62,10 @@ TPofBCalc::TPofBCalc(const vector ¶) : fBmin(0.0), fBmax(0.0), fDT(p int i; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + int chunk = fPBSize/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for (i = 0; i < static_cast(fPBSize); ++i) { fB[i] = static_cast(i)*fDB; @@ -83,7 +86,10 @@ TPofBCalc::TPofBCalc(const vector& b, const vector& pb, double d int i; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + int chunk = fPBSize/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for (i = 0; i < static_cast(fPBSize); ++i) { fB[i] = b[i]; @@ -121,9 +127,12 @@ TPofBCalc::TPofBCalc(const vector& b, const vector& pb, double d void TPofBCalc::UnsetPBExists() { int i; -#ifdef HAVE_GOMP -#pragma omp parallel for default(shared) private(i) schedule(dynamic) -#endif + #ifdef HAVE_GOMP + int chunk = fPBSize/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) + #endif for (i = 0; i < static_cast(fPBSize); ++i) { fPB[i] = 0.0; } @@ -140,14 +149,17 @@ void TPofBCalc::Normalize(unsigned int minFilledIndex = 0, unsigned int maxFille double pBsum(0.0); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum) + int chunk = (maxFilledIndex-minFilledIndex)/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum) #endif for (i = minFilledIndex; i <= static_cast(maxFilledIndex); ++i) pBsum += fPB[i]; pBsum *= fDB; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for (i = minFilledIndex; i <= static_cast(maxFilledIndex); ++i) fPB[i] /= pBsum; @@ -160,7 +172,10 @@ void TPofBCalc::SetPB(const vector &pb) const { int i; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + int chunk = fPBSize/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for (i = 0; i < static_cast(fPBSize); ++i) { fPB[i] = pb[i]; @@ -190,14 +205,20 @@ void TPofBCalc::Calculate(const string &type, const vector ¶) { int i; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + int chunk = B0Index/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for (i = 0; i < B0Index; ++i) { fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expominus); } #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + chunk = (BmaxIndex-B0Index)/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for (i = B0Index; i <= BmaxIndex; ++i) { fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expoplus); @@ -546,6 +567,10 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto // cannot use a reduction clause here (like e.g. in Normalize()), since pBvec[] is not a scalar variable // therefore, we need to work on it a bit more int n(omp_get_num_procs()), tid, offset; + int chunk = fPBSize/n; + if (chunk < 10) + chunk = 10; + vector< vector > pBvec(n, vector(fPBSize, 0)); int indexStep(static_cast(floor(static_cast(numberOfSteps_2)/static_cast(n)))); @@ -577,7 +602,7 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto } for (j = 0; j < n; ++j) { - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) for (i = 0; i < static_cast(fPBSize); ++i) { fPB[i] += static_cast(pBvec[j][i]); } @@ -624,7 +649,10 @@ void TPofBCalc::AddBackground(double B, double s, double w) { // calculate Gaussian background double bg[fPBSize]; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + int chunk = fPBSize/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for(i = 0; i < static_cast(fPBSize); ++i) { bg[i] = exp(-(fB[i]-B)*(fB[i]-B)/(2.0*BsSq)); @@ -634,7 +662,7 @@ void TPofBCalc::AddBackground(double B, double s, double w) { double bgsum(0.0); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:bgsum) + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:bgsum) #endif for (i = 0; i < static_cast(fPBSize); ++i) bgsum += bg[i]; @@ -642,14 +670,14 @@ void TPofBCalc::AddBackground(double B, double s, double w) { bgsum *= fDB; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for (i = 0; i < static_cast(fPBSize); ++i) bg[i] /= bgsum; // add background to P(B) #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) #endif for (i = 0; i < static_cast(fPBSize); ++i) fPB[i] = (1.0 - w)*fPB[i] + w*bg[i]; @@ -691,7 +719,10 @@ void TPofBCalc::ConvolveGss(double w) { int i; #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic) + int chunk = (NFFT/2+1)/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic,chunk) #endif for (i = 0; i < static_cast(NFFT/2+1); ++i) { GssInTimeDomain = exp(expo*static_cast(i)*static_cast(i)); @@ -725,7 +756,10 @@ double TPofBCalc::GetFirstMoment() const { double pBsum(0.0); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum) + int chunk = fPBSize/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum) #endif for (i = 0; i < static_cast(fPBSize); ++i) pBsum += fB[i]*fPB[i]; @@ -746,7 +780,10 @@ double TPofBCalc::GetCentralMoment(unsigned int n) const { double pBsum(0.0); #ifdef HAVE_GOMP - #pragma omp parallel for default(shared) private(i, diff) schedule(dynamic) reduction(+:pBsum) + int chunk = fPBSize/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i, diff) schedule(dynamic,chunk) reduction(+:pBsum) #endif for (i = 0; i < static_cast(fPBSize); ++i) { diff = fB[i]-firstMoment; diff --git a/src/external/libFitPofB/classes/TPofTCalc.cpp b/src/external/libFitPofB/classes/TPofTCalc.cpp index 30a61b62..fe11bb1b 100644 --- a/src/external/libFitPofB/classes/TPofTCalc.cpp +++ b/src/external/libFitPofB/classes/TPofTCalc.cpp @@ -102,10 +102,13 @@ TPofTCalc::TPofTCalc (const TPofBCalc *PofB, const string &wisdom, const vector< int i; -#ifdef HAVE_GOMP -#pragma omp parallel for default(shared) private(i) schedule(dynamic) -#endif - for (i = 0; i < NFFT_2p1; i++) { + #ifdef HAVE_GOMP + int chunk = NFFT_2p1/omp_get_num_procs(); + if (chunk < 10) + chunk = 10; + #pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) + #endif + for (i = 0; i < NFFT_2p1; ++i) { fT[i] = static_cast(i)*fTBin; } @@ -190,10 +193,13 @@ void TPofTCalc::CalcPol(const vector &par) { double sinph(sin(par[0]*PI/180.0)), cosph(cos(par[0]*PI/180.0)); int i; -#ifdef HAVE_GOMP -#pragma omp parallel for default(shared) private(i) schedule(dynamic) -#endif - for (i=0; i vector N0; vector bg; - for(unsigned int i(0); i double ttime; int j,k; - for(unsigned int i(0); i(floor(ttime/fTBin)); asydata[j]=asy0[i]*(fPT[k]+(fPT[k+1]-fPT[k])/fTBin*(ttime-fT[k])); @@ -289,12 +301,12 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector vector< vector > histo; vector data(nChannels); - for (unsigned int i(0); i vector histoData; TString name; - for (unsigned int i(0); i name += i; fakeHisto = new TH1F(name.Data(), name.Data(), int(par[3]), -par[2]/2.0, (par[3]+0.5)*par[2]); // fill theoHisto -#ifdef HAVE_GOMP -#pragma omp parallel for default(shared) private(j) schedule(dynamic) -#endif - for (j = 0; jSetBinContent(j, histo[i][j]); // end omp