diff --git a/configure.ac b/configure.ac index e3f96afb8..cfc3e7b3e 100644 --- a/configure.ac +++ b/configure.ac @@ -209,18 +209,6 @@ if test "${FFTW3_FOUND}" != "1"; then FFTW3_LIBS="-L${FFTW3_PREFIX}/lib -lfftw3 -lm" FFTW3_CFLAGS="-I${FFTW3_PREFIX}/include" fi -# Check for fftw3_threads-library. If available musrfit is also linked against it (used in libTFitPofB). -SAVED_CFLAGS="$CFLAGS" -CFLAGS="$CFLAGS $FFTW3_CFLAGS" -SAVED_LIBSS="$LIBS" -LIBS="$LIBS $FFTW3_LIBS" -AC_SEARCH_LIBS([fftw_init_threads], [fftw3_threads], [FFTW3_LIBS="$FFTW3_LIBS -lfftw3_threads -lpthread" - AC_DEFINE([HAVE_LIBFFTW3_THREADS], [1], [Define to 1 if fftw3_threads are available])], [], [-lpthread]) -CFLAGS="$SAVED_CFLAGS" -LIBS="$SAVED_LIBS" -AC_SUBST(FFTW3_LIBS) -AC_SUBST(FFTW3_CFLAGS) - dnl ----------------------------------------------- dnl Ask user for path to gsl @@ -487,7 +475,28 @@ if test "${BUILD_BMW_LIBS}" = "1"; then TFITPOFB_CFLAGS="-I${TFITPOFB_SRCDIR}/include" AC_SUBST(TFITPOFB_LIBS) AC_SUBST(TFITPOFB_CFLAGS) + + # Check for fftw3_threads-library. If available musrfit is also linked against it (used in libTFitPofB). + SAVED_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $FFTW3_CFLAGS" + SAVED_LIBSS="$LIBS" + LIBS="$LIBS $FFTW3_LIBS" + AC_SEARCH_LIBS([fftw_init_threads], [fftw3_threads], [FFTW3_LIBS="$FFTW3_LIBS -lfftw3_threads -lpthread" + AC_DEFINE([HAVE_LIBFFTW3_THREADS], [1], [Define to 1 if fftw3_threads are available])], [], [-lpthread]) + CFLAGS="$SAVED_CFLAGS" + LIBS="$SAVED_LIBS" + + # Check for gomp-library. If available musrfit is also linked against it (used in libTFitPofB). + SAVED_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="$CXXFLAGS -fopenmp" + SAVED_LIBSS="$LIBS" + LIBS="$LIBS -fopenmp" + AC_SEARCH_LIBS([omp_get_num_procs], [gomp], [AC_DEFINE([HAVE_GOMP], [1], [Define to 1 if gomp is available])], + [CXXFLAGS="$SAVED_CXXFLAGS" LIBS="$SAVED_LIBS"], []) fi +AC_SUBST(FFTW3_LIBS) +AC_SUBST(FFTW3_CFLAGS) + dnl ----------------------------------------------- dnl Set host specific compiler and linker flags diff --git a/src/external/TFitPofB-lib/classes/TBofZCalc.cpp b/src/external/TFitPofB-lib/classes/TBofZCalc.cpp index 808f7f84f..5cc2b2b11 100644 --- a/src/external/TFitPofB-lib/classes/TBofZCalc.cpp +++ b/src/external/TFitPofB-lib/classes/TBofZCalc.cpp @@ -50,7 +50,9 @@ void TBofZCalc::Calculate() fZ.resize(fSteps); fBZ.resize(fSteps); +#ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic) +#endif for (j=0; j(j)*fDZ; fZ[j] = ZZ; diff --git a/src/external/TFitPofB-lib/classes/TBulkTriVortexFieldCalc.cpp b/src/external/TFitPofB-lib/classes/TBulkTriVortexFieldCalc.cpp index b00275f4c..467c0f8b0 100644 --- a/src/external/TFitPofB-lib/classes/TBulkTriVortexFieldCalc.cpp +++ b/src/external/TFitPofB-lib/classes/TBulkTriVortexFieldCalc.cpp @@ -130,8 +130,13 @@ TBulkTriVortexLondonFieldCalc::TBulkTriVortexLondonFieldCalc(const string& wisdo #ifdef HAVE_LIBFFTW3_THREADS int init_threads(fftw_init_threads()); - if (init_threads) + if (init_threads) { +#ifdef HAVE_GOMP + fftw_plan_with_nthreads(omp_get_num_procs()); +#else fftw_plan_with_nthreads(2); +#endif /* HAVE_GOMP */ + } #endif /* HAVE_LIBFFTW3_THREADS */ fFFTin = new fftw_complex[(fSteps/2 + 1) * fSteps]; @@ -191,7 +196,9 @@ void TBulkTriVortexLondonFieldCalc::CalculateGrid() const { // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #endif for (m = 0; m < NFFTsq; m++) { fFFTout[m] = field; } @@ -274,7 +281,9 @@ void TBulkTriVortexLondonFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); // Multiply by the applied field + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fFFTout[l] *= field; } @@ -298,8 +307,13 @@ TBulkSqVortexLondonFieldCalc::TBulkSqVortexLondonFieldCalc(const string& wisdom, #ifdef HAVE_LIBFFTW3_THREADS int init_threads(fftw_init_threads()); - if (init_threads) + if (init_threads) { +#ifdef HAVE_GOMP + fftw_plan_with_nthreads(omp_get_num_procs()); +#else fftw_plan_with_nthreads(2); +#endif /* HAVE_GOMP */ + } #endif /* HAVE_LIBFFTW3_THREADS */ fFFTin = new fftw_complex[(fSteps/2 + 1) * fSteps]; @@ -359,7 +373,9 @@ void TBulkSqVortexLondonFieldCalc::CalculateGrid() const { // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #endif for (m = 0; m < NFFTsq; m++) { fFFTout[m] = field; } @@ -397,7 +413,9 @@ void TBulkSqVortexLondonFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); // Multiply by the applied field + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fFFTout[l] *= field; } @@ -423,8 +441,13 @@ TBulkTriVortexMLFieldCalc::TBulkTriVortexMLFieldCalc(const string& wisdom, const #ifdef HAVE_LIBFFTW3_THREADS int init_threads(fftw_init_threads()); - if (init_threads) + if (init_threads) { +#ifdef HAVE_GOMP + fftw_plan_with_nthreads(omp_get_num_procs()); +#else fftw_plan_with_nthreads(2); +#endif /* HAVE_GOMP */ + } #endif /* HAVE_LIBFFTW3_THREADS */ fFFTin = new fftw_complex[(fSteps/2 + 1) * fSteps]; @@ -481,7 +504,9 @@ void TBulkTriVortexMLFieldCalc::CalculateGrid() const { // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #endif for (m = 0; m < NFFTsq; m++) { fFFTout[m] = field; } @@ -568,7 +593,9 @@ void TBulkTriVortexMLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); // Multiply by the applied field + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fFFTout[l] *= field; } @@ -595,8 +622,13 @@ TBulkTriVortexAGLFieldCalc::TBulkTriVortexAGLFieldCalc(const string& wisdom, con #ifdef HAVE_LIBFFTW3_THREADS int init_threads(fftw_init_threads()); - if (init_threads) + if (init_threads) { +#ifdef HAVE_GOMP + fftw_plan_with_nthreads(omp_get_num_procs()); +#else fftw_plan_with_nthreads(2); +#endif /* HAVE_GOMP */ + } #endif /* HAVE_LIBFFTW3_THREADS */ fFFTin = new fftw_complex[(fSteps/2 + 1) * fSteps]; @@ -653,7 +685,9 @@ void TBulkTriVortexAGLFieldCalc::CalculateGrid() const { // ... but first check that the field is not larger than Hc2 and that we are dealing with a type II SC if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #endif for (m = 0; m < NFFTsq; m++) { fFFTout[m] = field; } @@ -747,7 +781,9 @@ void TBulkTriVortexAGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); // Multiply by the applied field + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fFFTout[l] *= field; } @@ -787,8 +823,13 @@ TBulkTriVortexNGLFieldCalc::TBulkTriVortexNGLFieldCalc(const string& wisdom, con #ifdef HAVE_LIBFFTW3_THREADS int init_threads(fftw_init_threads()); - if (init_threads) + if (init_threads) { +#ifdef HAVE_GOMP + fftw_plan_with_nthreads(omp_get_num_procs()); +#else fftw_plan_with_nthreads(2); +#endif /* HAVE_GOMP */ + } #endif /* HAVE_LIBFFTW3_THREADS */ const unsigned int stepsSq(fSteps*fSteps); @@ -876,7 +917,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGradient() const { // Take the derivative of the Fourier sum of omega // First save a copy of the real aK-matrix in the imaginary part of the bK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; ++l) { fBkMatrix[l][1] = fFFTin[l][0]; } @@ -912,7 +955,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGradient() const { fftw_execute(fFFTplan); // Copy the results to the gradient matrix and restore the original aK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; ++l) { fOmegaDiffMatrix[l][0] = fRealSpaceMatrix[l][1]; fFFTin[l][0] = fBkMatrix[l][1]; @@ -960,7 +1005,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGradient() const { fftw_execute(fFFTplan); // Copy the results to the gradient matrix and restore the original aK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; ++l) { fOmegaDiffMatrix[l][1] = fRealSpaceMatrix[l][1]; fFFTin[l][0] = fBkMatrix[l][1]; @@ -1503,7 +1550,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // first check that the field is not larger than Hc2 and that we are dealing with a type II SC ... if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #endif for (m = 0; m < NFFTsq; m++) { fFFTout[m] = field; } @@ -1518,7 +1567,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { FillAbrikosovCoefficients(); + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFT; l++) { fCheckAkConvergence[l] = fFFTin[l][0]; } @@ -1531,7 +1582,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fOmegaMatrix[l] = fSumAk - fRealSpaceMatrix[l][0]; } @@ -1544,7 +1597,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { double denomQA; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l,denomQA) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { if (!fOmegaMatrix[l] || !l || (l == (NFFT+1)*NFFT_2)) { fQMatrixA[l][0] = 0.0; @@ -1569,7 +1624,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { */ // initialize B(x,y) with the mean field + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fFFTout[l] = scaledB; } @@ -1581,8 +1638,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { while (!akConverged || !bkConverged) { // First iteration step for aK - + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { if (fOmegaMatrix[l]) { fRealSpaceMatrix[l][0] = fOmegaMatrix[l]*(fOmegaMatrix[l] + fQMatrix[l][0]*fQMatrix[l][0] + fQMatrix[l][1]*fQMatrix[l][1] - 2.0) + \ @@ -1610,7 +1668,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // Need a copy of the aK-matrix since FFTW is manipulating the input in c2r and r2c transforms // Store it in the first half of the bK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq_2; l++) { fBkMatrix[l][0] = fFFTin[l][0]; } @@ -1619,7 +1679,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fOmegaMatrix[l] = fSumAk - fRealSpaceMatrix[l][0]; } @@ -1645,7 +1707,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { } // Restore the aK-matrix from the bK-space and multiply with the spacial averages + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq_2; l++) { fFFTin[l][0] = fBkMatrix[l][0]*fSumSum/fSumOmegaSq; fFFTin[l][1] = 0.0; @@ -1668,7 +1732,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { } if (!akConverged) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFT; l++) { fCheckAkConvergence[l] = fFFTin[l][0]; } @@ -1689,7 +1755,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplan); + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fOmegaMatrix[l] = fSumAk - fRealSpaceMatrix[l][0]; } @@ -1698,7 +1766,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { if (akInitiallyConverged) { // if the aK iterations converged, go on with the bK calculation //cout << "converged, count=" << count << endl; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fBkMatrix[l][0] = fOmegaMatrix[l]*fFFTout[l] + fSumAk*(scaledB - fFFTout[l]) + \ fQMatrix[l][1]*fOmegaDiffMatrix[l][0] - fQMatrix[l][0]*fOmegaDiffMatrix[l][1]; @@ -1717,7 +1787,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // Check the convergence of the bK-iterations if (firstBkCalculation) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFT; l++) { fCheckBkConvergence[l] = 0.0; } @@ -1741,7 +1813,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // cout << "Bk Convergence: " << bkConverged << endl; if (!bkConverged) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFT; l++) { fCheckBkConvergence[l] = fBkMatrix[l][0]; } @@ -1751,7 +1825,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix // Since aK is only half the size of bK, store every second entry in the imaginary part of aK + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l+=2) { fFFTin[l/2][0] = fBkMatrix[l][0]; fFFTin[l/2][1] = fBkMatrix[l+1][0]; @@ -1761,7 +1837,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplanBkToBandQ); + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fFFTout[l] = scaledB + fBkMatrix[l][0]; } @@ -1770,7 +1848,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { break; // Restore bKs for Qx calculation and Fourier transform to get Qx + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l+=2) { fBkMatrix[l][0] = fFFTin[l/2][0]; fBkMatrix[l+1][0] = fFFTin[l/2][1]; @@ -1782,13 +1862,18 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplanBkToBandQ); + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fQMatrix[l][0] = fQMatrixA[l][0] - fBkMatrix[l][1]; } // Restore bKs for Qy calculation and Fourier transform to get Qy + + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l+=2) { fBkMatrix[l][0] = fFFTin[l/2][0]; fBkMatrix[l+1][0] = fFFTin[l/2][1]; @@ -1800,7 +1885,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { fftw_execute(fFFTplanBkToBandQ); + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fQMatrix[l][1] = fQMatrixA[l][1] + fBkMatrix[l][1]; } @@ -1809,7 +1896,9 @@ void TBulkTriVortexNGLFieldCalc::CalculateGrid() const { // If the iterations have converged, rescale the field from Brandt's units to Gauss + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsq; l++) { fFFTout[l] *= Hc2_kappa; } diff --git a/src/external/TFitPofB-lib/classes/TFilmTriVortexFieldCalc.cpp b/src/external/TFitPofB-lib/classes/TFilmTriVortexFieldCalc.cpp index 28a31bd9a..26286bbb0 100644 --- a/src/external/TFitPofB-lib/classes/TFilmTriVortexFieldCalc.cpp +++ b/src/external/TFitPofB-lib/classes/TFilmTriVortexFieldCalc.cpp @@ -40,8 +40,8 @@ using namespace std; #include "TMath.h" -#define PI 3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067 -#define TWOPI (2.0*3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067) +#define PI 3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067f +#define TWOPI (2.0f*3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067f) const float fluxQuantum(2.067833667e7f); // in this case this is Gauss times square nm @@ -150,8 +150,13 @@ TFilmTriVortexNGLFieldCalc::TFilmTriVortexNGLFieldCalc(const string& wisdom, con #ifdef HAVE_LIBFFTW3_THREADS int init_threads(fftwf_init_threads()); - if (init_threads) + if (init_threads) { +#ifdef HAVE_GOMP + fftwf_plan_with_nthreads(omp_get_num_procs()); +#else fftwf_plan_with_nthreads(2); +#endif /* HAVE_GOMP */ + } #endif /* HAVE_LIBFFTW3_THREADS */ const unsigned int stepsSqStZ(fSteps*fSteps*fStepsZ); @@ -277,7 +282,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { int i, j, k, l, index; // First save a copy of the real aK-matrix in the imaginary part of the bK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][1] = fFFTin[l][0]; } @@ -285,14 +292,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { // sum_K aK Kx^2 cos(Kx*x + Ky*y) cos(Kz*z) // First multiply the aK with Kx^2, then call FFTW - float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0));; + float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));; // k = 0 // even rows for (i = 0; i < NFFT; i += 2) { // j = 0 - fFFTin[fStepsZ*NFFT*i][0] = 0.0; + fFFTin[fStepsZ*NFFT*i][0] = 0.0f; // j != 0 for (j = 2; j < NFFT_2; j += 2) { fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast(j*j); @@ -319,7 +326,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { // even rows for (i = 0; i < NFFT; i += 2) { // j = 0 - fFFTin[k + NFFTz*NFFT*i][0] = 0.0; + fFFTin[k + NFFTz*NFFT*i][0] = 0.0f; // j != 0 for (j = 2; j < NFFT_2; j += 2) { fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast(j*j); @@ -348,7 +355,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { denom[k] = fRealSpaceMatrix[k][0]; fGstorage[k] = fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0]; } + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][0] = fBkMatrix[l][1]; } @@ -356,14 +365,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { // sum_K aK Kx Ky cos(Kx*x + Ky*y) cos(Kz*z) // First multiply the aK with Kx*Ky, then call FFTW - const float coeffKxKy = (4.0/sqrt3*pow(PI/fLatticeConstant, 2.0)); + const float coeffKxKy = (4.0f/sqrt3*pow(PI/fLatticeConstant, 2.0f)); // k = 0 // even rows for (i = 0; i < NFFT_2; i += 2) { // j = 0 - fFFTin[fStepsZ*NFFT*i][0] = 0.0; + fFFTin[fStepsZ*NFFT*i][0] = 0.0f; // j != 0 for (j = 2; j < NFFT_2; j += 2) { fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKxKy*static_cast(j*i); @@ -409,7 +418,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { // even rows for (i = 0; i < NFFT_2; i += 2) { // j = 0 - fFFTin[k + fStepsZ*NFFT*i][0] = 0.0; + fFFTin[k + fStepsZ*NFFT*i][0] = 0.0f; // j != 0 for (j = 2; j < NFFT_2; j += 2) { fFFTin[k + fStepsZ*(j + NFFT*i)][0] *= coeffKxKy*static_cast(j*i); @@ -420,7 +429,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { } for (i = NFFT_2; i < NFFT; i += 2) { // j = 0 - fFFTin[k + fStepsZ*NFFT*i][0] = 0.0; + fFFTin[k + fStepsZ*NFFT*i][0] = 0.0f; // j != 0 for (j = 2; j < NFFT_2; j += 2) { fFFTin[k + fStepsZ*(j + NFFT*i)][0] *= coeffKxKy*static_cast(j*(i - NFFT)); @@ -456,7 +465,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { for (k = 0; k < NFFTz; ++k) { fGstorage[k] += fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0]; } + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][0] = fBkMatrix[l][1]; } @@ -564,7 +575,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const { for (k = 0; k < NFFTz; ++k) { fGstorage[k] += fRealSpaceMatrix[k][1]*fRealSpaceMatrix[k][1]; } + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][0] = fBkMatrix[l][1]; } @@ -597,7 +610,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { // This is going to be a bit lengthy... // First save a copy of the real aK-matrix in the imaginary part of the bK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][1] = fFFTin[l][0]; } @@ -664,7 +679,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { fftwf_execute(fFFTplan); // Copy the results to the gradient matrix and restore the original aK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fOmegaDiffMatrix[0][l] = fRealSpaceMatrix[l][1]; fFFTin[l][0] = fBkMatrix[l][1]; @@ -752,7 +769,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { fftwf_execute(fFFTplan); // Copy the results to the gradient matrix and restore the original aK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fOmegaDiffMatrix[1][l] = fRealSpaceMatrix[l][1]; fFFTin[l][0] = fBkMatrix[l][1]; @@ -814,21 +833,27 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const { // Copy the results to the gradient matrix - with the 1D-FORWARD-transform we have to _add_ fSumAk for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fOmegaDiffMatrix[2][k + NFFTz*index] = fRealSpaceMatrix[k + NFFTz*index][1] + fSumAk[k][1]; } } // Restore the original aK-matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][0] = fBkMatrix[l][1]; fBkMatrix[l][1] = 0.0; } } else { // For the 2D solution, dw/dz = 0 + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fOmegaDiffMatrix[2][l] = 0.0; fBkMatrix[l][1] = 0.0; @@ -973,7 +998,9 @@ void TFilmTriVortexNGLFieldCalc::FillAbrikosovCoefficients(const float reducedFi fFFTin[0][0] = 0.0; for (k = 1; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fFFTin[k + NFFTz*index][0] = 0.0; fFFTin[k + NFFTz*index][1] = 0.0; @@ -1666,7 +1693,9 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsB() const { */ } else { // for 2D solution only for (k = 1; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[k + NFFTz*index][0] = 0.0; fBkMatrix[k + NFFTz*index][1] = 0.0; @@ -1937,7 +1966,9 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXFirst() c float ii; // k = 0 + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[NFFTz*index][0] = 0.0f; } @@ -2080,7 +2111,9 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXSecond() float ii; // k = 0 + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[NFFTz*index][0] = 0.0f; } @@ -2219,7 +2252,9 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYFirst() c float ii; // k = 0 + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[NFFTz*index][0] = 0.0f; } @@ -2354,7 +2389,9 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYSecond() float ii; // k = 0 + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fBkMatrix[NFFTz*index][0] = 0.0f; } @@ -2534,7 +2571,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // first check that the field is not larger than Hc2 and that we are dealing with a type II SC ... if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) { int m; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(m) schedule(dynamic) + #endif for (m = 0; m < NFFTsqStZ; ++m) { fBout[0][m] = 0.0f; fBout[1][m] = 0.0f; @@ -2554,7 +2593,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // save a few coefficients for the convergence check for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(j,index) schedule(dynamic) + #endif for (j = 0; j < NFFT; ++j) { index = k + NFFTz*j; fCheckAkConvergence[index] = fFFTin[index][0]; @@ -2577,7 +2618,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { for (k = 0; k < NFFTz; ++k) { for (j = 0; j < NFFT; ++j) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(i,index) schedule(dynamic) + #endif for (i = 0; i < NFFT; ++i) { index = k + NFFTz*(j + NFFT*i); fOmegaMatrix[index] = fSumAk[k][0] - fRealSpaceMatrix[index][0]; @@ -2594,7 +2637,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { float denomQAInv; int indexQA; + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { if (!fOmegaMatrix[NFFTz*index] || !index || (index == (NFFT+1)*NFFT_2)) { fQMatrixA[index][0] = 0.0; @@ -2619,7 +2664,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Initialize the Q-Matrix with Q-Abrikosov for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0]; fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1]; @@ -2627,7 +2674,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { } // initialize the bK-Matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][0] = 0.0; // fBkMatrix[l][1] = 0.0; @@ -2651,7 +2700,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // cout << "g[" << k << "] = " << fGstorage[k] << endl; // } + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; l++) { if (fOmegaMatrix[l]) { fRealSpaceMatrix[l][0] = fOmegaMatrix[l]*(fOmegaMatrix[l] + fQMatrix[l][0]*fQMatrix[l][0] + fQMatrix[l][1]*fQMatrix[l][1] - 2.0) + \ @@ -2668,7 +2719,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // since all of this should be a smooth function anyway, I set the value of the next neighbour r // for the two vortex core positions in my matrix // If this was not enough we can get the g(0)-values by an expensive CalculateGatVortexCore()-invocation (not working at the moment) + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(k) schedule(dynamic) + #endif for (k = 0; k < NFFTz; ++k) { fRealSpaceMatrix[k][0] = fRealSpaceMatrix[k + fStepsZ*fSteps][0];//fGstorage[k]; fRealSpaceMatrix[k + NFFTz*(NFFT+1)*NFFT_2][0] = fRealSpaceMatrix[k][0];//fGstorage[k]; @@ -2688,7 +2741,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplan); for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0]; } @@ -2731,7 +2786,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Multiply the aK with the spacial averages for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fFFTin[k + NFFTz*index][0] = fFFTin[k + NFFTz*index][0]*fSumSum; } @@ -2760,7 +2817,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { if (!akConverged) { for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(j, index) schedule(dynamic) + #endif for (j = 0; j < NFFT; ++j) { index = k + NFFTz*j; fCheckAkConvergence[index] = fFFTin[index][0]; @@ -2791,7 +2850,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplan); for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0]; } @@ -2800,7 +2861,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { CalculateGradient(); // first calculate PK (use the Q-Matrix memory for the second part) + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fPkMatrix[l][0] = fOmegaMatrix[l]*fQMatrix[l][1]; fQMatrix[l][0] = fOmegaMatrix[l]*fQMatrix[l][0]; @@ -2834,7 +2897,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Check the convergence of the bK-iterations if (firstBkCalculation) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTStZ; ++l) { fCheckBkConvergence[l] = 0.0f; } @@ -2866,7 +2931,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { if (!bkConverged) { for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(j) schedule(dynamic) + #endif for (j = 0; j < NFFT; ++j) { index = k + NFFTz*j; fCheckBkConvergence[index] = fBkMatrix[index][0]; @@ -2875,7 +2942,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { } // In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fFFTin[l][1] = fBkMatrix[l][0]; } @@ -2911,14 +2980,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0] - fBkMatrix[k + NFFTz*index][1]; } } // Restore bKs for Qy calculation and Fourier transform to get Qy + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][0] = fFFTin[l][1]; fBkMatrix[l][1] = 0.0; @@ -2929,14 +3002,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); for (k = 0; k < NFFTz; ++k) { + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1] + fBkMatrix[k + NFFTz*index][1]; } } // Restore bKs for the next iteration + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBkMatrix[l][0] = fFFTin[l][1]; fBkMatrix[l][1] = 0.0; @@ -2950,7 +3027,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); // Fill in the B-Matrix and restore the bKs for the second part of the Bx-calculation + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[0][l] = fBkMatrix[l][0]; fBkMatrix[l][0] = fFFTin[l][1]; @@ -2962,7 +3041,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); // Fill in the B-Matrix and restore the bKs for the By-calculation + + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[0][l] = 0.5f*(fBkMatrix[l][0] - fBout[0][l])*Hc2_kappa; fBkMatrix[l][0] = fFFTin[l][1]; @@ -2974,7 +3056,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); // Fill in the B-Matrix and restore the bKs for the second part of the By-calculation + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[1][l] = fBkMatrix[l][0]; fBkMatrix[l][0] = fFFTin[l][1]; @@ -2986,7 +3070,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); // Fill in the B-Matrix and restore the bKs for the second part of the By-calculation + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[1][l] = 0.5f*(fBkMatrix[l][0] - fBout[1][l])*Hc2_kappa; fBkMatrix[l][0] = fFFTin[l][1]; @@ -2995,7 +3081,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { fftwf_execute(fFFTplanBkToBandQ); + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(l) schedule(dynamic) + #endif for (l = 0; l < NFFTsqStZ; ++l) { fBout[2][l] = (scaledB + fBkMatrix[l][0])*Hc2_kappa; } @@ -3003,7 +3091,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Since the surface is not included in the Bx and By-calculation above, we do another step // Save a copy of the BkS - where does not matter since this is the very end of the calculation... + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fFFTin[index][1] = fBkS[index][0]; } @@ -3014,7 +3104,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Write the surface fields to the field-Matrix and restore the BkS for the By-calculation + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fBout[0][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa; fBkS[index][0] = fFFTin[index][1]; @@ -3027,7 +3119,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const { // Write the surface fields to the field-Matrix + #ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(index) schedule(dynamic) + #endif for (index = 0; index < NFFTsq; ++index) { fBout[1][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa; } diff --git a/src/external/TFitPofB-lib/classes/TPofBCalc.cpp b/src/external/TFitPofB-lib/classes/TPofBCalc.cpp index d6e5fef65..29478db7e 100644 --- a/src/external/TFitPofB-lib/classes/TPofBCalc.cpp +++ b/src/external/TFitPofB-lib/classes/TPofBCalc.cpp @@ -108,7 +108,9 @@ TPofBCalc::TPofBCalc(const vector& b, const vector& pb, double d void TPofBCalc::UnsetPBExists() { int i; +#ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(i) schedule(dynamic) +#endif for (i = 0; i < static_cast(fPBSize); i++) { fPB[i] = 0.0; } @@ -478,7 +480,9 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto sum += fPB[i]; sum *= fDB; int i; +#ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(i) schedule(dynamic) +#endif for (i = 0; i < static_cast(fPBSize); ++i) fPB[i] /= sum; // end pragma omp parallel diff --git a/src/external/TFitPofB-lib/classes/TPofTCalc.cpp b/src/external/TFitPofB-lib/classes/TPofTCalc.cpp index da53c8dae..32bcc031c 100644 --- a/src/external/TFitPofB-lib/classes/TPofTCalc.cpp +++ b/src/external/TFitPofB-lib/classes/TPofTCalc.cpp @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -65,12 +66,18 @@ TPofTCalc::TPofTCalc (const TPofBCalc *PofB, const string &wisdom, const vector &par) : fWisdom(wisdom) { + #ifdef HAVE_LIBFFTW3_THREADS int init_threads(fftw_init_threads()); if (!init_threads) cout << "TPofTCalc::TPofTCalc: Couldn't initialize multiple FFTW-threads ..." << endl; - else + else { +#ifdef HAVE_GOMP + fftw_plan_with_nthreads(omp_get_num_procs()); +#else fftw_plan_with_nthreads(2); +#endif /* HAVE_GOMP */ + } #endif /* HAVE_LIBFFTW3_THREADS */ fNFFT = static_cast(1.0/(gBar*par[1]*par[2])); @@ -91,7 +98,9 @@ TPofTCalc::TPofTCalc (const TPofBCalc *PofB, const string &wisdom, const vector< int i; +#ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(i) schedule(dynamic) +#endif for (i = 0; i < NFFT_2p1; i++) { fT[i] = static_cast(i)*fTBin; } @@ -177,7 +186,9 @@ void TPofTCalc::CalcPol(const vector &par) { double sinph(sin(par[0]*PI/180.0)), cosph(cos(par[0]*PI/180.0)); int i; +#ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(i) schedule(dynamic) +#endif for (i=0; i // calculate asymmetry CalcPol(param); +#ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic) +#endif for(j=0; j(floor(ttime/fTBin)); @@ -274,7 +287,9 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector for (unsigned int i(0); i name += i; fakeHisto = new TH1F(name.Data(), name.Data(), int(par[3]), -par[2]/2.0, (par[3]+0.5)*par[2]); // fill theoHisto +#ifdef HAVE_GOMP #pragma omp parallel for default(shared) private(j) schedule(dynamic) +#endif for (j = 0; jSetBinContent(j, histo[i][j]); // end omp