Applied a few minor optimizations to the field-distribution calculations
This commit is contained in:
parent
db4425e352
commit
e22256ef8a
11
src/external/libFitPofB/classes/TBofZCalc.cpp
vendored
11
src/external/libFitPofB/classes/TBofZCalc.cpp
vendored
@ -57,10 +57,13 @@ void TBofZCalc::Calculate()
|
|||||||
fZ.resize(fSteps);
|
fZ.resize(fSteps);
|
||||||
fBZ.resize(fSteps);
|
fBZ.resize(fSteps);
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic)
|
int chunk = fSteps/omp_get_num_procs();
|
||||||
#endif
|
if (chunk < 10)
|
||||||
for (j=0; j<fSteps; j++) {
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic,chunk)
|
||||||
|
#endif
|
||||||
|
for (j=0; j<fSteps; ++j) {
|
||||||
ZZ = fParam[1] + static_cast<double>(j)*fDZ;
|
ZZ = fParam[1] + static_cast<double>(j)*fDZ;
|
||||||
fZ[j] = ZZ;
|
fZ[j] = ZZ;
|
||||||
fBZ[j] = GetBofZ(ZZ);
|
fBZ[j] = GetBofZ(ZZ);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -290,7 +290,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
|||||||
|
|
||||||
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
|
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
int chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBkMatrix[l][1] = fFFTin[l][0];
|
fBkMatrix[l][1] = fFFTin[l][0];
|
||||||
@ -299,61 +302,86 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
|||||||
// sum_K aK Kx^2 cos(Kx*x + Ky*y) cos(Kz*z)
|
// sum_K aK Kx^2 cos(Kx*x + Ky*y) cos(Kz*z)
|
||||||
// First multiply the aK with Kx^2, then call FFTW
|
// First multiply the aK with Kx^2, then call FFTW
|
||||||
|
|
||||||
float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));;
|
float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));
|
||||||
|
|
||||||
// k = 0
|
// k = 0
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
// even rows
|
#pragma omp parallel default(shared) private(i,j)
|
||||||
for (i = 0; i < NFFT; i += 2) {
|
{
|
||||||
// j = 0
|
#pragma omp sections
|
||||||
fFFTin[fStepsZ*NFFT*i][0] = 0.0f;
|
{
|
||||||
// j != 0
|
|
||||||
for (j = 2; j < NFFT_2; j += 2) {
|
|
||||||
fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast<float>(j*j);
|
|
||||||
}
|
|
||||||
for (j = NFFT_2; j < NFFT; j += 2) {
|
|
||||||
fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast<float>((j - NFFT)*(j - NFFT));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// odd rows
|
|
||||||
for (i = 1; i < NFFT; i += 2) {
|
|
||||||
for (j = 0; j < NFFT_2; j += 2) {
|
|
||||||
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
|
|
||||||
}
|
|
||||||
for (j = NFFT_2; j < NFFT; j += 2) {
|
|
||||||
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// k != 0
|
|
||||||
|
|
||||||
if (fFind3dSolution) {
|
|
||||||
for (k = 1; k < NFFTz; ++k) {
|
|
||||||
// even rows
|
// even rows
|
||||||
|
#pragma omp section
|
||||||
|
#endif
|
||||||
for (i = 0; i < NFFT; i += 2) {
|
for (i = 0; i < NFFT; i += 2) {
|
||||||
// j = 0
|
// j = 0
|
||||||
fFFTin[k + NFFTz*NFFT*i][0] = 0.0f;
|
fFFTin[fStepsZ*NFFT*i][0] = 0.0f;
|
||||||
// j != 0
|
// j != 0
|
||||||
for (j = 2; j < NFFT_2; j += 2) {
|
for (j = 2; j < NFFT_2; j += 2) {
|
||||||
fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast<float>(j*j);
|
fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast<float>(j*j);
|
||||||
}
|
}
|
||||||
for (j = NFFT_2; j < NFFT; j += 2) {
|
for (j = NFFT_2; j < NFFT; j += 2) {
|
||||||
fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast<float>((j - NFFT)*(j - NFFT));
|
fFFTin[fStepsZ*(j + NFFT*i)][0] *= coeffKx*static_cast<float>((j - NFFT)*(j - NFFT));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// odd rows
|
// odd rows
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
#pragma omp section
|
||||||
|
#endif
|
||||||
for (i = 1; i < NFFT; i += 2) {
|
for (i = 1; i < NFFT; i += 2) {
|
||||||
for (j = 0; j < NFFT_2; j += 2) {
|
for (j = 0; j < NFFT_2; j += 2) {
|
||||||
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
|
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
|
||||||
}
|
}
|
||||||
for (j = NFFT_2; j < NFFT; j += 2) {
|
for (j = NFFT_2; j < NFFT; j += 2) {
|
||||||
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
|
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
#ifdef HAVE_GOMP
|
||||||
} // else do nothing since the other aK are already zero since the former aK manipulation
|
} // end omp sections
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// k != 0
|
||||||
|
if (fFind3dSolution) {
|
||||||
|
for (k = 1; k < NFFTz; ++k) {
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
#pragma omp sections
|
||||||
|
{
|
||||||
|
// even rows
|
||||||
|
#pragma omp section
|
||||||
|
#endif
|
||||||
|
for (i = 0; i < NFFT; i += 2) {
|
||||||
|
// j = 0
|
||||||
|
fFFTin[k + NFFTz*NFFT*i][0] = 0.0f;
|
||||||
|
// j != 0
|
||||||
|
for (j = 2; j < NFFT_2; j += 2) {
|
||||||
|
fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast<float>(j*j);
|
||||||
|
}
|
||||||
|
for (j = NFFT_2; j < NFFT; j += 2) {
|
||||||
|
fFFTin[k + NFFTz*(j + NFFT*i)][0] *= coeffKx*static_cast<float>((j - NFFT)*(j - NFFT));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// odd rows
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
#pragma omp section
|
||||||
|
#endif
|
||||||
|
for (i = 1; i < NFFT; i += 2) {
|
||||||
|
for (j = 0; j < NFFT_2; j += 2) {
|
||||||
|
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
|
||||||
|
}
|
||||||
|
for (j = NFFT_2; j < NFFT; j += 2) {
|
||||||
|
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} // else do nothing since the other aK are already zero since the former aK manipulation
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
} // end omp parallel
|
||||||
|
#endif
|
||||||
|
|
||||||
fftwf_execute(fFFTplan);
|
fftwf_execute(fFFTplan);
|
||||||
|
|
||||||
@ -363,7 +391,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
|||||||
fGstorage[k] = fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
|
fGstorage[k] = fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
|
||||||
}
|
}
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fFFTin[l][0] = fBkMatrix[l][1];
|
fFFTin[l][0] = fBkMatrix[l][1];
|
||||||
@ -473,7 +501,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
|||||||
fGstorage[k] += fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
|
fGstorage[k] += fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
|
||||||
}
|
}
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fFFTin[l][0] = fBkMatrix[l][1];
|
fFFTin[l][0] = fBkMatrix[l][1];
|
||||||
@ -583,7 +611,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
|||||||
fGstorage[k] += fRealSpaceMatrix[k][1]*fRealSpaceMatrix[k][1];
|
fGstorage[k] += fRealSpaceMatrix[k][1]*fRealSpaceMatrix[k][1];
|
||||||
}
|
}
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fFFTin[l][0] = fBkMatrix[l][1];
|
fFFTin[l][0] = fBkMatrix[l][1];
|
||||||
@ -612,13 +640,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
|||||||
const int NFFTz_2(fStepsZ/2);
|
const int NFFTz_2(fStepsZ/2);
|
||||||
|
|
||||||
int i, j, k, l, index;
|
int i, j, k, l, index;
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
int chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Take the derivative of the Fourier sum of omega
|
// Take the derivative of the Fourier sum of omega
|
||||||
// This is going to be a bit lengthy...
|
// This is going to be a bit lengthy...
|
||||||
|
|
||||||
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
|
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBkMatrix[l][1] = fFFTin[l][0];
|
fBkMatrix[l][1] = fFFTin[l][0];
|
||||||
@ -687,7 +720,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
|||||||
|
|
||||||
// Copy the results to the gradient matrix and restore the original aK-matrix
|
// Copy the results to the gradient matrix and restore the original aK-matrix
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fOmegaDiffMatrix[0][l] = fRealSpaceMatrix[l][1];
|
fOmegaDiffMatrix[0][l] = fRealSpaceMatrix[l][1];
|
||||||
@ -777,7 +810,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
|||||||
|
|
||||||
// Copy the results to the gradient matrix and restore the original aK-matrix
|
// Copy the results to the gradient matrix and restore the original aK-matrix
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fOmegaDiffMatrix[1][l] = fRealSpaceMatrix[l][1];
|
fOmegaDiffMatrix[1][l] = fRealSpaceMatrix[l][1];
|
||||||
@ -838,10 +871,16 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
|||||||
// 3D transform
|
// 3D transform
|
||||||
fftwf_execute(fFFTplan);
|
fftwf_execute(fFFTplan);
|
||||||
|
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Copy the results to the gradient matrix - with the 1D-FORWARD-transform we have to _add_ fSumAk
|
// Copy the results to the gradient matrix - with the 1D-FORWARD-transform we have to _add_ fSumAk
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fOmegaDiffMatrix[2][k + NFFTz*index] = fRealSpaceMatrix[k + NFFTz*index][1] + fSumAk[k][1];
|
fOmegaDiffMatrix[2][k + NFFTz*index] = fRealSpaceMatrix[k + NFFTz*index][1] + fSumAk[k][1];
|
||||||
@ -850,7 +889,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
|||||||
|
|
||||||
// Restore the original aK-matrix
|
// Restore the original aK-matrix
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fFFTin[l][0] = fBkMatrix[l][1];
|
fFFTin[l][0] = fBkMatrix[l][1];
|
||||||
@ -859,7 +901,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
|||||||
} else {
|
} else {
|
||||||
// For the 2D solution, dw/dz = 0
|
// For the 2D solution, dw/dz = 0
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fOmegaDiffMatrix[2][l] = 0.0;
|
fOmegaDiffMatrix[2][l] = 0.0;
|
||||||
@ -1004,9 +1046,15 @@ void TFilmTriVortexNGLFieldCalc::FillAbrikosovCoefficients(const float reducedFi
|
|||||||
|
|
||||||
fFFTin[0][0] = 0.0;
|
fFFTin[0][0] = 0.0;
|
||||||
|
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
int chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (k = 1; k < NFFTz; ++k) {
|
for (k = 1; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fFFTin[k + NFFTz*index][0] = 0.0;
|
fFFTin[k + NFFTz*index][0] = 0.0;
|
||||||
@ -1699,9 +1747,16 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsB() const {
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
} else { // for 2D solution only
|
} else { // for 2D solution only
|
||||||
|
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
int chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (k = 1; k < NFFTz; ++k) {
|
for (k = 1; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fBkMatrix[k + NFFTz*index][0] = 0.0;
|
fBkMatrix[k + NFFTz*index][0] = 0.0;
|
||||||
@ -1971,10 +2026,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXFirst() c
|
|||||||
|
|
||||||
int i, j, k, kx, ky, kz, index;
|
int i, j, k, kx, ky, kz, index;
|
||||||
float ii;
|
float ii;
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
int chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
// k = 0
|
// k = 0
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fBkMatrix[NFFTz*index][0] = 0.0f;
|
fBkMatrix[NFFTz*index][0] = 0.0f;
|
||||||
@ -2116,10 +2176,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXSecond()
|
|||||||
|
|
||||||
int i, j, k, kx, ky, kz, index;
|
int i, j, k, kx, ky, kz, index;
|
||||||
float ii;
|
float ii;
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
int chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
// k = 0
|
// k = 0
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fBkMatrix[NFFTz*index][0] = 0.0f;
|
fBkMatrix[NFFTz*index][0] = 0.0f;
|
||||||
@ -2260,7 +2325,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYFirst() c
|
|||||||
|
|
||||||
// k = 0
|
// k = 0
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
int chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fBkMatrix[NFFTz*index][0] = 0.0f;
|
fBkMatrix[NFFTz*index][0] = 0.0f;
|
||||||
@ -2397,7 +2465,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYSecond()
|
|||||||
|
|
||||||
// k = 0
|
// k = 0
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
int chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fBkMatrix[NFFTz*index][0] = 0.0f;
|
fBkMatrix[NFFTz*index][0] = 0.0f;
|
||||||
@ -2575,11 +2646,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
const int NFFTz_2(fStepsZ/2);
|
const int NFFTz_2(fStepsZ/2);
|
||||||
const int NFFTsqStZ_2(NFFTsq*(fStepsZ/2 + 1));
|
const int NFFTsqStZ_2(NFFTsq*(fStepsZ/2 + 1));
|
||||||
|
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
int chunk;
|
||||||
|
#endif
|
||||||
|
|
||||||
// first check that the field is not larger than Hc2 and that we are dealing with a type II SC ...
|
// first check that the field is not larger than Hc2 and that we are dealing with a type II SC ...
|
||||||
if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) {
|
if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) {
|
||||||
int m;
|
int m;
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(m) schedule(dynamic)
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (m = 0; m < NFFTsqStZ; ++m) {
|
for (m = 0; m < NFFTsqStZ; ++m) {
|
||||||
fBout[0][m] = 0.0f;
|
fBout[0][m] = 0.0f;
|
||||||
@ -2601,7 +2679,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(j,index) schedule(dynamic)
|
chunk = NFFT/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(j,index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (j = 0; j < NFFT; ++j) {
|
for (j = 0; j < NFFT; ++j) {
|
||||||
index = k + NFFTz*j;
|
index = k + NFFTz*j;
|
||||||
@ -2626,7 +2707,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
for (j = 0; j < NFFT; ++j) {
|
for (j = 0; j < NFFT; ++j) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i,index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(i,index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < NFFT; ++i) {
|
for (i = 0; i < NFFT; ++i) {
|
||||||
index = k + NFFTz*(j + NFFT*i);
|
index = k + NFFTz*(j + NFFT*i);
|
||||||
@ -2645,7 +2726,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
int indexQA;
|
int indexQA;
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic)
|
chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
if (!fOmegaMatrix[NFFTz*index] || !index || (index == (NFFT+1)*NFFT_2)) {
|
if (!fOmegaMatrix[NFFTz*index] || !index || (index == (NFFT+1)*NFFT_2)) {
|
||||||
@ -2672,7 +2756,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0];
|
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0];
|
||||||
@ -2682,7 +2766,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// initialize the bK-Matrix
|
// initialize the bK-Matrix
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBkMatrix[l][0] = 0.0;
|
fBkMatrix[l][0] = 0.0;
|
||||||
@ -2708,7 +2795,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; l++) {
|
for (l = 0; l < NFFTsqStZ; l++) {
|
||||||
if (fOmegaMatrix[l]) {
|
if (fOmegaMatrix[l]) {
|
||||||
@ -2726,9 +2816,6 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
// since all of this should be a smooth function anyway, I set the value of the next neighbour r
|
// since all of this should be a smooth function anyway, I set the value of the next neighbour r
|
||||||
// for the two vortex core positions in my matrix
|
// for the two vortex core positions in my matrix
|
||||||
// If this was not enough we can get the g(0)-values by an expensive CalculateGatVortexCore()-invocation (not working at the moment)
|
// If this was not enough we can get the g(0)-values by an expensive CalculateGatVortexCore()-invocation (not working at the moment)
|
||||||
#ifdef HAVE_GOMP
|
|
||||||
#pragma omp parallel for default(shared) private(k) schedule(dynamic)
|
|
||||||
#endif
|
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
fRealSpaceMatrix[k][0] = fRealSpaceMatrix[k + fStepsZ*fSteps][0];//fGstorage[k];
|
fRealSpaceMatrix[k][0] = fRealSpaceMatrix[k + fStepsZ*fSteps][0];//fGstorage[k];
|
||||||
fRealSpaceMatrix[k + NFFTz*(NFFT+1)*NFFT_2][0] = fRealSpaceMatrix[k][0];//fGstorage[k];
|
fRealSpaceMatrix[k + NFFTz*(NFFT+1)*NFFT_2][0] = fRealSpaceMatrix[k][0];//fGstorage[k];
|
||||||
@ -2747,9 +2834,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
fftwf_execute(fFFTplan);
|
fftwf_execute(fFFTplan);
|
||||||
|
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
|
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
|
||||||
@ -2794,7 +2887,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
// Multiply the aK with the spacial averages
|
// Multiply the aK with the spacial averages
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fFFTin[k + NFFTz*index][0] = fFFTin[k + NFFTz*index][0]*fSumSum;
|
fFFTin[k + NFFTz*index][0] = fFFTin[k + NFFTz*index][0]*fSumSum;
|
||||||
@ -2823,9 +2916,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!akConverged) {
|
if (!akConverged) {
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
chunk = NFFT/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(j, index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(j, index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (j = 0; j < NFFT; ++j) {
|
for (j = 0; j < NFFT; ++j) {
|
||||||
index = k + NFFTz*j;
|
index = k + NFFTz*j;
|
||||||
@ -2856,9 +2954,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
fftwf_execute(fFFTplan);
|
fftwf_execute(fFFTplan);
|
||||||
|
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
|
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
|
||||||
@ -2869,6 +2973,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// first calculate PK (use the Q-Matrix memory for the second part)
|
// first calculate PK (use the Q-Matrix memory for the second part)
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
@ -2905,7 +3012,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
if (firstBkCalculation) {
|
if (firstBkCalculation) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
chunk = NFFTStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTStZ; ++l) {
|
for (l = 0; l < NFFTStZ; ++l) {
|
||||||
fCheckBkConvergence[l] = 0.0f;
|
fCheckBkConvergence[l] = 0.0f;
|
||||||
@ -2937,9 +3047,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
// cout << "Bk Convergence: " << bkConverged << endl;
|
// cout << "Bk Convergence: " << bkConverged << endl;
|
||||||
|
|
||||||
if (!bkConverged) {
|
if (!bkConverged) {
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
chunk = NFFT/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (j = 0; j < NFFT; ++j) {
|
for (j = 0; j < NFFT; ++j) {
|
||||||
index = k + NFFTz*j;
|
index = k + NFFTz*j;
|
||||||
@ -2950,7 +3065,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix
|
// In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fFFTin[l][1] = fBkMatrix[l][0];
|
fFFTin[l][1] = fBkMatrix[l][0];
|
||||||
@ -2986,9 +3104,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
fftwf_execute(fFFTplanBkToBandQ);
|
fftwf_execute(fFFTplanBkToBandQ);
|
||||||
|
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0] - fBkMatrix[k + NFFTz*index][1];
|
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0] - fBkMatrix[k + NFFTz*index][1];
|
||||||
@ -2997,7 +3121,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// Restore bKs for Qy calculation and Fourier transform to get Qy
|
// Restore bKs for Qy calculation and Fourier transform to get Qy
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBkMatrix[l][0] = fFFTin[l][1];
|
fBkMatrix[l][0] = fFFTin[l][1];
|
||||||
@ -3008,9 +3135,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
fftwf_execute(fFFTplanBkToBandQ);
|
fftwf_execute(fFFTplanBkToBandQ);
|
||||||
|
|
||||||
|
#ifdef HAVE_GOMP
|
||||||
|
chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (k = 0; k < NFFTz; ++k) {
|
for (k = 0; k < NFFTz; ++k) {
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1] + fBkMatrix[k + NFFTz*index][1];
|
fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1] + fBkMatrix[k + NFFTz*index][1];
|
||||||
@ -3019,7 +3152,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// Restore bKs for the next iteration
|
// Restore bKs for the next iteration
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBkMatrix[l][0] = fFFTin[l][1];
|
fBkMatrix[l][0] = fFFTin[l][1];
|
||||||
@ -3035,7 +3171,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// Fill in the B-Matrix and restore the bKs for the second part of the Bx-calculation
|
// Fill in the B-Matrix and restore the bKs for the second part of the Bx-calculation
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBout[0][l] = fBkMatrix[l][0];
|
fBout[0][l] = fBkMatrix[l][0];
|
||||||
@ -3050,7 +3189,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
// Fill in the B-Matrix and restore the bKs for the By-calculation
|
// Fill in the B-Matrix and restore the bKs for the By-calculation
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBout[0][l] = 0.5f*(fBkMatrix[l][0] - fBout[0][l])*Hc2_kappa;
|
fBout[0][l] = 0.5f*(fBkMatrix[l][0] - fBout[0][l])*Hc2_kappa;
|
||||||
@ -3064,7 +3203,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
|
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBout[1][l] = fBkMatrix[l][0];
|
fBout[1][l] = fBkMatrix[l][0];
|
||||||
@ -3078,7 +3217,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
|
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBout[1][l] = 0.5f*(fBkMatrix[l][0] - fBout[1][l])*Hc2_kappa;
|
fBout[1][l] = 0.5f*(fBkMatrix[l][0] - fBout[1][l])*Hc2_kappa;
|
||||||
@ -3089,7 +3228,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
fftwf_execute(fFFTplanBkToBandQ);
|
fftwf_execute(fFFTplanBkToBandQ);
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||||
fBout[2][l] = (scaledB + fBkMatrix[l][0])*Hc2_kappa;
|
fBout[2][l] = (scaledB + fBkMatrix[l][0])*Hc2_kappa;
|
||||||
@ -3099,7 +3238,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
|
|
||||||
// Save a copy of the BkS - where does not matter since this is the very end of the calculation...
|
// Save a copy of the BkS - where does not matter since this is the very end of the calculation...
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
chunk = NFFTsq/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fFFTin[index][1] = fBkS[index][0];
|
fFFTin[index][1] = fBkS[index][0];
|
||||||
@ -3112,7 +3254,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
// Write the surface fields to the field-Matrix and restore the BkS for the By-calculation
|
// Write the surface fields to the field-Matrix and restore the BkS for the By-calculation
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fBout[0][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;
|
fBout[0][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;
|
||||||
@ -3127,7 +3269,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
|||||||
// Write the surface fields to the field-Matrix
|
// Write the surface fields to the field-Matrix
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (index = 0; index < NFFTsq; ++index) {
|
for (index = 0; index < NFFTsq; ++index) {
|
||||||
fBout[1][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;
|
fBout[1][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;
|
||||||
|
73
src/external/libFitPofB/classes/TPofBCalc.cpp
vendored
73
src/external/libFitPofB/classes/TPofBCalc.cpp
vendored
@ -62,7 +62,10 @@ TPofBCalc::TPofBCalc(const vector<double> ¶) : fBmin(0.0), fBmax(0.0), fDT(p
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
int chunk = fPBSize/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||||
fB[i] = static_cast<double>(i)*fDB;
|
fB[i] = static_cast<double>(i)*fDB;
|
||||||
@ -83,7 +86,10 @@ TPofBCalc::TPofBCalc(const vector<double>& b, const vector<double>& pb, double d
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
int chunk = fPBSize/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||||
fB[i] = b[i];
|
fB[i] = b[i];
|
||||||
@ -121,9 +127,12 @@ TPofBCalc::TPofBCalc(const vector<double>& b, const vector<double>& pb, double d
|
|||||||
|
|
||||||
void TPofBCalc::UnsetPBExists() {
|
void TPofBCalc::UnsetPBExists() {
|
||||||
int i;
|
int i;
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
int chunk = fPBSize/omp_get_num_procs();
|
||||||
#endif
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||||
fPB[i] = 0.0;
|
fPB[i] = 0.0;
|
||||||
}
|
}
|
||||||
@ -140,14 +149,17 @@ void TPofBCalc::Normalize(unsigned int minFilledIndex = 0, unsigned int maxFille
|
|||||||
double pBsum(0.0);
|
double pBsum(0.0);
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum)
|
int chunk = (maxFilledIndex-minFilledIndex)/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum)
|
||||||
#endif
|
#endif
|
||||||
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
|
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
|
||||||
pBsum += fPB[i];
|
pBsum += fPB[i];
|
||||||
pBsum *= fDB;
|
pBsum *= fDB;
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
|
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
|
||||||
fPB[i] /= pBsum;
|
fPB[i] /= pBsum;
|
||||||
@ -160,7 +172,10 @@ void TPofBCalc::SetPB(const vector<double> &pb) const {
|
|||||||
|
|
||||||
int i;
|
int i;
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
int chunk = fPBSize/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||||
fPB[i] = pb[i];
|
fPB[i] = pb[i];
|
||||||
@ -190,14 +205,20 @@ void TPofBCalc::Calculate(const string &type, const vector<double> ¶) {
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
int chunk = B0Index/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < B0Index; ++i) {
|
for (i = 0; i < B0Index; ++i) {
|
||||||
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expominus);
|
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expominus);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
chunk = (BmaxIndex-B0Index)/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = B0Index; i <= BmaxIndex; ++i) {
|
for (i = B0Index; i <= BmaxIndex; ++i) {
|
||||||
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expoplus);
|
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expoplus);
|
||||||
@ -546,6 +567,10 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto
|
|||||||
// cannot use a reduction clause here (like e.g. in Normalize()), since pBvec[] is not a scalar variable
|
// cannot use a reduction clause here (like e.g. in Normalize()), since pBvec[] is not a scalar variable
|
||||||
// therefore, we need to work on it a bit more
|
// therefore, we need to work on it a bit more
|
||||||
int n(omp_get_num_procs()), tid, offset;
|
int n(omp_get_num_procs()), tid, offset;
|
||||||
|
int chunk = fPBSize/n;
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
|
||||||
vector< vector<unsigned int> > pBvec(n, vector<unsigned int>(fPBSize, 0));
|
vector< vector<unsigned int> > pBvec(n, vector<unsigned int>(fPBSize, 0));
|
||||||
|
|
||||||
int indexStep(static_cast<int>(floor(static_cast<float>(numberOfSteps_2)/static_cast<float>(n))));
|
int indexStep(static_cast<int>(floor(static_cast<float>(numberOfSteps_2)/static_cast<float>(n))));
|
||||||
@ -577,7 +602,7 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0; j < n; ++j) {
|
for (j = 0; j < n; ++j) {
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||||
fPB[i] += static_cast<double>(pBvec[j][i]);
|
fPB[i] += static_cast<double>(pBvec[j][i]);
|
||||||
}
|
}
|
||||||
@ -624,7 +649,10 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
|
|||||||
// calculate Gaussian background
|
// calculate Gaussian background
|
||||||
double bg[fPBSize];
|
double bg[fPBSize];
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
int chunk = fPBSize/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for(i = 0; i < static_cast<int>(fPBSize); ++i) {
|
for(i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||||
bg[i] = exp(-(fB[i]-B)*(fB[i]-B)/(2.0*BsSq));
|
bg[i] = exp(-(fB[i]-B)*(fB[i]-B)/(2.0*BsSq));
|
||||||
@ -634,7 +662,7 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
|
|||||||
|
|
||||||
double bgsum(0.0);
|
double bgsum(0.0);
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:bgsum)
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:bgsum)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
||||||
bgsum += bg[i];
|
bgsum += bg[i];
|
||||||
@ -642,14 +670,14 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
|
|||||||
bgsum *= fDB;
|
bgsum *= fDB;
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
||||||
bg[i] /= bgsum;
|
bg[i] /= bgsum;
|
||||||
|
|
||||||
// add background to P(B)
|
// add background to P(B)
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
||||||
fPB[i] = (1.0 - w)*fPB[i] + w*bg[i];
|
fPB[i] = (1.0 - w)*fPB[i] + w*bg[i];
|
||||||
@ -691,7 +719,10 @@ void TPofBCalc::ConvolveGss(double w) {
|
|||||||
|
|
||||||
int i;
|
int i;
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic)
|
int chunk = (NFFT/2+1)/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(NFFT/2+1); ++i) {
|
for (i = 0; i < static_cast<int>(NFFT/2+1); ++i) {
|
||||||
GssInTimeDomain = exp(expo*static_cast<double>(i)*static_cast<double>(i));
|
GssInTimeDomain = exp(expo*static_cast<double>(i)*static_cast<double>(i));
|
||||||
@ -725,7 +756,10 @@ double TPofBCalc::GetFirstMoment() const {
|
|||||||
double pBsum(0.0);
|
double pBsum(0.0);
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum)
|
int chunk = fPBSize/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
||||||
pBsum += fB[i]*fPB[i];
|
pBsum += fB[i]*fPB[i];
|
||||||
@ -746,7 +780,10 @@ double TPofBCalc::GetCentralMoment(unsigned int n) const {
|
|||||||
double pBsum(0.0);
|
double pBsum(0.0);
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i, diff) schedule(dynamic) reduction(+:pBsum)
|
int chunk = fPBSize/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i, diff) schedule(dynamic,chunk) reduction(+:pBsum)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||||
diff = fB[i]-firstMoment;
|
diff = fB[i]-firstMoment;
|
||||||
|
60
src/external/libFitPofB/classes/TPofTCalc.cpp
vendored
60
src/external/libFitPofB/classes/TPofTCalc.cpp
vendored
@ -102,10 +102,13 @@ TPofTCalc::TPofTCalc (const TPofBCalc *PofB, const string &wisdom, const vector<
|
|||||||
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
int chunk = NFFT_2p1/omp_get_num_procs();
|
||||||
#endif
|
if (chunk < 10)
|
||||||
for (i = 0; i < NFFT_2p1; i++) {
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
|
#endif
|
||||||
|
for (i = 0; i < NFFT_2p1; ++i) {
|
||||||
fT[i] = static_cast<double>(i)*fTBin;
|
fT[i] = static_cast<double>(i)*fTBin;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,10 +193,13 @@ void TPofTCalc::CalcPol(const vector<double> &par) {
|
|||||||
double sinph(sin(par[0]*PI/180.0)), cosph(cos(par[0]*PI/180.0));
|
double sinph(sin(par[0]*PI/180.0)), cosph(cos(par[0]*PI/180.0));
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
int chunk = (fNFFT/2+1)/omp_get_num_procs();
|
||||||
#endif
|
if (chunk < 10)
|
||||||
for (i=0; i<fNFFT/2+1; i++){
|
chunk = 10;
|
||||||
|
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||||
|
#endif
|
||||||
|
for (i=0; i<fNFFT/2+1; ++i) {
|
||||||
fPT[i] = (cosph*fFFTout[i][0] + sinph*fFFTout[i][1])*par[2];
|
fPT[i] = (cosph*fFFTout[i][0] + sinph*fFFTout[i][1])*par[2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -240,7 +246,7 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
|||||||
vector<double> N0;
|
vector<double> N0;
|
||||||
vector<double> bg;
|
vector<double> bg;
|
||||||
|
|
||||||
for(unsigned int i(0); i<numHist; i++) {
|
for(unsigned int i(0); i<numHist; ++i) {
|
||||||
t0.push_back(int(par[i+4+numHist*2]));
|
t0.push_back(int(par[i+4+numHist*2]));
|
||||||
asy0.push_back(par[i+4]);
|
asy0.push_back(par[i+4]);
|
||||||
phase0.push_back(par[i+4+numHist]);
|
phase0.push_back(par[i+4+numHist]);
|
||||||
@ -258,15 +264,21 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
|||||||
double ttime;
|
double ttime;
|
||||||
int j,k;
|
int j,k;
|
||||||
|
|
||||||
for(unsigned int i(0); i<numHist; i++) {
|
#ifdef HAVE_GOMP
|
||||||
|
int chunk = nChannels/omp_get_num_procs();
|
||||||
|
if (chunk < 10)
|
||||||
|
chunk = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for(unsigned int i(0); i<numHist; ++i) {
|
||||||
param[0]=phase0[i];
|
param[0]=phase0[i];
|
||||||
// calculate asymmetry
|
// calculate asymmetry
|
||||||
CalcPol(param);
|
CalcPol(param);
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for(j=0; j<nChannels; j++) {
|
for(j=0; j<nChannels; ++j) {
|
||||||
ttime=j*par[2];
|
ttime=j*par[2];
|
||||||
k = static_cast<int>(floor(ttime/fTBin));
|
k = static_cast<int>(floor(ttime/fTBin));
|
||||||
asydata[j]=asy0[i]*(fPT[k]+(fPT[k+1]-fPT[k])/fTBin*(ttime-fT[k]));
|
asydata[j]=asy0[i]*(fPT[k]+(fPT[k+1]-fPT[k])/fTBin*(ttime-fT[k]));
|
||||||
@ -289,12 +301,12 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
|||||||
vector< vector<double> > histo;
|
vector< vector<double> > histo;
|
||||||
vector<double> data(nChannels);
|
vector<double> data(nChannels);
|
||||||
|
|
||||||
for (unsigned int i(0); i<numHist; i++) { // loop over all histos
|
for (unsigned int i(0); i<numHist; ++i) { // loop over all histos
|
||||||
|
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (j = 0; j<nChannels; j++) { // loop over time
|
for (j = 0; j<nChannels; ++j) { // loop over time
|
||||||
if (j < t0[i]) // j<t0
|
if (j < t0[i]) // j<t0
|
||||||
data[j] = bg[i]; // background
|
data[j] = bg[i]; // background
|
||||||
else
|
else
|
||||||
@ -315,7 +327,7 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
|||||||
vector<TH1F*> histoData;
|
vector<TH1F*> histoData;
|
||||||
|
|
||||||
TString name;
|
TString name;
|
||||||
for (unsigned int i(0); i<numHist; i++) { // loop over all histos
|
for (unsigned int i(0); i<numHist; ++i) { // loop over all histos
|
||||||
// create histos
|
// create histos
|
||||||
name = "theoHisto";
|
name = "theoHisto";
|
||||||
name += i;
|
name += i;
|
||||||
@ -327,10 +339,10 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
|||||||
name += i;
|
name += i;
|
||||||
fakeHisto = new TH1F(name.Data(), name.Data(), int(par[3]), -par[2]/2.0, (par[3]+0.5)*par[2]);
|
fakeHisto = new TH1F(name.Data(), name.Data(), int(par[3]), -par[2]/2.0, (par[3]+0.5)*par[2]);
|
||||||
// fill theoHisto
|
// fill theoHisto
|
||||||
#ifdef HAVE_GOMP
|
#ifdef HAVE_GOMP
|
||||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
|
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
|
||||||
#endif
|
#endif
|
||||||
for (j = 0; j<nChannels; j++)
|
for (j = 0; j<nChannels; ++j)
|
||||||
theoHisto->SetBinContent(j, histo[i][j]);
|
theoHisto->SetBinContent(j, histo[i][j]);
|
||||||
// end omp
|
// end omp
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user