Applied a few minor optimizations to the field-distribution calculations
This commit is contained in:
parent
db4425e352
commit
e22256ef8a
11
src/external/libFitPofB/classes/TBofZCalc.cpp
vendored
11
src/external/libFitPofB/classes/TBofZCalc.cpp
vendored
@ -57,10 +57,13 @@ void TBofZCalc::Calculate()
|
||||
fZ.resize(fSteps);
|
||||
fBZ.resize(fSteps);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic)
|
||||
#endif
|
||||
for (j=0; j<fSteps; j++) {
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = fSteps/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(j,ZZ) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (j=0; j<fSteps; ++j) {
|
||||
ZZ = fParam[1] + static_cast<double>(j)*fDZ;
|
||||
fZ[j] = ZZ;
|
||||
fBZ[j] = GetBofZ(ZZ);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -290,7 +290,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
|
||||
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
int chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBkMatrix[l][1] = fFFTin[l][0];
|
||||
@ -299,11 +302,17 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
// sum_K aK Kx^2 cos(Kx*x + Ky*y) cos(Kz*z)
|
||||
// First multiply the aK with Kx^2, then call FFTW
|
||||
|
||||
float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));;
|
||||
float coeffKx(4.0/3.0*pow(PI/fLatticeConstant, 2.0f));
|
||||
|
||||
// k = 0
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel default(shared) private(i,j)
|
||||
{
|
||||
#pragma omp sections
|
||||
{
|
||||
// even rows
|
||||
#pragma omp section
|
||||
#endif
|
||||
for (i = 0; i < NFFT; i += 2) {
|
||||
// j = 0
|
||||
fFFTin[fStepsZ*NFFT*i][0] = 0.0f;
|
||||
@ -317,6 +326,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
}
|
||||
|
||||
// odd rows
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp section
|
||||
#endif
|
||||
for (i = 1; i < NFFT; i += 2) {
|
||||
for (j = 0; j < NFFT_2; j += 2) {
|
||||
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
|
||||
@ -325,12 +337,19 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
fFFTin[fStepsZ*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
|
||||
}
|
||||
}
|
||||
#ifdef HAVE_GOMP
|
||||
} // end omp sections
|
||||
#endif
|
||||
|
||||
// k != 0
|
||||
|
||||
if (fFind3dSolution) {
|
||||
for (k = 1; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp sections
|
||||
{
|
||||
// even rows
|
||||
#pragma omp section
|
||||
#endif
|
||||
for (i = 0; i < NFFT; i += 2) {
|
||||
// j = 0
|
||||
fFFTin[k + NFFTz*NFFT*i][0] = 0.0f;
|
||||
@ -344,6 +363,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
}
|
||||
|
||||
// odd rows
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp section
|
||||
#endif
|
||||
for (i = 1; i < NFFT; i += 2) {
|
||||
for (j = 0; j < NFFT_2; j += 2) {
|
||||
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1)*(j + 1));
|
||||
@ -352,8 +374,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
fFFTin[k + NFFTz*(j + 1 + NFFT*i)][0] *= coeffKx*static_cast<float>((j + 1 - NFFT)*(j + 1 - NFFT));
|
||||
}
|
||||
}
|
||||
#ifdef HAVE_GOMP
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} // else do nothing since the other aK are already zero since the former aK manipulation
|
||||
#ifdef HAVE_GOMP
|
||||
} // end omp parallel
|
||||
#endif
|
||||
|
||||
fftwf_execute(fFFTplan);
|
||||
|
||||
@ -363,7 +391,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
fGstorage[k] = fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
|
||||
}
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fFFTin[l][0] = fBkMatrix[l][1];
|
||||
@ -473,7 +501,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
fGstorage[k] += fRealSpaceMatrix[k][0]*fRealSpaceMatrix[k][0];
|
||||
}
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fFFTin[l][0] = fBkMatrix[l][1];
|
||||
@ -583,7 +611,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGatVortexCore() const {
|
||||
fGstorage[k] += fRealSpaceMatrix[k][1]*fRealSpaceMatrix[k][1];
|
||||
}
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fFFTin[l][0] = fBkMatrix[l][1];
|
||||
@ -612,13 +640,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
||||
const int NFFTz_2(fStepsZ/2);
|
||||
|
||||
int i, j, k, l, index;
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
// Take the derivative of the Fourier sum of omega
|
||||
// This is going to be a bit lengthy...
|
||||
|
||||
// First save a copy of the real aK-matrix in the imaginary part of the bK-matrix
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBkMatrix[l][1] = fFFTin[l][0];
|
||||
@ -687,7 +720,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
||||
|
||||
// Copy the results to the gradient matrix and restore the original aK-matrix
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fOmegaDiffMatrix[0][l] = fRealSpaceMatrix[l][1];
|
||||
@ -777,7 +810,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
||||
|
||||
// Copy the results to the gradient matrix and restore the original aK-matrix
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fOmegaDiffMatrix[1][l] = fRealSpaceMatrix[l][1];
|
||||
@ -838,10 +871,16 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
||||
// 3D transform
|
||||
fftwf_execute(fFFTplan);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
// Copy the results to the gradient matrix - with the 1D-FORWARD-transform we have to _add_ fSumAk
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fOmegaDiffMatrix[2][k + NFFTz*index] = fRealSpaceMatrix[k + NFFTz*index][1] + fSumAk[k][1];
|
||||
@ -850,7 +889,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
||||
|
||||
// Restore the original aK-matrix
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fFFTin[l][0] = fBkMatrix[l][1];
|
||||
@ -859,7 +901,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGradient() const {
|
||||
} else {
|
||||
// For the 2D solution, dw/dz = 0
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fOmegaDiffMatrix[2][l] = 0.0;
|
||||
@ -1004,9 +1046,15 @@ void TFilmTriVortexNGLFieldCalc::FillAbrikosovCoefficients(const float reducedFi
|
||||
|
||||
fFFTin[0][0] = 0.0;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
for (k = 1; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fFFTin[k + NFFTz*index][0] = 0.0;
|
||||
@ -1699,9 +1747,16 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsB() const {
|
||||
}
|
||||
*/
|
||||
} else { // for 2D solution only
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
for (k = 1; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fBkMatrix[k + NFFTz*index][0] = 0.0;
|
||||
@ -1971,10 +2026,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXFirst() c
|
||||
|
||||
int i, j, k, kx, ky, kz, index;
|
||||
float ii;
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
// k = 0
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fBkMatrix[NFFTz*index][0] = 0.0f;
|
||||
@ -2116,10 +2176,15 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpXSecond()
|
||||
|
||||
int i, j, k, kx, ky, kz, index;
|
||||
float ii;
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
// k = 0
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fBkMatrix[NFFTz*index][0] = 0.0f;
|
||||
@ -2260,7 +2325,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYFirst() c
|
||||
|
||||
// k = 0
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
int chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fBkMatrix[NFFTz*index][0] = 0.0f;
|
||||
@ -2397,7 +2465,10 @@ void TFilmTriVortexNGLFieldCalc::ManipulateFourierCoefficientsForBperpYSecond()
|
||||
|
||||
// k = 0
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
int chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fBkMatrix[NFFTz*index][0] = 0.0f;
|
||||
@ -2575,11 +2646,18 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
const int NFFTz_2(fStepsZ/2);
|
||||
const int NFFTsqStZ_2(NFFTsq*(fStepsZ/2 + 1));
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk;
|
||||
#endif
|
||||
|
||||
// first check that the field is not larger than Hc2 and that we are dealing with a type II SC ...
|
||||
if ((field >= Hc2) || (lambda < xi/sqrt(2.0))) {
|
||||
int m;
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(m) schedule(dynamic)
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(m) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (m = 0; m < NFFTsqStZ; ++m) {
|
||||
fBout[0][m] = 0.0f;
|
||||
@ -2601,7 +2679,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j,index) schedule(dynamic)
|
||||
chunk = NFFT/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(j,index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (j = 0; j < NFFT; ++j) {
|
||||
index = k + NFFTz*j;
|
||||
@ -2626,7 +2707,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
for (j = 0; j < NFFT; ++j) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i,index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(i,index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < NFFT; ++i) {
|
||||
index = k + NFFTz*(j + NFFT*i);
|
||||
@ -2645,7 +2726,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
int indexQA;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic)
|
||||
chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(index,denomQAInv) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
if (!fOmegaMatrix[NFFTz*index] || !index || (index == (NFFT+1)*NFFT_2)) {
|
||||
@ -2672,7 +2756,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0];
|
||||
@ -2682,7 +2766,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// initialize the bK-Matrix
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBkMatrix[l][0] = 0.0;
|
||||
@ -2708,7 +2795,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
// }
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; l++) {
|
||||
if (fOmegaMatrix[l]) {
|
||||
@ -2726,9 +2816,6 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
// since all of this should be a smooth function anyway, I set the value of the next neighbour r
|
||||
// for the two vortex core positions in my matrix
|
||||
// If this was not enough we can get the g(0)-values by an expensive CalculateGatVortexCore()-invocation (not working at the moment)
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(k) schedule(dynamic)
|
||||
#endif
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
fRealSpaceMatrix[k][0] = fRealSpaceMatrix[k + fStepsZ*fSteps][0];//fGstorage[k];
|
||||
fRealSpaceMatrix[k + NFFTz*(NFFT+1)*NFFT_2][0] = fRealSpaceMatrix[k][0];//fGstorage[k];
|
||||
@ -2747,9 +2834,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
fftwf_execute(fFFTplan);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
|
||||
@ -2794,7 +2887,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
// Multiply the aK with the spacial averages
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fFFTin[k + NFFTz*index][0] = fFFTin[k + NFFTz*index][0]*fSumSum;
|
||||
@ -2823,9 +2916,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
}
|
||||
|
||||
if (!akConverged) {
|
||||
#ifdef HAVE_GOMP
|
||||
chunk = NFFT/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j, index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(j, index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (j = 0; j < NFFT; ++j) {
|
||||
index = k + NFFTz*j;
|
||||
@ -2856,9 +2954,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
fftwf_execute(fFFTplan);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fOmegaMatrix[k + NFFTz*index] = fSumAk[k][0] - fRealSpaceMatrix[k + NFFTz*index][0];
|
||||
@ -2869,6 +2973,9 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// first calculate PK (use the Q-Matrix memory for the second part)
|
||||
#ifdef HAVE_GOMP
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
@ -2905,7 +3012,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
if (firstBkCalculation) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
chunk = NFFTStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTStZ; ++l) {
|
||||
fCheckBkConvergence[l] = 0.0f;
|
||||
@ -2937,9 +3047,14 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
// cout << "Bk Convergence: " << bkConverged << endl;
|
||||
|
||||
if (!bkConverged) {
|
||||
#ifdef HAVE_GOMP
|
||||
chunk = NFFT/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (j = 0; j < NFFT; ++j) {
|
||||
index = k + NFFTz*j;
|
||||
@ -2950,7 +3065,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// In order to save memory I will not allocate more space for another matrix but save a copy of the bKs in the aK-Matrix
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fFFTin[l][1] = fBkMatrix[l][0];
|
||||
@ -2986,9 +3104,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
fftwf_execute(fFFTplanBkToBandQ);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fQMatrix[k + NFFTz*index][0] = fQMatrixA[index][0] - fBkMatrix[k + NFFTz*index][1];
|
||||
@ -2997,7 +3121,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// Restore bKs for Qy calculation and Fourier transform to get Qy
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBkMatrix[l][0] = fFFTin[l][1];
|
||||
@ -3008,9 +3135,15 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
fftwf_execute(fFFTplanBkToBandQ);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
for (k = 0; k < NFFTz; ++k) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fQMatrix[k + NFFTz*index][1] = fQMatrixA[index][1] + fBkMatrix[k + NFFTz*index][1];
|
||||
@ -3019,7 +3152,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// Restore bKs for the next iteration
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBkMatrix[l][0] = fFFTin[l][1];
|
||||
@ -3035,7 +3171,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// Fill in the B-Matrix and restore the bKs for the second part of the Bx-calculation
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
chunk = NFFTsqStZ/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBout[0][l] = fBkMatrix[l][0];
|
||||
@ -3050,7 +3189,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
// Fill in the B-Matrix and restore the bKs for the By-calculation
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBout[0][l] = 0.5f*(fBkMatrix[l][0] - fBout[0][l])*Hc2_kappa;
|
||||
@ -3064,7 +3203,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBout[1][l] = fBkMatrix[l][0];
|
||||
@ -3078,7 +3217,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// Fill in the B-Matrix and restore the bKs for the second part of the By-calculation
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBout[1][l] = 0.5f*(fBkMatrix[l][0] - fBout[1][l])*Hc2_kappa;
|
||||
@ -3089,7 +3228,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
fftwf_execute(fFFTplanBkToBandQ);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(l) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (l = 0; l < NFFTsqStZ; ++l) {
|
||||
fBout[2][l] = (scaledB + fBkMatrix[l][0])*Hc2_kappa;
|
||||
@ -3099,7 +3238,10 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
|
||||
// Save a copy of the BkS - where does not matter since this is the very end of the calculation...
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
chunk = NFFTsq/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fFFTin[index][1] = fBkS[index][0];
|
||||
@ -3112,7 +3254,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
// Write the surface fields to the field-Matrix and restore the BkS for the By-calculation
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fBout[0][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;
|
||||
@ -3127,7 +3269,7 @@ void TFilmTriVortexNGLFieldCalc::CalculateGrid() const {
|
||||
// Write the surface fields to the field-Matrix
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(index) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (index = 0; index < NFFTsq; ++index) {
|
||||
fBout[1][NFFTz/2 + NFFTz*index] = fBkS[index][1]*Hc2_kappa;
|
||||
|
73
src/external/libFitPofB/classes/TPofBCalc.cpp
vendored
73
src/external/libFitPofB/classes/TPofBCalc.cpp
vendored
@ -62,7 +62,10 @@ TPofBCalc::TPofBCalc(const vector<double> ¶) : fBmin(0.0), fBmax(0.0), fDT(p
|
||||
int i;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
int chunk = fPBSize/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||
fB[i] = static_cast<double>(i)*fDB;
|
||||
@ -83,7 +86,10 @@ TPofBCalc::TPofBCalc(const vector<double>& b, const vector<double>& pb, double d
|
||||
int i;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
int chunk = fPBSize/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||
fB[i] = b[i];
|
||||
@ -121,9 +127,12 @@ TPofBCalc::TPofBCalc(const vector<double>& b, const vector<double>& pb, double d
|
||||
|
||||
void TPofBCalc::UnsetPBExists() {
|
||||
int i;
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
#endif
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = fPBSize/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||
fPB[i] = 0.0;
|
||||
}
|
||||
@ -140,14 +149,17 @@ void TPofBCalc::Normalize(unsigned int minFilledIndex = 0, unsigned int maxFille
|
||||
double pBsum(0.0);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum)
|
||||
int chunk = (maxFilledIndex-minFilledIndex)/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum)
|
||||
#endif
|
||||
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
|
||||
pBsum += fPB[i];
|
||||
pBsum *= fDB;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = minFilledIndex; i <= static_cast<int>(maxFilledIndex); ++i)
|
||||
fPB[i] /= pBsum;
|
||||
@ -160,7 +172,10 @@ void TPofBCalc::SetPB(const vector<double> &pb) const {
|
||||
|
||||
int i;
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
int chunk = fPBSize/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||
fPB[i] = pb[i];
|
||||
@ -190,14 +205,20 @@ void TPofBCalc::Calculate(const string &type, const vector<double> ¶) {
|
||||
int i;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
int chunk = B0Index/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < B0Index; ++i) {
|
||||
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expominus);
|
||||
}
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
chunk = (BmaxIndex-B0Index)/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = B0Index; i <= BmaxIndex; ++i) {
|
||||
fPB[i] = exp(-(fB[i]-B0)*(fB[i]-B0)/expoplus);
|
||||
@ -546,6 +567,10 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto
|
||||
// cannot use a reduction clause here (like e.g. in Normalize()), since pBvec[] is not a scalar variable
|
||||
// therefore, we need to work on it a bit more
|
||||
int n(omp_get_num_procs()), tid, offset;
|
||||
int chunk = fPBSize/n;
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
|
||||
vector< vector<unsigned int> > pBvec(n, vector<unsigned int>(fPBSize, 0));
|
||||
|
||||
int indexStep(static_cast<int>(floor(static_cast<float>(numberOfSteps_2)/static_cast<float>(n))));
|
||||
@ -577,7 +602,7 @@ void TPofBCalc::Calculate(const TBulkVortexFieldCalc *vortexLattice, const vecto
|
||||
}
|
||||
|
||||
for (j = 0; j < n; ++j) {
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||
fPB[i] += static_cast<double>(pBvec[j][i]);
|
||||
}
|
||||
@ -624,7 +649,10 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
|
||||
// calculate Gaussian background
|
||||
double bg[fPBSize];
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
int chunk = fPBSize/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for(i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||
bg[i] = exp(-(fB[i]-B)*(fB[i]-B)/(2.0*BsSq));
|
||||
@ -634,7 +662,7 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
|
||||
|
||||
double bgsum(0.0);
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:bgsum)
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:bgsum)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
||||
bgsum += bg[i];
|
||||
@ -642,14 +670,14 @@ void TPofBCalc::AddBackground(double B, double s, double w) {
|
||||
bgsum *= fDB;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
||||
bg[i] /= bgsum;
|
||||
|
||||
// add background to P(B)
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
||||
fPB[i] = (1.0 - w)*fPB[i] + w*bg[i];
|
||||
@ -691,7 +719,10 @@ void TPofBCalc::ConvolveGss(double w) {
|
||||
|
||||
int i;
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic)
|
||||
int chunk = (NFFT/2+1)/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i, GssInTimeDomain) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(NFFT/2+1); ++i) {
|
||||
GssInTimeDomain = exp(expo*static_cast<double>(i)*static_cast<double>(i));
|
||||
@ -725,7 +756,10 @@ double TPofBCalc::GetFirstMoment() const {
|
||||
double pBsum(0.0);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic) reduction(+:pBsum)
|
||||
int chunk = fPBSize/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk) reduction(+:pBsum)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i)
|
||||
pBsum += fB[i]*fPB[i];
|
||||
@ -746,7 +780,10 @@ double TPofBCalc::GetCentralMoment(unsigned int n) const {
|
||||
double pBsum(0.0);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i, diff) schedule(dynamic) reduction(+:pBsum)
|
||||
int chunk = fPBSize/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i, diff) schedule(dynamic,chunk) reduction(+:pBsum)
|
||||
#endif
|
||||
for (i = 0; i < static_cast<int>(fPBSize); ++i) {
|
||||
diff = fB[i]-firstMoment;
|
||||
|
60
src/external/libFitPofB/classes/TPofTCalc.cpp
vendored
60
src/external/libFitPofB/classes/TPofTCalc.cpp
vendored
@ -102,10 +102,13 @@ TPofTCalc::TPofTCalc (const TPofBCalc *PofB, const string &wisdom, const vector<
|
||||
|
||||
int i;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
#endif
|
||||
for (i = 0; i < NFFT_2p1; i++) {
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = NFFT_2p1/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i = 0; i < NFFT_2p1; ++i) {
|
||||
fT[i] = static_cast<double>(i)*fTBin;
|
||||
}
|
||||
|
||||
@ -190,10 +193,13 @@ void TPofTCalc::CalcPol(const vector<double> &par) {
|
||||
double sinph(sin(par[0]*PI/180.0)), cosph(cos(par[0]*PI/180.0));
|
||||
int i;
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic)
|
||||
#endif
|
||||
for (i=0; i<fNFFT/2+1; i++){
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = (fNFFT/2+1)/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#pragma omp parallel for default(shared) private(i) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (i=0; i<fNFFT/2+1; ++i) {
|
||||
fPT[i] = (cosph*fFFTout[i][0] + sinph*fFFTout[i][1])*par[2];
|
||||
}
|
||||
}
|
||||
@ -240,7 +246,7 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
||||
vector<double> N0;
|
||||
vector<double> bg;
|
||||
|
||||
for(unsigned int i(0); i<numHist; i++) {
|
||||
for(unsigned int i(0); i<numHist; ++i) {
|
||||
t0.push_back(int(par[i+4+numHist*2]));
|
||||
asy0.push_back(par[i+4]);
|
||||
phase0.push_back(par[i+4+numHist]);
|
||||
@ -258,15 +264,21 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
||||
double ttime;
|
||||
int j,k;
|
||||
|
||||
for(unsigned int i(0); i<numHist; i++) {
|
||||
#ifdef HAVE_GOMP
|
||||
int chunk = nChannels/omp_get_num_procs();
|
||||
if (chunk < 10)
|
||||
chunk = 10;
|
||||
#endif
|
||||
|
||||
for(unsigned int i(0); i<numHist; ++i) {
|
||||
param[0]=phase0[i];
|
||||
// calculate asymmetry
|
||||
CalcPol(param);
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic)
|
||||
#endif
|
||||
for(j=0; j<nChannels; j++) {
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j,ttime,k) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for(j=0; j<nChannels; ++j) {
|
||||
ttime=j*par[2];
|
||||
k = static_cast<int>(floor(ttime/fTBin));
|
||||
asydata[j]=asy0[i]*(fPT[k]+(fPT[k+1]-fPT[k])/fTBin*(ttime-fT[k]));
|
||||
@ -289,12 +301,12 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
||||
vector< vector<double> > histo;
|
||||
vector<double> data(nChannels);
|
||||
|
||||
for (unsigned int i(0); i<numHist; i++) { // loop over all histos
|
||||
for (unsigned int i(0); i<numHist; ++i) { // loop over all histos
|
||||
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
|
||||
#endif
|
||||
for (j = 0; j<nChannels; j++) { // loop over time
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (j = 0; j<nChannels; ++j) { // loop over time
|
||||
if (j < t0[i]) // j<t0
|
||||
data[j] = bg[i]; // background
|
||||
else
|
||||
@ -315,7 +327,7 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
||||
vector<TH1F*> histoData;
|
||||
|
||||
TString name;
|
||||
for (unsigned int i(0); i<numHist; i++) { // loop over all histos
|
||||
for (unsigned int i(0); i<numHist; ++i) { // loop over all histos
|
||||
// create histos
|
||||
name = "theoHisto";
|
||||
name += i;
|
||||
@ -327,10 +339,10 @@ void TPofTCalc::FakeData(const string &rootOutputFileName, const vector<double>
|
||||
name += i;
|
||||
fakeHisto = new TH1F(name.Data(), name.Data(), int(par[3]), -par[2]/2.0, (par[3]+0.5)*par[2]);
|
||||
// fill theoHisto
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic)
|
||||
#endif
|
||||
for (j = 0; j<nChannels; j++)
|
||||
#ifdef HAVE_GOMP
|
||||
#pragma omp parallel for default(shared) private(j) schedule(dynamic,chunk)
|
||||
#endif
|
||||
for (j = 0; j<nChannels; ++j)
|
||||
theoHisto->SetBinContent(j, histo[i][j]);
|
||||
// end omp
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user