diff --git a/ChangeLog b/ChangeLog
index e01af54e..b82d1c67 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,8 +6,10 @@
 
 changes since 0.9.0
 ===================================
-NEW any2many: force the user to define the exact NeXus ouput formate (HDF4,
-HDF5, XML)
+NEW the chi^2 calculation in single-histogram and asymmetry fits is parallelized
+    if musrfit is built using a compiler supporting OpenMP (e.g. GCC >= 4.2)
+NEW any2many: force the user to define the exact NeXus ouput format (HDF4,HDF5,XML)
+FIXED the evaluation of the LF depolarization functions (before numerous unnecessary calculations were performed)
 FIXED casting problem between uint32 and time_t for some compilers (MUSR-185)
 FIXED bug reported in MUSR-183: missing background for 2nd histo in asymmetry fits when using musrt0.
 FIXED Makefiles so that the NeXus support will not be built if it has not been enabled during the configure stage
diff --git a/configure.ac b/configure.ac
index 1c82456d..a658e2b9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -668,7 +668,7 @@ if test "${BUILD_BMW_LIBS}" = "1"; then
   AC_SUBST(FITPOFB_LIBS)
   AC_SUBST(FITPOFB_CFLAGS)
 
-  # Check for fftw3_threads-library. If available musrfit is also linked against it (used in libTFitPofB).
+  # Check for fftw3_threads-library. If available musrfit is also linked against it (used in libFitPofB).
   SAVED_CFLAGS="$CFLAGS"
   CFLAGS="$CFLAGS $FFTW3_CFLAGS"
   SAVED_LIBSS="$LIBS"
@@ -686,18 +686,18 @@ if test "${BUILD_BMW_LIBS}" = "1"; then
 
   CFLAGS="$SAVED_CFLAGS"
   LIBS="$SAVED_LIBS"
-
-  # Check for gomp-library. If available musrfit is also linked against it (used in libTFitPofB).
-  SAVED_CXXFLAGS="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fopenmp"
-  SAVED_LIBSS="$LIBS"
-  LIBS="$LIBS -fopenmp -lgomp"
-  AC_SEARCH_LIBS([omp_get_num_procs], [gomp], [AC_DEFINE([HAVE_GOMP], [1], [Define to 1 if gomp is available])],
-                                      [CXXFLAGS="$SAVED_CXXFLAGS" LIBS="$SAVED_LIBS"], [])
 fi
+
 AC_SUBST(FFTW3_LIBS)
 AC_SUBST(FFTW3_CFLAGS)
 
+# Check for gomp library. If available musrfit is also linked against it (used for parallel chisq calculation and in libFitPofB).
+SAVED_CXXFLAGS="$CXXFLAGS"
+CXXFLAGS="$CXXFLAGS -fopenmp"
+SAVED_LIBSS="$LIBS"
+LIBS="$LIBS -fopenmp -lgomp"
+AC_SEARCH_LIBS([omp_get_num_procs], [gomp], [AC_DEFINE([HAVE_GOMP], [1], [Define to 1 if gomp is available])],
+                                    [CXXFLAGS="$SAVED_CXXFLAGS" LIBS="$SAVED_LIBS"], [])
 
 dnl -----------------------------------------------
 dnl Set host specific compiler and linker flags
diff --git a/src/classes/PRunAsymmetry.cpp b/src/classes/PRunAsymmetry.cpp
index e981c521..33363158 100644
--- a/src/classes/PRunAsymmetry.cpp
+++ b/src/classes/PRunAsymmetry.cpp
@@ -29,6 +29,14 @@
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************/
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_GOMP
+#include <omp.h>
+#endif
+
 #include <stdio.h>
 
 #include <iostream>
@@ -154,9 +162,20 @@ Double_t PRunAsymmetry::CalcChiSquare(const std::vector<Double_t>& par)
     fFuncValues[i] = fMsrInfo->EvalFunc(fMsrInfo->GetFuncNo(i), *fRunInfo->GetMap(), par);
   }
 
-  // calculate chisq
-  Double_t time;
-  for (UInt_t i=0; i<fData.GetValue()->size(); i++) {
+  // calculate chi square
+  Double_t time(1.0);
+  Int_t i, N(static_cast<Int_t>(fData.GetValue()->size()));
+
+  // Calculate the theory function once to ensure one function evaluation for the current set of parameters.
+  // This is needed for the LF and user functions where some non-thread-save calculations only need to be calculated once
+  // for a given set of parameters---which should be done outside of the parallelized loop.
+  // For all other functions it means a tiny and acceptable overhead.
+  asymFcnValue = fTheory->Func(time, par, fFuncValues);
+
+  #ifdef HAVE_GOMP
+  #pragma omp parallel for default(shared) private(i,time,diff,asymFcnValue) schedule(dynamic,N/(2*omp_get_num_procs())) reduction(+:chisq)
+  #endif
+  for (i=0; i < N; ++i) {
     time = fData.GetDataTimeStart() + (Double_t)i*fData.GetDataTimeStep();
     if ((time>=fFitStartTime) && (time<=fFitEndTime)) {
       switch (fAlphaBetaTag) {
diff --git a/src/classes/PRunSingleHisto.cpp b/src/classes/PRunSingleHisto.cpp
index 0ffe76f7..a4446aee 100644
--- a/src/classes/PRunSingleHisto.cpp
+++ b/src/classes/PRunSingleHisto.cpp
@@ -29,6 +29,14 @@
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************/
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_GOMP
+#include <omp.h>
+#endif
+
 #include <iostream>
 #include <fstream>
 using namespace std;
@@ -139,8 +147,19 @@ Double_t PRunSingleHisto::CalcChiSquare(const std::vector<Double_t>& par)
   }
 
   // calculate chi square
-  Double_t time;
-  for (UInt_t i=0; i<fData.GetValue()->size(); i++) {
+  Double_t time(1.0);
+  Int_t i, N(static_cast<Int_t>(fData.GetValue()->size()));
+
+  // Calculate the theory function once to ensure one function evaluation for the current set of parameters.
+  // This is needed for the LF and user functions where some non-thread-save calculations only need to be calculated once
+  // for a given set of parameters---which should be done outside of the parallelized loop.
+  // For all other functions it means a tiny and acceptable overhead.
+  time = fTheory->Func(time, par, fFuncValues);
+
+  #ifdef HAVE_GOMP
+  #pragma omp parallel for default(shared) private(i,time,diff) schedule(dynamic,N/(2*omp_get_num_procs())) reduction(+:chisq)
+  #endif
+  for (i=0; i < N; ++i) {
     time = fData.GetDataTimeStart() + (Double_t)i*fData.GetDataTimeStep();
     if ((time>=fFitStartTime) && (time<=fFitEndTime)) {
       diff = fData.GetValue()->at(i) -
@@ -212,12 +231,25 @@ Double_t PRunSingleHisto::CalcMaxLikelihood(const std::vector<Double_t>& par)
   // calculate maximum log likelihood
   Double_t theo;
   Double_t data;
-  Double_t time;
+  Double_t time(1.0);
+  Int_t i, N(static_cast<Int_t>(fData.GetValue()->size()));
+
   // norm is needed since there is no simple scaling like in chisq case to get the correct Max.Log.Likelihood value when normlizing N(t) to 1/ns
   Double_t normalizer = 1.0;
+
   if (fScaleN0AndBkg)
     normalizer = fRunInfo->GetPacking() * (fTimeResolution * 1.0e3);
-  for (UInt_t i=0; i<fData.GetValue()->size(); i++) {
+
+  // Calculate the theory function once to ensure one function evaluation for the current set of parameters.
+  // This is needed for the LF and user functions where some non-thread-save calculations only need to be calculated once
+  // for a given set of parameters---which should be done outside of the parallelized loop.
+  // For all other functions it means a tiny and acceptable overhead.
+  time = fTheory->Func(time, par, fFuncValues);
+
+  #ifdef HAVE_GOMP
+  #pragma omp parallel for default(shared) private(i,time,theo,data) schedule(dynamic,N/(2*omp_get_num_procs())) reduction(-:mllh)
+  #endif
+  for (i=0; i < N; ++i) {
     time = fData.GetDataTimeStart() + (Double_t)i*fData.GetDataTimeStep();
     if ((time>=fFitStartTime) && (time<=fFitEndTime)) {
       // calculate theory for the given parameter set
diff --git a/src/classes/PTheory.cpp b/src/classes/PTheory.cpp
index 7abc8f0c..27f17c10 100644
--- a/src/classes/PTheory.cpp
+++ b/src/classes/PTheory.cpp
@@ -29,6 +29,14 @@
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************/
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_GOMP
+#include <omp.h>
+#endif
+
 #include <iostream>
 #include <vector>
 using namespace std;
@@ -1185,6 +1193,7 @@ Double_t PTheory::StaticGaussKT(register Double_t t, const PDoubleVector& paramV
  */
 Double_t PTheory::StaticGaussKTLF(register Double_t t, const PDoubleVector& paramValues, const PDoubleVector& funcValues) const
 {
+
   // expected parameters: frequency damping [tshift]
 
   Double_t val[3];
@@ -1206,8 +1215,9 @@ Double_t PTheory::StaticGaussKTLF(register Double_t t, const PDoubleVector& para
     return 1.0;
 
   // check if the parameter values have changed, and if yes recalculate the non-analytic integral
+  // check only the first two parameters since the tshift is irrelevant for the LF-integral calculation!!
   Bool_t newParam = false;
-  for (UInt_t i=0; i<3; i++) {
+  for (UInt_t i=0; i<2; i++) {
     if (val[i] != fPrevParam[i]) {
       newParam = true;
       break;
@@ -1215,9 +1225,11 @@ Double_t PTheory::StaticGaussKTLF(register Double_t t, const PDoubleVector& para
   }
 
   if (newParam) { // new parameters found
-    for (UInt_t i=0; i<3; i++)
-      fPrevParam[i] = val[i];
-    CalculateGaussLFIntegral(val);
+    {
+      for (UInt_t i=0; i<2; i++)
+        fPrevParam[i] = val[i];
+      CalculateGaussLFIntegral(val);
+    }
   }
 
   Double_t tt;
@@ -1246,6 +1258,7 @@ Double_t PTheory::StaticGaussKTLF(register Double_t t, const PDoubleVector& para
   }
 
   return result;
+
 }
 
 //--------------------------------------------------------------------------
@@ -1301,8 +1314,9 @@ Double_t PTheory::DynamicGaussKTLF(register Double_t t, const PDoubleVector& par
 
   if (!useKeren) {
     // check if the parameter values have changed, and if yes recalculate the non-analytic integral
+    // check only the first three parameters since the tshift is irrelevant for the LF-integral calculation!!
     Bool_t newParam = false;
-    for (UInt_t i=0; i<4; i++) {
+    for (UInt_t i=0; i<3; i++) {
       if (val[i] != fPrevParam[i]) {
         newParam = true;
         break;
@@ -1310,7 +1324,7 @@ Double_t PTheory::DynamicGaussKTLF(register Double_t t, const PDoubleVector& par
     }
 
     if (newParam) { // new parameters found
-      for (UInt_t i=0; i<4; i++)
+      for (UInt_t i=0; i<3; i++)
         fPrevParam[i] = val[i];
       CalculateDynKTLF(val, 0); // 0 means Gauss
     }
@@ -1340,6 +1354,7 @@ Double_t PTheory::DynamicGaussKTLF(register Double_t t, const PDoubleVector& par
   }
 
   return result;
+
 }
 
 //--------------------------------------------------------------------------
@@ -1423,8 +1438,9 @@ Double_t PTheory::StaticLorentzKTLF(register Double_t t, const PDoubleVector& pa
     return 1.0;
 
   // check if the parameter values have changed, and if yes recalculate the non-analytic integral
+  // check only the first two parameters since the tshift is irrelevant for the LF-integral calculation!!
   Bool_t newParam = false;
-  for (UInt_t i=0; i<3; i++) {
+  for (UInt_t i=0; i<2; i++) {
     if (val[i] != fPrevParam[i]) {
       newParam = true;
       break;
@@ -1432,7 +1448,7 @@ Double_t PTheory::StaticLorentzKTLF(register Double_t t, const PDoubleVector& pa
   }
 
   if (newParam) { // new parameters found
-    for (UInt_t i=0; i<3; i++)
+    for (UInt_t i=0; i<2; i++)
       fPrevParam[i] = val[i];
     CalculateLorentzLFIntegral(val);
   }
@@ -1472,6 +1488,7 @@ Double_t PTheory::StaticLorentzKTLF(register Double_t t, const PDoubleVector& pa
   }
 
   return result;
+
 }
 
 //--------------------------------------------------------------------------
@@ -1562,8 +1579,9 @@ Double_t PTheory::DynamicLorentzKTLF(register Double_t t, const PDoubleVector& p
   }
 
   // check if the parameter values have changed, and if yes recalculate the non-analytic integral
+  // check only the first three parameters since the tshift is irrelevant for the LF-integral calculation!!
   Bool_t newParam = false;
-  for (UInt_t i=0; i<4; i++) {
+  for (UInt_t i=0; i<3; i++) {
     if (val[i] != fPrevParam[i]) {
       newParam = true;
       break;
@@ -1571,14 +1589,15 @@ Double_t PTheory::DynamicLorentzKTLF(register Double_t t, const PDoubleVector& p
   }
 
   if (newParam) { // new parameters found
-    for (UInt_t i=0; i<4; i++)
+    for (UInt_t i=0; i<3; i++)
       fPrevParam[i] = val[i];
-    CalculateDynKTLF(val, 1); // 0 means Lorentz
+    CalculateDynKTLF(val, 1); // 1 means Lorentz
   }
 
   result = GetDynKTLFValue(tt);
 
   return result;
+
 }
 
 //--------------------------------------------------------------------------