dont link with boost filesystem

Merge branch 'master' of gitlab.psi.ch:uldis_l/DKS
remove any OpenCL calls when DKS compiled without OpenCL
2017-06-07 11:10:38 +02:00 · 2017-06-06 14:24:20 +02:00 · 2017-06-06 14:23:24 +02:00 · 2017-06-06 14:22:34 +02:00 · 2017-06-06 14:22:00 +02:00 · 2017-05-30 11:07:22 +02:00
30 changed files with 657 additions and 335 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
 PROJECT (DKS)
 SET (DKS_VERSION_MAJOR 1)
 SET (DKS_VERSION_MINOR 1)
-SET (DKS_VERSION_PATCH 0)
+SET (DKS_VERSION_PATCH 2)
 set (DKS_VERSION ${DKS_VERSION_MAJOR}.${DKS_VERSION_MINOR}.${DKS_VERSION_PATCH})
 SET (PACKAGE \"dks\")
 SET (PACKAGE_BUGREPORT \"locans.uldis@psi.ch\")
@ -28,11 +28,13 @@ MESSAGE (STATUS "OpenCL kernel files: ${OPENCL_KERNELS}")
 set (BOOSTROOT $ENV{BOOST_DIR})
 SET (Boost_USE_STATIC_LIBS OFF)
 SET (Boost_USE_STATIC_RUNTIME OFF)
-FIND_PACKAGE(Boost 1.55.0 REQUIRED COMPONENTS filesystem system)
+#FIND_PACKAGE(Boost 1.55 REQUIRED COMPONENTS filesystem system)
+FIND_PACKAGE(Boost 1.41 REQUIRED)
 IF (Boost_FOUND)
+  MESSAGE (STATUS "Boost version: ${Boost_VERSION}")
  MESSAGE (STATUS "Found boost include dir: ${Boost_INCLUDE_DIRS}")
  MESSAGE (STATUS "Found boost library dir: ${Boost_LIBRARY_DIRS}")
-  MESSAGE (STATUS "Found boost libraries: ${Boost_LIBRARIES}")
+  #MESSAGE (STATUS "Found boost libraries: ${Boost_LIBRARIES}")
  INCLUDE_DIRECTORIES (${Boost_INCLUDE_DIRS})
  LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
 ENDIF (Boost_FOUND)
@ -79,7 +81,7 @@ OPTION (USE_UQTK "Use UQTK" OFF)
 IF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)

  #for intel compiler turn on openmp and opencl
-  OPTION (USE_OPENCL "Use OpenCL" ON)
+  OPTION (USE_OPENCL "Use OpenCL" OFF)
  OPTION (USE_CUDA "Use CUDA" OFF)
  OPTION (USE_MIC "Use intel MIC" ON)
  
@ -113,15 +115,21 @@ ENDIF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)
 IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") AND NOT USE_INTEL)
  
  
-  OPTION (USE_OPENCL "Use OpenCL" ON)
+  OPTION (USE_OPENCL "Use OpenCL" OFF)
  OPTION (USE_CUDA "Use CUDA" OFF)
  OPTION (USE_MIC "Use intel MIC" OFF)
-  
+  OPTION (STATIC_CUDA "Link static cuda libraries" OFF)
+
+  IF (ENABLE_MUSR)
+    SET (USE_OPENCL ON)
+  ENDIF (ENABLE_MUSR)
+
  SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -DDEBUG -O3 -Wall -fopenmp -std=c++11 -D__wsu")

  FIND_PACKAGE(CUDA)
  IF (CUDA_FOUND)
    SET (USE_CUDA ON)
+    OPTION(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cuda libraries" OFF)
    INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
    LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
    LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs)
@ -131,20 +139,27 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
    MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
    SET(CUDA_PROPAGATE_HOST_FLAGS OFF)

-    SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lcudart -lcufft -lcublas -lnvToolsExt -DDKS_CUDA")
-    SET (CUDA_NVCC_FLAGS "-arch=sm_35 -DDEBUG -lcufft -lcublas -lcudart -fmad=false")
-    SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}  -DDEBUG -std=c++11 -D__wsu")    
-    SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${OPENCL_KERNELS}")
+    SET (CUDA_NVCC_FLAGS "-arch=sm_35;-DDEBUG;-std=c++11;-D__wsu;-fmad=false")    
+    SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${OPENCL_KERNELS}")
+
+    IF (NOT STATIC_CUDA)
+      SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_CUDA")
+      SET (DKS_CUDA_LIBS "-lcudadevrt -lcudart -lcufft -lcublas")
+    ELSE (NOT STATIC_CUDA) 
+      SET (CUDA_SEPARABLE_COMPILATION ON)
+      SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_CUDA -fPIC")
+      SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-rdc=true;-lcufft_static;-lcublas_static;-lcurand_static")
+      SET (DKS_CUDA_LIBS "-lcudadevrt -lcudart_static -lcufft_static -lcublas_static -lculibos")
+    ENDIF (NOT STATIC_CUDA)

    #if cuda version >= 7.0 add runtime commpilation flags
-    IF (NOT CUDA_VERSION VERSION_LESS "7.0")
+    IF (NOT CUDA_VERSION VERSION_LESS "7.0" AND ENABLE_MUSR)
      SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lnvrtc -lcuda")
-    ENDIF (NOT CUDA_VERSION VERSION_LESS "7.0")
+    ENDIF (NOT CUDA_VERSION VERSION_LESS "7.0" AND ENABLE_MUSR)
    
    MESSAGE (STATUS "nvcc flags: ${CUDA_NVCC_FLAGS}")
    
    SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
-    #set(CUDA_SEPARABLE_COMPILATION ON)
    SET(BUILD_SHARED_LIBS OFF)

  ENDIF (CUDA_FOUND)
@ -171,9 +186,9 @@ IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "
  ENDIF(APPLE AND NOT CUDA_FOUND)

  #if cuda found set cuda opencl flags
-  IF (CUDA_FOUND)
+  IF (CUDA_FOUND AND USE_OPENCL)
    SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL -lpthread -DDKS_OPENCL")
-  ENDIF (CUDA_FOUND)
+  ENDIF (CUDA_FOUND AND USE_OPENCL)

  #if cuda not found but amd opencl found set opencl flags
  IF (NOT CUDA_FOUND AND OpenCL_FOUND)
--- a/ReadMe.first
+++ b/ReadMe.first
@ -29,30 +29,30 @@ Intel MIC compilers (optional)
 ######Source######
 https://gitlab.psi.ch/uldis_l/DKS

+######Changes from DKS-1.0.x version######
+DKS is split into three modules that can be enabled/disabled at compile time depending on which software it is used for.
+By default only DKSBase and DKSFFT modules are enabled. In order to install other modules the necessary otion needs to be enabled.
+Supported options are:
+-DENABLE_OPAL option should be enabled if DKS will be used for OPAL
+-DENABLE_MUSR option should be enable if DKS will be used for musrfit
+-DENABLE_PET option should be enabled if DKS will be used for PET image reconstruction
+
+See install instructions for more details on how to enable the necessary options in DKS
+
 ######Install######
+#consult the https://gitlab.psi.ch/uldis_l/DKS/wikis/home for full install isntructions

 #clone DKS
 git clone git@gitlab.psi.ch:uldis_l/DKS.git DKS

-#set compilers to use
-#supported c++ compilers: g++, icpc, mpicxx whith g++
-#supported c compilers: gcc, icc, mpicc whith gcc
-export CXX_COMPILER=cpp_compiler_name
-export CC_COMPILER=c_compiler_name
+#switch to the desired version (OPTIONAL)
+git checkout DKS-1.1.0

-#set dks root directory directory
-cd DKS
-export DKS_ROOT = $PWD
-
-#set build directory
-mkdir $DKS_BUILD_DIR
-cd $DKS_BUILD_DIR
-
-#set install directory
-export DKS_INSTALL_DIR = $DKS_BUILD_DIR #default is /usr/local/
-
-CXX=$CXX_COMPILER CC=$CC_COMPILER cmake -DCMAKE_INSTALL_PREFIX=$DKS_BUILD_DIR $DKS_ROOT
+#configure installation in build directory
+#enable DKS modules to compile -DENABLE_OPAL, -DENABLE_MUSR, -DENABLE_PET
+CXX=<c++ compiler> CC=<c compiler> -DCMAKE_INSTALL_PREFIX=<install dir> <path to DKS source> [-DENABLE_OPAL=1 -DENABLE_MUSR=1 -DENABLE_PET=1] 

+#install DKS
 make
 make install

--- a/auto-tuning/CMakeLists.txt
+++ b/auto-tuning/CMakeLists.txt
@ -4,28 +4,30 @@ LINK_DIRECTORIES( ${CMAKE_SOURCE_DIR}/src )
 #chi square kernel tests
 IF (ENABLE_MUSR)
  ADD_EXECUTABLE(testChiSquareRT testChiSquareRT.cpp)
-  TARGET_LINK_LIBRARIES(testChiSquareRT dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
+  TARGET_LINK_LIBRARIES(testChiSquareRT dks ${CLFFT_LIBRARIES})

  ADD_EXECUTABLE(testChiSquareRTRandom testChiSquareRTRandom.cpp)
-  TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
+  TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${CLFFT_LIBRARIES})

  IF (USE_UQTK)
    ADD_EXECUTABLE(testChiSquareRTUQTK testChiSquareRTUQTK.cpp)
-    TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
+    TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${CLFFT_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
  ENDIF (USE_UQTK)
-  #TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES})

  #test to verify search functions
  ADD_EXECUTABLE(testSearch testSearch.cpp)
-  TARGET_LINK_LIBRARIES(testSearch dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
+  TARGET_LINK_LIBRARIES(testSearch dks ${CLFFT_LIBRARIES})
 ENDIF (ENABLE_MUSR)

 IF (ENABLE_OPAL)
  ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
-  TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
+  TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${CLFFT_LIBRARIES})

  ADD_EXECUTABLE(testPushKick testPushKick.cpp)
-  TARGET_LINK_LIBRARIES(testPushKick dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
+  TARGET_LINK_LIBRARIES(testPushKick dks ${CLFFT_LIBRARIES})
 ENDIF(ENABLE_OPAL)

+ADD_EXECUTABLE(testFFT testFFT.cpp)
+TARGET_LINK_LIBRARIES(testFFT dks ${CLFFT_LIBRARIES})
+

--- a/auto-tuning/testFFT.cpp
+++ b/auto-tuning/testFFT.cpp
@ -0,0 +1,214 @@
+#include <iostream>
+#include <cstdlib>
+#include <complex>
+
+#include "Utility/TimeStamp.h"
+#include "DKSFFT.h"
+
+using namespace std;
+
+void compareData(complex<double>* data1, complex<double>* data2, int N, int dim);
+void compareData(double* data1, double *data2, int N, int dim);
+
+void initData(complex<double> *data, int dimsize[3], int dim);
+void initData(double *data, int dimsize[3], int dim);
+
+bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &dim, 
+		char *api_name, char *device_name);
+
+void printHelp();
+
+int main(int argc, char *argv[]) {
+
+  int ierr;
+  int N1 = 8;
+  int N2 = 8;
+  int N3 = 8;
+  int dim = 3;
+  char *api_name = new char[10];
+  char *device_name = new char[10];
+
+  if ( readParams(argc, argv, N1, N2, N3, dim, api_name, device_name) )
+    return 0;
+
+  cout << "Use api: " << api_name << ", " << device_name << endl;
+
+  int dimsize[3] = {N1, N2, N3};
+  int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
+  int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2];
+
+  double *rdata = new double[sizereal];
+  double *ordata = new double[sizereal];
+  complex<double> *cdata = new complex<double>[sizereal];
+  complex<double> *codata = new complex<double>[sizereal];
+
+  initData(rdata, dimsize, 3);
+  initData(cdata, dimsize, 3);
+
+  /* init DKSBase */
+  cout << "Init device and set function" << endl;
+  DKSFFT base;
+  base.setAPI(api_name, strlen(api_name));
+  base.setDevice(device_name, strlen(device_name));
+  cout << "init device" << endl;
+  base.initDevice();
+  cout << "setup fft" << endl;
+  base.setupFFT(dim, dimsize);
+
+  //Test RC FFT -> CR FFT
+  void *real_ptr, *comp_ptr, *res_ptr;
+  cout << "allocate memory" << endl;
+  real_ptr = base.allocateMemory<double>(sizereal, ierr);
+  res_ptr = base.allocateMemory<double>(sizereal, ierr);
+  comp_ptr = base.allocateMemory< complex<double> >(sizecomp, ierr);
+
+  cout << "write data" << endl;
+  base.writeData<double>(real_ptr, rdata, sizereal);
+
+  cout << "perform fft" << endl;
+  base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
+  base.callC2RFFT(res_ptr, comp_ptr, dim, dimsize);
+  base.callNormalizeC2RFFT(res_ptr, dim, dimsize);
+
+  cout << "read data" << endl;
+  base.readData<double>(res_ptr, ordata, sizereal);
+
+  compareData(rdata, ordata, N1, 3);
+
+  base.freeMemory<double>(real_ptr, sizereal);
+  base.freeMemory<double>(res_ptr, sizereal);
+  base.freeMemory< complex<double> >(comp_ptr, sizecomp);
+
+  //Test CC FFT
+  void *mem_ptr;
+  mem_ptr = base.allocateMemory< complex<double> >(sizereal, ierr);
+  base.writeData< complex<double> >(mem_ptr, cdata, sizereal);
+  base.callFFT(mem_ptr, 3, dimsize);
+  base.callIFFT(mem_ptr, 3, dimsize);
+  base.callNormalizeFFT(mem_ptr, 3, dimsize);
+  base.readData< complex<double> >(mem_ptr, codata, sizereal);
+
+  compareData(cdata, codata, N1, 3);
+
+  base.freeMemory< complex<double> > (mem_ptr, sizereal);
+
+  delete[] rdata;
+  delete[] ordata;
+  delete[] cdata;
+  delete[] codata;
+
+}
+
+void compareData(complex<double>* data1, complex<double>* data2, int N, int dim) {
+  int ni, nj, nk, id;
+  ni = (dim > 2) ? N : 1;
+  nj = (dim > 1) ? N : 1;
+  nk = N;
+  double sum = 0;
+  for (int i = 0; i < ni; i++) {
+    for (int j = 0; j < nj; j++) {
+      for (int k = 0; k < nk; k++) {
+	id = i*ni*ni + j*nj + k;
+	sum += fabs(data1[id].real() - data2[id].real());
+	sum += fabs(data1[id].imag() - data2[id].imag());
+      }
+    }
+  }
+  cout << "Size " << N << " CC <--> CC diff: " << sum << endl;
+}
+
+void compareData(double* data1, double* data2, int N, int dim) {
+  int ni, nj, nk, id;
+  ni = (dim > 2) ? N : 1;
+  nj = (dim > 1) ? N : 1;
+  nk = N;
+  double sum = 0;
+  for (int i = 0; i < ni; i++) {
+    for (int j = 0; j < nj; j++) {
+      for (int k = 0; k < nk; k++) {
+	id = i*ni*ni + j*nj + k;
+	sum += fabs(data1[id] - data2[id]);
+      }
+    }
+  }
+  cout << "Size " << N << " RC <--> CR diff: " << sum << endl;
+}
+
+void initData(complex<double> *data, int dimsize[3], int dim) {
+  if (dim == 3) {
+    for (int i = 0; i < dimsize[2]; i++)
+      for (int j = 0; j < dimsize[1]; j++) 
+	for (int k = 0; k < dimsize[0]; k++) 
+	  data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = complex<double>(sin(k), 0.0);
+  } else if (dim == 2) {
+    for (int j = 0; j < dimsize[1]; j++) {
+      for (int k = 0; k < dimsize[0]; k++) {
+	data[j*dimsize[0] + k] = complex<double>(sin(k), 0.0);
+      }
+    }
+  } else {
+    for (int k = 0; k < dimsize[0]; k++) 
+      data[k] = complex<double>(sin(k), 0.0);
+  }
+}
+
+void initData(double *data, int dimsize[3], int dim) {
+  if (dim == 3) {
+    for (int i = 0; i < dimsize[2]; i++)
+      for (int j = 0; j < dimsize[1]; j++) 
+	for (int k = 0; k < dimsize[0]; k++) 
+	  data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = sin(k);
+  } else if (dim == 2) {
+    for (int j = 0; j < dimsize[1]; j++) {
+      for (int k = 0; k < dimsize[0]; k++) {
+	data[j*dimsize[0] + k] = sin(k);
+      }
+    }
+  } else {
+    for (int k = 0; k < dimsize[0]; k++) 
+      data[k] = sin(k);
+  }
+}
+
+bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &dim,
+		char *api_name, char *device_name) 
+{
+
+  for (int i = 1; i < argc; i++) {
+
+    if ( argv[i] == std::string("-dim")) {
+      dim = atoi(argv[i + 1]);
+      i++;
+    }
+
+    if ( argv[i] == std::string("-grid") ) {
+      N1 = atoi(argv[i + 1]);
+      N2 = atoi(argv[i + 2]);
+      N3 = atoi(argv[i + 3]);
+      i += 3;
+    }
+
+    if (argv[i] == string("-cuda")) {
+      strcpy(api_name, "Cuda");
+      strcpy(device_name, "-gpu");
+    } 
+
+    if (argv[i] == string("-opencl")) {
+      strcpy(api_name, "OpenCL");
+      strcpy(device_name, "-gpu");
+    } 
+
+    if (argv[i] == string("-mic")) {
+      strcpy(api_name, "OpenMP");
+      strcpy(device_name, "-mic");
+    } 
+
+    if (argv[i] == string("-cpu")) {
+      strcpy(api_name, "OpenCL");
+      strcpy(device_name, "-cpu");
+    }
+  }
+
+  return false;
+}
+
--- a/cmake/DKSConfig.cmake.in
+++ b/cmake/DKSConfig.cmake.in
@ -3,5 +3,7 @@ SET(${PROJECT_NAME}_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
 SET(${PROJECT_NAME}_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/lib")
 SET(${PROJECT_NAME}_LIBRARY "dks")
 SET(CMAKE_SKIP_RPATH ${CMAKE_SKIP_RPATH})
+SET(DKS_CUDA_STATIC ${STATIC_CUDA})
+SET(DKS_CUDA_LIBS "${DKS_CUDA_LIBS}")
 SET(DKS_VERSION ${DKS_VERSION})
 SET(DKS_VERSION_STR ${DKS_VERSION_STR})
--- a/doc/refman.pdf
+++ b/doc/refman.pdf
--- a/src/Algorithms/CollimatorPhysics.h
+++ b/src/Algorithms/CollimatorPhysics.h
@ -16,7 +16,8 @@ public:
  
  virtual ~DKSCollimatorPhysics() { }
  
-  virtual int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numpartices) = 0;
+  virtual int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numpartices, 
+				bool enableRutherforScattering = true) = 0;

  virtual int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr, 
 				   void *rx_ptr, void *ry_ptr, void *rz_ptr, 
--- a/src/Algorithms/FFT.h
+++ b/src/Algorithms/FFT.h
@ -6,7 +6,7 @@

 #include "../DKSDefinitions.h"

-class DKSFFT {
+class BaseFFT {

 protected:
  int defaultN[3];
@ -22,7 +22,7 @@ protected:

 public:

-  virtual ~DKSFFT() { }
+  virtual ~BaseFFT() { }

  virtual int setupFFT(int ndim, int N[3]) = 0;
  virtual int setupFFTRC(int ndim, int N[3], double scale = 1.0) = 0;
--- a/src/AutoTuning/DKSConfig.h
+++ b/src/AutoTuning/DKSConfig.h
@ -11,7 +11,7 @@
 #include <boost/optional/optional.hpp>
 #include <boost/property_tree/xml_parser.hpp>
 #include <boost/foreach.hpp>
-#include <boost/filesystem.hpp>
+//#include <boost/filesystem.hpp>
 #include <string>
 #include <iostream>
 #include <cstdlib>
@ -24,7 +24,7 @@
 #include "../DKSDefinitions.h"

 namespace pt = boost::property_tree;
-namespace fs = boost::filesystem;
+//namespace fs = boost::filesystem;

 const std::string config_dir = "/.config/DKS";
 const std::string config_file = "/autotuning.xml";
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -35,12 +35,12 @@ ENDMACRO ()
 SET (DKS_BASEDIR_HDRS
  DKSBase.h
  DKSDefinitions.h
-  DKSOPAL.h
+  DKSFFT.h
  )

 SET (DKS_BASEDIR_SRCS
  DKSBase.cpp
-  DKSOPAL.cpp
+  DKSFFT.cpp
  )

 #add opal to DKS if enable_opal is set
@ -112,26 +112,18 @@ IF (USE_CUDA)
  CUDA_ADD_LIBRARY(dks ${DKS_SRCS})
  CUDA_ADD_LIBRARY(dksshared SHARED ${DKS_SRCS})

-  IF (USE_UQTK)
-    TARGET_LINK_LIBRARIES(dks cudadevrt lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
-    TARGET_LINK_LIBRARIES(dksshared cudadevrt lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
-  ELSE (USE_UQTK)
-    TARGET_LINK_LIBRARIES(dks cudadevrt)
-    TARGET_LINK_LIBRARIES(dksshared cudadevrt)
-  ENDIF (USE_UQTK)
+  TARGET_LINK_LIBRARIES(dks ${DKS_CUDA_LIBS})
+  TARGET_LINK_LIBRARIES(dksshared ${DKS_CUDA_LIBS})
+  #TARGET_LINK_LIBRARIES(dks)
+  #TARGET_LINK_LIBRARIES(dksshared)

 ELSE (USE_CUDA)
  MESSAGE (STATUS "DKS srcs: ${DKS_SRCS}")
  ADD_LIBRARY(dks ${DKS_SRCS})
  ADD_LIBRARY(dksshared SHARED ${DKS_SRCS})

-  IF (USE_UQTK)
-    TARGET_LINK_LIBRARIES(dks lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
-    TARGET_LINK_LIBRARIES(dksshared lreg UQTk quad uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
-  ELSE (USE_UQTK)
-    TARGET_LINK_LIBRARIES(dks)
-    TARGET_LINK_LIBRARIES(dksshared)
-  ENDIF(USE_UQTK)
+  TARGET_LINK_LIBRARIES(dks)
+  TARGET_LINK_LIBRARIES(dksshared)

 ENDIF (USE_CUDA)

--- a/src/CUDA/CMakeLists.txt
+++ b/src/CUDA/CMakeLists.txt
@ -1,9 +1,9 @@
-SET (_HDRS CudaBase.cuh)
-SET (_SRCS CudaBase.cu)
+SET (_HDRS CudaBase.cuh CudaFFT.cuh)
+SET (_SRCS CudaBase.cu CudaFFT.cu)

 IF (ENABLE_OPAL)
-  SET (_HDRS ${_HDRS} CudaFFT.cuh CudaGreensFunction.cuh CudaCollimatorPhysics.cuh)
-  SET (_SRCS ${_SRCS} CudaFFT.cu CudaGreensFunction.cu CudaCollimatorPhysics.cu)
+  SET (_HDRS ${_HDRS} CudaGreensFunction.cuh CudaCollimatorPhysics.cuh)
+  SET (_SRCS ${_SRCS} CudaGreensFunction.cu CudaCollimatorPhysics.cu)
 ENDIF (ENABLE_OPAL)

 IF (ENABLE_MUSR)
--- a/src/CUDA/CudaBase.cuh
+++ b/src/CUDA/CudaBase.cuh
@ -12,7 +12,6 @@
 #include <cufft.h>
 #include <cublas_v2.h>
 #include <curand_kernel.h>
-#include <nvToolsExt.h>
 #include <time.h>

 #define BLOCK_SIZE 128
--- a/src/CUDA/CudaCollimatorPhysics.cu
+++ b/src/CUDA/CudaCollimatorPhysics.cu
@ -23,9 +23,10 @@
 #define X0_M 9
 #define I_M 10
 #define DT_M 11
+#define LOWENERGY_THR 12

 #define BLOCK_SIZE 128
-#define NUMPAR 12
+#define NUMPAR 13

 __device__ inline double dot(double3 &d1, double3 &d2) {

@ -41,6 +42,23 @@ __device__ inline double3 cross(double3 &lhs, double3 &rhs) {
  return tmp;
 }

+__device__ inline double3 ArbitraryRotation(double3 &W, double3 &Rorg, double Theta) {
+  double c=cos(Theta);
+  double s=sin(Theta);
+  double dotW = sqrt(dot(W,W));
+  W.x = W.x / dotW;
+  W.y = W.y / dotW;
+  W.z = W.z / dotW;
+
+  double dotWR = dot(W, Rorg) * (1.0 - c);
+  double3 crossW = cross(W, Rorg);
+  double3 tmp;
+  tmp.x = Rorg.x * c + crossW.x * s + W.x * dotWR;
+  tmp.y = Rorg.y * c + crossW.y * s + W.y * dotWR;
+  tmp.z = Rorg.z * c + crossW.z * s + W.z * dotWR;
+  return tmp;
+} 
+
 __device__ inline bool checkHit(double &z, double *par) {

  /* check if particle is in the degrader material */
@ -89,7 +107,7 @@ __device__ inline void energyLoss(double &Eng, bool &pdead, curandState &state,
    Eng = Eng + delta_E / 1E3;
  }

-  pdead = ((Eng<1E-4) || (dEdx>0));
+  pdead = ( (Eng < par[LOWENERGY_THR]) || (dEdx > 0) );

 }

@ -100,6 +118,7 @@ __device__ inline void Rot(double &px, double &pz, double &x, double &z, double
  double Psixz;
  double pxz;

+  /*
  if (px>=0 && pz>=0)
    Psixz = atan(px/pz);
  else if (px>0 && pz<0)
@ -108,7 +127,8 @@ __device__ inline void Rot(double &px, double &pz, double &x, double &z, double
    Psixz = atan(px/pz) + 2*PI;
  else
    Psixz = atan(px/pz) + PI;
-
+  */
+  Psixz = atan2(px, pz);
  pxz = sqrt(px*px + pz*pz);

  if(coord==1) {
@ -125,7 +145,9 @@ __device__ inline void Rot(double &px, double &pz, double &x, double &z, double
  pz = -pxz*sin(Psixz)*sin(thetacou) + pxz*cos(Psixz)*cos(thetacou);
 }

-__device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, double* par) {
+__device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, double* par,
+				   bool enableRutherfordScattering) 
+{

  double Eng = sqrt(dot(P, P) + 1.0) * M_P - M_P;
  double gamma = (Eng + M_P) / M_P;
@ -148,20 +170,9 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d
  }

  //__syncthreads();  
-
  double xplane = z1 * deltas * theta0 / sqrt(12.0) + z2 * deltas * theta0 / 2.0;
  Rot(P.x, P.z, R.x, R.z, xplane, normP, thetacou, deltas, 1, par);

-  double P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
-  if(P2 < 0.0047) {
-    double P3 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
-    double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
-    double P4 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
-    if(P4 > 0.5)
-      thetaru = -thetaru;
-    Rot(P.x,P.z,R.x,R.z, xplane, normP, thetaru, deltas, 0, par);
-  }
-
  // y-direction: See Physical Review, "Multiple Scattering"
  z1 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
  z2 = curand_normal_double(&state);//gsl_ran_gaussian(rGen_m,1.0);
@ -178,14 +189,23 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d
  double yplane = z1 * deltas * theta0 / sqrt(12.0) + z2 * deltas * theta0 / 2.0;
  Rot(P.y,P.z,R.y,R.z, yplane, normP, thetacou, deltas, 2, par);

-  P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
-  if(P2 < 0.0047) {
+  double P2 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
+  if( (P2 < 0.0047) && enableRutherfordScattering) {
    double P3 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
-    double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
-    double P4 = curand_uniform_double(&state);//gsl_rng_uniform(rGen_m);
-    if(P4 > 0.5)
-      thetaru = -thetaru;
-    Rot(P.y,P.z,R.y,R.z, yplane, normP, thetaru, deltas, 0, par);
+    //double thetaru = 2.5 * sqrt(1 / P3) * sqrt(2.0) * theta0;
+    double thetaru = 2.5 * sqrt(1 / P3) * 2.0 * theta0;
+    double phiru = 2.0 * M_PI * curand_uniform_double(&state);
+    double th0=atan2(sqrt(P.x*P.x+P.y*P.y),fabs(P.z));
+    double3 W,X;
+    
+    double dotP = sqrt(dot(P,P));
+    X.x = cos(phiru)*sin(thetaru) * dotP;
+    X.y = sin(phiru)*sin(thetaru) * dotP;
+    X.z = cos(thetaru) * dotP;
+    W.x = -P.y;
+    W.y = P.x;
+    W.z = 0.0;
+    P = ArbitraryRotation(W, X, th0);
  }

 }
@ -193,7 +213,7 @@ __device__ inline void coulombScat(double3 &R, double3 &P, curandState &state, d

 template <typename T>
 __global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state,
-					int numparticles)
+					int numparticles, bool enableRutherfordScattering)
 {

  //get global id and thread id
@ -235,7 +255,7 @@ __global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state
 	P.x = P.x * ptot / sq;
 	P.y = P.y * ptot / sq;
 	P.z = P.z * ptot / sq;
-	coulombScat(R[tid], P, s, p); 
+	coulombScat(R[tid], P, s, p, enableRutherfordScattering); 

 	data[idx].Pincol = P;
      } else {
@ -258,7 +278,8 @@ __global__ void kernelCollimatorPhysics(T *data, double *par, curandState *state
 }

 __global__ void kernelCollimatorPhysics2(CUDA_PART2_SMALL data, double *par, 
-					 curandState *state, int numparticles)
+					 curandState *state, int numparticles,
+					 bool enableRutherfordScattering)
 {

  //get global id and thread id
@ -296,7 +317,7 @@ __global__ void kernelCollimatorPhysics2(CUDA_PART2_SMALL data, double *par,
    	P.x = P.x * ptot / sq;
    	P.y = P.y * ptot / sq;
    	P.z = P.z * ptot / sq;
-    	coulombScat(R[tid], P, s, p); 
+    	coulombScat(R[tid], P, s, p, enableRutherfordScattering); 
      
    	data.Pincol[idx] = P;
    } else {
@ -663,7 +684,8 @@ struct less_then
  }
 };

-int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles)
+int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
+					     bool enableRutherfordScattering)
 {

  int threads = BLOCK_SIZE;
@ -676,7 +698,8 @@ int CudaCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int n
  kernelCollimatorPhysics<<<blocks, threads, smem_size>>>((CUDA_PART_SMALL*)mem_ptr, 
 							  (double*)par_ptr,
 							  m_base->cuda_getCurandStates(),
-							  numparticles);
+							  numparticles,
+							  enableRutherfordScattering);

  cudaError_t err = cudaGetLastError();
  if (err != cudaSuccess)
--- a/src/CUDA/CudaCollimatorPhysics.cuh
+++ b/src/CUDA/CudaCollimatorPhysics.cuh
@ -110,7 +110,7 @@ public:
   *
   */
  int CollimatorPhysics(void *mem_ptr, void *par_ptr, 
-			int numpartices);
+			int numpartices, bool enableRutherforScattering = true);

  int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr, 
 			   void *rx_ptr, void *ry_ptr, void *rz_ptr, 
--- a/src/CUDA/CudaFFT.cuh
+++ b/src/CUDA/CudaFFT.cuh
@ -10,7 +10,7 @@
 #include "../Algorithms/FFT.h"
 #include "CudaBase.cuh"

-class CudaFFT : public DKSFFT{
+class CudaFFT : public BaseFFT {

 private:

--- a/src/DKSBase.h
+++ b/src/DKSBase.h
@ -33,7 +33,6 @@

 #ifdef DKS_CUDA
 #include "CUDA/CudaBase.cuh"
-#include "nvToolsExt.h"
 #endif

 #ifdef DKS_MIC
@ -889,9 +888,10 @@ public:
   * TODO: opencl and mic imlementation
   */
  int callMemInfo() {
+    #ifdef DKS_CUDA
    if (apiCuda())
      return CUDA_SAFECALL(cbase->cuda_memInfo());
-
+    #endif
    return DKS_ERROR;
  }

@ -900,11 +900,13 @@ public:
   * Used for debuging and timing purposes only.
   */
  void oclEventInfo() {
+    #ifdef DKS_OPENCL
    if (apiOpenCL())
      return OPENCL_SAFECALL(oclbase->ocl_eventInfo());
-
+    #endif
  }

+
  /** 
   * Test function to profile opencl kernel calls.
   * Used for debuging and timing purposes only.
--- a/src/DKSBaseMuSR.h
+++ b/src/DKSBaseMuSR.h
@ -8,6 +8,7 @@
 #include "AutoTuning/DKSAutoTuningTester.h"

 #include "DKSBase.h"
+#include "DKSFFT.h"

 #include "Algorithms/ChiSquareRuntime.h"

@ -19,7 +20,7 @@
 #include "OpenCL/OpenCLChiSquareRuntime.h"
 #endif

-class DKSBaseMuSR : public DKSBase {
+class DKSBaseMuSR : public DKSFFT {

 private:

--- a/src/DKSFFT.cpp
+++ b/src/DKSFFT.cpp
@ -0,0 +1,147 @@
+#include "DKSFFT.h"
+
+DKSFFT::DKSFFT() {
+  dksfft = nullptr;
+}
+
+DKSFFT::~DKSFFT() {
+  delete dksfft;
+}
+
+/* setup fft plans to reuse if multiple ffts of same size are needed */
+int DKSFFT::setupFFT(int ndim, int N[3]) {
+
+  if (apiCuda()) {
+    dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
+    return dksfft->setupFFT(ndim, N);
+  } else if (apiOpenCL()) {
+    dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
+    int ierr1 = dksfft->setupFFT(ndim, N);
+    int ierr2 = dksfft->setupFFTRC(ndim, N);
+    int ierr3 = dksfft->setupFFTCR(ndim, N);
+    if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
+      return DKS_ERROR;
+
+    return DKS_SUCCESS;
+  } else if (apiOpenMP()) {
+    //micbase.mic_setupFFT(ndim, N);
+    //BENI: setting up RC and CR transformations on MIC
+    dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
+    int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
+    int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
+    if (ierr1 != DKS_SUCCESS)
+      return ierr1;
+    if (ierr2 != DKS_SUCCESS)
+      return ierr2;
+    return DKS_SUCCESS;
+  }
+
+  return DKS_ERROR;
+
+}
+//BENI:
+int DKSFFT::setupFFTRC(int ndim, int N[3], double scale) {
+
+  if (apiCuda())
+    return dksfft->setupFFT(ndim, N);
+  if (apiOpenCL())
+    return dksfft->setupFFTRC(ndim, N);
+  else if (apiOpenMP())
+    return dksfft->setupFFTRC(ndim, N, scale);
+
+  return DKS_ERROR;
+
+}
+
+//BENI:
+int DKSFFT::setupFFTCR(int ndim, int N[3], double scale) {
+
+  if (apiCuda())
+    return dksfft->setupFFT(ndim, N);
+  if (apiOpenCL())
+    return dksfft->setupFFTCR(ndim, N);
+  else if (apiOpenMP())
+    return dksfft->setupFFTCR(ndim, N, scale);
+
+  return DKS_ERROR;
+
+}
+
+/* call OpenCL FFT function for selected platform */
+int DKSFFT::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
+
+  if (apiOpenCL() || apiOpenMP()) 
+    return dksfft->executeFFT(data_ptr, ndim, dimsize);
+  else if (apiCuda())
+    return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
+
+  DEBUG_MSG("No implementation for selected platform");
+  return DKS_ERROR;
+}
+
+/* call OpenCL IFFT function for selected platform */
+int DKSFFT::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
+  if (apiOpenCL() || apiOpenMP())
+      return dksfft->executeIFFT(data_ptr, ndim, dimsize);
+  else if (apiCuda()) 
+    return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
+
+  DEBUG_MSG("No implementation for selected platform");
+  return DKS_ERROR;
+}
+
+/* call normalize FFT function for selected platform */
+int DKSFFT::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
+
+  if (apiOpenCL()) {
+    if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
+      return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
+    else 
+      return DKS_ERROR;
+  } else if (apiCuda()) {
+    return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
+  } else if (apiOpenMP()) {
+    return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
+  }
+
+  DEBUG_MSG("No implementation for selected platform");
+  return DKS_ERROR;
+}
+
+/* call real to complex FFT */
+int DKSFFT::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
+
+  if (apiCuda())
+    return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
+  else if (apiOpenCL() || apiOpenMP())
+    return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
+
+  DEBUG_MSG("No implementation for selected platform");
+  return DKS_ERROR;
+}
+
+/* call complex to real FFT */
+int DKSFFT::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
+  if (apiCuda())
+    return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
+  else if (apiOpenCL() || apiOpenMP())
+    return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
+
+  DEBUG_MSG("No implementation for selected platform");
+  return DKS_ERROR;
+}
+
+/* normalize complex to real iFFT */
+int DKSFFT::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
+  if (apiCuda())
+    return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
+  else if (apiOpenCL())
+    return DKS_ERROR;
+  else if (apiOpenMP())
+    return DKS_ERROR;
+
+  DEBUG_MSG("No implementation for selected platform");
+  return DKS_ERROR;
+}
+
+
--- a/src/DKSFFT.h
+++ b/src/DKSFFT.h
@ -0,0 +1,108 @@
+#ifndef H_DKSBASE_FFT
+#define H_DKSBASE_FFT
+
+#include <iostream>
+#include "AutoTuning/DKSAutoTuning.h"
+
+#include "DKSBase.h"
+
+#include "DKSDefinitions.h"
+
+#include "Algorithms/GreensFunction.h"
+#include "Algorithms/CollimatorPhysics.h"
+#include "Algorithms/FFT.h"
+
+#ifdef DKS_AMD
+#include "OpenCL/OpenCLFFT.h"
+#endif
+
+#ifdef DKS_CUDA
+#include "CUDA/CudaFFT.cuh"
+#endif
+
+#ifdef DKS_MIC
+#include "MIC/MICFFT.h"
+#endif
+
+class DKSFFT : public DKSBase {
+
+private:
+
+  BaseFFT *dksfft;
+
+  int initFFT();
+  
+public:
+  
+  DKSFFT();
+  ~DKSFFT();
+
+  /** 
+   * Setup FFT function.
+   * Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
+   * If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case 
+   * each fft will do its own setup according to fft size and dimensions.
+   * TODO: opencl and mic implementations
+   */
+  int setupFFT(int ndim, int N[3]);
+  //BENI:
+  int setupFFTRC(int ndim, int N[3], double scale = 1.0);
+  //BENI:
+  int setupFFTCR(int ndim, int N[3], double scale = 1.0);
+
+  /** 
+   * Call complex-to-complex fft.
+   * Executes in place complex to compelx fft on the device on data pointed by data_ptr.
+   * stream id can be specified to use other streams than default.
+   * TODO: mic implementation
+   */
+  int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
+
+  /** 
+   * Call complex-to-complex ifft.
+   * Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
+   * stream id can be specified to use other streams than default.
+   * TODO: mic implementation.
+   */
+  int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
+
+  /** 
+   * Normalize complex to complex ifft.
+   * Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by 
+   * fft size
+   * TODO: mic implementation.
+   */
+  int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
+
+  /** 
+   * Call real to complex FFT.
+   * Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
+   * to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
+   * should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
+   * (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
+   * TODO: opencl and mic implementations
+   */
+  int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
+
+  /** 
+   * Call complex to real iFFT.
+   * Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
+   * to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
+   * should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
+   * (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
+   * TODO: opencl and mic implementations.
+   */
+  int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
+
+  /** 
+   * Normalize compelx to real ifft.
+   * Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by 
+   * fft size.
+   * TODO: opencl and mic implementations.
+   */
+  int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
+
+
+};
+
+#endif
--- a/src/DKSOPAL.cpp
+++ b/src/DKSOPAL.cpp
@ -1,7 +1,6 @@
 #include "DKSOPAL.h"

 DKSOPAL::DKSOPAL() {
-  dksfft = nullptr;
  dkscol = nullptr;
  dksgreens = nullptr;
 }
@ -12,7 +11,6 @@ DKSOPAL::DKSOPAL(const char* api_name, const char* device_name) {
 }

 DKSOPAL::~DKSOPAL() {
-  delete dksfft;
  delete dkscol;
  delete dksgreens;
 }
@ -22,17 +20,14 @@ int DKSOPAL::setupOPAL() {
  if (apiOpenCL()) {
    ierr = OPENCL_SAFECALL( DKS_SUCCESS );
    //TODO: only enable if AMD libraries are available
-    dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
    dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(getOpenCLBase()) );
    dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(getOpenCLBase()) );
  } else if (apiCuda()) {
    ierr = CUDA_SAFECALL( DKS_SUCCESS );
-    dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
    dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(getCudaBase()) );
    dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(getCudaBase()) );
  } else if (apiOpenMP()) {
    ierr = MIC_SAFECALL( DKS_SUCCESS );
-    dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
    dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(getMICBase()) );
    dksgreens = MIC_SAFEINIT( new MICGreensFunction(getMICBase()) );
  } else {
@ -50,139 +45,6 @@ int DKSOPAL::initDevice() {

 }

-/* setup fft plans to reuse if multiple ffts of same size are needed */
-int DKSOPAL::setupFFT(int ndim, int N[3]) {
-
-  if (apiCuda()) {
-    return dksfft->setupFFT(ndim, N);
-  } else if (apiOpenCL()) {
-    int ierr1 = dksfft->setupFFT(ndim, N);
-    int ierr2 = dksfft->setupFFTRC(ndim, N);
-    int ierr3 = dksfft->setupFFTCR(ndim, N);
-    if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
-      return DKS_ERROR;
-
-    return DKS_SUCCESS;
-  } else if (apiOpenMP()) {
-    //micbase.mic_setupFFT(ndim, N);
-    //BENI: setting up RC and CR transformations on MIC
-    int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
-    int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
-    if (ierr1 != DKS_SUCCESS)
-      return ierr1;
-    if (ierr2 != DKS_SUCCESS)
-      return ierr2;
-    return DKS_SUCCESS;
-  }
-
-  return DKS_ERROR;
-
-}
-//BENI:
-int DKSOPAL::setupFFTRC(int ndim, int N[3], double scale) {
-
-  if (apiCuda())
-    return dksfft->setupFFT(ndim, N);
-  if (apiOpenCL())
-    return dksfft->setupFFTRC(ndim, N);
-  else if (apiOpenMP())
-    return dksfft->setupFFTRC(ndim, N, scale);
-
-  return DKS_ERROR;
-
-}
-
-//BENI:
-int DKSOPAL::setupFFTCR(int ndim, int N[3], double scale) {
-
-  if (apiCuda())
-    return dksfft->setupFFT(ndim, N);
-  if (apiOpenCL())
-    return dksfft->setupFFTCR(ndim, N);
-  else if (apiOpenMP())
-    return dksfft->setupFFTCR(ndim, N, scale);
-
-  return DKS_ERROR;
-
-}
-
-/* call OpenCL FFT function for selected platform */
-int DKSOPAL::callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
-
-  if (apiOpenCL() || apiOpenMP()) 
-    return dksfft->executeFFT(data_ptr, ndim, dimsize);
-  else if (apiCuda())
-    return dksfft->executeFFT(data_ptr, ndim, dimsize, streamId);
-
-  DEBUG_MSG("No implementation for selected platform");
-  return DKS_ERROR;
-}
-
-/* call OpenCL IFFT function for selected platform */
-int DKSOPAL::callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
-  if (apiOpenCL() || apiOpenMP())
-      return dksfft->executeIFFT(data_ptr, ndim, dimsize);
-  else if (apiCuda()) 
-    return dksfft->executeIFFT(data_ptr, ndim, dimsize, streamId);
-
-  DEBUG_MSG("No implementation for selected platform");
-  return DKS_ERROR;
-}
-
-/* call normalize FFT function for selected platform */
-int DKSOPAL::callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId) {
-
-  if (apiOpenCL()) {
-    if ( loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLFFT.cl") == DKS_SUCCESS )
-      return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
-    else 
-      return DKS_ERROR;
-  } else if (apiCuda()) {
-    return dksfft->normalizeFFT(data_ptr, ndim, dimsize, streamId);
-  } else if (apiOpenMP()) {
-    return dksfft->normalizeFFT(data_ptr, ndim, dimsize);
-  }
-
-  DEBUG_MSG("No implementation for selected platform");
-  return DKS_ERROR;
-}
-
-/* call real to complex FFT */
-int DKSOPAL::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
-
-  if (apiCuda())
-    return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
-  else if (apiOpenCL() || apiOpenMP())
-    return dksfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize);
-
-  DEBUG_MSG("No implementation for selected platform");
-  return DKS_ERROR;
-}
-
-/* call complex to real FFT */
-int DKSOPAL::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
-  if (apiCuda())
-    return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId);
-  else if (apiOpenCL() || apiOpenMP())
-    return dksfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize);
-
-  DEBUG_MSG("No implementation for selected platform");
-  return DKS_ERROR;
-}
-
-/* normalize complex to real iFFT */
-int DKSOPAL::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
-  if (apiCuda())
-    return dksfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId);
-  else if (apiOpenCL())
-    return DKS_ERROR;
-  else if (apiOpenMP())
-    return DKS_ERROR;
-
-  DEBUG_MSG("No implementation for selected platform");
-  return DKS_ERROR;
-}
-
 int DKSOPAL::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ, 
 				double hz_m0, double hz_m1, double hz_m2, int streamId) {

@ -209,18 +71,20 @@ int DKSOPAL::callMultiplyComplexFields(void *mem_ptr1, void *mem_ptr2, int size,

 int DKSOPAL::callCollimatorPhysics(void *mem_ptr, void *par_ptr, 
 				   int numparticles, int numparams,
-				   int &numaddback, int &numdead) 
+				   int &numaddback, int &numdead, 
+				   bool enableRutherforScattering) 
 {

-  return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
+  return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles, enableRutherforScattering);

 }


-int DKSOPAL::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles) 
+int DKSOPAL::callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles,
+				    bool enableRutherforScattering) 
 {

-  return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles);
+  return dkscol->CollimatorPhysics(mem_ptr, par_ptr, numparticles, enableRutherforScattering);
  
 }

--- a/src/DKSOPAL.h
+++ b/src/DKSOPAL.h
@ -5,6 +5,7 @@
 #include "AutoTuning/DKSAutoTuning.h"

 #include "DKSBase.h"
+#include "DKSFFT.h"

 #include "DKSDefinitions.h"

@ -32,11 +33,10 @@
 #include "MIC/MICCollimatorPhysics.h"
 #endif

-class DKSOPAL : public DKSBase {
+class DKSOPAL : public DKSFFT {

 private: 

-  DKSFFT *dksfft;
  DKSCollimatorPhysics *dkscol;
  GreensFunction *dksgreens;

@ -56,71 +56,6 @@ public:
  ///////Function library part of dksbase////////
  ///////////////////////////////////////////////

-  /** 
-   * Setup FFT function.
-   * Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
-   * If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case 
-   * each fft will do its own setup according to fft size and dimensions.
-   * TODO: opencl and mic implementations
-   */
-  int setupFFT(int ndim, int N[3]);
-  //BENI:
-  int setupFFTRC(int ndim, int N[3], double scale = 1.0);
-  //BENI:
-  int setupFFTCR(int ndim, int N[3], double scale = 1.0);
-
-/** 
-   * Call complex-to-complex fft.
-   * Executes in place complex to compelx fft on the device on data pointed by data_ptr.
-   * stream id can be specified to use other streams than default.
-   * TODO: mic implementation
-   */
-  int callFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
-
-  /** 
-   * Call complex-to-complex ifft.
-   * Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
-   * stream id can be specified to use other streams than default.
-   * TODO: mic implementation.
-   */
-  int callIFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
-
-  /** 
-   * Normalize complex to complex ifft.
-   * Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by 
-   * fft size
-   * TODO: mic implementation.
-   */
-  int callNormalizeFFT(void * data_ptr, int ndim, int dimsize[3], int streamId = -1);
-
-  /** 
-   * Call real to complex FFT.
-   * Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
-   * to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
-   * should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
-   * (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
-   * TODO: opencl and mic implementations
-   */
-  int callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
-
-  /** 
-   * Call complex to real iFFT.
-   * Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
-   * to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
-   * should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
-   * (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
-   * TODO: opencl and mic implementations.
-   */
-  int callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId = -1);
-
-  /** 
-   * Normalize compelx to real ifft.
-   * Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by 
-   * fft size.
-   * TODO: opencl and mic implementations.
-   */
-  int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
-
  /** 
   * Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
   * For specifics check OPAL docs.
@ -159,14 +94,16 @@ public:
   */
  int callCollimatorPhysics(void *mem_ptr, void *par_ptr, 
 			    int numparticles, int numparams, 
-			    int &numaddback, int &numdead);
+			    int &numaddback, int &numdead,
+			    bool enableRutherfordScattering = true);
  
  /** 
   * Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
   * For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
   * TODO: opencl and mic implementations.
   */
-  int callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles);
+  int callCollimatorPhysics2(void *mem_ptr, void *par_ptr, int numparticles, 
+			     bool enableRutherfordScattering = true);

  /** 
   * Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
--- a/src/MIC/CMakeLists.txt
+++ b/src/MIC/CMakeLists.txt
@ -1,11 +1,10 @@
-SET (_SRCS MICBase.cpp)
-SET (_HDRS MICBase.h)
+SET (_SRCS MICBase.cpp MICFFT.cpp)
+SET (_HDRS MICBase.h MICFFT.h)

 IF (ENABLE_OPAL)
  SET (_SRCS
    ${_SRCS}
    MICChiSquare.cpp
-    MICFFT.cpp
    MICGreensFunction.cpp  
    MICCollimatorPhysics.cpp
    )
@ -13,7 +12,6 @@ IF (ENABLE_OPAL)
  SET (_HDRS
    ${_HDRS}
    MICChiSquare.h
-    MICFFT.h
    MICCollimatorPhysics.h
    MICGreensFunction.hpp    
    MICMergeSort.h
--- a/src/MIC/MICCollimatorPhysics.cpp
+++ b/src/MIC/MICCollimatorPhysics.cpp
@ -368,7 +368,9 @@ void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) {

 }

-int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles) {
+int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles,
+					    bool enableRutherforScattering) 
+{

  //cast device memory pointers to appropriate types
  MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
--- a/src/MIC/MICCollimatorPhysics.h
+++ b/src/MIC/MICCollimatorPhysics.h
@ -40,7 +40,8 @@ public:

  ~MICCollimatorPhysics() {  };

-  int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles);
+  int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles, 
+			bool enableRutherforScattering = true);

  int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr, 
 			   void *rx_ptr, void *ry_ptr, void *rz_ptr, 
--- a/src/MIC/MICFFT.h
+++ b/src/MIC/MICFFT.h
@ -10,7 +10,7 @@
 #include "../Algorithms/FFT.h"
 #include "MICBase.h"

-class MICFFT : public DKSFFT {
+class MICFFT : public BaseFFT {

 private:

--- a/src/OpenCL/CMakeLists.txt
+++ b/src/OpenCL/CMakeLists.txt
@ -4,6 +4,25 @@ SET (_HDRS OpenCLBase.h)
 SET (_SRCS OpenCLBase.cpp)
 SET (_KERNELS "")

+IF (ENABLE_AMD)
+  SET (_SRCS
+    ${_SRCS}
+    OpenCLFFT.cpp
+    )
+
+  SET (_HDRS
+    ${_HDRS}
+    OpenCLFFT.h
+    )
+
+  SET (_KERNELS
+    ${_KERNELS}
+    OpenCLKernels/OpenCLFFT.cl
+    OpenCLKernels/OpenCLFFTStockham.cl
+    OpenCLKernels/OpenCLTranspose.cl
+  )
+ENDIF (ENABLE_AMD)
+
 IF (ENABLE_MUSR)
  SET (_HDRS ${_HDRS} OpenCLChiSquareRuntime.h)
  SET (_SRCS ${_SRCS} OpenCLChiSquareRuntime.cpp)
@ -13,23 +32,18 @@ ENDIF (ENABLE_MUSR)
 IF (ENABLE_AMD AND ENABLE_OPAL)
  SET (_SRCS
    ${_SRCS}
-    OpenCLFFT.cpp
    OpenCLCollimatorPhysics.cpp
    OpenCLGreensFunction.cpp
    )

  SET (_HDRS
    ${_HDRS}
-    OpenCLFFT.h
    OpenCLCollimatorPhysics.h
    OpenCLGreensFunction.h
    )

  SET (_KERNELS
    ${_KERNELS}
-    OpenCLKernels/OpenCLFFT.cl
-    OpenCLKernels/OpenCLFFTStockham.cl
-    OpenCLKernels/OpenCLTranspose.cl
    OpenCLKernels/OpenCLCollimatorPhysics.cl
    OpenCLKernels/OpenCLGreensFunction.cl
  )
--- a/src/OpenCL/OpenCLCollimatorPhysics.cpp
+++ b/src/OpenCL/OpenCLCollimatorPhysics.cpp
@ -34,7 +34,7 @@ TODO:
 2. boost.compute sort for user defined structure crashes
 */
 int OpenCLCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, 
-					       int numparticles) 
+					       int numparticles, bool enableRutherforScattering) 
 {
  /*
  //set number of total threads, and number threads per block
--- a/src/OpenCL/OpenCLCollimatorPhysics.h
+++ b/src/OpenCL/OpenCLCollimatorPhysics.h
@ -52,7 +52,8 @@ public:
  }

  /* execute degrader code on device */
-  int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles);
+  int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles, 
+			bool enableRutherforScattering = true);

  int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr, 
 			   void *rx_ptr, void *ry_ptr, void *rz_ptr, 
--- a/src/OpenCL/OpenCLFFT.h
+++ b/src/OpenCL/OpenCLFFT.h
@ -22,7 +22,7 @@

 #include "clFFT.h"

-class OpenCLFFT : public DKSFFT {
+class OpenCLFFT : public BaseFFT {

 private:

@ -112,10 +112,9 @@ public:
 		   int streamId = -1);
  int executeCRFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3], 
 		   int streamId = -1);
-  int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1)
-    {
-      return DKS_ERROR;
-    }
+  int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) {
+    return DKS_ERROR;
+  }

  //void printData3DN4(cl_double2* &data, int N);

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -39,8 +39,8 @@ ADD_EXECUTABLE(testFFTSolverMIC testFFTSolver_MIC.cpp)
 #TARGET_LINK_LIBRARIES(testFFT dks)
 #TARGET_LINK_LIBRARIES(testMIC dks)
 #TARGET_LINK_LIBRARIES(testMICOpenCL dks)
-TARGET_LINK_LIBRARIES(testFFT3D dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
-TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
+TARGET_LINK_LIBRARIES(testFFT3D dks ${CLFFT_LIBRARIES})
+TARGET_LINK_LIBRARIES(testFFT3DRC dks ${CLFFT_LIBRARIES})
 #TARGET_LINK_LIBRARIES(testFFT3DRC_MIC dks)
 #TARGET_LINK_LIBRARIES(testFFT3DTiming dks)
 #TARGET_LINK_LIBRARIES(testStockhamFFT dks)
@ -54,11 +54,11 @@ TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
 #TARGET_LINK_LIBRARIES(testGather dks)
 #TARGET_LINK_LIBRARIES(testGatherAsync dks)
 #TARGET_LINK_LIBRARIES(testTranspose dks)
-TARGET_LINK_LIBRARIES(testRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
-TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
-TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
+TARGET_LINK_LIBRARIES(testRandom dks ${CLFFT_LIBRARIES})
+TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${CLFFT_LIBRARIES})
+TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${CLFFT_LIBRARIES})
 #TARGET_LINK_LIBRARIES(testPush dks)
-TARGET_LINK_LIBRARIES(testFFTSolverMIC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
+TARGET_LINK_LIBRARIES(testFFTSolverMIC dks ${CLFFT_LIBRARIES})
 #TARGET_LINK_LIBRARIES(testIntegration dks)
 #TARGET_LINK_LIBRARIES(testImageReconstruction dks)
Author	SHA1	Message	Date
Uldis Locans	7ca93a3a49	dont link with boost filesystem	2017-06-07 11:10:38 +02:00
Uldis Locans	aa14065994	Merge branch 'master' of gitlab.psi.ch:uldis_l/DKS	2017-06-06 14:24:20 +02:00
Uldis Locans	50ecb31042	remove any OpenCL calls when DKS compiled without OpenCL	2017-06-06 14:23:24 +02:00
Uldis Locans	3d130aa01f	write CUDA libraries that are needed for linking in config file	2017-06-06 14:22:34 +02:00
Uldis Locans	5071ea5741	add option to link DKS with static cuda libraries	2017-06-06 14:22:00 +02:00
Uldis Locans	efe5f0db38	remove nvToolsExt for manual profiling	2017-05-30 11:07:22 +02:00
Uldis Locans	1d420504cc	fix FFT test when OPAL and/or Musr modules are not compiled	2017-05-30 11:05:12 +02:00
Uldis Locans	cc59f550ab	move patch version forward	2017-05-29 13:29:08 +02:00
Uldis Locans	d20fea2caa	readme updated	2017-05-29 13:28:23 +02:00
Uldis Locans	8b7d824b3a	updated documentation	2017-05-29 13:21:34 +02:00
Uldis Locans	2c9fe4ea6f	fix for seperate FFT	2017-05-29 13:18:37 +02:00
Uldis Locans	e32f9aaff2	include FFT in DKSOPAL and DKSBaseMuSR	2017-05-29 12:49:32 +02:00
Uldis Locans	f3527969cb	seperate FFT from DKSOPAL	2017-05-29 09:39:25 +02:00
Uldis Locans	cadd258668	update DKS version	2017-04-27 18:15:48 +02:00
Uldis Locans	a94ed9f3b8	Update CUDA particle matter interaction according to latest changes in opal	2017-04-27 18:15:35 +02:00
Uldis Locans	61919ae53c	add enablerutherford scattering flag to opal collimatorphysics	2017-04-27 10:54:02 +02:00