From 5fb2ce777e564d52160fcaad3032c6f72c43a653 Mon Sep 17 00:00:00 2001 From: Andreas Suter Date: Thu, 10 Mar 2016 15:52:48 +0100 Subject: [PATCH] some more work towards DKS/GPU support of musrfit --- doc/examples/test-histo-PSI-BIN.msr | 1 + src/classes/PFitter.cpp | 31 +++++++++--- src/classes/PFitterFcnDKS.cpp | 77 +++++++++++++++++++++++++++-- src/classes/PMsrHandler.cpp | 46 +++++++++++++++++ src/include/PFitterFcnDKS.h | 27 ++++++---- src/include/PMsrHandler.h | 1 + src/include/PMusr.h | 7 +++ 7 files changed, 170 insertions(+), 20 deletions(-) diff --git a/doc/examples/test-histo-PSI-BIN.msr b/doc/examples/test-histo-PSI-BIN.msr index 081079e3..c385586d 100644 --- a/doc/examples/test-histo-PSI-BIN.msr +++ b/doc/examples/test-histo-PSI-BIN.msr @@ -47,6 +47,7 @@ t0 202.0 ############################################################### COMMANDS +CUDA SCALE_N0_BKG TRUE MINIMIZE MINOS diff --git a/src/classes/PFitter.cpp b/src/classes/PFitter.cpp index c8e34adc..7ae78193 100644 --- a/src/classes/PFitter.cpp +++ b/src/classes/PFitter.cpp @@ -120,22 +120,31 @@ PFitter::PFitter(PMsrHandler *runInfo, PRunListCollection *runListCollection, Bo return; } + // get the DKS tag from the commands block + UInt_t dksTag = fRunInfo->GetDKSTag(); + // check if the theory function can already run on the GPU string theo = fRunInfo->GetDKSTheoryString(); - if (!theo.compare("??")) { // theory not yet DKS ready - cout << endl << ">> PFitter::PFitter(): **INFO** theory not yet DKS/GPU ready. Will run on the CPU." << endl; - } else { - fDKSReady = true; - cout << endl << ">> PFitter::PFitter(): **INFO** theory DKS/GPU ready. Will run on the GPU." << endl; + if (dksTag != DKS_CPU_OPENMP) { + if (!theo.compare("??")) { // theory not yet DKS ready + cout << endl << ">> PFitter::PFitter(): **INFO** theory not yet DKS/GPU ready. Will run on the CPU." << endl; + } else { + fDKSReady = true; + cout << endl << ">> PFitter::PFitter(): **INFO** theory DKS/GPU ready. Will run on the GPU." << endl; + } } // create fit function object depending whether DKS/GPU can be used or not - if (fDKSReady) { // run on the GPU - fFitterFcnDKS = new PFitterFcnDKS(runListCollection, fUseChi2); + if (fDKSReady && (dksTag != DKS_CPU_OPENMP)) { // run on the GPU + fFitterFcnDKS = new PFitterFcnDKS(runListCollection, fUseChi2, dksTag, theo); if (!fFitterFcnDKS) { fIsValid = false; } + if (!fFitterFcnDKS->IsValid()) { + fIsValid = false; + } } else { // run on the CPU + fDKSReady = false; // needed in case dksTag == DKS_CPU_OPENMP fFitterFcn = new PFitterFcn(runListCollection, fUseChi2); if (!fFitterFcn) { fIsValid = false; @@ -388,6 +397,14 @@ Bool_t PFitter::CheckCommands() cmdLineNo++; if (it->fLine.Contains("COMMANDS", TString::kIgnoreCase)) { continue; + } else if (it->fLine.Contains("OpenMP", TString::kIgnoreCase)) { // run on CPU using OpenMP + continue; + } else if (it->fLine.Contains("CUDA", TString::kIgnoreCase)) { // try to run DKS/GPU CUDA + continue; + } else if (it->fLine.Contains("OpenCL-GPU", TString::kIgnoreCase)) { // try to run DKS/GPU OpenCL + continue; + } else if (it->fLine.Contains("OpenCL-CPU", TString::kIgnoreCase)) { // try to run DKS/CPU OpenCL + continue; } else if (it->fLine.Contains("SET BATCH", TString::kIgnoreCase)) { // needed for backward compatibility continue; } else if (it->fLine.Contains("END RETURN", TString::kIgnoreCase)) { // needed for backward compatibility diff --git a/src/classes/PFitterFcnDKS.cpp b/src/classes/PFitterFcnDKS.cpp index f4db5c22..305be307 100644 --- a/src/classes/PFitterFcnDKS.cpp +++ b/src/classes/PFitterFcnDKS.cpp @@ -30,6 +30,7 @@ #include using namespace std; +#include "PMusr.h" #include "PFitterFcnDKS.h" //-------------------------------------------------------------------------- @@ -41,14 +42,20 @@ using namespace std; * \param runList run list collection * \param useChi2 if true, a chisq fit will be performed, otherwise a log max-likelihood fit will be carried out. */ -PFitterFcnDKS::PFitterFcnDKS(PRunListCollection *runList, Bool_t useChi2) : - fRunListCollection(runList), - fUseChi2(useChi2) +PFitterFcnDKS::PFitterFcnDKS(PRunListCollection *runList, const Bool_t useChi2, const UInt_t dksTag, + const std::string theo) : + fTheoStr(theo), + fUseChi2(useChi2), + fRunListCollection(runList) { + fValid = false; + if (fUseChi2) fUp = 1.0; else fUp = 0.5; + + InitDKS(dksTag); } //-------------------------------------------------------------------------- @@ -78,7 +85,7 @@ Double_t PFitterFcnDKS::operator()(const std::vector& par) const } //-------------------------------------------------------------------------- -// CalcExpectedChiSquare() +// CalcExpectedChiSquare (public) //-------------------------------------------------------------------------- /** *

Calculates the expected chisq, expected chisq per run, and chisq per run, if applicable. @@ -105,3 +112,65 @@ void PFitterFcnDKS::CalcExpectedChiSquare(const std::vector &par, Doub } } } + +//-------------------------------------------------------------------------- +// InitDKS (private) +//-------------------------------------------------------------------------- +/** + *

initializes the DKS interface + * + */ +void PFitterFcnDKS::InitDKS(const UInt_t dksTag) +{ + // if any device was allocated before, free the device resources + FreeDKS(); + + // select framework + if (dksTag == DKS_GPU_CUDA) + fDKS.setAPI("Cuda"); + else + fDKS.setAPI("OpenCL"); + + // select device + if (dksTag == DKS_CPU_OPENCL) + fDKS.setDevice("-cpu"); + else + fDKS.setDevice("-gpu"); + + // init device + fDKS.initDevice(); + + // init chisq buffer on the GPU + + // allocate memory for the data on the GPU/CPU and transfer the data sets + + // set the function string and compile the program + Int_t ierr = fDKS.callCompileProgram(fTheoStr, !fUseChi2); + if (ierr != 0) { + cerr << ">> PFitterFcnDKS::InitDKS: **ERROR** failed to compile theory!" << endl; + fValid = false; + return; + } + + // checks device properties if openCL + ierr = fDKS.checkMuSRKernels(); + if (ierr != 0) { + cerr << ">> PFitterFcnDKS::InitDKS: **ERROR** muSR kernel checks failed!" << endl; + fValid = false; + return; + } + + fValid = true; +} + +//-------------------------------------------------------------------------- +// FreeDKS (private) +//-------------------------------------------------------------------------- +/** + *

cleanup DKS/GPU memory + * + */ +void PFitterFcnDKS::FreeDKS() +{ + +} diff --git a/src/classes/PMsrHandler.cpp b/src/classes/PMsrHandler.cpp index b475e517..c33a3295 100644 --- a/src/classes/PMsrHandler.cpp +++ b/src/classes/PMsrHandler.cpp @@ -6090,6 +6090,52 @@ std::string PMsrHandler::GetDKSTheoryString() return result; } +//-------------------------------------------------------------------------- +// GetDKSTag (public) +//-------------------------------------------------------------------------- +/** + *

Checks the COMMAND block for DKS related tags. Currently the following + * tags are allowed: + * (i) OpenMP: which means run on the CPU with OpenMP + * (ii) Cuda: which means run on the GPU using Cuda + * (iii) OpenCL-CPU: which means run on the CPU with OpenCL + * (iv) OpenCL-GPU: which means run on the GPU with OpenCL + * + * @return DKS tag + */ +UInt_t PMsrHandler::GetDKSTag() +{ + UInt_t count=0; + UInt_t tag = DKS_CPU_OPENMP; + TString last(""); + + for (UInt_t i=0; i 1) { + cerr << ">> PMsrHandler::GetDKSTag(): **WARNING** found multiple DKS tags, will use the last one found: '" << last.Data() << "'" << endl; + } + + return tag; +} + //-------------------------------------------------------------------------- // HandleTheoryArguments (private) //-------------------------------------------------------------------------- diff --git a/src/include/PFitterFcnDKS.h b/src/include/PFitterFcnDKS.h index 37bb4348..8cf2e67f 100644 --- a/src/include/PFitterFcnDKS.h +++ b/src/include/PFitterFcnDKS.h @@ -31,9 +31,9 @@ #define _PFITTERFCNDKS_H_ #include - +#include #include "Minuit2/FCNBase.h" - +#include "DKSBaseMuSR.h" #include "PRunListCollection.h" /** @@ -42,20 +42,29 @@ class PFitterFcnDKS : public ROOT::Minuit2::FCNBase { public: - PFitterFcnDKS(PRunListCollection *runList, Bool_t useChi2); - ~PFitterFcnDKS(); + PFitterFcnDKS(PRunListCollection *runList, const Bool_t useChi2, const UInt_t dksTag, const std::string theo); + virtual ~PFitterFcnDKS(); - Double_t Up() const { return fUp; } - Double_t operator()(const std::vector &par) const; + virtual Bool_t IsValid() { return fValid; } - UInt_t GetTotalNoOfFittedBins() { return fRunListCollection->GetTotalNoOfBinsFitted(); } - UInt_t GetNoOfFittedBins(const UInt_t idx) { return fRunListCollection->GetNoOfBinsFitted(idx); } - void CalcExpectedChiSquare(const std::vector &par, Double_t &totalExpectedChisq, std::vector &expectedChisqPerRun); + virtual Double_t Up() const { return fUp; } + virtual Double_t operator()(const std::vector &par) const; + + virtual UInt_t GetTotalNoOfFittedBins() { return fRunListCollection->GetTotalNoOfBinsFitted(); } + virtual UInt_t GetNoOfFittedBins(const UInt_t idx) { return fRunListCollection->GetNoOfBinsFitted(idx); } + virtual void CalcExpectedChiSquare(const std::vector &par, Double_t &totalExpectedChisq, std::vector &expectedChisqPerRun); private: + Bool_t fValid; ///< flag needed to ensure a valid state + std::string fTheoStr; ///< theory string for DKS compilation Double_t fUp; ///< for chisq == 1.0, i.e. errors are 1 std. deviation errors. for log max-likelihood == 0.5, i.e. errors are 1 std. deviation errors (for details see the minuit2 user manual). Bool_t fUseChi2; ///< true = chisq fit, false = log max-likelihood fit PRunListCollection *fRunListCollection; ///< pre-processed data to be fitted + + mutable DKSBaseMuSR fDKS; + + virtual void InitDKS(const UInt_t dksTag); + virtual void FreeDKS(); }; #endif // _PFITTERFCNDKS_H_ diff --git a/src/include/PMsrHandler.h b/src/include/PMsrHandler.h index 5d272f35..95e3bf76 100644 --- a/src/include/PMsrHandler.h +++ b/src/include/PMsrHandler.h @@ -112,6 +112,7 @@ class PMsrHandler virtual Double_t GetAlphaEstimateN0(); virtual std::string GetDKSTheoryString(); + virtual UInt_t GetDKSTag(); private: Bool_t fFourierOnly; ///< flag indicating if Fourier transform only is wished. If yes, some part of the msr-file blocks are not needed. diff --git a/src/include/PMusr.h b/src/include/PMusr.h index 5aa435f6..4457a40d 100644 --- a/src/include/PMusr.h +++ b/src/include/PMusr.h @@ -148,6 +148,13 @@ typedef struct { char a[7]; } __float128; // needed since cint doesn't know it #define RRF_FREQ_UNDEF 1.0e10 +//------------------------------------------------------------- +// DKS related tags +#define DKS_CPU_OPENMP 0 +#define DKS_CPU_OPENCL 1 +#define DKS_GPU_OPENCL 2 +#define DKS_GPU_CUDA 3 + //------------------------------------------------------------- /** *

typedef to make to code more readable. Definition of a bool vector.