#include "DKSBaseMuSR.h" DKSBaseMuSR::DKSBaseMuSR() { chiSq = nullptr; chiSquareSize_m = -1; } DKSBaseMuSR::~DKSBaseMuSR() { freeChiSquare(); } int DKSBaseMuSR::callCompileProgram(std::string function, bool mlh) { return chiSq->compileProgram(function, mlh); } int DKSBaseMuSR::callLaunchChiSquare(int fitType, void *mem_data, void *mem_err, int length, int numpar, int numfunc, int nummap, double timeStart, double timeStep, double &result) { //if we are not auto tuning and the size of the problem has changed find the new parameters //from autotuning config file if (!isAutoTuningOn() && length != chiSquareSize_m) { int numBlocks, blockSize; std::string device_name; getDeviceName(device_name); dksconfig.getConfigParameter(getAPI(), getDevice(), device_name, "ChiSquare", length, "NumBlocks", numBlocks); dksconfig.getConfigParameter(getAPI(), getDevice(), device_name, "ChiSquare", length, "BlockSize", blockSize); chiSq->setKernelParams(numBlocks, blockSize); //std::cout << "Parameters set to: " << numBlocks << ", " << blockSize << std::endl; chiSquareSize_m = length; } int ierr = chiSq->launchChiSquare(fitType, mem_data, mem_err, length, numpar, numfunc, nummap, timeStart, timeStep, result); if ( isAutoTuningOn() ) { std::vector config; callAutoTuningChiSquare(fitType, mem_data, mem_err, length, numpar, numfunc, nummap, timeStart, timeStep, result, config); } return ierr; } int DKSBaseMuSR::callAutoTuningChiSquare(int fitType, void *mem_data, void *mem_err, int length, int numpar, int numfunc, int nummap, double timeStart, double timeStep, double &result, std::vector &config) { int loops = 100; DKSAutoTuning *autoTuning; if (apiCuda()) autoTuning = new DKSAutoTuning(this, API_CUDA, DEVICE_GPU_NEW, loops); else if (apiOpenCL() && deviceGPU()) autoTuning = new DKSAutoTuning(this, API_OPENCL, DEVICE_GPU_NEW, loops); else if (apiOpenCL() && deviceCPU()) autoTuning = new DKSAutoTuning(this, API_OPENCL, DEVICE_CPU_NEW, loops); else if (apiOpenCL() && deviceMIC()) autoTuning = new DKSAutoTuning(this, API_OPENCL, DEVICE_MIC_NEW, loops); else autoTuning = new DKSAutoTuning(this, API_UNKNOWN, DEVICE_UNKNOWN_NEW, loops); int maxThreadsPerBlock = 1024; checkMuSRKernels(fitType, maxThreadsPerBlock); std::cout << "Max threads for autotune " << maxThreadsPerBlock << std::endl; //create the function to be timed std::function f = std::bind(&ChiSquareRuntime::launchChiSquare, chiSq, fitType, mem_data, mem_err, length, numpar, numfunc, nummap, timeStart, timeStep, result); autoTuning->setFunction(f, "launchChiSquare"); //create the parameters for auto-tuning autoTuning->addParameter(&chiSq->blockSize_m, 32, maxThreadsPerBlock, 32, "BlockSize"); autoTuning->addParameter(&chiSq->numBlocks_m, 100, 5000, 100, "NumBlocks"); autoTuning->lineSearch(); //autoTuning->hillClimbing(100); //autoTuning->simulatedAnnealing(1e-3, 1e-6); //autoTuning->exaustiveSearch(); std::string device_name; getDeviceName(device_name); dksconfig.addConfigParameter(getAPI(), getDevice(), device_name, "ChiSquare", length, "NumBlocks", chiSq->numBlocks_m); dksconfig.addConfigParameter(getAPI(), getDevice(), device_name, "ChiSquare", length, "BlockSize", chiSq->blockSize_m); config.push_back(chiSq->blockSize_m); config.push_back(chiSq->numBlocks_m); delete autoTuning; return DKS_SUCCESS; } int DKSBaseMuSR::testAutoTuning() { DKSAutoTuning *autoTuning; DKSAutoTuningTester *tester; autoTuning = new DKSAutoTuning(this, API_UNKNOWN, DEVICE_UNKNOWN_NEW); tester = new DKSAutoTuningTester(); std::function f = std::bind(&DKSAutoTuningTester::peaksZ, tester); autoTuning->setFunction(f, "testAutoTuner", false); autoTuning->addParameter(&tester->x, -3.0, 3.0, 0.5, "x"); autoTuning->addParameter(&tester->y, -3.0, 3.0, 0.5, "y"); autoTuning->exaustiveSearch(); autoTuning->hillClimbing(10); autoTuning->simulatedAnnealing(10, 0.0005); return DKS_SUCCESS; } int DKSBaseMuSR::callSetConsts(double N0, double tau, double bkg) { return chiSq->setConsts(N0, tau, bkg); } int DKSBaseMuSR::callSetConsts(double alpha, double beta) { return chiSq->setConsts(alpha, beta); } int DKSBaseMuSR::initChiSquare(int size_data, int size_param, int size_func, int size_map) { int ierr; if (apiCuda()) { ierr = CUDA_SAFECALL( DKS_SUCCESS ); chiSq = CUDA_SAFEINIT(new CudaChiSquareRuntime(getCudaBase())); } else { ierr = OPENCL_SAFECALL( DKS_SUCCESS ); chiSq = OPENCL_SAFECALL(new OpenCLChiSquareRuntime(getOpenCLBase())); } if (ierr == DKS_SUCCESS) { return chiSq->initChiSquare(size_data, size_param, size_func, size_map); } else { DEBUG_MSG("DKS API not set, or DKS compiled without sellected API support"); return DKS_ERROR; } } int DKSBaseMuSR::freeChiSquare() { int ierr = DKS_SUCCESS; if (chiSq != NULL) { ierr = chiSq->freeChiSquare(); delete chiSq; chiSq = NULL; } return ierr; } int DKSBaseMuSR::writeParams(const double *params, int numparams) { return chiSq->writeParams(params, numparams); } int DKSBaseMuSR::writeFunctions(const double *func, int numfunc) { return chiSq->writeFunc(func, numfunc); } int DKSBaseMuSR::writeMaps(const int *map, int numfunc) { return chiSq->writeMap(map, numfunc);; } int DKSBaseMuSR::checkMuSRKernels(int fitType) { int threadsPerBlock = 1; return chiSq->checkChiSquareKernels(fitType, threadsPerBlock); } int DKSBaseMuSR::checkMuSRKernels(int fitType, int &threadsPerBlock) { return chiSq->checkChiSquareKernels(fitType, threadsPerBlock); } int DKSBaseMuSR::getOperations(int &oper) { return chiSq->getOperations(oper); }