197 lines
5.8 KiB
C++
197 lines
5.8 KiB
C++
#include "DKSBaseMuSR.h"
|
|
|
|
DKSBaseMuSR::DKSBaseMuSR() {
|
|
chiSq = nullptr;
|
|
chiSquareSize_m = -1;
|
|
}
|
|
|
|
DKSBaseMuSR::~DKSBaseMuSR() {
|
|
freeChiSquare();
|
|
}
|
|
|
|
int DKSBaseMuSR::callCompileProgram(std::string function, bool mlh) {
|
|
return chiSq->compileProgram(function, mlh);
|
|
}
|
|
|
|
int DKSBaseMuSR::callLaunchChiSquare(int fitType,
|
|
void *mem_data, void *mem_err, int length,
|
|
int numpar, int numfunc, int nummap,
|
|
double timeStart, double timeStep,
|
|
double &result)
|
|
{
|
|
|
|
|
|
//if we are not auto tuning and the size of the problem has changed find the new parameters
|
|
//from autotuning config file
|
|
if (!isAutoTuningOn() && length != chiSquareSize_m) {
|
|
int numBlocks, blockSize;
|
|
std::string device_name;
|
|
getDeviceName(device_name);
|
|
dksconfig.getConfigParameter(getAPI(), getDevice(), device_name, "ChiSquare",
|
|
length, "NumBlocks", numBlocks);
|
|
dksconfig.getConfigParameter(getAPI(), getDevice(), device_name, "ChiSquare",
|
|
length, "BlockSize", blockSize);
|
|
chiSq->setKernelParams(numBlocks, blockSize);
|
|
|
|
//std::cout << "Parameters set to: " << numBlocks << ", " << blockSize << std::endl;
|
|
|
|
chiSquareSize_m = length;
|
|
}
|
|
|
|
int ierr = chiSq->launchChiSquare(fitType, mem_data, mem_err, length, numpar, numfunc,
|
|
nummap, timeStart, timeStep, result);
|
|
|
|
if ( isAutoTuningOn() ) {
|
|
std::vector<int> config;
|
|
callAutoTuningChiSquare(fitType, mem_data, mem_err, length, numpar, numfunc, nummap, timeStart,
|
|
timeStep, result, config);
|
|
}
|
|
|
|
return ierr;
|
|
}
|
|
|
|
int DKSBaseMuSR::callAutoTuningChiSquare(int fitType, void *mem_data, void *mem_err, int length,
|
|
int numpar, int numfunc, int nummap,
|
|
double timeStart, double timeStep,
|
|
double &result, std::vector<int> &config)
|
|
{
|
|
|
|
int loops = 100;
|
|
DKSAutoTuning *autoTuning;
|
|
if (apiCuda())
|
|
autoTuning = new DKSAutoTuning(this, API_CUDA, DEVICE_GPU_NEW, loops);
|
|
else if (apiOpenCL() && deviceGPU())
|
|
autoTuning = new DKSAutoTuning(this, API_OPENCL, DEVICE_GPU_NEW, loops);
|
|
else if (apiOpenCL() && deviceCPU())
|
|
autoTuning = new DKSAutoTuning(this, API_OPENCL, DEVICE_CPU_NEW, loops);
|
|
else if (apiOpenCL() && deviceMIC())
|
|
autoTuning = new DKSAutoTuning(this, API_OPENCL, DEVICE_MIC_NEW, loops);
|
|
else
|
|
autoTuning = new DKSAutoTuning(this, API_UNKNOWN, DEVICE_UNKNOWN_NEW, loops);
|
|
|
|
|
|
int maxThreadsPerBlock = 1024;
|
|
checkMuSRKernels(fitType, maxThreadsPerBlock);
|
|
std::cout << "Max threads for autotune " << maxThreadsPerBlock << std::endl;
|
|
|
|
//create the function to be timed
|
|
std::function<int()> f = std::bind(&ChiSquareRuntime::launchChiSquare, chiSq,
|
|
fitType, mem_data, mem_err, length, numpar, numfunc, nummap,
|
|
timeStart, timeStep, result);
|
|
autoTuning->setFunction(f, "launchChiSquare");
|
|
|
|
//create the parameters for auto-tuning
|
|
autoTuning->addParameter(&chiSq->blockSize_m, 32, maxThreadsPerBlock, 32, "BlockSize");
|
|
autoTuning->addParameter(&chiSq->numBlocks_m, 100, 5000, 100, "NumBlocks");
|
|
|
|
autoTuning->lineSearch();
|
|
|
|
//autoTuning->hillClimbing(100);
|
|
|
|
//autoTuning->simulatedAnnealing(1e-3, 1e-6);
|
|
|
|
//autoTuning->exaustiveSearch();
|
|
|
|
std::string device_name;
|
|
getDeviceName(device_name);
|
|
dksconfig.addConfigParameter(getAPI(), getDevice(), device_name, "ChiSquare", length,
|
|
"NumBlocks", chiSq->numBlocks_m);
|
|
dksconfig.addConfigParameter(getAPI(), getDevice(), device_name, "ChiSquare", length,
|
|
"BlockSize", chiSq->blockSize_m);
|
|
|
|
|
|
config.push_back(chiSq->blockSize_m);
|
|
config.push_back(chiSq->numBlocks_m);
|
|
|
|
delete autoTuning;
|
|
|
|
return DKS_SUCCESS;
|
|
|
|
}
|
|
|
|
int DKSBaseMuSR::testAutoTuning() {
|
|
|
|
DKSAutoTuning *autoTuning;
|
|
DKSAutoTuningTester *tester;
|
|
|
|
autoTuning = new DKSAutoTuning(this, API_UNKNOWN, DEVICE_UNKNOWN_NEW);
|
|
tester = new DKSAutoTuningTester();
|
|
|
|
std::function<double()> f = std::bind(&DKSAutoTuningTester::peaksZ, tester);
|
|
autoTuning->setFunction(f, "testAutoTuner", false);
|
|
|
|
autoTuning->addParameter(&tester->x, -3.0, 3.0, 0.5, "x");
|
|
autoTuning->addParameter(&tester->y, -3.0, 3.0, 0.5, "y");
|
|
|
|
autoTuning->exaustiveSearch();
|
|
|
|
autoTuning->hillClimbing(10);
|
|
|
|
autoTuning->simulatedAnnealing(10, 0.0005);
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int DKSBaseMuSR::callSetConsts(double N0, double tau, double bkg) {
|
|
return chiSq->setConsts(N0, tau, bkg);
|
|
}
|
|
|
|
int DKSBaseMuSR::callSetConsts(double alpha, double beta) {
|
|
return chiSq->setConsts(alpha, beta);
|
|
}
|
|
|
|
int DKSBaseMuSR::initChiSquare(int size_data, int size_param, int size_func, int size_map) {
|
|
int ierr;
|
|
|
|
if (apiCuda()) {
|
|
ierr = CUDA_SAFECALL( DKS_SUCCESS );
|
|
chiSq = CUDA_SAFEINIT(new CudaChiSquareRuntime(getCudaBase()));
|
|
} else {
|
|
ierr = OPENCL_SAFECALL( DKS_SUCCESS );
|
|
chiSq = OPENCL_SAFECALL(new OpenCLChiSquareRuntime(getOpenCLBase()));
|
|
}
|
|
|
|
if (ierr == DKS_SUCCESS) {
|
|
return chiSq->initChiSquare(size_data, size_param, size_func, size_map);
|
|
} else {
|
|
DEBUG_MSG("DKS API not set, or DKS compiled without sellected API support");
|
|
return DKS_ERROR;
|
|
}
|
|
}
|
|
|
|
int DKSBaseMuSR::freeChiSquare() {
|
|
int ierr = DKS_SUCCESS;
|
|
if (chiSq != NULL) {
|
|
ierr = chiSq->freeChiSquare();
|
|
delete chiSq;
|
|
chiSq = NULL;
|
|
}
|
|
return ierr;
|
|
}
|
|
|
|
int DKSBaseMuSR::writeParams(const double *params, int numparams) {
|
|
return chiSq->writeParams(params, numparams);
|
|
}
|
|
|
|
int DKSBaseMuSR::writeFunctions(const double *func, int numfunc) {
|
|
return chiSq->writeFunc(func, numfunc);
|
|
}
|
|
|
|
int DKSBaseMuSR::writeMaps(const int *map, int numfunc) {
|
|
return chiSq->writeMap(map, numfunc);;
|
|
|
|
}
|
|
|
|
int DKSBaseMuSR::checkMuSRKernels(int fitType) {
|
|
int threadsPerBlock = 1;
|
|
return chiSq->checkChiSquareKernels(fitType, threadsPerBlock);
|
|
}
|
|
|
|
int DKSBaseMuSR::checkMuSRKernels(int fitType, int &threadsPerBlock) {
|
|
return chiSq->checkChiSquareKernels(fitType, threadsPerBlock);
|
|
}
|
|
|
|
int DKSBaseMuSR::getOperations(int &oper) {
|
|
return chiSq->getOperations(oper);
|
|
}
|