303 lines
6.9 KiB
C++
303 lines
6.9 KiB
C++
#include "DKSAutoTuning.h"
|
|
|
|
DKSAutoTuning::DKSAutoTuning(DKSBase *base, std::string api, std::string device, int loops) {
|
|
|
|
base_m = base;
|
|
api_name_m = api;
|
|
device_name_m = device;
|
|
loops_m = loops;
|
|
|
|
evaluate_time_m = true;
|
|
}
|
|
|
|
DKSAutoTuning::~DKSAutoTuning() {
|
|
params_m.clear();
|
|
}
|
|
|
|
int DKSAutoTuning::setParameterValues(States state) {
|
|
|
|
//if states and params don't match in size something has gone wrong
|
|
if (state.size() != params_m.size()) {
|
|
DEBUG_MSG("Parameters and states don't match!");
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
//set the value pointed by params to value saved in state
|
|
for (unsigned int i = 0; i < params_m.size(); i++)
|
|
params_m[i].setValue(state[i].value);
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
/** TODO: might need a better timing for GPU code */
|
|
int DKSAutoTuning::evaluateFunction(double &value) {
|
|
|
|
int ierr = DKS_ERROR;
|
|
DKSTimer t;
|
|
|
|
t.init(function_name_m);
|
|
|
|
if (evaluate_time_m) {
|
|
//run for "loop" times and return the average time.
|
|
//syncDevice() is used to make sure that nothing is running on the device before the timer starts
|
|
// and to make sure the function has completed on the device before the time stops
|
|
for (int j = 0; j < loops_m; j++) {
|
|
base_m->syncDevice();
|
|
t.start();
|
|
ierr = f_m();
|
|
base_m->syncDevice();
|
|
t.stop();
|
|
if (ierr != DKS_SUCCESS) //exit loop if kernel execution fials
|
|
break;
|
|
}
|
|
|
|
//returns
|
|
value = t.gettime() / loops_m;
|
|
} else {
|
|
value = fd_m();
|
|
ierr = DKS_SUCCESS;
|
|
}
|
|
|
|
return ierr;
|
|
}
|
|
|
|
void DKSAutoTuning::clearParameters() {
|
|
params_m.clear();
|
|
}
|
|
|
|
void DKSAutoTuning::exaustiveSearch() {
|
|
|
|
DKSTimer t;
|
|
t.init("exaustive search");
|
|
t.start();
|
|
|
|
if (params_m.size() < 2)
|
|
return;
|
|
|
|
Parameter p1 = params_m[0];
|
|
Parameter p2 = params_m[1];
|
|
|
|
double time;
|
|
double mint = 1000000.0;
|
|
int minv1 = 0;
|
|
int minv2 = 0;
|
|
|
|
//std::ofstream myfile;
|
|
//std::string filename;
|
|
//filename = "search_" + api_name_m + "_" + device_name_m + ".dat";
|
|
//myfile.open(filename);
|
|
|
|
for (double v1 = p1.min; v1 <= p1.max; v1 += p1.step) {
|
|
for (double v2 = p2.min; v2 <= p2.max; v2 += p2.step) {
|
|
p1.setValue(v1);
|
|
p2.setValue(v2);
|
|
|
|
int ierr = evaluateFunction(time);
|
|
|
|
if (ierr == DKS_SUCCESS && time < mint) {
|
|
mint = time;
|
|
minv1 = v1;
|
|
minv2 = v2;
|
|
}
|
|
if (ierr == DKS_ERROR)
|
|
time = 1;
|
|
|
|
//myfile << time << "\t";
|
|
}
|
|
//myfile << "\n";
|
|
}
|
|
//myfile.close();
|
|
|
|
//std::cout << "Optimal launch parameters:" << std::endl;
|
|
//std::cout << mint << "\t" << minv1 << "\t" << minv2 << std::endl;
|
|
p1.setValue(minv1);
|
|
p2.setValue(minv2);
|
|
|
|
t.stop();
|
|
//std::cout << "exaustive search: " << t.gettime() << std::endl;
|
|
}
|
|
|
|
void DKSAutoTuning::lineSearch() {
|
|
DKSTimer t;
|
|
t.init("line search");
|
|
t.start();
|
|
|
|
double time;
|
|
int ierr = DKS_ERROR;
|
|
|
|
if (params_m.size() < 1) {
|
|
DEBUG_MSG("Need some parameters to autotune!");
|
|
return;
|
|
}
|
|
|
|
double mint = 1000000.0;
|
|
//loop trough parameters one parameter at a time
|
|
for (auto param : params_m) {
|
|
int minv = param.getValue();
|
|
|
|
//go trough all the values of the parameter, while keeping other parameters const
|
|
for (double i = param.min; i <= param.max; i += param.step) {
|
|
//adjust parameters
|
|
param.setValue(i);
|
|
|
|
//run for "loop" times and get average
|
|
ierr = evaluateFunction(time);
|
|
|
|
//if there was no error executing the function and time is better than previou
|
|
//min time, save the parameter configuration
|
|
if (ierr == DKS_SUCCESS && time < mint) {
|
|
mint = time;
|
|
minv = i;
|
|
}
|
|
|
|
} //repeat
|
|
|
|
param.setValue(minv);
|
|
}
|
|
|
|
//DEBUG: print out the found best parameters
|
|
for (auto param : params_m)
|
|
std::cout << "Parameter " << param.name << " set to " << param.getValue() << std::endl;
|
|
|
|
std::cout << "Best time: " << mint << std::endl;
|
|
|
|
t.stop();
|
|
std::cout << "Line search time: " << t.gettime() << std::endl;
|
|
|
|
}
|
|
|
|
void DKSAutoTuning::hillClimbing(int restart_loops) {
|
|
|
|
DKSTimer t;
|
|
t.init("hill climbing");
|
|
t.start();
|
|
|
|
std::cout << "hill climbing" << std::endl;
|
|
|
|
int ierr;
|
|
double time_current;
|
|
double time_next;
|
|
DKSSearchStates search(params_m);
|
|
|
|
std::cout << "start " << restart_loops << std::endl;
|
|
|
|
for (int i = 0; i < restart_loops; i++) {
|
|
|
|
|
|
//init random current state
|
|
search.initCurrentState();
|
|
|
|
//evaluate current state
|
|
setParameterValues(search.getCurrentState());
|
|
ierr = evaluateFunction(time_current);
|
|
|
|
//std::cout << "Start iteration " << i+1 << std::endl;
|
|
//search.printCurrentState(time_current);
|
|
|
|
if (ierr == DKS_ERROR)
|
|
continue;
|
|
|
|
//statr the loop
|
|
bool topReached = false;
|
|
while(!topReached) {
|
|
|
|
search.getNeighbours();
|
|
|
|
//get all the neighbors of the current state
|
|
bool neighbourFound = false;
|
|
while (!neighbourFound && search.nextNeighbourExists()) {
|
|
|
|
//evaluate all the neighbors of the current state
|
|
setParameterValues(search.getNextNeighbour());
|
|
ierr = evaluateFunction(time_next);
|
|
|
|
//search.printNeighbour(time_next);
|
|
|
|
if (ierr == DKS_ERROR)
|
|
std::cout << "Error evaluating function" << std::endl;
|
|
|
|
//move to the first option that improives the solution
|
|
if (ierr == DKS_SUCCESS && time_next < time_current) {
|
|
time_current = time_next;
|
|
search.moveToNeighbour();
|
|
neighbourFound = true;
|
|
}
|
|
|
|
}
|
|
|
|
//if no better option is found save the state and move to step 1
|
|
if (!neighbourFound) {
|
|
search.saveCurrentState(time_current);
|
|
topReached = true;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
search.printBest();
|
|
|
|
t.stop();
|
|
std::cout << "hill climbing: " << t.gettime() << std::endl;
|
|
}
|
|
|
|
void DKSAutoTuning::simulatedAnnealing(double Tstart, double Tstep) {
|
|
|
|
DKSTimer t;
|
|
t.init("simulated annealing");
|
|
t.start();
|
|
|
|
int ierr;
|
|
double time_current;
|
|
double time_next;
|
|
|
|
DKSSearchStates search(params_m);
|
|
|
|
//make a random guess
|
|
search.initCurrentState();
|
|
|
|
//evaluate current state
|
|
setParameterValues(search.getCurrentState());
|
|
ierr = evaluateFunction(time_current);
|
|
|
|
if (ierr == DKS_ERROR)
|
|
return;
|
|
|
|
for (double Temp = Tstart; Temp > 0; Temp -= Tstep) {
|
|
|
|
search.printCurrentState(time_current);
|
|
|
|
//calucate all the neighbours of current state
|
|
search.getNeighbours(10);
|
|
|
|
//make a move to random neighbour and evaluate the runtime
|
|
setParameterValues(search.getRandomNeighbour());
|
|
ierr = evaluateFunction(time_next);
|
|
|
|
if (ierr == DKS_ERROR)
|
|
return;
|
|
|
|
//if the solution improves move to this point else move to this point with probabily exp(-dE/T)
|
|
if (time_next < time_current) {
|
|
time_current = time_next;
|
|
search.moveToNeighbour();
|
|
} else {
|
|
double p = (double)rand() / RAND_MAX;
|
|
double dE = time_next - time_current;
|
|
double P = exp(-dE/Temp);
|
|
|
|
if (P > p) {
|
|
time_current = time_next;
|
|
search.moveToNeighbour();
|
|
}
|
|
}
|
|
}
|
|
|
|
search.printCurrentState(time_current);
|
|
|
|
t.stop();
|
|
std::cout << "Simulated annealing: " << t.gettime() << std::endl;
|
|
|
|
}
|
|
|