Files
DKS/auto-tuning/testChiSquareRTUQTK.cpp
2016-10-10 14:49:32 +02:00

619 lines
17 KiB
C++

#include <iostream>
#include <cstdlib>
#include <string>
#include <cmath>
#include <fstream>
#include <cstdio>
#include <stddef.h>
#include <fstream>
#include <math.h>
#include <time.h>
#include <getopt.h>
#include <unistd.h>
#include "DKSBaseMuSR.h"
#include "Utility/DKSTimer.h"
#include "Array1D.h"
#include "Array2D.h"
#include "Array3D.h"
#include "error_handlers.h"
#include "PCSet.h"
#include "fast_laplace.h"
#include "uqtktools.h"
#include "lreg.h"
#define PI 3.14159265358979323846
#define TWO_PI 6.283185307179586231996
#define DEG_TO_RAD 1.7453292519943295474371681e-2
//#define N0 0.25
#define N0 1e-10
#define TAU 2.197019
#define BKG 0.05
using namespace std;
typedef std::function<double()> doubleF;
void randData(double *data, int N, int scale = 1) {
for (int i = 0; i < N; i++)
data[i] = ((double)rand() / RAND_MAX ) * scale;
}
/** MusrFit predefined functions.
* Predefined functions from MusrFit that can be used to define the theory function.
* First parameter in all the functions is alwats time - t, rest of the parameters depend
* on the function.
*/
double se(double *t, double *lamda) {
return exp( -*lamda**t );
}
//math func + math oper + memory loads
//1 + 1 + 2
double ge(double *t, double *lamda, double *beta) {
return exp( -pow( (*lamda)*(*t), *beta) );
}
//2 + 1 + 3
double sg(double *t, double *sigma) {
return exp( -0.5 * pow((*sigma)*(*t), 2) );
}
//2 + 2 + 2
double stg(double *t, double *sigma) {
double sigmatsq = pow((*sigma)*(*t),2);
return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatsq) * exp(-0.5 * sigmatsq);
}
double sekt(double *t, double *lambda) {
double lambdat = *lambda*(*t);
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
}
double lgkt(double *t, double *lambda, double *sigma) {
double lambdat = *lambda*(*t);
double sigmatsq = pow(*sigma*(*t), 2.0);
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat - sigmatsq) * exp(-lambdat - 0.5*sigmatsq);
}
double skt(double *t, double *sigma, double *beta) {
if (*beta < 1.0e-3)
return 0.0;
double sigmatb = pow(*sigma*(*t), (*beta));
return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatb) * exp(-sigmatb/(*beta));
}
double spg(double *t, double *lambda, double *gamma, double *q) {
double lam2 = (*lambda)*(*lambda);
double lamt2q = (*t)*(*t)*lam2*(*q);
double rate2 = 4.0*lam2*(1.0-*q)*(*t)/(*gamma);
double rateL = sqrt(fabs(rate2));
double rateT = sqrt(fabs(rate2)+lamt2q);
return (1.0/3.0)*exp(-rateL) + (2.0/3.0)*(1.0 - lamt2q / rateT)*exp(-rateT);
}
double rahf(double *t, double *nu, double *lambda) {
double nut = *nu*(*t);
double nuth = *nu*(*t)/2.0;
double lamt = *lambda*(*t);
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
}
double tf(double *t, double *phi, double *nu) {
double tmp_nu = TWO_PI**nu**t;
double tmp_phi = DEG_TO_RAD * *phi;
return cos(tmp_nu + tmp_phi);
}
double ifld(double *t, double *alpha, double *phi, double *nu, double *lambdaT, double *lambdaL) {
double wt = TWO_PI**nu**t;
double ph = DEG_TO_RAD**phi;
return *alpha*cos(wt+ph)*exp(-*lambdaT**t) + (1.0-*alpha)*exp(-*lambdaL**t);
}
double b(double *t, double *phi, double *nu) {
return j0(TWO_PI**nu**t + DEG_TO_RAD**phi);
}
double ib(double *t, double *alpha, double *phi, double *nu, double *lambdaT, double *lambdaL) {
double wt = TWO_PI * *nu * *t;
double ph = DEG_TO_RAD * *phi;
return *alpha*j0(wt+ph)*exp(-*lambdaT**t) + (1.0-*alpha)*exp(-*lambdaL**t);
}
double ab(double *t, double *sigma, double *gamma) {
double gt = *gamma**t;
return exp(-pow(*sigma/(*gamma),2.0)*(exp(-gt) - 1.0 + gt));
}
double snkzf(double *t, double *Delta0, double *Rb) {
double D0t2 = pow(*Delta0**t, 2.0);
double aa = 1.0/(1.0+pow(*Rb,2.0)*D0t2);
return (1.0/3.0) + (2.0/3.0)*pow(aa,1.5)*(1.0-D0t2*aa)*exp(-0.5*D0t2*aa);
}
double snktf(double *t, double *phi, double *nu, double *Delta0, double *Rb) {
double wt = TWO_PI**nu**t;
double ph = DEG_TO_RAD**phi;
double D0t2 = pow(*Delta0**t, 2.0);
double aa = 1.0/(1.0+pow(*Rb,2.0)*D0t2);
return sqrt(aa)*exp(-0.5*D0t2*aa)*cos(wt+ph);
}
double dnkzf(double *t, double *Delta0, double *Rb, double *nuc) {
double nuct = *nuc**t;
double theta = (exp(-nuct) - 1.0 -nuct)/pow(*nuc, 2.0);
double aa = 1.0/(1.0+4.0*pow(*Rb**Delta0,2.0)*theta);
return sqrt(aa)*exp(-2.0**Delta0**Delta0*theta*aa);
}
double dnktf(double *t, double *phi, double *nu, double *Delta0, double *Rb, double *nuc) {
double wt = TWO_PI**nu**t;
double ph = DEG_TO_RAD**phi;
double nuct = *nuc**t;
double theta = (exp(-nuct) - 1.0 -nuct)/pow(*nuc, 2.0);
double aa = 1.0/(1.0+2.0*pow(*Rb**Delta0,2.0)*theta);
return sqrt(aa)*exp(-*Delta0**Delta0*theta*aa)*cos(wt+ph);
}
double evalf(std::vector< std::pair<int, doubleF> > func) {
double result = 0.0;
for (auto f : func) {
switch (f.first) {
case 0: result += f.second(); break;
case 1: result -= f.second(); break;
default: result += f.second(); break;
}
}
return result;
}
double cpuChiSq(double *data, std::vector< std::pair<int, doubleF> > &func, int ndata, double *t, double dt) {
double result = 0.0;
double ts = *t;
for (int i = 0; i < ndata; i++) {
*t = ts + i*dt;
double d = data[i];
double e = data[i];
double vf = evalf(func);
double theo = N0 * exp(-(*t)/TAU) * (1.0 + vf) + BKG;
if (e != 0.0)
result += ( (theo - d) * (theo - d) ) / (e * e);
else
result += theo * theo;
}
return result;
}
//create a random length from 50 - 1000 array and fill with random values from 0 to 1
void randomParams(double *p, int np) {
for (int i = 0; i < np; i++)
p[i] = (double)rand() / RAND_MAX;
}
//create map array of random size and fill with indexes from 0 to max, max < size of param array
void randomMaps(int *m, int nm, int max) {
for (int i = 0; i < nm; i++)
m[i] = rand() % max;
}
void generateRandomFunction(std::vector< std::pair<int, doubleF> > &func, std::string &sfunc,
double *t, double *p, int *m, int np, int nm, int nfunc)
{
for (int n = 0; n < nfunc; n++) {
std::string sf = "";
doubleF f;
int r = rand() % 18; //randomly choose one of the predefined functions to use
int id1 = rand() % nm; //randomly select parameters to use in the function
int id2 = rand() % nm;
int id3 = rand() % nm;
int id4 = rand() % nm;
int id5 = rand() % nm;
std::string p1 = "p[m[" + to_string(id1) + "]])";
std::string p2 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]])";
std::string p3 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" +
to_string(id3) + "]])";
std::string p4 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" +
to_string(id3) + "]], p[m[" + to_string(id4) + "]])";
std::string p5 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" +
to_string(id3) + "]], p[m[" + to_string(id4) + "]], p[m[" + to_string(id5) + "]])";
//get a random index from maps and use it to get the parameter value, bind function and parameter
//values to f, and create string for gpu in sfunc
switch (r) {
case 0:
f = std::bind(se, t, &p[m[id1]]);
sf = "se(t," + p1;
break;
case 1:
f = std::bind(ge, t, &p[m[id1]], &p[m[id2]]);
sf = "ge(t," + p2;
break;
case 2:
f = std::bind(sg, t, &p[m[id1]]);
sf = "sg(t, " + p1;
break;
case 3:
f = std::bind(stg, t, &p[m[id1]]);
sf = "stg(t, " + p1;
break;
case 4:
f = std::bind(sekt, t, &p[m[id1]]);
sf = "sekt(t, " + p1;
break;
case 5:
f = std::bind(lgkt, t, &p[m[id1]], &p[m[id2]]);
sf = "lgkt(t, " + p2;
break;
case 6:
f = std::bind(skt, t, &p[m[id1]], &p[m[id2]]);
sf = "skt(t, " + p2;
break;
case 7:
f = std::bind(spg, t, &p[m[id1]], &p[m[id2]], &p[m[id3]]);
sf = "spg(t, " + p3;
break;
case 8:
f = std::bind(rahf, t, &p[m[id1]], &p[m[id2]]);
sf = "rahf(t, " + p2;
break;
case 9:
f = std::bind(tf, t, &p[m[id1]], &p[m[id2]]);
sf = "tf(t, " + p2;
break;
case 10:
f = std::bind(ifld, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]);
sf = "ifld(t, " + p5;
break;
case 11:
f = std::bind(b, t, &p[m[id1]], &p[m[id2]]);
sf = "b(t, " + p2;
break;
case 12:
f = std::bind(ib, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]);
sf = "ib(t, " + p5;
break;
case 13:
f = std::bind(ab, t, &p[m[id1]], &p[m[id2]]);
sf = "ab(t, " + p2;
break;
case 14:
f = std::bind(snkzf, t, &p[m[id1]], &p[m[id2]]);
sf = "snkzf(t, " + p2;
break;
case 15:
f = std::bind(snktf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]]);
sf = "snktf(t, " + p4;
break;
case 16:
f = std::bind(dnkzf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]]);
sf = "dnkzf(t, " + p3;
break;
case 17:
f = std::bind(dnktf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]);
sf = "dnktf(t, " + p5;
break;
}
int sign = rand() % 2;
if (n == 0) sign = 0;
func.push_back( std::make_pair(sign, f) );
if (n == 0)
sfunc = sf;
else {
switch(sign) {
case 0: sfunc += " + " + sf; break;
case 1: sfunc += " - " + sf; break;
default: sfunc += " + " + sf; break;
}
}
}
}
int main(int argc, char *argv[]) {
srand(time(NULL));
bool autotune = false;
bool eval = false;
bool test = false;
char *api_name = new char[10];
char *device_name = new char[10];
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
int nord = 15; //the order of the initial, overcomplete basis
int loop = 100;
for (int i = 1; i < argc; ++i) {
if (argv[i] == string("-cuda")) {
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-opencl")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-mic")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-mic");
}
if (argv[i] == string("-cpu")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-cpu");
}
if (argv[i] == string("-autotune")) {
autotune = true;
}
if (argv[i] == string("-eval"))
eval = true;
if (argv[i] == string("-test"))
test = true;
if (argv[i] == string("-nord"))
nord = atoi(argv[i+1]);
if (argv[i] == string("-loop"))
loop = atoi(argv[i+1]);
}
//init dks and set chi^2 constants
DKSBaseMuSR dksbase;
dksbase.setAPI(api_name);
dksbase.setDevice(device_name);
dksbase.initDevice();
if (autotune)
dksbase.setAutoTuningOn();
int nydim = 2; //the dimensionality of input
int nxdim = 5;
//UQTk arrays
Array2D<double> xdata(loop, nxdim, 0.0);
Array2D<double> ydata(loop, nydim, 0.0);
Array2D<double> xdata_pce(loop, nxdim, 0.0);
Array2D<double> ydata_pce(loop, nydim, 0.0);
int size = 10000;
Array2D<double> xtmp(size, nxdim, 0.0);
Array2D<double> ytmp(size, nydim, 0.0);
if (eval || test) {
for (int l = 0; l < loop; l++) {
int ierr;
//create a random number of parameters
int n = rand() % 9 + 1;
int Ndata = n * 100000; //number of data points 100k to 1milj, with 100k incr.
int np = ( rand() % (1000 - 50) ) + 50; //from 50 to 1000 for different shared memory needs
int nm = ( rand() % (50 - 5) ) + 5; //use 5 to 50 of the parameters, for different memory access
int nf = ( rand() % (50 - 5) ) + 5; //not used in the test case, but changes the shared memory
int nfunc = (rand() % (10 - 1) ) + 1; //1 to 10 user defined functions
//allocate storage for parameters, maps and functions
int *m = new int[nm];
double *p = new double[np];
double *f = new double[nf];
//fill with random numbers
randomParams(p, np);
randomMaps(m, nm, np);
randomParams(f, nf);
//create a random user function that can be passed to GPU kernel and evaluated on the host
double dt = 1e-10;
double t = 1e-10;
std::vector< std::pair<int, doubleF> > func;
std::string sfunc;
generateRandomFunction(func, sfunc, &t, p, m, np, nm, nfunc);
//create a data array and fill with random values
double *data = new double[Ndata];
randomParams(data, Ndata);
//allocate device memory for the data and transfer to the GPU
void *data_ptr = dksbase.allocateMemory<double>(Ndata, ierr);
dksbase.writeData<double>(data_ptr, data, Ndata);
//init chi^2
dksbase.initChiSquare(Ndata, np, nf, nm);
dksbase.callSetConsts(N0, TAU, BKG);
//write params to the devic
dksbase.writeParams(p, np);
dksbase.writeFunctions(f, nf);
dksbase.writeMaps(m, nm);
//compile the kernel with the new function
dksbase.callCompileProgram(sfunc);
//run the kernel on the GPU and evaluate the function on the host
double result_dks, result_cpu, tmp_result;
ierr = dksbase.callLaunchChiSquare(1, data_ptr, data_ptr, Ndata, np, nf, nm,
t, dt, result_dks);
if (ierr == DKS_SUCCESS) {
result_cpu = cpuChiSq(data, func, Ndata, &t, dt);
std::vector<int> config;
dksbase.callAutoTuningChiSquare(1, data_ptr, data_ptr, Ndata, np, nf, nm,
t, dt, tmp_result, config);
cout << "DKS: " << result_dks << endl;
cout << "CPU: " << result_cpu << endl;
cout << "Launch parameters: " << config[0] << ", " << config[1] << endl;
cout << sfunc << endl;
cout << "Kernel parameters: " << np << ", " << nm << ", " << nf << ", " << nfunc << endl;
xdata(l,0) = np;
xdata(l,1) = nm;
xdata(l,2) = nf;
xdata(l,3) = nfunc;
xdata(l,4) = Ndata;
ydata(l,0) = config[0];
ydata(l,1) = config[1];
std::cout << std::endl << "Loop " << l + 1 << " finished" << std::endl << std::endl;
} else {
cout << "Created kernel failed! " << np << ", " << nm << ", " << nf << ", " << nfunc << endl;
cout << sfunc << endl;
}
//free temporary resources
delete[] m;
delete[] p;
delete[] f;
delete[] data;
dksbase.freeChiSquare();
dksbase.freeMemory<double>(data_ptr, Ndata);
}
} else {
//read_datafileVS(xdata, "xdata.dat");
//read_datafileVS(ydata, "ydata.dat");
xtmp.SetValue(0.0);
ytmp.SetValue(0.0);
read_datafileVS(xtmp, "xdata_pce.dat");
read_datafileVS(ytmp, "ydata_pce.dat");
for (int i = 0; i < loop; i++) {
for (int j = 0; j < nxdim; j++)
xdata(i,j) = xtmp(i,j);
for (int j = 0; j < nydim; j++)
ydata(i,j) = ytmp(i,j);
}
}
if (eval) {
for (int i = 0; i < nxdim; i++) {
for (int j = 0; j < loop; j++) {
xdata_pce(j,i) = xdata(j,i);
ydata_pce(j,i) = ydata(j,i);
}
}
for (int i = 0; i < nydim; i++) {
for (int j = 0; j < loop; j++) {
xdata_pce(j,i) = xdata(j,i);
ydata_pce(j,i) = ydata(j,i);
}
}
} else {
//read_datafileVS(xdata_pce, "xdata_pce.dat");
//read_datafileVS(ydata_pce, "ydata_pce.dat");
xtmp.SetValue(0.0);
ytmp.SetValue(0.0);
read_datafileVS(xtmp, "xdata_pce.dat");
read_datafileVS(ytmp, "ydata_pce.dat");
for (int i = 0; i < loop; i++) {
for (int j = 0; j < nxdim; j++)
xdata_pce(i,j) = xtmp(i,j);
for (int j = 0; j < nydim; j++)
ydata_pce(i,j) = ytmp(i,j);
}
std::cout << "Built pce with " << xdata_pce.XSize() << " datapoints" << std::endl;
}
//default input settings
string which_chaos="LU"; //PC type
string msc="m";
Lreg* reg;
reg = new PCreg(which_chaos,nord,nxdim);
int nbas = reg->GetNbas();
Array2D<double> ypc_data(xdata.XSize(), nydim, 0.0);
for (int i = 0; i < nydim; i++) {
std::cout << "start dim " << i+1 << std::endl;
Array1D<double> ydata_1d(xdata_pce.XSize(), 0.0);
for (unsigned int j = 0; j < xdata_pce.XSize(); j++)
ydata_1d(j) = ydata_pce(j,i);
std::cout << "setup data" << std::endl;
reg->SetupData(xdata_pce,ydata_1d);
std::cout << "Comput best lambda" << std::endl;
double lambda=reg->LSQ_computeBestLambda();
Array1D<double> lam(nbas,lambda);
reg->SetWeights(lam);
std::cout << "LSQ build regr" << std::endl;
reg->LSQ_BuildRegr();
std::cout << std::endl << "Lambda : " << lambda << std::endl;
Array1D<double> ypc;
Array1D<double> ycheck;
Array2D<double> ycheck_cov;
reg->EvalRegr(xdata,msc,ypc,ycheck,ycheck_cov);
std::cout << std::endl << "Eval" << std::endl;
for (unsigned int j = 0; j < xdata.XSize(); j++)
ypc_data(j,i) = ypc(j);
}
if (eval) {
write_datafile(xdata_pce, "xdata_pce.dat");
write_datafile(ydata_pce, "ydata_pce.dat");
}
write_datafile(xdata, "xdata.dat");
write_datafile(ydata, "ydata.dat");
write_datafile(ypc_data, "ypc_data.dat");
return 0;
}