#include #include #include #include #include #include #include #include #include #include #include #include #include "DKSBaseMuSR.h" #include "Utility/DKSTimer.h" #include "Array1D.h" #include "Array2D.h" #include "Array3D.h" #include "error_handlers.h" #include "PCSet.h" #include "fast_laplace.h" #include "uqtktools.h" #include "lreg.h" #define PI 3.14159265358979323846 #define TWO_PI 6.283185307179586231996 #define DEG_TO_RAD 1.7453292519943295474371681e-2 //#define N0 0.25 #define N0 1e-10 #define TAU 2.197019 #define BKG 0.05 using namespace std; typedef std::function doubleF; void randData(double *data, int N, int scale = 1) { for (int i = 0; i < N; i++) data[i] = ((double)rand() / RAND_MAX ) * scale; } /** MusrFit predefined functions. * Predefined functions from MusrFit that can be used to define the theory function. * First parameter in all the functions is alwats time - t, rest of the parameters depend * on the function. */ double se(double *t, double *lamda) { return exp( -*lamda**t ); } //math func + math oper + memory loads //1 + 1 + 2 double ge(double *t, double *lamda, double *beta) { return exp( -pow( (*lamda)*(*t), *beta) ); } //2 + 1 + 3 double sg(double *t, double *sigma) { return exp( -0.5 * pow((*sigma)*(*t), 2) ); } //2 + 2 + 2 double stg(double *t, double *sigma) { double sigmatsq = pow((*sigma)*(*t),2); return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatsq) * exp(-0.5 * sigmatsq); } double sekt(double *t, double *lambda) { double lambdat = *lambda*(*t); return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat); } double lgkt(double *t, double *lambda, double *sigma) { double lambdat = *lambda*(*t); double sigmatsq = pow(*sigma*(*t), 2.0); return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat - sigmatsq) * exp(-lambdat - 0.5*sigmatsq); } double skt(double *t, double *sigma, double *beta) { if (*beta < 1.0e-3) return 0.0; double sigmatb = pow(*sigma*(*t), (*beta)); return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatb) * exp(-sigmatb/(*beta)); } double spg(double *t, double *lambda, double *gamma, double *q) { double lam2 = (*lambda)*(*lambda); double lamt2q = (*t)*(*t)*lam2*(*q); double rate2 = 4.0*lam2*(1.0-*q)*(*t)/(*gamma); double rateL = sqrt(fabs(rate2)); double rateT = sqrt(fabs(rate2)+lamt2q); return (1.0/3.0)*exp(-rateL) + (2.0/3.0)*(1.0 - lamt2q / rateT)*exp(-rateT); } double rahf(double *t, double *nu, double *lambda) { double nut = *nu*(*t); double nuth = *nu*(*t)/2.0; double lamt = *lambda*(*t); return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt)); } double tf(double *t, double *phi, double *nu) { double tmp_nu = TWO_PI**nu**t; double tmp_phi = DEG_TO_RAD * *phi; return cos(tmp_nu + tmp_phi); } double ifld(double *t, double *alpha, double *phi, double *nu, double *lambdaT, double *lambdaL) { double wt = TWO_PI**nu**t; double ph = DEG_TO_RAD**phi; return *alpha*cos(wt+ph)*exp(-*lambdaT**t) + (1.0-*alpha)*exp(-*lambdaL**t); } double b(double *t, double *phi, double *nu) { return j0(TWO_PI**nu**t + DEG_TO_RAD**phi); } double ib(double *t, double *alpha, double *phi, double *nu, double *lambdaT, double *lambdaL) { double wt = TWO_PI * *nu * *t; double ph = DEG_TO_RAD * *phi; return *alpha*j0(wt+ph)*exp(-*lambdaT**t) + (1.0-*alpha)*exp(-*lambdaL**t); } double ab(double *t, double *sigma, double *gamma) { double gt = *gamma**t; return exp(-pow(*sigma/(*gamma),2.0)*(exp(-gt) - 1.0 + gt)); } double snkzf(double *t, double *Delta0, double *Rb) { double D0t2 = pow(*Delta0**t, 2.0); double aa = 1.0/(1.0+pow(*Rb,2.0)*D0t2); return (1.0/3.0) + (2.0/3.0)*pow(aa,1.5)*(1.0-D0t2*aa)*exp(-0.5*D0t2*aa); } double snktf(double *t, double *phi, double *nu, double *Delta0, double *Rb) { double wt = TWO_PI**nu**t; double ph = DEG_TO_RAD**phi; double D0t2 = pow(*Delta0**t, 2.0); double aa = 1.0/(1.0+pow(*Rb,2.0)*D0t2); return sqrt(aa)*exp(-0.5*D0t2*aa)*cos(wt+ph); } double dnkzf(double *t, double *Delta0, double *Rb, double *nuc) { double nuct = *nuc**t; double theta = (exp(-nuct) - 1.0 -nuct)/pow(*nuc, 2.0); double aa = 1.0/(1.0+4.0*pow(*Rb**Delta0,2.0)*theta); return sqrt(aa)*exp(-2.0**Delta0**Delta0*theta*aa); } double dnktf(double *t, double *phi, double *nu, double *Delta0, double *Rb, double *nuc) { double wt = TWO_PI**nu**t; double ph = DEG_TO_RAD**phi; double nuct = *nuc**t; double theta = (exp(-nuct) - 1.0 -nuct)/pow(*nuc, 2.0); double aa = 1.0/(1.0+2.0*pow(*Rb**Delta0,2.0)*theta); return sqrt(aa)*exp(-*Delta0**Delta0*theta*aa)*cos(wt+ph); } double evalf(std::vector< std::pair > func) { double result = 0.0; for (auto f : func) { switch (f.first) { case 0: result += f.second(); break; case 1: result -= f.second(); break; default: result += f.second(); break; } } return result; } double cpuChiSq(double *data, std::vector< std::pair > &func, int ndata, double *t, double dt) { double result = 0.0; double ts = *t; for (int i = 0; i < ndata; i++) { *t = ts + i*dt; double d = data[i]; double e = data[i]; double vf = evalf(func); double theo = N0 * exp(-(*t)/TAU) * (1.0 + vf) + BKG; if (e != 0.0) result += ( (theo - d) * (theo - d) ) / (e * e); else result += theo * theo; } return result; } //create a random length from 50 - 1000 array and fill with random values from 0 to 1 void randomParams(double *p, int np) { for (int i = 0; i < np; i++) p[i] = (double)rand() / RAND_MAX; } //create map array of random size and fill with indexes from 0 to max, max < size of param array void randomMaps(int *m, int nm, int max) { for (int i = 0; i < nm; i++) m[i] = rand() % max; } void generateRandomFunction(std::vector< std::pair > &func, std::string &sfunc, double *t, double *p, int *m, int np, int nm, int nfunc) { for (int n = 0; n < nfunc; n++) { std::string sf = ""; doubleF f; int r = rand() % 18; //randomly choose one of the predefined functions to use int id1 = rand() % nm; //randomly select parameters to use in the function int id2 = rand() % nm; int id3 = rand() % nm; int id4 = rand() % nm; int id5 = rand() % nm; std::string p1 = "p[m[" + to_string(id1) + "]])"; std::string p2 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]])"; std::string p3 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" + to_string(id3) + "]])"; std::string p4 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" + to_string(id3) + "]], p[m[" + to_string(id4) + "]])"; std::string p5 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" + to_string(id3) + "]], p[m[" + to_string(id4) + "]], p[m[" + to_string(id5) + "]])"; //get a random index from maps and use it to get the parameter value, bind function and parameter //values to f, and create string for gpu in sfunc switch (r) { case 0: f = std::bind(se, t, &p[m[id1]]); sf = "se(t," + p1; break; case 1: f = std::bind(ge, t, &p[m[id1]], &p[m[id2]]); sf = "ge(t," + p2; break; case 2: f = std::bind(sg, t, &p[m[id1]]); sf = "sg(t, " + p1; break; case 3: f = std::bind(stg, t, &p[m[id1]]); sf = "stg(t, " + p1; break; case 4: f = std::bind(sekt, t, &p[m[id1]]); sf = "sekt(t, " + p1; break; case 5: f = std::bind(lgkt, t, &p[m[id1]], &p[m[id2]]); sf = "lgkt(t, " + p2; break; case 6: f = std::bind(skt, t, &p[m[id1]], &p[m[id2]]); sf = "skt(t, " + p2; break; case 7: f = std::bind(spg, t, &p[m[id1]], &p[m[id2]], &p[m[id3]]); sf = "spg(t, " + p3; break; case 8: f = std::bind(rahf, t, &p[m[id1]], &p[m[id2]]); sf = "rahf(t, " + p2; break; case 9: f = std::bind(tf, t, &p[m[id1]], &p[m[id2]]); sf = "tf(t, " + p2; break; case 10: f = std::bind(ifld, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]); sf = "ifld(t, " + p5; break; case 11: f = std::bind(b, t, &p[m[id1]], &p[m[id2]]); sf = "b(t, " + p2; break; case 12: f = std::bind(ib, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]); sf = "ib(t, " + p5; break; case 13: f = std::bind(ab, t, &p[m[id1]], &p[m[id2]]); sf = "ab(t, " + p2; break; case 14: f = std::bind(snkzf, t, &p[m[id1]], &p[m[id2]]); sf = "snkzf(t, " + p2; break; case 15: f = std::bind(snktf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]]); sf = "snktf(t, " + p4; break; case 16: f = std::bind(dnkzf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]]); sf = "dnkzf(t, " + p3; break; case 17: f = std::bind(dnktf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]); sf = "dnktf(t, " + p5; break; } int sign = rand() % 2; if (n == 0) sign = 0; func.push_back( std::make_pair(sign, f) ); if (n == 0) sfunc = sf; else { switch(sign) { case 0: sfunc += " + " + sf; break; case 1: sfunc += " - " + sf; break; default: sfunc += " + " + sf; break; } } } } int main(int argc, char *argv[]) { srand(time(NULL)); bool autotune = false; bool eval = false; bool test = false; char *api_name = new char[10]; char *device_name = new char[10]; strcpy(api_name, "Cuda"); strcpy(device_name, "-gpu"); int nord = 15; //the order of the initial, overcomplete basis int loop = 100; for (int i = 1; i < argc; ++i) { if (argv[i] == string("-cuda")) { strcpy(api_name, "Cuda"); strcpy(device_name, "-gpu"); } if (argv[i] == string("-opencl")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-gpu"); } if (argv[i] == string("-mic")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-mic"); } if (argv[i] == string("-cpu")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-cpu"); } if (argv[i] == string("-autotune")) { autotune = true; } if (argv[i] == string("-eval")) eval = true; if (argv[i] == string("-test")) test = true; if (argv[i] == string("-nord")) nord = atoi(argv[i+1]); if (argv[i] == string("-loop")) loop = atoi(argv[i+1]); } //init dks and set chi^2 constants DKSBaseMuSR dksbase; dksbase.setAPI(api_name); dksbase.setDevice(device_name); dksbase.initDevice(); if (autotune) dksbase.setAutoTuningOn(); int nydim = 2; //the dimensionality of input int nxdim = 5; //UQTk arrays Array2D xdata(loop, nxdim, 0.0); Array2D ydata(loop, nydim, 0.0); Array2D xdata_pce(loop, nxdim, 0.0); Array2D ydata_pce(loop, nydim, 0.0); int size = 10000; Array2D xtmp(size, nxdim, 0.0); Array2D ytmp(size, nydim, 0.0); if (eval || test) { for (int l = 0; l < loop; l++) { int ierr; //create a random number of parameters int n = rand() % 9 + 1; int Ndata = n * 100000; //number of data points 100k to 1milj, with 100k incr. int np = ( rand() % (1000 - 50) ) + 50; //from 50 to 1000 for different shared memory needs int nm = ( rand() % (50 - 5) ) + 5; //use 5 to 50 of the parameters, for different memory access int nf = ( rand() % (50 - 5) ) + 5; //not used in the test case, but changes the shared memory int nfunc = (rand() % (10 - 1) ) + 1; //1 to 10 user defined functions //allocate storage for parameters, maps and functions int *m = new int[nm]; double *p = new double[np]; double *f = new double[nf]; //fill with random numbers randomParams(p, np); randomMaps(m, nm, np); randomParams(f, nf); //create a random user function that can be passed to GPU kernel and evaluated on the host double dt = 1e-10; double t = 1e-10; std::vector< std::pair > func; std::string sfunc; generateRandomFunction(func, sfunc, &t, p, m, np, nm, nfunc); //create a data array and fill with random values double *data = new double[Ndata]; randomParams(data, Ndata); //allocate device memory for the data and transfer to the GPU void *data_ptr = dksbase.allocateMemory(Ndata, ierr); dksbase.writeData(data_ptr, data, Ndata); //init chi^2 dksbase.initChiSquare(Ndata, np, nf, nm); dksbase.callSetConsts(N0, TAU, BKG); //write params to the devic dksbase.writeParams(p, np); dksbase.writeFunctions(f, nf); dksbase.writeMaps(m, nm); //compile the kernel with the new function dksbase.callCompileProgram(sfunc); //run the kernel on the GPU and evaluate the function on the host double result_dks, result_cpu, tmp_result; ierr = dksbase.callLaunchChiSquare(1, data_ptr, data_ptr, Ndata, np, nf, nm, t, dt, result_dks); if (ierr == DKS_SUCCESS) { result_cpu = cpuChiSq(data, func, Ndata, &t, dt); std::vector config; dksbase.callAutoTuningChiSquare(1, data_ptr, data_ptr, Ndata, np, nf, nm, t, dt, tmp_result, config); cout << "DKS: " << result_dks << endl; cout << "CPU: " << result_cpu << endl; cout << "Launch parameters: " << config[0] << ", " << config[1] << endl; cout << sfunc << endl; cout << "Kernel parameters: " << np << ", " << nm << ", " << nf << ", " << nfunc << endl; xdata(l,0) = np; xdata(l,1) = nm; xdata(l,2) = nf; xdata(l,3) = nfunc; xdata(l,4) = Ndata; ydata(l,0) = config[0]; ydata(l,1) = config[1]; std::cout << std::endl << "Loop " << l + 1 << " finished" << std::endl << std::endl; } else { cout << "Created kernel failed! " << np << ", " << nm << ", " << nf << ", " << nfunc << endl; cout << sfunc << endl; } //free temporary resources delete[] m; delete[] p; delete[] f; delete[] data; dksbase.freeChiSquare(); dksbase.freeMemory(data_ptr, Ndata); } } else { //read_datafileVS(xdata, "xdata.dat"); //read_datafileVS(ydata, "ydata.dat"); xtmp.SetValue(0.0); ytmp.SetValue(0.0); read_datafileVS(xtmp, "xdata_pce.dat"); read_datafileVS(ytmp, "ydata_pce.dat"); for (int i = 0; i < loop; i++) { for (int j = 0; j < nxdim; j++) xdata(i,j) = xtmp(i,j); for (int j = 0; j < nydim; j++) ydata(i,j) = ytmp(i,j); } } if (eval) { for (int i = 0; i < nxdim; i++) { for (int j = 0; j < loop; j++) { xdata_pce(j,i) = xdata(j,i); ydata_pce(j,i) = ydata(j,i); } } for (int i = 0; i < nydim; i++) { for (int j = 0; j < loop; j++) { xdata_pce(j,i) = xdata(j,i); ydata_pce(j,i) = ydata(j,i); } } } else { //read_datafileVS(xdata_pce, "xdata_pce.dat"); //read_datafileVS(ydata_pce, "ydata_pce.dat"); xtmp.SetValue(0.0); ytmp.SetValue(0.0); read_datafileVS(xtmp, "xdata_pce.dat"); read_datafileVS(ytmp, "ydata_pce.dat"); for (int i = 0; i < loop; i++) { for (int j = 0; j < nxdim; j++) xdata_pce(i,j) = xtmp(i,j); for (int j = 0; j < nydim; j++) ydata_pce(i,j) = ytmp(i,j); } std::cout << "Built pce with " << xdata_pce.XSize() << " datapoints" << std::endl; } //default input settings string which_chaos="LU"; //PC type string msc="m"; Lreg* reg; reg = new PCreg(which_chaos,nord,nxdim); int nbas = reg->GetNbas(); Array2D ypc_data(xdata.XSize(), nydim, 0.0); for (int i = 0; i < nydim; i++) { std::cout << "start dim " << i+1 << std::endl; Array1D ydata_1d(xdata_pce.XSize(), 0.0); for (unsigned int j = 0; j < xdata_pce.XSize(); j++) ydata_1d(j) = ydata_pce(j,i); std::cout << "setup data" << std::endl; reg->SetupData(xdata_pce,ydata_1d); std::cout << "Comput best lambda" << std::endl; double lambda=reg->LSQ_computeBestLambda(); Array1D lam(nbas,lambda); reg->SetWeights(lam); std::cout << "LSQ build regr" << std::endl; reg->LSQ_BuildRegr(); std::cout << std::endl << "Lambda : " << lambda << std::endl; Array1D ypc; Array1D ycheck; Array2D ycheck_cov; reg->EvalRegr(xdata,msc,ypc,ycheck,ycheck_cov); std::cout << std::endl << "Eval" << std::endl; for (unsigned int j = 0; j < xdata.XSize(); j++) ypc_data(j,i) = ypc(j); } if (eval) { write_datafile(xdata_pce, "xdata_pce.dat"); write_datafile(ydata_pce, "ydata_pce.dat"); } write_datafile(xdata, "xdata.dat"); write_datafile(ydata, "ydata.dat"); write_datafile(ypc_data, "ypc_data.dat"); return 0; }