DKS/auto-tuning/testChiSquareRTRandom.cpp

#include <iostream>
#include <cstdlib>
#include <string>
#include <cmath>
#include <fstream>

#include "DKSBaseMuSR.h"
#include "Utility/DKSTimer.h"

#define PI 3.14159265358979323846
#define TWO_PI 6.283185307179586231996
#define DEG_TO_RAD 1.7453292519943295474371681e-2

//#define N0 0.25
#define N0 1e-10
#define TAU 2.197019
#define BKG 0.05

using namespace std;

typedef std::function<double()> doubleF;

void randData(double *data, int N, int scale = 1) {
  for (int i = 0; i < N; i++)
    data[i] = ((double)rand() / RAND_MAX ) * scale;
}

/** MusrFit predefined functions.
 * Predefined functions from MusrFit that can be used to define the theory function.
 * First parameter in all the functions is alwats time - t, rest of the parameters depend
 * on the function.
 */
double se(double *t, double *lamda) {
  return exp( -*lamda**t );
}

double ge(double *t, double *lamda, double *beta) {
  return exp( -pow( (*lamda)*(*t), *beta) );
}

double sg(double *t, double *sigma) {
  return exp( -0.5 * pow((*sigma)*(*t), 2) );
}

double stg(double *t, double *sigma) {
  double sigmatsq = pow((*sigma)*(*t),2);
  return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatsq) * exp(-0.5 * sigmatsq);
}

double sekt(double *t, double *lambda) {
  double lambdat = *lambda*(*t);

  return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
}

double lgkt(double *t, double *lambda, double *sigma) {
  double lambdat = *lambda*(*t);
  double sigmatsq = pow(*sigma*(*t), 2.0);

  return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat - sigmatsq) * exp(-lambdat - 0.5*sigmatsq);
}

double skt(double *t, double *sigma, double *beta) {
  if (*beta < 1.0e-3)
    return 0.0;
  double sigmatb = pow(*sigma*(*t), (*beta));

  return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatb) * exp(-sigmatb/(*beta));
}

double spg(double *t, double *lambda, double *gamma, double *q) {
  double lam2 = (*lambda)*(*lambda);
  double lamt2q = (*t)*(*t)*lam2*(*q);
  double rate2 = 4.0*lam2*(1.0-*q)*(*t)/(*gamma);
  double rateL = sqrt(fabs(rate2));
  double rateT = sqrt(fabs(rate2)+lamt2q);

  return (1.0/3.0)*exp(-rateL) + (2.0/3.0)*(1.0 - lamt2q / rateT)*exp(-rateT);
}

double rahf(double *t, double *nu, double *lambda) {
  double nut  = *nu*(*t);
  double nuth = *nu*(*t)/2.0;
  double lamt = *lambda*(*t);

  return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
}

double tf(double *t, double *phi, double *nu) {
  double tmp_nu = TWO_PI**nu**t;
  double tmp_phi = DEG_TO_RAD * *phi;

  return cos(tmp_nu + tmp_phi);
}

double ifld(double *t, double *alpha, double *phi, double *nu, double *lambdaT, double *lambdaL) {
  double wt = TWO_PI**nu**t;
  double ph = DEG_TO_RAD**phi;

  return *alpha*cos(wt+ph)*exp(-*lambdaT**t) + (1.0-*alpha)*exp(-*lambdaL**t);
}

double b(double *t, double *phi, double *nu) {
  return j0(TWO_PI**nu**t + DEG_TO_RAD**phi);
}

double ib(double *t, double *alpha, double *phi, double *nu, double *lambdaT, double *lambdaL) {
  double wt = TWO_PI * *nu * *t;
  double ph = DEG_TO_RAD * *phi;

  return *alpha*j0(wt+ph)*exp(-*lambdaT**t) + (1.0-*alpha)*exp(-*lambdaL**t);
}

double ab(double *t, double *sigma, double *gamma) {
  double gt = *gamma**t;

  return exp(-pow(*sigma/(*gamma),2.0)*(exp(-gt) - 1.0 + gt));
}

double snkzf(double *t, double *Delta0, double *Rb) {
  double D0t2 = pow(*Delta0**t, 2.0);
  double aa = 1.0/(1.0+pow(*Rb,2.0)*D0t2);

  return (1.0/3.0) + (2.0/3.0)*pow(aa,1.5)*(1.0-D0t2*aa)*exp(-0.5*D0t2*aa);
}

double snktf(double *t, double *phi, double *nu, double *Delta0, double *Rb) {
  double wt = TWO_PI**nu**t;
  double ph = DEG_TO_RAD**phi;
  double D0t2 = pow(*Delta0**t, 2.0);
  double aa = 1.0/(1.0+pow(*Rb,2.0)*D0t2);

  return sqrt(aa)*exp(-0.5*D0t2*aa)*cos(wt+ph);
}

double dnkzf(double *t, double *Delta0, double *Rb, double *nuc) {
  double nuct = *nuc**t;
  double theta = (exp(-nuct) - 1.0 -nuct)/pow(*nuc, 2.0);
  double aa = 1.0/(1.0+4.0*pow(*Rb**Delta0,2.0)*theta);

  return sqrt(aa)*exp(-2.0**Delta0**Delta0*theta*aa);
}

double dnktf(double *t, double *phi, double *nu, double *Delta0, double *Rb, double *nuc) {
  double wt = TWO_PI**nu**t;
  double ph = DEG_TO_RAD**phi;
  double nuct = *nuc**t;
  double theta = (exp(-nuct) - 1.0 -nuct)/pow(*nuc, 2.0);
  double aa = 1.0/(1.0+2.0*pow(*Rb**Delta0,2.0)*theta);

  return sqrt(aa)*exp(-*Delta0**Delta0*theta*aa)*cos(wt+ph);
}

double evalf(std::vector< std::pair<int, doubleF> > func) {

  double result = 0.0;
  for (auto f : func) {
    switch (f.first) {
    case 0: result += f.second(); break;
    case 1: result -= f.second(); break;
    default: result += f.second(); break;
    }
  }

  return result;
}

double cpuChiSq(double *data, std::vector< std::pair<int, doubleF> > &func, int ndata, double *t, double dt) {

  double result = 0.0;
  double ts = *t;

  for (int i = 0; i < ndata; i++) {

    *t = ts + i*dt;
    double d = data[i];
    double e = data[i];

    double vf = evalf(func);
    double theo = N0 * exp(-(*t)/TAU) * (1.0 + vf) + BKG;

    if (e != 0.0)
      result += ( (theo - d) * (theo - d) ) / (e*e);
    else
      result += theo * theo;

  }
  return result;
}

//create a random length from 50 - 1000 array and fill with random values from 0 to 1
void randomParams(double *p, int np) {
  for (int i = 0; i < np; i++)
    p[i] = (double)rand() / RAND_MAX;
}

//create map array of random size and fill with indexes from 0 to max, max < size of param array
void randomMaps(int *m, int nm, int max) {
  for (int i = 0; i < nm; i++)
    m[i] = rand() % max;
}

int generateRandomFunction(std::vector< std::pair<int, doubleF> > &func, std::string &sfunc,
			   double *t, double *p, int *m, int np, int nm)
{

  //nf defines the number of functions to generate (from 1 to 25)
  int nf = rand() % 25 + 1;

  for (int n = 0; n < nf; n++) {
    std::string sf = "";
    doubleF f;

    int r = rand() % 18; //choose random function to use

    int id1 = rand() % nm;
    int id2 = rand() % nm;
    int id3 = rand() % nm;
    int id4 = rand() % nm;
    int id5 = rand() % nm;

    std::string p1 = "p[m[" + to_string(id1) + "]])";
    std::string p2 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]])";
    std::string p3 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" +
      to_string(id3) + "]])";
    std::string p4 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" +
      to_string(id3) + "]], p[m[" + to_string(id4) + "]])";
    std::string p5 = "p[m[" + to_string(id1) + "]], p[m[" + to_string(id2) + "]], p[m[" +
      to_string(id3) + "]], p[m[" + to_string(id4) + "]], p[m[" + to_string(id5) + "]])";

    //get a random index from maps and use it to get the parameter value, bind function and parameter
    //values to f, and create string for gpu in sfunc
    switch (r) {
    case 0:
      f = std::bind(se, t, &p[m[id1]]);
      sf = "se(t," + p1;
      break;
    case 1:
      f = std::bind(ge, t, &p[m[id1]], &p[m[id2]]);
      sf = "ge(t," + p2;
      break;
    case 2:
      f = std::bind(sg, t, &p[m[id1]]);
      sf = "sg(t, " + p1;
      break;
    case 3:
      f = std::bind(stg, t, &p[m[id1]]);
      sf = "stg(t, " + p1;
      break;
    case 4:
      f = std::bind(sekt, t, &p[m[id1]]);
      sf = "sekt(t, " + p1;
      break;
    case 5:
      f = std::bind(lgkt, t, &p[m[id1]], &p[m[id2]]);
      sf = "lgkt(t, " + p2;
      break;
    case 6:
      f = std::bind(skt, t, &p[m[id1]], &p[m[id2]]);
      sf = "skt(t, " + p2;
      break;
    case 7:
      f = std::bind(spg, t, &p[m[id1]], &p[m[id2]], &p[m[id3]]);
      sf = "spg(t, " + p3;
      break;
    case 8:
      f = std::bind(rahf, t, &p[m[id1]], &p[m[id2]]);
      sf = "rahf(t, " + p2;
      break;
    case 9:
      f = std::bind(tf, t, &p[m[id1]], &p[m[id2]]);
      sf = "tf(t, " + p2;
      break;
    case 10:
      f = std::bind(ifld, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]);
      sf = "ifld(t, " + p5;
      break;
    case 11:
      f = std::bind(b, t, &p[m[id1]], &p[m[id2]]);
      sf = "b(t, " + p2;
      break;
    case 12:
      f = std::bind(ib, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]);
      sf = "ib(t, " + p5;
      break;
    case 13:
      f = std::bind(ab, t, &p[m[id1]], &p[m[id2]]);
      sf = "ab(t, " + p2;
      break;
    case 14:
      f = std::bind(snkzf, t, &p[m[id1]], &p[m[id2]]);
      sf = "snkzf(t, " + p2;
      break;
    case 15:
      f = std::bind(snktf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]]);
      sf = "snktf(t, " + p4;
      break;
    case 16:
      f = std::bind(dnkzf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]]);
      sf = "dnkzf(t, " + p3;
      break;
    case 17:
      f = std::bind(dnktf, t, &p[m[id1]], &p[m[id2]], &p[m[id3]], &p[m[id4]], &p[m[id5]]);
      sf = "dnktf(t, " + p5;
      break;
    }


    int sign = rand() % 2;
    if (n == 0) sign = 0;
    func.push_back( std::make_pair(sign, f) );
    if (n == 0)
      sfunc = sf;
    else {
      switch(sign) {
      case 0: sfunc += " + " + sf; break;
      case 1: sfunc += " - " + sf; break;
      default: sfunc += " + " + sf; break;
      }

    }
  }

  return nf;
}

int main(int argc, char *argv[]) {


  srand(time(NULL));

  int ierr;
  int Ndata = 1e6;

  bool autotune = false;

  char *api_name = new char[10];
  char *device_name = new char[10];

  strcpy(api_name, "Cuda");
  strcpy(device_name, "-gpu");

  for (int i = 1; i < argc; ++i) {

    if (argv[i] == string("-cuda")) {
      strcpy(api_name, "Cuda");
      strcpy(device_name, "-gpu");
    }

    if (argv[i] == string("-opencl")) {
      strcpy(api_name, "OpenCL");
      strcpy(device_name, "-gpu");
    }

    if (argv[i] == string("-mic")) {
      strcpy(api_name, "OpenCL");
      strcpy(device_name, "-mic");
    }

    if (argv[i] == string("-cpu")) {
      strcpy(api_name, "OpenCL");
      strcpy(device_name, "-cpu");
    }

    if (argv[i] == string("-autotune")) {
      autotune = true;
    }

  }

  //create a random number of parameters
  int np = ( rand() % (1000 - 50) ) + 50;
  int nm = ( rand() % (50 - 5) ) + 5;
  int nf = ( rand() % (50 - 5) ) + 5;

  int *m = new int[nm];
  double *p = new double[np];
  double *f = new double[nf];

  randomParams(p, np);
  randomMaps(m, nm, np);
  randomParams(f, nf);

  double dt = 1e-10;
  double t = 1e-10;
  std::vector< std::pair<int, doubleF> > func;
  std::string sfunc;
  int nfunc = generateRandomFunction(func, sfunc, &t, p, m, np, nm);

  //create DKS base object, set and init device / framework
  DKSBaseMuSR dksbase;
  dksbase.setAPI(api_name);
  dksbase.setDevice(device_name);

  dksbase.initDevice();
  dksbase.initChiSquare(Ndata, np, nf, nm);

  dksbase.writeParams(p, np);
  dksbase.writeFunctions(f, nf);
  dksbase.writeMaps(m, nm);

  dksbase.callSetConsts(N0, TAU, BKG);

  dksbase.callCompileProgram(sfunc);

  if (autotune)
    dksbase.setAutoTuningOn();

  int oper = 0;
  dksbase.getOperations(oper);

  cout << "=========================BEGIN TEST=========================" << endl;
  cout << "Use api: " << api_name << "\t" << device_name << endl;
  cout << "Number of params: " << np << endl;
  cout << "Number of maps: " << nm << endl;
  cout << "Number of predefined functions: " << nfunc << endl;
  cout << "Number of ptx instructions: " << oper << endl;
  cout << "------------------------------------------------------------" << endl;
  cout << sfunc << endl;
  cout << "------------------------------------------------------------" << endl;

  //allocate memory on host and device device
  double *data = new double[Ndata];
  randomParams(data, Ndata);
  void *data_ptr = dksbase.allocateMemory<double>(Ndata, ierr);
  dksbase.writeData<double>(data_ptr, data, Ndata);

  for (int N = 1e5; N < Ndata + 1; N += 1e5) {
    double result_dks, result_cpu;

    t = 1e-10;

    dksbase.callLaunchChiSquare(1, data_ptr, data_ptr, N, np, nf, nm, t, dt, result_dks);
    result_cpu = cpuChiSq(data, func, N, &t, dt);

    cout << "Npart: " << N << endl;
    cout << "DKS: " << result_dks << endl;
    cout << "CPU: " << result_cpu << endl;

  }

  dksbase.freeMemory<double>(data_ptr, Ndata);
  dksbase.freeChiSquare();
  delete[] data;
  delete[] p;
  delete[] f;
  delete[] m;

  return 0;
}