Files
DKS/auto-tuning/testChiSquareRT.cpp

389 lines
10 KiB
C++

#include <iostream>
#include <cstdlib>
#include <string>
#include <cmath>
#include <fstream>
#include "DKSBaseMuSR.h"
#include "Utility/DKSTimer.h"
#define PI 3.14159265358979323846
#define TWO_PI 6.283185307179586231996
#define DEG_TO_RAD 1.7453292519943295474371681e-2
#define N0 0.25
#define TAU 2.197019
#define BKG 1.0
#define ALPHA 1.0
#define BETA 1.0
using namespace std;
void randData(double *data, int N, int scale = 1) {
for (int i = 0; i < N; i++)
data[i] = ((double)rand() / RAND_MAX ) * scale;
}
/** MusrFit predefined functions.
* Predefined functions from MusrFit that can be used to define the theory function.
* First parameter in all the functions is alwats time - t, rest of the parameters depend
* on the function.
*/
double se(double t, double lamda) {
return exp( -lamda*t );
}
double ge(double t, double lamda, double beta) {
return exp( -pow(lamda*t, beta) );
}
double sg(double t, double sigma) {
return exp( -0.5 * pow(sigma*t, 2) );
}
double stg(double t, double sigma) {
double sigmatsq = pow(sigma*t,2);
return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatsq) * exp(-0.5 * sigmatsq);
}
double sekt(double t, double lambda) {
double lambdat = lambda*t;
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
}
double lgkt(double t, double lambda, double sigma) {
double lambdat = lambda*t;
double sigmatsq = pow(sigma*t, 2.0);
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat - sigmatsq) * exp(-lambdat - 0.5*sigmatsq);
}
double skt(double t, double sigma, double beta) {
if (beta < 1.0e-3)
return 0.0;
double sigmatb = pow(sigma*t, beta);
return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatb) * exp(-sigmatb/beta);
}
double spg(double t, double lambda, double gamma, double q) {
double lam2 = lambda*lambda;
double lamt2q = t*t*lam2*q;
double rate2 = 4.0*lam2*(1.0-q)*t/gamma;
double rateL = sqrt(fabs(rate2));
double rateT = sqrt(fabs(rate2)+lamt2q);
return (1.0/3.0)*exp(-rateL) + (2.0/3.0)*(1.0 - lamt2q / rateT)*exp(-rateT);
}
double rahf(double t, double nu, double lambda) {
double nut = nu*t;
double nuth = nu*t/2.0;
double lamt = lambda*t;
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
}
double tf(double t, double phi, double nu) {
double tmp_nu = TWO_PI*nu*t;
double tmp_phi = DEG_TO_RAD * phi;
return cos(tmp_nu + tmp_phi);
}
double ifld(double t, double alpha, double phi, double nu, double lambdaT, double lambdaL) {
double wt = TWO_PI*nu*t;
double ph = DEG_TO_RAD*phi;
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
}
double b(double t, double phi, double nu) {
return j0(TWO_PI*nu*t + DEG_TO_RAD*phi);
}
double ib(double t, double alpha, double phi, double nu, double lambdaT, double lambdaL) {
double wt = TWO_PI * nu * t;
double ph = DEG_TO_RAD * phi;
return alpha*j0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
}
double ab(double t, double sigma, double gamma) {
double gt = gamma*t;
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
}
double snkzf(double t, double Delta0, double Rb) {
double D0t2 = pow(Delta0*t, 2.0);
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
return (1.0/3.0) + (2.0/3.0)*pow(aa,1.5)*(1.0-D0t2*aa)*exp(-0.5*D0t2*aa);
}
double snktf(double t, double phi, double nu, double Delta0, double Rb) {
double wt = TWO_PI*nu*t;
double ph = DEG_TO_RAD*phi;
double D0t2 = pow(Delta0*t, 2.0);
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
return sqrt(aa)*exp(-0.5*D0t2*aa)*cos(wt+ph);
}
double dnkzf(double t, double Delta0, double Rb, double nuc) {
double nuct = nuc*t;
double theta = (exp(-nuct) - 1.0 -nuct)/pow(nuc, 2.0);
double aa = 1.0/(1.0+4.0*pow(Rb*Delta0,2.0)*theta);
return sqrt(aa)*exp(-2.0*Delta0*Delta0*theta*aa);
}
double dnktf(double t, double phi, double nu, double Delta0, double Rb, double nuc) {
double wt = TWO_PI*nu*t;
double ph = DEG_TO_RAD*phi;
double nuct = nuc*t;
double theta = (exp(-nuct) - 1.0 -nuct)/pow(nuc, 2.0);
double aa = 1.0/(1.0+2.0*pow(Rb*Delta0,2.0)*theta);
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
}
double cpuChiSq(double *data, double *p, double *f, int Ndata, int Npar, int Nfnc,
double timeStart, double timeStep, bool mlh = false)
{
double result = 0.0;
for (int i = 0; i < Ndata; i++) {
double t = timeStart + i*timeStep;
double d = data[i];
double e = data[i];
double fTheory = p[0] * f[0] * sg(t, p[1]) * tf(t, p[2], f[1]);
double theo = N0 * exp(-t/TAU) * (1.0 + fTheory) + BKG;
if (mlh) {
if ((d > 1.0e-9) && (fabs(theo) > 1.0e-9))
result += 2.0 * ((theo - d) + d * log(d / theo));
else
result += 2.0 * (theo - d);
} else {
if (e != 0.0)
result += ( (theo - d) * (theo - d) ) / (e * e);
else
result += theo * theo;
}
}
return result;
}
double cpuChiSqAsym(double *data, double *p, double *f, int Ndata, int Npar, int Nfnc,
double timeStart, double timeStep, bool mlh = false)
{
double result = 0.0;
for (int i = 0; i < Ndata; i++) {
double t = timeStart + i*timeStep;
double d = data[i];
double e = data[i];
double theoVal = p[0] * f[0] * sg(t, p[1]) * tf(t, p[2], f[1]);
double ab = ALPHA * BETA;
double theo = ((ab+1.0)*theoVal - (ALPHA-1.0))/((ALPHA+1.0) - (ab-1.0)*theoVal);
if (mlh) {
result += 0.0; //log max likelihood not defined here
} else {
if (e != 0.0)
result += ( (theo - d) * (theo - d) ) / (e * e);
else
result += theo * theo;
}
}
return result;
}
int runTest(const char *api_name, const char *device_name, bool autotune, bool mlh, bool asym) {
int ierr;
/*
* Histogram size used in tests. If autotune run kernes with sizes from 1e5 to 1e6.
* If autotune is off just run the test once (used for debuging to test the kernel)
*/
int Nstart = 1e5;
int Nstep = 1e5;
int Nend = (autotune) ? 1e6 : 1e5;
//parameter, function and map sizes used in tests
int Npar = 66;
int Nfnc = 2;
int Nmap = 5;
//print test info
cout << "=========================BEGIN TEST=========================" << endl;
cout << "Use api: " << api_name << "\t" << device_name << endl;
cout << "Max log likelihood: " << std::boolalpha << mlh << endl;
cout << "Asymetry fit: " << std::boolalpha << asym << endl;
DKSBaseMuSR dksbase;
dksbase.setAPI(api_name);
dksbase.setDevice(device_name);
ierr = dksbase.initDevice();
if (ierr != DKS_SUCCESS) {
std::cout << "Device not supported!" << std::endl;
return DKS_ERROR;
}
//get the list of different devices
std::vector<int> devices;
dksbase.getDeviceList(devices);
std::cout << "Unique devices: " << devices.size() << std::endl;
//create the function string to use in test
string sFnc = "p[m[0]] * f[m[1]] * sg(t, p[m[2]]) * tf(t, p[m[3]], f[m[4]])";
int map[5] = {0, 0, 1, 2, 1};
//runt tests from 100k to 1mil data points
for (unsigned int device = 0; device < devices.size(); device++) {
for (int Ndata = Nstart; Ndata <= Nend; Ndata += Nstep) {
dksbase.setDefaultDevice(device);
std::cout << "Ndata: " << Ndata << std::endl;
//init the chi square calculations
dksbase.initChiSquare(Ndata, Npar, Nfnc, Nmap);
//create random arrays for data, parameter and function storage
double *data = new double[Ndata];
double *par = new double[Npar];
double *fnc = new double[Nfnc];
randData(data, Ndata);
randData(par, Npar);
randData(fnc, Nfnc, 100);
//allocate memory on device
void *data_ptr = dksbase.allocateMemory<double>(Ndata, ierr);
//write data, params, functions and maps to the device
dksbase.writeData<double>(data_ptr, data, Ndata);
dksbase.writeParams(par, Npar);
dksbase.writeFunctions(fnc, Nfnc);
dksbase.writeMaps(map, Nmap);
//set musrfit constants
dksbase.callSetConsts(N0, TAU, BKG);
dksbase.callSetConsts(ALPHA, BETA);
//compile the program created with the function string
dksbase.callCompileProgram(sFnc, mlh);
//set autotuning on/off
if (autotune)
dksbase.setAutoTuningOn();
//check kernel
dksbase.checkMuSRKernels(1);
//tmp values to store results and tmp values for time steps and start time
double result_gpu = 0.0;
double result_cpu = 0.0;
double dt = 1e-12;
double ts = 1e-7;
//execute kernel on the GPU and execute the same function on the cpu
if (!asym) {
dksbase.callLaunchChiSquare(1, data_ptr, data_ptr, Ndata, Npar, Nfnc,
Nmap, ts, dt, result_gpu);
result_cpu = cpuChiSq(data, par, fnc, Ndata, Npar, Nfnc, ts, dt, mlh);
} else {
dksbase.callLaunchChiSquare(2, data_ptr, data_ptr, Ndata, Npar, Nfnc,
Nmap, ts, dt, result_gpu);
result_cpu = cpuChiSqAsym(data, par, fnc, Ndata, Npar, Nfnc, ts, dt, mlh);
}
//check the results
cout << "DKS: " << result_gpu << endl;
cout << "CPU: " << result_cpu << endl;
//free CPU and GPU memory
dksbase.freeMemory<double>(data_ptr, Ndata);
dksbase.freeChiSquare();
delete[] data;
delete[] par;
delete[] fnc;
cout << "------------------------------------------------------------" << endl;
}
}
return DKS_SUCCESS;
}
int main(int argc, char* argv[]) {
bool asym = false;
bool mlh = false;
bool autotune = false;
char *api_name = new char[10];
char *device_name = new char[10];
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
for (int i = 1; i < argc; ++i) {
if (argv[i] == string("-cuda")) {
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-opencl")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-mic")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-mic");
}
if (argv[i] == string("-cpu")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-cpu");
}
if (argv[i] == string("-mlh"))
mlh = true;
if (argv[i] == string("-asym"))
asym = true;
if (argv[i] == string("-autotune"))
autotune = true;
}
int numPlatforms = 3;
const char *api[] = {"Cuda","OpenCL","OpenCL","OpenCL","OpenMP"};
const char *device[] = {"-gpu","-gpu","-cpu","-mic","-mic"};
for (int i = 2; i < numPlatforms; i++) {
runTest(api[i], device[i], autotune, mlh, asym);
}
return 0;
}