389 lines
10 KiB
C++
389 lines
10 KiB
C++
#include <iostream>
|
|
#include <cstdlib>
|
|
#include <string>
|
|
#include <cmath>
|
|
#include <fstream>
|
|
|
|
#include "DKSBaseMuSR.h"
|
|
#include "Utility/DKSTimer.h"
|
|
|
|
#define PI 3.14159265358979323846
|
|
#define TWO_PI 6.283185307179586231996
|
|
#define DEG_TO_RAD 1.7453292519943295474371681e-2
|
|
|
|
#define N0 0.25
|
|
#define TAU 2.197019
|
|
#define BKG 1.0
|
|
|
|
#define ALPHA 1.0
|
|
#define BETA 1.0
|
|
|
|
using namespace std;
|
|
|
|
void randData(double *data, int N, int scale = 1) {
|
|
for (int i = 0; i < N; i++)
|
|
data[i] = ((double)rand() / RAND_MAX ) * scale;
|
|
}
|
|
|
|
/** MusrFit predefined functions.
|
|
* Predefined functions from MusrFit that can be used to define the theory function.
|
|
* First parameter in all the functions is alwats time - t, rest of the parameters depend
|
|
* on the function.
|
|
*/
|
|
double se(double t, double lamda) {
|
|
return exp( -lamda*t );
|
|
}
|
|
|
|
double ge(double t, double lamda, double beta) {
|
|
return exp( -pow(lamda*t, beta) );
|
|
}
|
|
|
|
double sg(double t, double sigma) {
|
|
return exp( -0.5 * pow(sigma*t, 2) );
|
|
}
|
|
|
|
double stg(double t, double sigma) {
|
|
double sigmatsq = pow(sigma*t,2);
|
|
return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatsq) * exp(-0.5 * sigmatsq);
|
|
}
|
|
|
|
double sekt(double t, double lambda) {
|
|
double lambdat = lambda*t;
|
|
|
|
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
|
|
}
|
|
|
|
double lgkt(double t, double lambda, double sigma) {
|
|
double lambdat = lambda*t;
|
|
double sigmatsq = pow(sigma*t, 2.0);
|
|
|
|
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat - sigmatsq) * exp(-lambdat - 0.5*sigmatsq);
|
|
}
|
|
|
|
double skt(double t, double sigma, double beta) {
|
|
if (beta < 1.0e-3)
|
|
return 0.0;
|
|
double sigmatb = pow(sigma*t, beta);
|
|
|
|
return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatb) * exp(-sigmatb/beta);
|
|
}
|
|
|
|
double spg(double t, double lambda, double gamma, double q) {
|
|
double lam2 = lambda*lambda;
|
|
double lamt2q = t*t*lam2*q;
|
|
double rate2 = 4.0*lam2*(1.0-q)*t/gamma;
|
|
double rateL = sqrt(fabs(rate2));
|
|
double rateT = sqrt(fabs(rate2)+lamt2q);
|
|
|
|
return (1.0/3.0)*exp(-rateL) + (2.0/3.0)*(1.0 - lamt2q / rateT)*exp(-rateT);
|
|
}
|
|
|
|
double rahf(double t, double nu, double lambda) {
|
|
double nut = nu*t;
|
|
double nuth = nu*t/2.0;
|
|
double lamt = lambda*t;
|
|
|
|
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
|
|
}
|
|
|
|
double tf(double t, double phi, double nu) {
|
|
double tmp_nu = TWO_PI*nu*t;
|
|
double tmp_phi = DEG_TO_RAD * phi;
|
|
|
|
return cos(tmp_nu + tmp_phi);
|
|
}
|
|
|
|
double ifld(double t, double alpha, double phi, double nu, double lambdaT, double lambdaL) {
|
|
double wt = TWO_PI*nu*t;
|
|
double ph = DEG_TO_RAD*phi;
|
|
|
|
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
|
}
|
|
|
|
double b(double t, double phi, double nu) {
|
|
return j0(TWO_PI*nu*t + DEG_TO_RAD*phi);
|
|
}
|
|
|
|
double ib(double t, double alpha, double phi, double nu, double lambdaT, double lambdaL) {
|
|
double wt = TWO_PI * nu * t;
|
|
double ph = DEG_TO_RAD * phi;
|
|
|
|
return alpha*j0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
|
|
}
|
|
|
|
double ab(double t, double sigma, double gamma) {
|
|
double gt = gamma*t;
|
|
|
|
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
|
|
}
|
|
|
|
double snkzf(double t, double Delta0, double Rb) {
|
|
double D0t2 = pow(Delta0*t, 2.0);
|
|
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
|
|
|
|
return (1.0/3.0) + (2.0/3.0)*pow(aa,1.5)*(1.0-D0t2*aa)*exp(-0.5*D0t2*aa);
|
|
}
|
|
|
|
double snktf(double t, double phi, double nu, double Delta0, double Rb) {
|
|
double wt = TWO_PI*nu*t;
|
|
double ph = DEG_TO_RAD*phi;
|
|
double D0t2 = pow(Delta0*t, 2.0);
|
|
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
|
|
|
|
return sqrt(aa)*exp(-0.5*D0t2*aa)*cos(wt+ph);
|
|
}
|
|
|
|
double dnkzf(double t, double Delta0, double Rb, double nuc) {
|
|
double nuct = nuc*t;
|
|
double theta = (exp(-nuct) - 1.0 -nuct)/pow(nuc, 2.0);
|
|
double aa = 1.0/(1.0+4.0*pow(Rb*Delta0,2.0)*theta);
|
|
|
|
return sqrt(aa)*exp(-2.0*Delta0*Delta0*theta*aa);
|
|
}
|
|
|
|
double dnktf(double t, double phi, double nu, double Delta0, double Rb, double nuc) {
|
|
double wt = TWO_PI*nu*t;
|
|
double ph = DEG_TO_RAD*phi;
|
|
double nuct = nuc*t;
|
|
double theta = (exp(-nuct) - 1.0 -nuct)/pow(nuc, 2.0);
|
|
double aa = 1.0/(1.0+2.0*pow(Rb*Delta0,2.0)*theta);
|
|
|
|
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
|
|
}
|
|
|
|
|
|
double cpuChiSq(double *data, double *p, double *f, int Ndata, int Npar, int Nfnc,
|
|
double timeStart, double timeStep, bool mlh = false)
|
|
{
|
|
|
|
double result = 0.0;
|
|
for (int i = 0; i < Ndata; i++) {
|
|
|
|
double t = timeStart + i*timeStep;
|
|
double d = data[i];
|
|
double e = data[i];
|
|
|
|
double fTheory = p[0] * f[0] * sg(t, p[1]) * tf(t, p[2], f[1]);
|
|
double theo = N0 * exp(-t/TAU) * (1.0 + fTheory) + BKG;
|
|
|
|
if (mlh) {
|
|
if ((d > 1.0e-9) && (fabs(theo) > 1.0e-9))
|
|
result += 2.0 * ((theo - d) + d * log(d / theo));
|
|
else
|
|
result += 2.0 * (theo - d);
|
|
} else {
|
|
if (e != 0.0)
|
|
result += ( (theo - d) * (theo - d) ) / (e * e);
|
|
else
|
|
result += theo * theo;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
double cpuChiSqAsym(double *data, double *p, double *f, int Ndata, int Npar, int Nfnc,
|
|
double timeStart, double timeStep, bool mlh = false)
|
|
{
|
|
|
|
double result = 0.0;
|
|
for (int i = 0; i < Ndata; i++) {
|
|
|
|
double t = timeStart + i*timeStep;
|
|
double d = data[i];
|
|
double e = data[i];
|
|
|
|
double theoVal = p[0] * f[0] * sg(t, p[1]) * tf(t, p[2], f[1]);
|
|
double ab = ALPHA * BETA;
|
|
|
|
|
|
double theo = ((ab+1.0)*theoVal - (ALPHA-1.0))/((ALPHA+1.0) - (ab-1.0)*theoVal);
|
|
|
|
if (mlh) {
|
|
result += 0.0; //log max likelihood not defined here
|
|
} else {
|
|
if (e != 0.0)
|
|
result += ( (theo - d) * (theo - d) ) / (e * e);
|
|
else
|
|
result += theo * theo;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
int runTest(const char *api_name, const char *device_name, bool autotune, bool mlh, bool asym) {
|
|
|
|
int ierr;
|
|
|
|
/*
|
|
* Histogram size used in tests. If autotune run kernes with sizes from 1e5 to 1e6.
|
|
* If autotune is off just run the test once (used for debuging to test the kernel)
|
|
*/
|
|
int Nstart = 1e5;
|
|
int Nstep = 1e5;
|
|
int Nend = (autotune) ? 1e6 : 1e5;
|
|
|
|
//parameter, function and map sizes used in tests
|
|
int Npar = 66;
|
|
int Nfnc = 2;
|
|
int Nmap = 5;
|
|
|
|
//print test info
|
|
cout << "=========================BEGIN TEST=========================" << endl;
|
|
cout << "Use api: " << api_name << "\t" << device_name << endl;
|
|
cout << "Max log likelihood: " << std::boolalpha << mlh << endl;
|
|
cout << "Asymetry fit: " << std::boolalpha << asym << endl;
|
|
|
|
DKSBaseMuSR dksbase;
|
|
dksbase.setAPI(api_name);
|
|
dksbase.setDevice(device_name);
|
|
ierr = dksbase.initDevice();
|
|
if (ierr != DKS_SUCCESS) {
|
|
std::cout << "Device not supported!" << std::endl;
|
|
return DKS_ERROR;
|
|
}
|
|
|
|
//get the list of different devices
|
|
std::vector<int> devices;
|
|
dksbase.getDeviceList(devices);
|
|
std::cout << "Unique devices: " << devices.size() << std::endl;
|
|
|
|
//create the function string to use in test
|
|
string sFnc = "p[m[0]] * f[m[1]] * sg(t, p[m[2]]) * tf(t, p[m[3]], f[m[4]])";
|
|
int map[5] = {0, 0, 1, 2, 1};
|
|
|
|
//runt tests from 100k to 1mil data points
|
|
for (unsigned int device = 0; device < devices.size(); device++) {
|
|
for (int Ndata = Nstart; Ndata <= Nend; Ndata += Nstep) {
|
|
|
|
dksbase.setDefaultDevice(device);
|
|
|
|
std::cout << "Ndata: " << Ndata << std::endl;
|
|
|
|
//init the chi square calculations
|
|
dksbase.initChiSquare(Ndata, Npar, Nfnc, Nmap);
|
|
|
|
//create random arrays for data, parameter and function storage
|
|
double *data = new double[Ndata];
|
|
double *par = new double[Npar];
|
|
double *fnc = new double[Nfnc];
|
|
|
|
randData(data, Ndata);
|
|
randData(par, Npar);
|
|
randData(fnc, Nfnc, 100);
|
|
|
|
//allocate memory on device
|
|
void *data_ptr = dksbase.allocateMemory<double>(Ndata, ierr);
|
|
|
|
//write data, params, functions and maps to the device
|
|
dksbase.writeData<double>(data_ptr, data, Ndata);
|
|
dksbase.writeParams(par, Npar);
|
|
dksbase.writeFunctions(fnc, Nfnc);
|
|
dksbase.writeMaps(map, Nmap);
|
|
|
|
//set musrfit constants
|
|
dksbase.callSetConsts(N0, TAU, BKG);
|
|
dksbase.callSetConsts(ALPHA, BETA);
|
|
|
|
//compile the program created with the function string
|
|
dksbase.callCompileProgram(sFnc, mlh);
|
|
|
|
//set autotuning on/off
|
|
if (autotune)
|
|
dksbase.setAutoTuningOn();
|
|
|
|
//check kernel
|
|
dksbase.checkMuSRKernels(1);
|
|
|
|
//tmp values to store results and tmp values for time steps and start time
|
|
double result_gpu = 0.0;
|
|
double result_cpu = 0.0;
|
|
double dt = 1e-12;
|
|
double ts = 1e-7;
|
|
|
|
//execute kernel on the GPU and execute the same function on the cpu
|
|
if (!asym) {
|
|
dksbase.callLaunchChiSquare(1, data_ptr, data_ptr, Ndata, Npar, Nfnc,
|
|
Nmap, ts, dt, result_gpu);
|
|
result_cpu = cpuChiSq(data, par, fnc, Ndata, Npar, Nfnc, ts, dt, mlh);
|
|
} else {
|
|
dksbase.callLaunchChiSquare(2, data_ptr, data_ptr, Ndata, Npar, Nfnc,
|
|
Nmap, ts, dt, result_gpu);
|
|
result_cpu = cpuChiSqAsym(data, par, fnc, Ndata, Npar, Nfnc, ts, dt, mlh);
|
|
}
|
|
|
|
//check the results
|
|
cout << "DKS: " << result_gpu << endl;
|
|
cout << "CPU: " << result_cpu << endl;
|
|
|
|
//free CPU and GPU memory
|
|
dksbase.freeMemory<double>(data_ptr, Ndata);
|
|
dksbase.freeChiSquare();
|
|
|
|
delete[] data;
|
|
delete[] par;
|
|
delete[] fnc;
|
|
cout << "------------------------------------------------------------" << endl;
|
|
}
|
|
}
|
|
|
|
return DKS_SUCCESS;
|
|
}
|
|
|
|
int main(int argc, char* argv[]) {
|
|
|
|
bool asym = false;
|
|
bool mlh = false;
|
|
bool autotune = false;
|
|
|
|
char *api_name = new char[10];
|
|
char *device_name = new char[10];
|
|
|
|
strcpy(api_name, "Cuda");
|
|
strcpy(device_name, "-gpu");
|
|
|
|
for (int i = 1; i < argc; ++i) {
|
|
|
|
if (argv[i] == string("-cuda")) {
|
|
strcpy(api_name, "Cuda");
|
|
strcpy(device_name, "-gpu");
|
|
}
|
|
|
|
if (argv[i] == string("-opencl")) {
|
|
strcpy(api_name, "OpenCL");
|
|
strcpy(device_name, "-gpu");
|
|
}
|
|
|
|
if (argv[i] == string("-mic")) {
|
|
strcpy(api_name, "OpenCL");
|
|
strcpy(device_name, "-mic");
|
|
}
|
|
|
|
if (argv[i] == string("-cpu")) {
|
|
strcpy(api_name, "OpenCL");
|
|
strcpy(device_name, "-cpu");
|
|
}
|
|
|
|
if (argv[i] == string("-mlh"))
|
|
mlh = true;
|
|
|
|
if (argv[i] == string("-asym"))
|
|
asym = true;
|
|
|
|
if (argv[i] == string("-autotune"))
|
|
autotune = true;
|
|
|
|
}
|
|
|
|
int numPlatforms = 3;
|
|
const char *api[] = {"Cuda","OpenCL","OpenCL","OpenCL","OpenMP"};
|
|
const char *device[] = {"-gpu","-gpu","-cpu","-mic","-mic"};
|
|
|
|
for (int i = 2; i < numPlatforms; i++) {
|
|
runTest(api[i], device[i], autotune, mlh, asym);
|
|
}
|
|
|
|
return 0;
|
|
}
|