#include #include #include #include #include #include "DKSBaseMuSR.h" #include "Utility/DKSTimer.h" #define PI 3.14159265358979323846 #define TWO_PI 6.283185307179586231996 #define DEG_TO_RAD 1.7453292519943295474371681e-2 #define N0 0.25 #define TAU 2.197019 #define BKG 1.0 #define ALPHA 1.0 #define BETA 1.0 using namespace std; void randData(double *data, int N, int scale = 1) { for (int i = 0; i < N; i++) data[i] = ((double)rand() / RAND_MAX ) * scale; } /** MusrFit predefined functions. * Predefined functions from MusrFit that can be used to define the theory function. * First parameter in all the functions is alwats time - t, rest of the parameters depend * on the function. */ double se(double t, double lamda) { return exp( -lamda*t ); } double ge(double t, double lamda, double beta) { return exp( -pow(lamda*t, beta) ); } double sg(double t, double sigma) { return exp( -0.5 * pow(sigma*t, 2) ); } double stg(double t, double sigma) { double sigmatsq = pow(sigma*t,2); return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatsq) * exp(-0.5 * sigmatsq); } double sekt(double t, double lambda) { double lambdat = lambda*t; return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat); } double lgkt(double t, double lambda, double sigma) { double lambdat = lambda*t; double sigmatsq = pow(sigma*t, 2.0); return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat - sigmatsq) * exp(-lambdat - 0.5*sigmatsq); } double skt(double t, double sigma, double beta) { if (beta < 1.0e-3) return 0.0; double sigmatb = pow(sigma*t, beta); return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatb) * exp(-sigmatb/beta); } double spg(double t, double lambda, double gamma, double q) { double lam2 = lambda*lambda; double lamt2q = t*t*lam2*q; double rate2 = 4.0*lam2*(1.0-q)*t/gamma; double rateL = sqrt(fabs(rate2)); double rateT = sqrt(fabs(rate2)+lamt2q); return (1.0/3.0)*exp(-rateL) + (2.0/3.0)*(1.0 - lamt2q / rateT)*exp(-rateT); } double rahf(double t, double nu, double lambda) { double nut = nu*t; double nuth = nu*t/2.0; double lamt = lambda*t; return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt)); } double tf(double t, double phi, double nu) { double tmp_nu = TWO_PI*nu*t; double tmp_phi = DEG_TO_RAD * phi; return cos(tmp_nu + tmp_phi); } double ifld(double t, double alpha, double phi, double nu, double lambdaT, double lambdaL) { double wt = TWO_PI*nu*t; double ph = DEG_TO_RAD*phi; return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t); } double b(double t, double phi, double nu) { return j0(TWO_PI*nu*t + DEG_TO_RAD*phi); } double ib(double t, double alpha, double phi, double nu, double lambdaT, double lambdaL) { double wt = TWO_PI * nu * t; double ph = DEG_TO_RAD * phi; return alpha*j0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t); } double ab(double t, double sigma, double gamma) { double gt = gamma*t; return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt)); } double snkzf(double t, double Delta0, double Rb) { double D0t2 = pow(Delta0*t, 2.0); double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2); return (1.0/3.0) + (2.0/3.0)*pow(aa,1.5)*(1.0-D0t2*aa)*exp(-0.5*D0t2*aa); } double snktf(double t, double phi, double nu, double Delta0, double Rb) { double wt = TWO_PI*nu*t; double ph = DEG_TO_RAD*phi; double D0t2 = pow(Delta0*t, 2.0); double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2); return sqrt(aa)*exp(-0.5*D0t2*aa)*cos(wt+ph); } double dnkzf(double t, double Delta0, double Rb, double nuc) { double nuct = nuc*t; double theta = (exp(-nuct) - 1.0 -nuct)/pow(nuc, 2.0); double aa = 1.0/(1.0+4.0*pow(Rb*Delta0,2.0)*theta); return sqrt(aa)*exp(-2.0*Delta0*Delta0*theta*aa); } double dnktf(double t, double phi, double nu, double Delta0, double Rb, double nuc) { double wt = TWO_PI*nu*t; double ph = DEG_TO_RAD*phi; double nuct = nuc*t; double theta = (exp(-nuct) - 1.0 -nuct)/pow(nuc, 2.0); double aa = 1.0/(1.0+2.0*pow(Rb*Delta0,2.0)*theta); return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph); } double cpuChiSq(double *data, double *p, double *f, int Ndata, int Npar, int Nfnc, double timeStart, double timeStep, bool mlh = false) { double result = 0.0; for (int i = 0; i < Ndata; i++) { double t = timeStart + i*timeStep; double d = data[i]; double e = data[i]; double fTheory = p[0] * f[0] * sg(t, p[1]) * tf(t, p[2], f[1]); double theo = N0 * exp(-t/TAU) * (1.0 + fTheory) + BKG; if (mlh) { if ((d > 1.0e-9) && (fabs(theo) > 1.0e-9)) result += 2.0 * ((theo - d) + d * log(d / theo)); else result += 2.0 * (theo - d); } else { if (e != 0.0) result += ( (theo - d) * (theo - d) ) / (e * e); else result += theo * theo; } } return result; } double cpuChiSqAsym(double *data, double *p, double *f, int Ndata, int Npar, int Nfnc, double timeStart, double timeStep, bool mlh = false) { double result = 0.0; for (int i = 0; i < Ndata; i++) { double t = timeStart + i*timeStep; double d = data[i]; double e = data[i]; double theoVal = p[0] * f[0] * sg(t, p[1]) * tf(t, p[2], f[1]); double ab = ALPHA * BETA; double theo = ((ab+1.0)*theoVal - (ALPHA-1.0))/((ALPHA+1.0) - (ab-1.0)*theoVal); if (mlh) { result += 0.0; //log max likelihood not defined here } else { if (e != 0.0) result += ( (theo - d) * (theo - d) ) / (e * e); else result += theo * theo; } } return result; } int runTest(const char *api_name, const char *device_name, bool autotune, bool mlh, bool asym) { int ierr; /* * Histogram size used in tests. If autotune run kernes with sizes from 1e5 to 1e6. * If autotune is off just run the test once (used for debuging to test the kernel) */ int Nstart = 1e5; int Nstep = 1e5; int Nend = (autotune) ? 1e6 : 1e5; //parameter, function and map sizes used in tests int Npar = 66; int Nfnc = 2; int Nmap = 5; //print test info cout << "=========================BEGIN TEST=========================" << endl; cout << "Use api: " << api_name << "\t" << device_name << endl; cout << "Max log likelihood: " << std::boolalpha << mlh << endl; cout << "Asymetry fit: " << std::boolalpha << asym << endl; DKSBaseMuSR dksbase; dksbase.setAPI(api_name); dksbase.setDevice(device_name); ierr = dksbase.initDevice(); if (ierr != DKS_SUCCESS) { std::cout << "Device not supported!" << std::endl; return DKS_ERROR; } //get the list of different devices std::vector devices; dksbase.getDeviceList(devices); std::cout << "Unique devices: " << devices.size() << std::endl; //create the function string to use in test string sFnc = "p[m[0]] * f[m[1]] * sg(t, p[m[2]]) * tf(t, p[m[3]], f[m[4]])"; int map[5] = {0, 0, 1, 2, 1}; //runt tests from 100k to 1mil data points for (unsigned int device = 0; device < devices.size(); device++) { for (int Ndata = Nstart; Ndata <= Nend; Ndata += Nstep) { dksbase.setDefaultDevice(device); std::cout << "Ndata: " << Ndata << std::endl; //init the chi square calculations dksbase.initChiSquare(Ndata, Npar, Nfnc, Nmap); //create random arrays for data, parameter and function storage double *data = new double[Ndata]; double *par = new double[Npar]; double *fnc = new double[Nfnc]; randData(data, Ndata); randData(par, Npar); randData(fnc, Nfnc, 100); //allocate memory on device void *data_ptr = dksbase.allocateMemory(Ndata, ierr); //write data, params, functions and maps to the device dksbase.writeData(data_ptr, data, Ndata); dksbase.writeParams(par, Npar); dksbase.writeFunctions(fnc, Nfnc); dksbase.writeMaps(map, Nmap); //set musrfit constants dksbase.callSetConsts(N0, TAU, BKG); dksbase.callSetConsts(ALPHA, BETA); //compile the program created with the function string dksbase.callCompileProgram(sFnc, mlh); //set autotuning on/off if (autotune) dksbase.setAutoTuningOn(); //check kernel dksbase.checkMuSRKernels(1); //tmp values to store results and tmp values for time steps and start time double result_gpu = 0.0; double result_cpu = 0.0; double dt = 1e-12; double ts = 1e-7; //execute kernel on the GPU and execute the same function on the cpu if (!asym) { dksbase.callLaunchChiSquare(1, data_ptr, data_ptr, Ndata, Npar, Nfnc, Nmap, ts, dt, result_gpu); result_cpu = cpuChiSq(data, par, fnc, Ndata, Npar, Nfnc, ts, dt, mlh); } else { dksbase.callLaunchChiSquare(2, data_ptr, data_ptr, Ndata, Npar, Nfnc, Nmap, ts, dt, result_gpu); result_cpu = cpuChiSqAsym(data, par, fnc, Ndata, Npar, Nfnc, ts, dt, mlh); } //check the results cout << "DKS: " << result_gpu << endl; cout << "CPU: " << result_cpu << endl; //free CPU and GPU memory dksbase.freeMemory(data_ptr, Ndata); dksbase.freeChiSquare(); delete[] data; delete[] par; delete[] fnc; cout << "------------------------------------------------------------" << endl; } } return DKS_SUCCESS; } int main(int argc, char* argv[]) { bool asym = false; bool mlh = false; bool autotune = false; char *api_name = new char[10]; char *device_name = new char[10]; strcpy(api_name, "Cuda"); strcpy(device_name, "-gpu"); for (int i = 1; i < argc; ++i) { if (argv[i] == string("-cuda")) { strcpy(api_name, "Cuda"); strcpy(device_name, "-gpu"); } if (argv[i] == string("-opencl")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-gpu"); } if (argv[i] == string("-mic")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-mic"); } if (argv[i] == string("-cpu")) { strcpy(api_name, "OpenCL"); strcpy(device_name, "-cpu"); } if (argv[i] == string("-mlh")) mlh = true; if (argv[i] == string("-asym")) asym = true; if (argv[i] == string("-autotune")) autotune = true; } int numPlatforms = 3; const char *api[] = {"Cuda","OpenCL","OpenCL","OpenCL","OpenMP"}; const char *device[] = {"-gpu","-gpu","-cpu","-mic","-mic"}; for (int i = 2; i < numPlatforms; i++) { runTest(api[i], device[i], autotune, mlh, asym); } return 0; }