194 lines
4.6 KiB
C++
194 lines
4.6 KiB
C++
#include <iostream>
|
|
#include <cstdlib>
|
|
#include <string>
|
|
#include <cmath>
|
|
#include <omp.h>
|
|
|
|
#include "DKSBaseMuSR.h"
|
|
#include "Utility/DKSTimer.h"
|
|
|
|
void initData(double *data, int N, bool ones = false) {
|
|
for (int i = 0; i < N; i++) {
|
|
if (ones)
|
|
data[i] = 1.0;
|
|
else
|
|
data[i] = (double)rand() / RAND_MAX;
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
void printData(T *data, int N) {
|
|
for (int i = 0; i < N; i++)
|
|
std::cout << data[i] << "\t";
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
|
|
const std::string funct = "cos(t*p[0]) - exp(-t*p[m[0]])";
|
|
//std::string funct = "p[m[0]] * se(t, p[m[1]]) * tf(t, f[m[2]], p[m[3]])";
|
|
//const std::string funct = "p[m[0]] * se(t, p[m[1]])";
|
|
//const std::string funct = "p[m[1]] + p[m[0]]";
|
|
|
|
double fTheory(double time, double *par, double *func, int *map) {
|
|
return cos(time*par[0]) - exp(-time*par[map[0]]);
|
|
}
|
|
|
|
double testFunctionSerial(double *data, double *par, double *func, int *map,
|
|
double N0, double tau, double bkg, double timeStep,
|
|
int startTimeBin, int endTimeBin)
|
|
{
|
|
double time, diff, theo;
|
|
double chisq = 0;
|
|
for (int i = startTimeBin; i < endTimeBin; ++i) {
|
|
time = i * timeStep;
|
|
theo = N0 * exp(-time/tau) * (1.0 + fTheory(time, par, func, map)) + bkg;
|
|
diff = data[i] - theo;
|
|
|
|
chisq += diff * diff / data[i];
|
|
}
|
|
|
|
return chisq;
|
|
}
|
|
|
|
double testFunctionParallel(double *data, double *par, double *func, int *map,
|
|
double N0, double tau, double bkg, double timeStep,
|
|
int startTimeBin, int endTimeBin)
|
|
{
|
|
int i, chunk;
|
|
double time, diff, theo;
|
|
double chisq = 0;
|
|
|
|
chunk = (endTimeBin - startTimeBin) / omp_get_num_procs();
|
|
if (chunk < 10)
|
|
chunk = 10;
|
|
#pragma omp parallel for default(shared) private (i,time,diff) firstprivate(N0,tau,bkg,timeStep) schedule(dynamic,chunk) reduction(+:chisq)
|
|
for (i = startTimeBin; i < endTimeBin; ++i) {
|
|
time = i * timeStep;
|
|
theo = N0 * exp(-time/tau) * (1.0 + fTheory(time, par, func, map)) + bkg;
|
|
diff = data[i] - theo;
|
|
|
|
chisq += diff * diff / data[i];
|
|
}
|
|
|
|
return chisq;
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
int Loop = 100;
|
|
|
|
//init test data on the host
|
|
int Ndata = 8;
|
|
if (argc > 1)
|
|
Ndata = atoi(argv[1]);
|
|
|
|
int api = 1;
|
|
if (argc > 2)
|
|
api = atoi(argv[2]);
|
|
|
|
int Npar = 66;
|
|
int Nfunc = 1;
|
|
int Nmap = 4;
|
|
|
|
double *data = new double[Ndata];
|
|
double *par = new double[Npar];
|
|
double *func = new double[Nfunc];
|
|
int *map = new int[Nmap];
|
|
|
|
initData(data, Ndata);
|
|
initData(par, Npar);
|
|
initData(func, Nfunc);
|
|
map[0] = 1;
|
|
map[1] = 2;
|
|
map[2] = 3;
|
|
map[3] = 4;
|
|
|
|
//create timers
|
|
DKSTimer serialTimer;
|
|
DKSTimer cudaTimer;
|
|
DKSTimer ompTimer;
|
|
DKSTimer gpuOverhead;
|
|
serialTimer.init("Serial timer");
|
|
cudaTimer.init("Cuda timer");
|
|
ompTimer.init("OpenMP timer");
|
|
gpuOverhead.init("Overhead for gpu");
|
|
|
|
|
|
//serial version
|
|
double resultSerial;
|
|
|
|
serialTimer.start();
|
|
for (int i = 0; i < Loop; i++)
|
|
resultSerial = testFunctionSerial(data, par, func, map, 1.0, 1.0, 1.0, 0.1, 0, Ndata);
|
|
serialTimer.stop();
|
|
|
|
//openmp version
|
|
double resultOMP = 0.0;
|
|
|
|
ompTimer.start();
|
|
//for (int i = 0; i < Loop; i++)
|
|
// resultOMP = testFunctionParallel(data, par, func, map, 1.0, 1.0, 1.0, 0.1, 0, Ndata);
|
|
ompTimer.stop();
|
|
|
|
|
|
//create and init dkabase
|
|
gpuOverhead.start();
|
|
|
|
DKSBaseMuSR dksbase;
|
|
if (api == 1)
|
|
dksbase.setAPI("Cuda");
|
|
else
|
|
dksbase.setAPI("OpenCL");
|
|
|
|
dksbase.setDevice("-gpu");
|
|
dksbase.initDevice();
|
|
dksbase.initChiSquare(Ndata, Npar, Nfunc, Nmap);
|
|
|
|
//allocate memory on the device
|
|
int ierr;
|
|
void *data_ptr;
|
|
|
|
data_ptr = dksbase.allocateMemory<double>(Ndata, ierr);
|
|
|
|
dksbase.writeData<double>(data_ptr, data, Ndata);
|
|
dksbase.writeFunctions(func, Nfunc);
|
|
dksbase.writeMaps(map, Nmap);
|
|
|
|
dksbase.callCompileProgram(funct);
|
|
gpuOverhead.stop();
|
|
|
|
double resultCuda;
|
|
|
|
cudaTimer.start();
|
|
for (int i = 0; i < Loop; i++) {
|
|
dksbase.writeParams(par, Npar);
|
|
int ierr = dksbase.callLaunchChiSquare(data_ptr, data_ptr, Ndata, Npar, Nfunc, Nmap,
|
|
0.0, 0.1, 0, resultCuda);
|
|
|
|
if (ierr != 0)
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
cudaTimer.stop();
|
|
|
|
std::cout << std::endl;
|
|
std::cout << "=======================Results=======================" << std::endl;
|
|
std::cout << "Result serial = " << resultSerial << std::endl;
|
|
std::cout << "Result prallel = " << resultOMP << std::endl;
|
|
std::cout << "Result cuda = " << resultCuda << std::endl;
|
|
|
|
std::cout << std::endl;
|
|
std::cout << "=======================Timings=======================" << std::endl;
|
|
serialTimer.print();
|
|
ompTimer.print();
|
|
cudaTimer.print();
|
|
gpuOverhead.print();
|
|
std::cout << std::endl;
|
|
|
|
dksbase.freeMemory<double>(data_ptr, Ndata);
|
|
|
|
return 0;
|
|
|
|
|
|
}
|