first work to add GPU support via DKS for Fourier.

This commit is contained in:
2015-04-07 16:44:20 +02:00
parent e4f11aca8c
commit 75578f1977
7 changed files with 545 additions and 67 deletions

View File

@ -37,8 +37,6 @@ using namespace std;
#include "TF1.h"
#include "TAxis.h"
//#include "TFile.h"
#include "PMusr.h"
#include "PFourier.h"
@ -59,7 +57,7 @@ using namespace std;
* \param dcCorrected if true, removed DC offset from signal before Fourier transformation, otherwise not
* \param zeroPaddingPower if set to values > 0, there will be zero padding up to 2^zeroPaddingPower
*/
PFourier::PFourier(TH1F *data, Int_t unitTag, Double_t startTime, Double_t endTime, Bool_t dcCorrected, UInt_t zeroPaddingPower) :
PFourier::PFourier(TH1F *data, Int_t unitTag, Double_t startTime, Double_t endTime, Bool_t dcCorrected, UInt_t zeroPaddingPower, Bool_t useFFTW) :
fData(data), fUnitTag(unitTag), fStartTime(startTime), fEndTime(endTime),
fDCCorrected(dcCorrected), fZeroPaddingPower(zeroPaddingPower)
{
@ -71,8 +69,15 @@ PFourier::PFourier(TH1F *data, Int_t unitTag, Double_t startTime, Double_t endTi
}
fValid = true;
fUseFFTW = true;
fIn = 0;
fOut = 0;
#ifdef HAVE_DKS
fInDKS = 0;
fOutDKS = 0;
#endif
SetUseFFTW(useFFTW);
fApodization = F_APODIZATION_NONE;
@ -81,7 +86,7 @@ PFourier::PFourier(TH1F *data, Int_t unitTag, Double_t startTime, Double_t endTi
// if endTime == 0 set it to the last time slot
if (fEndTime == 0.0) {
Int_t last = fData->GetNbinsX()-1;
Int_t last = fData->GetNbinsX();
fEndTime = fData->GetBinCenter(last);
}
@ -128,21 +133,67 @@ PFourier::PFourier(TH1F *data, Int_t unitTag, Double_t startTime, Double_t endTi
}
// allocate necessary memory
fIn = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*fNoOfBins);
fOut = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*fNoOfBins);
// check if memory allocation has been successful
if ((fIn == 0) || (fOut == 0)) {
fValid = false;
return;
if (fUseFFTW) {
fIn = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*fNoOfBins);
fOut = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*fNoOfBins);
} else { // try DKS
#ifdef HAVE_DKS
fInDKS = new double[fNoOfBins];
unsigned int size=fNoOfBins/2;
if (fNoOfBins % 2 == 1)
size++;
fOutDKS = new complex<double>[size];
#endif
}
// get the FFTW3 plan (see FFTW3 manual)
fFFTwPlan = fftw_plan_dft_1d(fNoOfBins, fIn, fOut, FFTW_FORWARD, FFTW_ESTIMATE);
// check if a valid plan has been generated
if (!fFFTwPlan) {
// check if memory allocation has been successful
if (fUseFFTW) {
if ((fIn == 0) || (fOut == 0)) {
fValid = false;
return;
}
} else { // try DKS
#ifdef HAVE_DKS
if ((fInDKS == 0) || (fOutDKS == 0)) {
fValid = false;
return;
}
#else
fValid = false;
return;
#endif
}
if (fUseFFTW) {
// get the FFTW3 plan (see FFTW3 manual)
fFFTwPlan = fftw_plan_dft_1d(fNoOfBins, fIn, fOut, FFTW_FORWARD, FFTW_ESTIMATE);
// check if a valid plan has been generated
if (!fFFTwPlan) {
fValid = false;
}
} else { // try DKS
#ifdef HAVE_DKS
// init DKSBase
fDks.setAPI("Cuda", 4);
fDks.setDevice("-gpu", 4);
fDks.initDevice();
int dimsize[3] = {fNoOfBins, 1, 1};
fDks.setupFFT(1, dimsize);
// allocate memory on accelerator
int ierr;
unsigned int size=fNoOfBins/2;
if (fNoOfBins % 2 == 1)
size++;
fReal_ptr = 0;
fComp_ptr = 0;
fReal_ptr = fDks.allocateMemory<double>(fNoOfBins, ierr);
fComp_ptr = fDks.allocateMemory< complex<double> >(size, ierr);
if ((fReal_ptr==0) || (fComp_ptr==0))
fValid = false;
#else
fValid = false;
#endif
}
}
@ -154,12 +205,27 @@ PFourier::PFourier(TH1F *data, Int_t unitTag, Double_t startTime, Double_t endTi
*/
PFourier::~PFourier()
{
if (fFFTwPlan)
fftw_destroy_plan(fFFTwPlan);
if (fIn)
fftw_free(fIn);
if (fOut)
fftw_free(fOut);
if (fUseFFTW) {
if (fFFTwPlan)
fftw_destroy_plan(fFFTwPlan);
if (fIn)
fftw_free(fIn);
if (fOut)
fftw_free(fOut);
} else {
#ifdef HAVE_DKS
// free accelerator memory
fDks.freeMemory<double>(fReal_ptr, (int)fNoOfBins);
int size = fNoOfBins/2;
if (fNoOfBins % 2 == 1)
size++;
fDks.freeMemory< complex<double> >(fComp_ptr, size);
if (fIn)
delete [] fInDKS;
if (fOut)
delete [] fOutDKS;
#endif
}
}
//--------------------------------------------------------------------------
@ -178,7 +244,28 @@ void PFourier::Transform(UInt_t apodizationTag)
PrepareFFTwInputData(apodizationTag);
fftw_execute(fFFTwPlan);
if (fUseFFTW) {
fftw_execute(fFFTwPlan);
} else {
#ifdef HAVE_DKS
int dimsize[3] = {fNoOfBins, 1, 1};
int status=0, size=fNoOfBins/2;
if (fNoOfBins % 2 == 1)
size++;
// write data to the accelerator
status=fDks.writeData<double>(fReal_ptr, fInDKS, fNoOfBins);
cout << "debug> status=" << status << ": write" << endl;
// execute the FFT
status = fDks.callR2CFFT(fReal_ptr, fComp_ptr, 1, dimsize);
cout << "debug> status=" << status << ": FFT" << endl;
// read data from accelerator
status = fDks.readData< complex<double> >(fComp_ptr, fOutDKS, size);
cout << "debug> status=" << status << ": read" << endl;
#else
fValid = false;
return;
#endif
}
// correct the phase for tstart != 0.0
// find the first bin >= fStartTime
@ -191,12 +278,42 @@ void PFourier::Transform(UInt_t apodizationTag)
}
Double_t phase, re, im;
for (UInt_t i=0; i<fNoOfBins; i++) {
phase = 2.0*PI/(fTimeResolution*fNoOfBins) * i * shiftTime;
re = fOut[i][0] * cos(phase) + fOut[i][1] * sin(phase);
im = -fOut[i][0] * sin(phase) + fOut[i][1] * cos(phase);
fOut[i][0] = re;
fOut[i][1] = im;
if (fUseFFTW) {
for (UInt_t i=0; i<fNoOfBins; i++) {
phase = 2.0*PI/(fTimeResolution*fNoOfBins) * i * shiftTime;
re = fOut[i][0] * cos(phase) + fOut[i][1] * sin(phase);
im = -fOut[i][0] * sin(phase) + fOut[i][1] * cos(phase);
fOut[i][0] = re;
fOut[i][1] = im;
}
} else { // try DKS
for (UInt_t i=0; i<fNoOfBins; i++) {
phase = 2.0*PI/(fTimeResolution*fNoOfBins) * i * shiftTime;
#ifdef HAVE_DKS
re = fOutDKS[i].real() * cos(phase) + fOutDKS[i].imag() * sin(phase);
im = -fOutDKS[i].real() * sin(phase) + fOutDKS[i].imag() * cos(phase);
fOutDKS[i].real() = re;
fOutDKS[i].imag() = im;
#endif
}
}
}
//--------------------------------------------------------------------------
// SetUseFFTW
//--------------------------------------------------------------------------
/**
* <p>Set fUseFFTW flag. Checks if DKS support is in place before setting the flag.
*/
void PFourier::SetUseFFTW(const Bool_t flag)
{
if (flag == false) {
#ifndef HAVE_DKS
fUseFFTW = true;
cerr << endl << "PFouier::SetUseFFTW: **ERROR** DKS not in use, will fall back to FFTW" << endl << endl;
#else
fUseFFTW = flag;
#endif
}
}
@ -251,9 +368,20 @@ TH1F* PFourier::GetRealFourier(const Double_t scale)
}
// fill realFourier vector
for (UInt_t i=0; i<noOfFourierBins; i++) {
realFourier->SetBinContent(i+1, scale*fOut[i][0]);
realFourier->SetBinError(i+1, 0.0);
if (fUseFFTW) {
for (UInt_t i=0; i<noOfFourierBins; i++) {
realFourier->SetBinContent(i+1, scale*fOut[i][0]);
realFourier->SetBinError(i+1, 0.0);
}
} else {
for (UInt_t i=0; i<noOfFourierBins; i++) {
#ifdef HAVE_DKS
realFourier->SetBinContent(i+1, scale*fOutDKS[i].real());
#else
realFourier->SetBinContent(i+1, PMUSR_UNDEFINED);
#endif
realFourier->SetBinError(i+1, 0.0);
}
}
return realFourier;
}
@ -292,11 +420,21 @@ TH1F* PFourier::GetImaginaryFourier(const Double_t scale)
}
// fill imaginaryFourier vector
for (UInt_t i=0; i<noOfFourierBins; i++) {
imaginaryFourier->SetBinContent(i+1, scale*fOut[i][1]);
imaginaryFourier->SetBinError(i+1, 0.0);
if (fUseFFTW) {
for (UInt_t i=0; i<noOfFourierBins; i++) {
imaginaryFourier->SetBinContent(i+1, scale*fOut[i][1]);
imaginaryFourier->SetBinError(i+1, 0.0);
}
} else {
for (UInt_t i=0; i<noOfFourierBins; i++) {
#ifdef HAVE_DKS
imaginaryFourier->SetBinContent(i+1, scale*fOutDKS[i].imag());
#else
imaginaryFourier->SetBinContent(i+1, PMUSR_UNDEFINED);
#endif
imaginaryFourier->SetBinError(i+1, 0.0);
}
}
return imaginaryFourier;
}
@ -334,11 +472,21 @@ TH1F* PFourier::GetPowerFourier(const Double_t scale)
}
// fill powerFourier vector
for (UInt_t i=0; i<noOfFourierBins; i++) {
pwrFourier->SetBinContent(i+1, scale*sqrt(fOut[i][0]*fOut[i][0]+fOut[i][1]*fOut[i][1]));
pwrFourier->SetBinError(i+1, 0.0);
if (fUseFFTW) {
for (UInt_t i=0; i<noOfFourierBins; i++) {
pwrFourier->SetBinContent(i+1, scale*sqrt(fOut[i][0]*fOut[i][0]+fOut[i][1]*fOut[i][1]));
pwrFourier->SetBinError(i+1, 0.0);
}
} else {
for (UInt_t i=0; i<noOfFourierBins; i++) {
#ifdef HAVE_DKS
pwrFourier->SetBinContent(i+1, scale*sqrt(fOutDKS[i].real()*fOutDKS[i].real()+fOutDKS[i].imag()*fOutDKS[i].imag()));
#else
pwrFourier->SetBinContent(i+1, PMUSR_UNDEFINED);
#endif
pwrFourier->SetBinError(i+1, 0.0);
}
}
return pwrFourier;
}
@ -377,18 +525,31 @@ TH1F* PFourier::GetPhaseFourier(const Double_t scale)
// fill phaseFourier vector
Double_t value = 0.0;
Double_t re, im;
for (UInt_t i=0; i<noOfFourierBins; i++) {
if (fUseFFTW) {
re = fOut[i][0];
im = fOut[i][1];
} else {
#ifdef HAVE_DKS
re = fOutDKS[i].real();
im = fOutDKS[i].imag();
#else
re = 1.0;
im = 0.0;
#endif
}
// calculate the phase
if (fOut[i][0] == 0) {
if (fOut[i][1] >= 0.0)
if (re == 0) {
if (im >= 0.0)
value = PI_HALF;
else
value = -PI_HALF;
} else {
value = atan(fOut[i][1]/fOut[i][0]);
value = atan(re/im);
// check sector
if (fOut[i][0] < 0.0) {
if (fOut[i][1] > 0.0)
if (re < 0.0) {
if (im > 0.0)
value = PI + value;
else
value = PI - value;
@ -437,13 +598,26 @@ void PFourier::PrepareFFTwInputData(UInt_t apodizationTag)
}
// 2nd fill fIn
for (UInt_t i=0; i<fNoOfData; i++) {
fIn[i][0] = fData->GetBinContent(i+start) - mean;
fIn[i][1] = 0.0;
}
for (UInt_t i=fNoOfData; i<fNoOfBins; i++) {
fIn[i][0] = 0.0;
fIn[i][1] = 0.0;
if (fUseFFTW) {
for (UInt_t i=0; i<fNoOfData; i++) {
fIn[i][0] = fData->GetBinContent(i+start) - mean;
fIn[i][1] = 0.0;
}
for (UInt_t i=fNoOfData; i<fNoOfBins; i++) {
fIn[i][0] = 0.0;
fIn[i][1] = 0.0;
}
} else {
for (UInt_t i=0; i<fNoOfData; i++) {
#ifdef HAVE_DKS
fInDKS[i] = fData->GetBinContent(i+start) - mean;
#endif
}
for (UInt_t i=fNoOfData; i<fNoOfBins; i++) {
#ifdef HAVE_DKS
fInDKS[i] = 0.0;
#endif
}
}
// 3rd apodize data (if wished)
@ -503,6 +677,12 @@ void PFourier::ApodizeData(Int_t apodizationTag) {
for (UInt_t j=1; j<5; j++) {
q += c[j] * pow((Double_t)i/(Double_t)fNoOfData, 2.0*(Double_t)j);
}
fIn[i][0] *= q;
if (fUseFFTW) {
fIn[i][0] *= q;
} else {
#ifdef HAVE_DKS
fInDKS[i] *= q;
#endif
}
}
}