musrfit/src/include/PRunNonMusr.h

/***************************************************************************

  PRunNonMusr.h

  Author: Andreas Suter
  e-mail: andreas.suter@psi.ch

***************************************************************************/

/***************************************************************************
 *   Copyright (C) 2007-2025 by Andreas Suter                              *
 *   andreas.suter@psi.ch                                                  *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#ifndef _PRUNNONMUSR_H_
#define _PRUNNONMUSR_H_

#include "PMusr.h"
#include "PRunBase.h"

/**
 * \brief Class for fitting general x-y data sets (non-μSR time series).
 *
 * PRunNonMusr extends the musrfit framework to handle arbitrary x-y data that doesn't
 * originate from μSR experiments. This allows users to leverage musrfit's powerful
 * fitting engine, theory functions, and MINUIT interface for general curve fitting tasks.
 *
 * \section nonmusr_purpose Purpose and Use Cases
 *
 * This fit type is designed for:
 * - <b>General time-series data:</b> Any x vs. y measurements
 * - <b>Non-μSR physics:</b> Other experimental data (e.g., optical, electrical, thermal)
 * - <b>Reusing μSR theory functions:</b> Apply exponentials, oscillations, relaxations to non-μSR data
 * - <b>Custom data analysis:</b> Fit arbitrary functional forms to experimental data
 * - <b>Method validation:</b> Test fitting procedures on simulated or benchmark data
 *
 * \section nonmusr_data Data Structure
 *
 * Unlike μSR fits, non-μSR data is provided as simple x-y pairs:
 * - <b>x-axis:</b> Independent variable (time, temperature, field, voltage, etc.)
 * - <b>y-axis:</b> Dependent variable (signal, counts, current, etc.)
 * - <b>Errors:</b> Optional error bars on y-values
 * - <b>No histograms:</b> Data is already processed, not raw detector counts
 * - <b>No t0:</b> No time-zero concept (not time-differential μSR)
 * - <b>No asymmetry:</b> Direct fitting of y vs. x
 *
 * \section nonmusr_input Data Input Formats
 *
 * Non-μSR data can be loaded from:
 * - <b>ASCII files:</b> Space/tab-separated columns (x, y, err)
 * - <b>DB files:</b> Database format with metadata
 * - <b>MuSonRoot files:</b> Special ROOT structure for non-μSR data
 *
 * ASCII format example:
 * \code
 * # x-axis    y-axis    error
 * 0.0         100.5     3.2
 * 0.1         95.3      3.1
 * 0.2         90.8      3.0
 * ...
 * \endcode
 *
 * \section nonmusr_msr MSR File Configuration
 *
 * Example MSR file RUN block for non-μSR data:
 * \code
 * RUN data/mydata.dat DB PSI MUSR-ROOT   (name beamline)
 *   fittype         12      (NonMusr)
 *   map             1 2     (x-index, y-index in data columns)
 *   xy-data         0 1     (column 0 = x, column 1 = y)
 *   packing         1       (usually 1 for pre-processed data)
 *   fit             0.0 10.0 (fit range in x-axis units)
 * \endcode
 *
 * \section nonmusr_theory Theory Functions
 *
 * All standard musrfit theory functions can be used:
 * - <b>Exponentials:</b> decay, growth, stretched exponentials
 * - <b>Oscillations:</b> cosine, sine, damped oscillations
 * - <b>Relaxations:</b> static/dynamic Gaussian/Lorentzian Kubo-Toyabe
 * - <b>Polynomials:</b> backgrounds, baselines
 * - <b>User functions:</b> Custom C++ functions
 *
 * Theory is evaluated at the x-values from the data file.
 *
 * \section nonmusr_differences Key Differences from μSR Fits
 *
 * <table>
 * <tr><th>Feature</th><th>μSR Fits</th><th>Non-μSR Fits</th></tr>
 * <tr><td>Data type</td><td>Raw histograms</td><td>Processed x-y pairs</td></tr>
 * <tr><td>Time zero (t0)</td><td>Required</td><td>Not applicable</td></tr>
 * <tr><td>Background</td><td>Estimated/subtracted</td><td>Included in y-data</td></tr>
 * <tr><td>Asymmetry</td><td>Calculated</td><td>Not applicable</td></tr>
 * <tr><td>Packing</td><td>Rebin histograms</td><td>Average x-y points</td></tr>
 * <tr><td>Fit range</td><td>Time (μs)</td><td>x-axis units (arbitrary)</td></tr>
 * <tr><td>ADDRUN</td><td>Supported</td><td>NOT supported</td></tr>
 * </table>
 *
 * \section nonmusr_limitations Limitations
 *
 * - <b>No ADDRUN:</b> Cannot combine multiple non-μSR data sets
 * - <b>No maximum likelihood:</b> Only χ² fitting supported (not implemented for non-μSR)
 * - <b>No expected χ²:</b> Not implemented (unclear definition for general data)
 * - <b>Simple error handling:</b> Assumes independent Gaussian errors
 *
 * \section nonmusr_workflow Analysis Workflow
 *
 * 1. <b>Prepare Data:</b> Format as x-y(-error) columns in ASCII/DB file
 * 2. <b>Create MSR File:</b> Specify file, fit type (12), xy-data indices, fit range
 * 3. <b>Define Theory:</b> Use standard THEORY block functions
 * 4. <b>Run Fit:</b> musrfit performs χ² minimization via MINUIT
 * 5. <b>Analyze Results:</b> Standard parameter extraction, error analysis, plotting
 *
 * \see PRunSingleHisto for standard μSR single histogram fits
 * \see PRunBase for base class interface and common functionality
 */
class PRunNonMusr : public PRunBase
{
  public:
    /**
     * \brief Default constructor creating an empty, invalid non-μSR run object.
     *
     * Initializes all member variables to default values:
     * - Bin counts set to 0
     * - Packing set to 1 (no packing for x-y data)
     * - Handle tag set to kEmpty
     * - Raw data pointer set to nullptr
     *
     * This constructor is needed for creating vectors of PRunNonMusr objects.
     * The resulting object cannot be used until properly initialized via the main constructor.
     */
    PRunNonMusr();

    /**
     * \brief Main constructor initializing a non-μSR run from MSR file and data.
     *
     * Performs initialization for general x-y data fitting:
     *
     * 1. <b>Base Class Initialization:</b>
     *    - Calls PRunBase constructor with MSR/data handlers
     *    - Initializes theory engine
     *
     * 2. <b>Raw Data Loading:</b>
     *    - Retrieves raw data using run name from MSR file
     *    - Validates data was successfully loaded
     *    - Marks run invalid if data cannot be loaded
     *
     * 3. <b>Data Preparation:</b>
     *    - Calls PrepareData() to process x-y data
     *    - Extracts x-y columns based on map/xy-data specification
     *    - Applies packing if specified
     *    - Sets up fit range in x-axis units
     *
     * The object is marked as invalid (fValid=false) if:
     * - Raw data file cannot be loaded
     * - PrepareData() fails (invalid column indices, missing data, etc.)
     *
     * \param msrInfo Pointer to MSR file handler (must remain valid)
     * \param rawData Pointer to raw data handler for loading data files
     * \param runNo Run number (0-based index in MSR file RUN blocks)
     * \param tag Operation mode: kFit (fitting), kView (display/plotting)
     * \param theoAsData Theory mode: true = at data points, false = high-resolution
     *
     * \warning Check IsValid() after construction before using for fitting
     *
     * \see PrepareData() for data processing details
     */
    PRunNonMusr(PMsrHandler *msrInfo, PRunDataHandler *rawData, UInt_t runNo, EPMusrHandleTag tag, Bool_t theoAsData);

    /**
     * \brief Virtual destructor (no cleanup needed for this class).
     *
     * Raw data pointer (fRawRunData) is not owned by this class and is not deleted.
     * Base class destructor handles cleanup of theory objects and other shared resources.
     */
    virtual ~PRunNonMusr();

    /**
     * \brief Calculates χ² between non-μSR data and theory.
     *
     * Computes the chi-squared statistic for x-y data:
     * \f[ \chi^2 = \sum_{i={\rm start}}^{\rm end} \frac{(y_i^{\rm data} - y_i^{\rm theory})^2}{\sigma_i^2} \f]
     *
     * where:
     * - y_i^data is the measured y-value at x_i
     * - y_i^theory is the theory function evaluated at x_i
     * - σ_i is the error on y_i (from data file or assumed)
     * - Sum runs over all points within fit range (fFitStartTime ≤ x ≤ fFitEndTime)
     *
     * Algorithm:
     * 1. Evaluate FUNCTIONS block for user-defined functions
     * 2. Loop over data points from fStartTimeBin to fEndTimeBin
     * 3. For each point:
     *    - Get x-value from fData.GetX()
     *    - Evaluate theory at that x-value: fTheory->Func(x, par, fFuncValues)
     *    - Compute weighted squared difference
     * 4. Sum contributions to get total χ²
     *
     * Unlike μSR fits, this operates on x-y data directly (not histograms).
     * The x-axis can represent any independent variable (time, temperature, field, etc.),
     * not just time in microseconds.
     *
     * \param par Parameter vector from MINUIT with current parameter values
     * \return Chi-squared value (sum of weighted squared residuals)
     *
     * \note No OpenMP parallelization in this implementation (typically smaller data sets)
     *
     * \see CalcMaxLikelihood() - NOT IMPLEMENTED for non-μSR
     */
    virtual Double_t CalcChiSquare(const std::vector<Double_t>& par);

    /**
     * \brief Calculates expected χ² (NOT IMPLEMENTED for non-μSR).
     *
     * This method is not implemented for general x-y data because the concept
     * of "expected χ²" depends on the underlying statistical distribution,
     * which is not well-defined for arbitrary non-μSR data (unlike Poisson-distributed
     * histogram counts in μSR).
     *
     * Calling this method prints a warning message and returns 0.0.
     *
     * \param par Parameter vector from MINUIT
     * \return Always returns 0.0
     *
     * \note Implementation would require assumptions about data statistics
     */
    virtual Double_t CalcChiSquareExpected(const std::vector<Double_t>& par);

    /**
     * \brief Calculates maximum likelihood (NOT IMPLEMENTED for non-μSR).
     *
     * Maximum likelihood fitting is not implemented for general x-y data because:
     * - The appropriate likelihood function depends on the data's statistical distribution
     * - Unlike μSR histograms (Poisson), non-μSR data can have various error models
     * - Gaussian errors → equivalent to χ² (no advantage)
     * - Other distributions require user-specified likelihood functions
     *
     * Calling this method prints a warning message and returns 1.0.
     *
     * \param par Parameter vector from MINUIT
     * \return Always returns 1.0
     *
     * \note Only χ² fitting is supported for non-μSR data
     */
    virtual Double_t CalcMaxLikelihood(const std::vector<Double_t>& par);

    /**
     * \brief Evaluates theory function (empty implementation for non-μSR).
     *
     * For non-μSR data, theory calculation is performed directly in CalcChiSquare()
     * rather than pre-calculating and storing theory values. This is because:
     * - Theory is evaluated at arbitrary x-values (from data file)
     * - No high-resolution grid needed (data already defines x-points)
     * - Simpler to evaluate on-demand during χ² calculation
     *
     * This method exists to satisfy the PRunBase interface but does nothing.
     *
     * \see CalcChiSquare() for on-demand theory evaluation
     */
    virtual void CalcTheory();

    /**
     * \brief Returns the number of x-y points within the fit range.
     *
     * Counts data points where fFitStartTime ≤ x ≤ fFitEndTime.
     * This count is used for:
     * - Degrees of freedom: ν = N_points - N_params
     * - Reduced χ²: χ²_red = χ² / ν
     * - Statistical quality assessment
     *
     * The method loops through all x-values in fData.GetX() and counts
     * those within the specified fit range.
     *
     * \return Number of data points within fit range
     *
     * \see GetNoOfFitBins() implementation for counting algorithm
     */
    virtual UInt_t GetNoOfFitBins();

    /**
     * \brief Sets fit range in bin units (NOT SUPPORTED for non-μSR).
     *
     * This method is not meaningful for non-μSR data because:
     * - Fit range is specified in x-axis units (not bins)
     * - No "first good bin" or "last good bin" concept (no t0)
     * - x-values can be non-uniform (arbitrary spacing)
     *
     * This empty implementation exists to satisfy the PRunBase interface.
     * Use SetFitRange(PDoublePairVector) in the base class instead.
     *
     * \param fitRange Fit range string (ignored)
     *
     * \see PRunBase::SetFitRange() for proper fit range specification
     */
    virtual void SetFitRangeBin(const TString fitRange) {}

    /**
     * \brief Returns the x-axis column index from MSR file specification.
     *
     * Extracts the x-data column index from:
     * - "xy-data" entry in RUN block (preferred)
     * - "map" entry first value (fallback)
     * - Default: 0 (first column)
     *
     * The index specifies which column in the data file contains the
     * independent variable (x-axis values).
     *
     * \return Column index for x-axis data (0-based)
     *
     * \see GetYIndex() for y-axis column
     */
    virtual UInt_t GetXIndex();

    /**
     * \brief Returns the y-axis column index from MSR file specification.
     *
     * Extracts the y-data column index from:
     * - "xy-data" entry in RUN block (preferred)
     * - "map" entry second value (fallback)
     * - Default: 1 (second column)
     *
     * The index specifies which column in the data file contains the
     * dependent variable (y-axis values to be fitted).
     *
     * \return Column index for y-axis data (0-based)
     *
     * \see GetXIndex() for x-axis column
     */
    virtual UInt_t GetYIndex();

  protected:
    /**
     * \brief Main data preparation orchestrator for non-μSR data.
     *
     * Coordinates the data loading and preprocessing pipeline:
     * 1. Checks for ADDRUN (warns if present - not supported for non-μSR)
     * 2. Retrieves packing parameter from RUN or GLOBAL block
     * 3. Retrieves fit range from RUN or GLOBAL block
     * 4. Dispatches to PrepareFitData() or PrepareViewData() based on tag
     *
     * Key validation:
     * - ADDRUN presence → warning (ignored)
     * - Missing packing → error (required)
     * - Missing fit range → acceptable (will use all data)
     *
     * \return True if data preparation succeeds, false on error
     *
     * \see PrepareFitData() for fitting mode processing
     * \see PrepareViewData() for viewing mode processing
     */
    virtual Bool_t PrepareData();

    /**
     * \brief Prepares x-y data for fitting.
     *
     * Performs data processing for curve fitting:
     * 1. Determines x-y column indices via GetXIndex() and GetYIndex()
     * 2. Loads x-y(-error) data from file
     * 3. Applies packing if specified:
     *    - packing = 1: Use data as-is (no averaging)
     *    - packing > 1: Average consecutive points in groups of packing
     * 4. Packing algorithm for x-values: midpoint of packed range
     * 5. Packing algorithm for y-values: sum of packed points
     * 6. Packing algorithm for errors: sqrt(sum of squared errors)
     * 7. Counts bins within fit range (fFitStartTime ≤ x ≤ fFitEndTime)
     * 8. Determines fStartTimeBin and fEndTimeBin indices
     *
     * Packing example (packing=3):
     * - Raw: x=[0, 1, 2, 3, 4, 5], y=[10, 12, 11, 15, 14, 13]
     * - Packed: x=[1, 4], y=[33, 42] (midpoint, sum)
     *
     * \return True on success, false if data loading fails
     *
     * \see PrepareViewData() for viewing mode (may differ in processing)
     */
    virtual Bool_t PrepareFitData();

    /**
     * \brief Prepares x-y data for viewing/plotting.
     *
     * Similar to PrepareFitData() but optimized for visualization:
     * - May use different packing for display
     * - May extend beyond fit range for context
     * - Less stringent validation
     *
     * The exact implementation mirrors PrepareFitData() for non-μSR,
     * but the separation allows future customization for viewing.
     *
     * \return True on success, false if data loading fails
     *
     * \see PrepareFitData() for fitting mode processing
     */
    virtual Bool_t PrepareViewData();

  private:
    /**
     * \brief Pointer to raw run data handler (not owned).
     *
     * Provides access to loaded x-y data from file. Retrieved via
     * fRawData->GetRunData(*runName) during construction.
     *
     * Contains the fDataNonMusr structure with:
     * - GetData(): Vector of vectors for data columns (x, y, etc.)
     * - GetErrData(): Vector of vectors for error columns
     *
     * \warning Not owned by this class - do not delete
     */
    PRawRunData *fRawRunData;

    UInt_t fNoOfFitBins;    ///< Number of x-y points within fit range (fFitStartTime ≤ x ≤ fFitEndTime)

    /**
     * \brief Data point averaging/grouping factor.
     *
     * Number of consecutive raw data points to average together:
     * - 1: No packing (use all points as-is)
     * - > 1: Average groups of packing points
     *
     * Packing reduces the number of data points, improving:
     * - Signal-to-noise ratio (averaging reduces noise)
     * - Fit speed (fewer points to evaluate)
     *
     * But decreases:
     * - Resolution (fewer x-values)
     *
     * Source priority:
     * 1. RUN block "packing" entry
     * 2. GLOBAL block "packing" entry
     * 3. ERROR if neither specified
     */
    Int_t  fPacking;

    /**
     * \brief Theory calculation mode flag.
     *
     * Controls theory grid resolution (currently not used for non-μSR):
     * - true: Theory at data x-values only (efficient)
     * - false: Theory on high-resolution grid (smooth plotting)
     *
     * For non-μSR, theory is always evaluated at data x-values
     * during CalcChiSquare(), so this flag has minimal effect.
     */
    Bool_t fTheoAsData;

    /**
     * \brief Index of first data point in fit range.
     *
     * Points to the first x-y pair where x ≥ fFitStartTime.
     * Used as loop start in CalcChiSquare().
     */
    Int_t fStartTimeBin;

    /**
     * \brief Index of last data point in fit range (inclusive).
     *
     * Points to the last x-y pair where x ≤ fFitEndTime.
     * Used as loop end in CalcChiSquare() (inclusive: i <= fEndTimeBin).
     *
     * \note Unlike μSR fits, this is INCLUSIVE (≤), not exclusive (<)
     */
    Int_t fEndTimeBin;
};

#endif // _PRUNNONMUSR_H_