improve the doxygen docu of PRunDataHandler.h

This commit is contained in:
2025-11-15 08:16:15 +01:00
parent 38decd8b58
commit 26b87e0659

View File

@@ -37,198 +37,391 @@
//-------------------------------------------------------------
/**
* <p>Data file format identifiers for any2many converter.
* \defgroup FileFormats Data File Format Identifiers
* \brief Constants identifying μSR data file formats for the any2many converter.
*
* <p>These constants identify different μSR data file formats supported
* by musrfit for reading and conversion.
* These constants are used to specify input and output formats when converting
* between different μSR data file formats. Each format has specific characteristics:
*
* - <b>ROOT-based formats:</b> Binary, fast, support complex data structures
* - <b>NeXus (HDF5):</b> Self-describing, hierarchical, cross-platform standard
* - <b>Binary formats:</b> Compact, fast, but platform/facility specific
* - <b>ASCII formats:</b> Human-readable, portable, but larger file size
* @{
*/
/// Undefined or unknown format
/// Undefined or unknown format (used for error indication)
#define A2M_UNDEFINED 0
/// Generic ROOT file
/// Generic ROOT file (minimal structure, basic histograms only)
#define A2M_ROOT 1
/// MusrRoot format (PSI-specific ROOT structure)
/// MusrRoot format (PSI-specific ROOT with complete metadata and run info)
#define A2M_MUSR_ROOT 2
/// MusrRoot with directory structure
/// MusrRoot with directory structure (organized by run number, year, etc.)
#define A2M_MUSR_ROOT_DIR 3
/// PSI binary format
/// PSI binary format (legacy PSI format, platform-specific byte ordering)
#define A2M_PSIBIN 4
/// PSI MDU ASCII format
/// PSI MDU ASCII format (ASCII variant of PSI format with metadata)
#define A2M_PSIMDU 5
/// TRIUMF MUD (Muon Data) format
/// TRIUMF MUD (Muon Data) format (TRIUMF's standard binary format)
#define A2M_MUD 6
/// NeXus HDF5 format (ISIS, JPARC)
/// NeXus HDF5 format (ISIS, JPARC standard - self-describing, hierarchical)
#define A2M_NEXUS 7
/// WKM format (older PSI format)
/// WKM format (older PSI format, deprecated but still supported for legacy data)
#define A2M_WKM 8
/// Generic ASCII format
/// Generic ASCII format (two-column time-value data for non-μSR applications)
#define A2M_ASCII 9
/** @} */
//-------------------------------------------------------------
/**
* <p>Raw data file reader and format converter.
* \brief Raw data file reader and format converter for μSR data.
*
* <p>PRunDataHandler is the I/O layer for musrfit, responsible for:
* - Loading raw histogram data from multiple file formats
* - Converting between different μSR data formats
* - Searching multiple data paths for run files
* - Reading run metadata (field, temperature, time, etc.)
* - Managing collections of runs for simultaneous fits
* PRunDataHandler is the comprehensive I/O layer for musrfit, serving as the gateway
* between raw experimental data files and the fitting framework. It provides unified
* access to data from different μSR facilities worldwide, handling the complexity of
* various file formats transparently.
*
* <p><b>Supported file formats:</b>
* - <b>MusrRoot:</b> PSI ROOT-based format with comprehensive metadata
* - <b>NeXus:</b> HDF5-based format (ISIS, JPARC)
* - <b>MUD:</b> TRIUMF's binary format
* \section responsibilities Core Responsibilities
* - <b>Data Loading:</b> Reading raw histogram data from multiple file formats
* - <b>Format Conversion:</b> Converting between different μSR data formats (any2many utility)
* - <b>Path Management:</b> Searching multiple directories for data files
* - <b>Metadata Extraction:</b> Reading experimental parameters (field, temperature, beam energy, time)
* - <b>Template Expansion:</b> Resolving run name templates (e.g., "%r" → run number)
* - <b>Run Management:</b> Managing collections of runs for global/simultaneous fits
* - <b>Data Validation:</b> Verifying data integrity and consistency
*
* \section formats Supported File Formats
*
* <b>ROOT-based formats:</b>
* - <b>MusrRoot:</b> PSI standard format with complete metadata, run info, and histograms
* - <b>Generic ROOT:</b> Basic ROOT files with minimal structure
*
* <b>HDF5-based formats:</b>
* - <b>NeXus:</b> International standard (ISIS, JPARC) - self-describing, hierarchical
*
* <b>Binary formats:</b>
* - <b>MUD:</b> TRIUMF Muon Data format (binary, platform-specific)
* - <b>PSI-BIN:</b> Legacy PSI binary format
* - <b>WKM:</b> Older PSI format
* - <b>ASCII:</b> Text-based formats (for non-μSR data)
* - <b>DB/DAT:</b> Database formats for general x-y data
* - <b>WKM:</b> Older PSI format (deprecated, read-only support)
*
* <p><b>Key features:</b>
* - Automatic format detection
* - Run name template expansion (e.g., "run%r.root" → "run2425.root")
* - Multi-path search (search common data directories)
* - Batch file conversion (any2many tool)
* - Metadata extraction and validation
* <b>Text formats:</b>
* - <b>MDU ASCII:</b> PSI ASCII format with metadata
* - <b>ASCII:</b> Generic two-column data (time, counts)
* - <b>DB/DAT:</b> Database and data table formats for x-y data
*
* <p><b>Example usage:</b>
* @code
* PRunDataHandler handler(msrInfo, dataPaths);
* handler.ReadData(); // Reads all runs specified in MSR file
* PRawRunData *data = handler.GetRunData(0);
* @endcode
* \section features Key Features
*
* <b>Intelligent file searching:</b>
* - Multi-path search (environment variables, default paths)
* - Run name template expansion: %r (run), %y (year), %p (path)
* - Automatic format detection based on file extension
* - Year-based subdirectory organization (e.g., 2024/run2425.root)
*
* <b>Data management:</b>
* - Caching: Avoid re-reading identical runs in global fits
* - Validation: Check file existence before attempting reads
* - Error handling: Graceful failure with informative messages
* - Batch processing: Handle multiple runs efficiently
*
* <b>Metadata extraction:</b>
* - Magnetic field (from data file or MSR file)
* - Sample temperature
* - Beam energy
* - Run date/time (ISO 8601 format)
* - Detector configuration
* - Time resolution
* - Time-zero (t0) estimates
*
* \section usage Usage Examples
*
* <b>Reading data for fitting:</b>
* \code
* // Constructor reads MSR file and searches default paths
* PRunDataHandler handler(msrInfo);
* handler.ReadData(); // Loads all runs from MSR RUN blocks
*
* // Access individual run data
* PRawRunData *run0 = handler.GetRunData(0);
* PDoubleVector *histogram = run0->GetDataBin(0); // First detector
* \endcode
*
* <b>Reading data with custom search paths:</b>
* \code
* PStringVector paths;
* paths.push_back("/data/musr/2024");
* paths.push_back("/archive/musr");
*
* PRunDataHandler handler(msrInfo, paths);
* handler.ReadData();
* \endcode
*
* <b>Converting data formats (any2many):</b>
* \code
* PAny2ManyInfo convInfo;
* convInfo.inputFormat = "nexus";
* convInfo.outputFormat = "root";
* // ... configure convInfo ...
*
* PRunDataHandler handler(&convInfo);
* handler.ReadData(); // Read from NeXus
* handler.ConvertData(); // Convert to ROOT
* handler.WriteData(); // Write ROOT file
* \endcode
*
* \section templates Run Name Templates
*
* Template placeholders in run names are expanded automatically:
* - <b>%r</b> → Run number (e.g., 2425)
* - <b>%y</b> → 4-digit year (e.g., 2024)
* - <b>%Y</b> → 2-digit year (e.g., 24)
* - <b>%p</b> → Data path from search list
*
* Example: "run%r.root" with run 2425 → "run2425.root"
*
* \section thread_safety Thread Safety
* PRunDataHandler objects are NOT thread-safe. Each thread should create
* its own instance or use external synchronization.
*
* \see PRawRunData for the raw data structure
* \see PMsrHandler for MSR file parsing
* \see PAny2ManyInfo for format conversion configuration
*/
class PRunDataHandler
{
public:
/// Default constructor
/**
* \brief Default constructor creating an uninitialized handler.
*
* Creates an empty handler with no data loaded. Not typically used directly;
* prefer constructors that specify data source (MSR file or conversion config).
*/
PRunDataHandler();
/**
* <p>Constructor for single file with explicit format.
* \brief Constructor for reading a single file with explicit format.
*
* @param fileName Path to data file
* @param fileFormat Format string ("root", "nexus", "mud", etc.)
* Reads one data file in the specified format. Use this when the format
* cannot be auto-detected or when you want to override detection.
*
* \param fileName Path to data file (absolute or relative)
* \param fileFormat Format identifier ("root", "nexus", "mud", "wkm", "psibin", "ascii")
*
* Example: PRunDataHandler("run2425.root", "root")
*/
PRunDataHandler(TString fileName, const TString fileFormat);
/**
* <p>Constructor with search paths.
* \brief Constructor for single file with search paths.
*
* @param fileName File name or template
* @param fileFormat Format string
* @param dataPath Vector of directories to search for data files
* Searches multiple directories for the specified file. Useful when data
* files may be in various locations (current dir, archive, network mount).
*
* \param fileName File name or template (e.g., "run%r.root")
* \param fileFormat Format identifier string
* \param dataPath Vector of directory paths to search sequentially
*
* The handler searches paths in order and uses the first match found.
*/
PRunDataHandler(TString fileName, const TString fileFormat, const PStringVector dataPath);
/**
* <p>Constructor for reading single file into provided structure.
* \brief Constructor for reading single file into pre-allocated structure.
*
* @param fileName File name
* @param fileFormat Format string
* @param dataPath Data search path
* @param runData Reference to PRawRunData to fill
* Reads a data file directly into a provided PRawRunData object. Useful
* for reading auxiliary data or when you want to manage memory explicitly.
*
* \param fileName Name of file to read
* \param fileFormat Format identifier
* \param dataPath Single directory path to search (not a vector)
* \param runData Reference to PRawRunData object to fill with data
*/
PRunDataHandler(TString fileName, const TString fileFormat, const TString dataPath, PRawRunData &runData);
/**
* <p>Constructor for format conversion (any2many).
* \brief Constructor for format conversion (any2many utility).
*
* @param any2ManyInfo Conversion configuration structure
* Initializes handler for converting between data formats. The any2ManyInfo
* structure specifies input/output formats, file lists, and conversion options.
*
* \param any2ManyInfo Pointer to conversion configuration structure
*
* Usage: Create PAny2ManyInfo, set input/output formats and file lists,
* then call ReadData() → ConvertData() → WriteData()
*/
PRunDataHandler(PAny2ManyInfo *any2ManyInfo);
/**
* <p>Constructor for format conversion with search paths.
* \brief Constructor for format conversion with custom search paths.
*
* @param any2ManyInfo Conversion configuration
* @param dataPath Vector of search directories
* Like the any2many constructor, but allows specifying additional directories
* to search for input files.
*
* \param any2ManyInfo Pointer to conversion configuration
* \param dataPath Vector of directories to search for input files
*/
PRunDataHandler(PAny2ManyInfo *any2ManyInfo, const PStringVector dataPath);
/**
* <p>Constructor for MSR-based data loading.
* \brief Constructor for MSR-based data loading (standard fitting workflow).
*
* @param msrInfo MSR file handler (provides run list and paths)
* Primary constructor used by musrfit for loading data specified in an MSR file.
* Reads the RUN blocks from the MSR file and loads all referenced data files.
* Uses default search paths from environment or MSR file settings.
*
* \param msrInfo Pointer to MSR file handler containing run specifications
*
* After construction, call ReadData() to actually load the histogram data.
*/
PRunDataHandler(PMsrHandler *msrInfo);
/**
* <p>Constructor for MSR-based loading with custom search paths.
* \brief Constructor for MSR-based loading with custom search paths.
*
* @param msrInfo MSR file handler
* @param dataPath Vector of directories to search
* Like the MSR constructor, but allows overriding the default search paths.
* Useful for accessing data in non-standard locations or network mounts.
*
* \param msrInfo Pointer to MSR file handler
* \param dataPath Vector of directory paths to search for data files
*
* Paths are searched in order; the first matching file is used.
*/
PRunDataHandler(PMsrHandler *msrInfo, const PStringVector dataPath);
/**
* \brief Virtual destructor that frees all loaded data.
*
* Cleans up:
* - All PRawRunData objects in fData
* - File path vectors
* - Temporary buffers
*
* Pointers to fMsrInfo and fAny2ManyInfo are NOT deleted (owned externally).
*/
virtual ~PRunDataHandler();
/**
* <p>Reads all data files specified in MSR file or configuration.
* \brief Reads all data files specified in MSR file or configuration.
*
* <p>Searches data paths, detects formats, loads histograms and
* metadata. Call this before attempting to access run data.
* Main data loading method that:
* 1. Iterates through run specifications (from MSR or any2many config)
* 2. Searches data paths for each file using template expansion
* 3. Detects file format (from extension or explicit specification)
* 4. Calls appropriate format-specific reader (ReadRootFile, ReadNexusFile, etc.)
* 5. Extracts metadata (field, temperature, time resolution)
* 6. Validates histogram data integrity
* 7. Caches data to avoid re-reading in global fits
*
* Must be called after construction and before accessing data via GetRunData().
* Sets fAllDataAvailable flag based on success/failure.
*
* \post fData contains loaded run data, fAllDataAvailable indicates success
*/
virtual void ReadData();
/**
* <p>Performs format conversion (for any2many utility).
* \brief Performs format conversion (for any2many utility).
*
* <p>Converts loaded data to the target format specified in
* any2many configuration.
* Converts data already loaded by ReadData() to the output format specified
* in fAny2ManyInfo. This is an internal preprocessing step before WriteData().
*
* Conversion operations may include:
* - Data structure reorganization
* - Metadata format translation
* - Histogram rebinning or resampling
* - Endianness conversion (for binary formats)
*
* \pre ReadData() must have been called to load source data
* \pre fAny2ManyInfo must specify valid output format
*/
virtual void ConvertData();
/**
* <p>Writes data to file in specified format.
* \brief Writes data to file in the specified format.
*
* @param fileName Output file name (empty = use default)
* @return true on success, false on error
* Writes loaded and (optionally) converted data to an output file. The format
* is determined by fAny2ManyInfo (for any2many) or fFileFormat.
*
* Supported output formats:
* - ROOT (generic or MusrRoot structure)
* - NeXus (HDF5)
* - WKM, PSI-BIN (legacy formats)
* - MUD (TRIUMF)
* - ASCII (text export)
*
* \param fileName Output file name (empty string = auto-generate from input name)
* \return True on successful write, false on error
*
* \pre Data must be loaded (ReadData called)
*/
virtual Bool_t WriteData(TString fileName="");
/// Returns true if all required data files were successfully loaded
/// @return Data availability status
/**
* \brief Checks if all required data files were successfully loaded.
*
* Returns false if any file specified in the MSR file or configuration
* could not be found or loaded. Useful for validating data availability
* before starting a fit.
*
* \return True if all data loaded successfully, false if any file missing or failed
*/
virtual Bool_t IsAllDataAvailable() const { return fAllDataAvailable; }
/**
* <p>Gets run data by run name.
* \brief Retrieves run data by run name.
*
* @param runName Name of run to retrieve
* @return Pointer to PRawRunData, or nullptr if not found
* Searches fData for a run with matching name. Run names are extracted from
* data files or MSR RUN blocks.
*
* \param runName Name of run to retrieve (e.g., "2425", "run2425")
* \return Pointer to PRawRunData if found, nullptr otherwise
*
* \note Pointer is valid until handler destruction or data reload
*/
virtual PRawRunData* GetRunData(const TString &runName);
/**
* <p>Gets run data by index.
* \brief Retrieves run data by index.
*
* @param idx Run index (0-based)
* @return Pointer to PRawRunData, or nullptr if out of range
* Returns run data from the internal list by position. Index corresponds to
* the order in the MSR file RUN blocks or any2many file list.
*
* \param idx Run index (0-based, default=0 for first run)
* \return Pointer to PRawRunData if index valid, nullptr if out of range
*
* \note Pointer is valid until handler destruction or data reload
*/
virtual PRawRunData* GetRunData(const UInt_t idx=0);
/// Returns the number of loaded run data sets
/// @return Number of runs
/**
* \brief Returns the number of loaded run data sets.
*
* \return Number of runs currently loaded in fData
*/
virtual Int_t GetNoOfRunData() {return fData.size(); }
/**
* <p>Sets or replaces run data at specified index.
* \brief Sets or replaces run data at specified index.
*
* @param data Pointer to PRawRunData to store
* @param idx Index where to store data (default=0)
* @return true on success
* Allows external modification or replacement of run data. Used in special
* cases like manual data manipulation or run merging.
*
* \param data Pointer to PRawRunData to store (handler takes ownership)
* \param idx Index where to store data (default=0)
* \return True on success, false if index out of range or data invalid
*
* \warning Handler takes ownership and will delete data on destruction
*/
virtual Bool_t SetRunData(PRawRunData *data, UInt_t idx=0);
private:
PMsrHandler *fMsrInfo; ///< pointer to the msr-file handler
PAny2ManyInfo *fAny2ManyInfo; ///< pointer to the any2many data structure
PStringVector fDataPath; ///< vector containing all the search paths where to look for data files
PMsrHandler *fMsrInfo; ///< Pointer to MSR file handler (not owned, don't delete)
PAny2ManyInfo *fAny2ManyInfo; ///< Pointer to any2many conversion configuration (not owned, don't delete)
PStringVector fDataPath; ///< Search paths for data files (checked sequentially until file found)
Bool_t fAllDataAvailable; ///< flag indicating if all data sets could be read
TString fFileFormat; ///< keeps the file format if explicitly given
TString fRunName; ///< current run name
TString fRunPathName; ///< current path file name
PRawRunDataList fData; ///< keeping all the raw data
Bool_t fAllDataAvailable; ///< Flag: true if all requested data files loaded successfully, false if any failed
TString fFileFormat; ///< Explicitly specified file format (overrides auto-detection)
TString fRunName; ///< Current run name being processed (used during file reading)
TString fRunPathName; ///< Full path to current data file being read
PRawRunDataList fData; ///< List of all loaded raw run data (histograms + metadata)
virtual void Init(const Int_t tag=0);
virtual Bool_t ReadFilesMsr();