diff --git a/src/include/PRunDataHandler.h b/src/include/PRunDataHandler.h index d55663ab4..8653b75dd 100644 --- a/src/include/PRunDataHandler.h +++ b/src/include/PRunDataHandler.h @@ -37,198 +37,391 @@ //------------------------------------------------------------- /** - *
Data file format identifiers for any2many converter. + * \defgroup FileFormats Data File Format Identifiers + * \brief Constants identifying μSR data file formats for the any2many converter. * - *
These constants identify different μSR data file formats supported - * by musrfit for reading and conversion. + * These constants are used to specify input and output formats when converting + * between different μSR data file formats. Each format has specific characteristics: + * + * - ROOT-based formats: Binary, fast, support complex data structures + * - NeXus (HDF5): Self-describing, hierarchical, cross-platform standard + * - Binary formats: Compact, fast, but platform/facility specific + * - ASCII formats: Human-readable, portable, but larger file size + * @{ */ -/// Undefined or unknown format +/// Undefined or unknown format (used for error indication) #define A2M_UNDEFINED 0 -/// Generic ROOT file +/// Generic ROOT file (minimal structure, basic histograms only) #define A2M_ROOT 1 -/// MusrRoot format (PSI-specific ROOT structure) +/// MusrRoot format (PSI-specific ROOT with complete metadata and run info) #define A2M_MUSR_ROOT 2 -/// MusrRoot with directory structure +/// MusrRoot with directory structure (organized by run number, year, etc.) #define A2M_MUSR_ROOT_DIR 3 -/// PSI binary format +/// PSI binary format (legacy PSI format, platform-specific byte ordering) #define A2M_PSIBIN 4 -/// PSI MDU ASCII format +/// PSI MDU ASCII format (ASCII variant of PSI format with metadata) #define A2M_PSIMDU 5 -/// TRIUMF MUD (Muon Data) format +/// TRIUMF MUD (Muon Data) format (TRIUMF's standard binary format) #define A2M_MUD 6 -/// NeXus HDF5 format (ISIS, JPARC) +/// NeXus HDF5 format (ISIS, JPARC standard - self-describing, hierarchical) #define A2M_NEXUS 7 -/// WKM format (older PSI format) +/// WKM format (older PSI format, deprecated but still supported for legacy data) #define A2M_WKM 8 -/// Generic ASCII format +/// Generic ASCII format (two-column time-value data for non-μSR applications) #define A2M_ASCII 9 +/** @} */ //------------------------------------------------------------- /** - *
Raw data file reader and format converter. + * \brief Raw data file reader and format converter for μSR data. * - *
PRunDataHandler is the I/O layer for musrfit, responsible for: - * - Loading raw histogram data from multiple file formats - * - Converting between different μSR data formats - * - Searching multiple data paths for run files - * - Reading run metadata (field, temperature, time, etc.) - * - Managing collections of runs for simultaneous fits + * PRunDataHandler is the comprehensive I/O layer for musrfit, serving as the gateway + * between raw experimental data files and the fitting framework. It provides unified + * access to data from different μSR facilities worldwide, handling the complexity of + * various file formats transparently. * - *
Supported file formats: - * - MusrRoot: PSI ROOT-based format with comprehensive metadata - * - NeXus: HDF5-based format (ISIS, JPARC) - * - MUD: TRIUMF's binary format + * \section responsibilities Core Responsibilities + * - Data Loading: Reading raw histogram data from multiple file formats + * - Format Conversion: Converting between different μSR data formats (any2many utility) + * - Path Management: Searching multiple directories for data files + * - Metadata Extraction: Reading experimental parameters (field, temperature, beam energy, time) + * - Template Expansion: Resolving run name templates (e.g., "%r" → run number) + * - Run Management: Managing collections of runs for global/simultaneous fits + * - Data Validation: Verifying data integrity and consistency + * + * \section formats Supported File Formats + * + * ROOT-based formats: + * - MusrRoot: PSI standard format with complete metadata, run info, and histograms + * - Generic ROOT: Basic ROOT files with minimal structure + * + * HDF5-based formats: + * - NeXus: International standard (ISIS, JPARC) - self-describing, hierarchical + * + * Binary formats: + * - MUD: TRIUMF Muon Data format (binary, platform-specific) * - PSI-BIN: Legacy PSI binary format - * - WKM: Older PSI format - * - ASCII: Text-based formats (for non-μSR data) - * - DB/DAT: Database formats for general x-y data + * - WKM: Older PSI format (deprecated, read-only support) * - *
Key features: - * - Automatic format detection - * - Run name template expansion (e.g., "run%r.root" → "run2425.root") - * - Multi-path search (search common data directories) - * - Batch file conversion (any2many tool) - * - Metadata extraction and validation + * Text formats: + * - MDU ASCII: PSI ASCII format with metadata + * - ASCII: Generic two-column data (time, counts) + * - DB/DAT: Database and data table formats for x-y data * - *
Example usage: - * @code - * PRunDataHandler handler(msrInfo, dataPaths); - * handler.ReadData(); // Reads all runs specified in MSR file - * PRawRunData *data = handler.GetRunData(0); - * @endcode + * \section features Key Features + * + * Intelligent file searching: + * - Multi-path search (environment variables, default paths) + * - Run name template expansion: %r (run), %y (year), %p (path) + * - Automatic format detection based on file extension + * - Year-based subdirectory organization (e.g., 2024/run2425.root) + * + * Data management: + * - Caching: Avoid re-reading identical runs in global fits + * - Validation: Check file existence before attempting reads + * - Error handling: Graceful failure with informative messages + * - Batch processing: Handle multiple runs efficiently + * + * Metadata extraction: + * - Magnetic field (from data file or MSR file) + * - Sample temperature + * - Beam energy + * - Run date/time (ISO 8601 format) + * - Detector configuration + * - Time resolution + * - Time-zero (t0) estimates + * + * \section usage Usage Examples + * + * Reading data for fitting: + * \code + * // Constructor reads MSR file and searches default paths + * PRunDataHandler handler(msrInfo); + * handler.ReadData(); // Loads all runs from MSR RUN blocks + * + * // Access individual run data + * PRawRunData *run0 = handler.GetRunData(0); + * PDoubleVector *histogram = run0->GetDataBin(0); // First detector + * \endcode + * + * Reading data with custom search paths: + * \code + * PStringVector paths; + * paths.push_back("/data/musr/2024"); + * paths.push_back("/archive/musr"); + * + * PRunDataHandler handler(msrInfo, paths); + * handler.ReadData(); + * \endcode + * + * Converting data formats (any2many): + * \code + * PAny2ManyInfo convInfo; + * convInfo.inputFormat = "nexus"; + * convInfo.outputFormat = "root"; + * // ... configure convInfo ... + * + * PRunDataHandler handler(&convInfo); + * handler.ReadData(); // Read from NeXus + * handler.ConvertData(); // Convert to ROOT + * handler.WriteData(); // Write ROOT file + * \endcode + * + * \section templates Run Name Templates + * + * Template placeholders in run names are expanded automatically: + * - %r → Run number (e.g., 2425) + * - %y → 4-digit year (e.g., 2024) + * - %Y → 2-digit year (e.g., 24) + * - %p → Data path from search list + * + * Example: "run%r.root" with run 2425 → "run2425.root" + * + * \section thread_safety Thread Safety + * PRunDataHandler objects are NOT thread-safe. Each thread should create + * its own instance or use external synchronization. + * + * \see PRawRunData for the raw data structure + * \see PMsrHandler for MSR file parsing + * \see PAny2ManyInfo for format conversion configuration */ class PRunDataHandler { public: - /// Default constructor + /** + * \brief Default constructor creating an uninitialized handler. + * + * Creates an empty handler with no data loaded. Not typically used directly; + * prefer constructors that specify data source (MSR file or conversion config). + */ PRunDataHandler(); /** - *
Constructor for single file with explicit format. + * \brief Constructor for reading a single file with explicit format. * - * @param fileName Path to data file - * @param fileFormat Format string ("root", "nexus", "mud", etc.) + * Reads one data file in the specified format. Use this when the format + * cannot be auto-detected or when you want to override detection. + * + * \param fileName Path to data file (absolute or relative) + * \param fileFormat Format identifier ("root", "nexus", "mud", "wkm", "psibin", "ascii") + * + * Example: PRunDataHandler("run2425.root", "root") */ PRunDataHandler(TString fileName, const TString fileFormat); /** - *
Constructor with search paths. + * \brief Constructor for single file with search paths. * - * @param fileName File name or template - * @param fileFormat Format string - * @param dataPath Vector of directories to search for data files + * Searches multiple directories for the specified file. Useful when data + * files may be in various locations (current dir, archive, network mount). + * + * \param fileName File name or template (e.g., "run%r.root") + * \param fileFormat Format identifier string + * \param dataPath Vector of directory paths to search sequentially + * + * The handler searches paths in order and uses the first match found. */ PRunDataHandler(TString fileName, const TString fileFormat, const PStringVector dataPath); /** - *
Constructor for reading single file into provided structure. + * \brief Constructor for reading single file into pre-allocated structure. * - * @param fileName File name - * @param fileFormat Format string - * @param dataPath Data search path - * @param runData Reference to PRawRunData to fill + * Reads a data file directly into a provided PRawRunData object. Useful + * for reading auxiliary data or when you want to manage memory explicitly. + * + * \param fileName Name of file to read + * \param fileFormat Format identifier + * \param dataPath Single directory path to search (not a vector) + * \param runData Reference to PRawRunData object to fill with data */ PRunDataHandler(TString fileName, const TString fileFormat, const TString dataPath, PRawRunData &runData); /** - *
Constructor for format conversion (any2many). + * \brief Constructor for format conversion (any2many utility). * - * @param any2ManyInfo Conversion configuration structure + * Initializes handler for converting between data formats. The any2ManyInfo + * structure specifies input/output formats, file lists, and conversion options. + * + * \param any2ManyInfo Pointer to conversion configuration structure + * + * Usage: Create PAny2ManyInfo, set input/output formats and file lists, + * then call ReadData() → ConvertData() → WriteData() */ PRunDataHandler(PAny2ManyInfo *any2ManyInfo); /** - *
Constructor for format conversion with search paths. + * \brief Constructor for format conversion with custom search paths. * - * @param any2ManyInfo Conversion configuration - * @param dataPath Vector of search directories + * Like the any2many constructor, but allows specifying additional directories + * to search for input files. + * + * \param any2ManyInfo Pointer to conversion configuration + * \param dataPath Vector of directories to search for input files */ PRunDataHandler(PAny2ManyInfo *any2ManyInfo, const PStringVector dataPath); /** - *
Constructor for MSR-based data loading. + * \brief Constructor for MSR-based data loading (standard fitting workflow). * - * @param msrInfo MSR file handler (provides run list and paths) + * Primary constructor used by musrfit for loading data specified in an MSR file. + * Reads the RUN blocks from the MSR file and loads all referenced data files. + * Uses default search paths from environment or MSR file settings. + * + * \param msrInfo Pointer to MSR file handler containing run specifications + * + * After construction, call ReadData() to actually load the histogram data. */ PRunDataHandler(PMsrHandler *msrInfo); /** - *
Constructor for MSR-based loading with custom search paths. + * \brief Constructor for MSR-based loading with custom search paths. * - * @param msrInfo MSR file handler - * @param dataPath Vector of directories to search + * Like the MSR constructor, but allows overriding the default search paths. + * Useful for accessing data in non-standard locations or network mounts. + * + * \param msrInfo Pointer to MSR file handler + * \param dataPath Vector of directory paths to search for data files + * + * Paths are searched in order; the first matching file is used. */ PRunDataHandler(PMsrHandler *msrInfo, const PStringVector dataPath); + /** + * \brief Virtual destructor that frees all loaded data. + * + * Cleans up: + * - All PRawRunData objects in fData + * - File path vectors + * - Temporary buffers + * + * Pointers to fMsrInfo and fAny2ManyInfo are NOT deleted (owned externally). + */ virtual ~PRunDataHandler(); /** - *
Reads all data files specified in MSR file or configuration. + * \brief Reads all data files specified in MSR file or configuration. * - *
Searches data paths, detects formats, loads histograms and - * metadata. Call this before attempting to access run data. + * Main data loading method that: + * 1. Iterates through run specifications (from MSR or any2many config) + * 2. Searches data paths for each file using template expansion + * 3. Detects file format (from extension or explicit specification) + * 4. Calls appropriate format-specific reader (ReadRootFile, ReadNexusFile, etc.) + * 5. Extracts metadata (field, temperature, time resolution) + * 6. Validates histogram data integrity + * 7. Caches data to avoid re-reading in global fits + * + * Must be called after construction and before accessing data via GetRunData(). + * Sets fAllDataAvailable flag based on success/failure. + * + * \post fData contains loaded run data, fAllDataAvailable indicates success */ virtual void ReadData(); /** - *
Performs format conversion (for any2many utility). + * \brief Performs format conversion (for any2many utility). * - *
Converts loaded data to the target format specified in - * any2many configuration. + * Converts data already loaded by ReadData() to the output format specified + * in fAny2ManyInfo. This is an internal preprocessing step before WriteData(). + * + * Conversion operations may include: + * - Data structure reorganization + * - Metadata format translation + * - Histogram rebinning or resampling + * - Endianness conversion (for binary formats) + * + * \pre ReadData() must have been called to load source data + * \pre fAny2ManyInfo must specify valid output format */ virtual void ConvertData(); /** - *
Writes data to file in specified format. + * \brief Writes data to file in the specified format. * - * @param fileName Output file name (empty = use default) - * @return true on success, false on error + * Writes loaded and (optionally) converted data to an output file. The format + * is determined by fAny2ManyInfo (for any2many) or fFileFormat. + * + * Supported output formats: + * - ROOT (generic or MusrRoot structure) + * - NeXus (HDF5) + * - WKM, PSI-BIN (legacy formats) + * - MUD (TRIUMF) + * - ASCII (text export) + * + * \param fileName Output file name (empty string = auto-generate from input name) + * \return True on successful write, false on error + * + * \pre Data must be loaded (ReadData called) */ virtual Bool_t WriteData(TString fileName=""); - /// Returns true if all required data files were successfully loaded - /// @return Data availability status + /** + * \brief Checks if all required data files were successfully loaded. + * + * Returns false if any file specified in the MSR file or configuration + * could not be found or loaded. Useful for validating data availability + * before starting a fit. + * + * \return True if all data loaded successfully, false if any file missing or failed + */ virtual Bool_t IsAllDataAvailable() const { return fAllDataAvailable; } /** - *
Gets run data by run name. + * \brief Retrieves run data by run name. * - * @param runName Name of run to retrieve - * @return Pointer to PRawRunData, or nullptr if not found + * Searches fData for a run with matching name. Run names are extracted from + * data files or MSR RUN blocks. + * + * \param runName Name of run to retrieve (e.g., "2425", "run2425") + * \return Pointer to PRawRunData if found, nullptr otherwise + * + * \note Pointer is valid until handler destruction or data reload */ virtual PRawRunData* GetRunData(const TString &runName); /** - *
Gets run data by index. + * \brief Retrieves run data by index. * - * @param idx Run index (0-based) - * @return Pointer to PRawRunData, or nullptr if out of range + * Returns run data from the internal list by position. Index corresponds to + * the order in the MSR file RUN blocks or any2many file list. + * + * \param idx Run index (0-based, default=0 for first run) + * \return Pointer to PRawRunData if index valid, nullptr if out of range + * + * \note Pointer is valid until handler destruction or data reload */ virtual PRawRunData* GetRunData(const UInt_t idx=0); - /// Returns the number of loaded run data sets - /// @return Number of runs + /** + * \brief Returns the number of loaded run data sets. + * + * \return Number of runs currently loaded in fData + */ virtual Int_t GetNoOfRunData() {return fData.size(); } /** - *
Sets or replaces run data at specified index. + * \brief Sets or replaces run data at specified index. * - * @param data Pointer to PRawRunData to store - * @param idx Index where to store data (default=0) - * @return true on success + * Allows external modification or replacement of run data. Used in special + * cases like manual data manipulation or run merging. + * + * \param data Pointer to PRawRunData to store (handler takes ownership) + * \param idx Index where to store data (default=0) + * \return True on success, false if index out of range or data invalid + * + * \warning Handler takes ownership and will delete data on destruction */ virtual Bool_t SetRunData(PRawRunData *data, UInt_t idx=0); private: - PMsrHandler *fMsrInfo; ///< pointer to the msr-file handler - PAny2ManyInfo *fAny2ManyInfo; ///< pointer to the any2many data structure - PStringVector fDataPath; ///< vector containing all the search paths where to look for data files + PMsrHandler *fMsrInfo; ///< Pointer to MSR file handler (not owned, don't delete) + PAny2ManyInfo *fAny2ManyInfo; ///< Pointer to any2many conversion configuration (not owned, don't delete) + PStringVector fDataPath; ///< Search paths for data files (checked sequentially until file found) - Bool_t fAllDataAvailable; ///< flag indicating if all data sets could be read - TString fFileFormat; ///< keeps the file format if explicitly given - TString fRunName; ///< current run name - TString fRunPathName; ///< current path file name - PRawRunDataList fData; ///< keeping all the raw data + Bool_t fAllDataAvailable; ///< Flag: true if all requested data files loaded successfully, false if any failed + TString fFileFormat; ///< Explicitly specified file format (overrides auto-detection) + TString fRunName; ///< Current run name being processed (used during file reading) + TString fRunPathName; ///< Full path to current data file being read + PRawRunDataList fData; ///< List of all loaded raw run data (histograms + metadata) virtual void Init(const Int_t tag=0); virtual Bool_t ReadFilesMsr();