/*************************************************************************** PRunDataHandler.h Author: Andreas Suter e-mail: andreas.suter@psi.ch ***************************************************************************/ /*************************************************************************** * Copyright (C) 2007-2026 by Andreas Suter * * andreas.suter@psi.ch * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef _PRUNDATAHANDLER_H_ #define _PRUNDATAHANDLER_H_ #include #include "PMusr.h" #include "PMsrHandler.h" //------------------------------------------------------------- /** * \defgroup FileFormats Data File Format Identifiers * \brief Constants identifying μSR data file formats for the any2many converter. * * These constants are used to specify input and output formats when converting * between different μSR data file formats. Each format has specific characteristics: * * - ROOT-based formats: Binary, fast, support complex data structures * - NeXus (HDF5): Self-describing, hierarchical, cross-platform standard * - Binary formats: Compact, fast, but platform/facility specific * - ASCII formats: Human-readable, portable, but larger file size * @{ */ /// Undefined or unknown format (used for error indication) #define A2M_UNDEFINED 0 /// Generic ROOT file (minimal structure, basic histograms only) #define A2M_ROOT 1 /// MusrRoot format (PSI-specific ROOT with complete metadata and run info) #define A2M_MUSR_ROOT 2 /// MusrRoot with directory structure (organized by run number, year, etc.) #define A2M_MUSR_ROOT_DIR 3 /// PSI binary format (legacy PSI format, platform-specific byte ordering) #define A2M_PSIBIN 4 /// PSI MDU ASCII format (ASCII variant of PSI format with metadata) #define A2M_PSIMDU 5 /// TRIUMF MUD (Muon Data) format (TRIUMF's standard binary format) #define A2M_MUD 6 /// NeXus HDF5 format (ISIS, JPARC standard - self-describing, hierarchical) #define A2M_NEXUS 7 /// WKM format (older PSI format, deprecated but still supported for legacy data) #define A2M_WKM 8 /// Generic ASCII format (two-column time-value data for non-μSR applications) #define A2M_ASCII 9 /** @} */ //------------------------------------------------------------- /** * \brief Raw data file reader and format converter for μSR data. * * PRunDataHandler is the comprehensive I/O layer for musrfit, serving as the gateway * between raw experimental data files and the fitting framework. It provides unified * access to data from different μSR facilities worldwide, handling the complexity of * various file formats transparently. * * \section responsibilities Core Responsibilities * - Data Loading: Reading raw histogram data from multiple file formats * - Format Conversion: Converting between different μSR data formats (any2many utility) * - Path Management: Searching multiple directories for data files * - Metadata Extraction: Reading experimental parameters (field, temperature, beam energy, time) * - Template Expansion: Resolving run name templates (e.g., "%r" → run number) * - Run Management: Managing collections of runs for global/simultaneous fits * - Data Validation: Verifying data integrity and consistency * * \section formats Supported File Formats * * ROOT-based formats: * - MusrRoot: PSI standard format with complete metadata, run info, and histograms * - Generic ROOT: Basic ROOT files with minimal structure * * HDF5-based formats: * - NeXus: International standard (ISIS, JPARC) - self-describing, hierarchical * * Binary formats: * - MUD: TRIUMF Muon Data format (binary, platform-specific) * - PSI-BIN: Legacy PSI binary format * - WKM: Older PSI format (deprecated, read-only support) * * Text formats: * - MDU ASCII: PSI ASCII format with metadata * - ASCII: Generic two-column data (time, counts) * - DB/DAT: Database and data table formats for x-y data * * \section features Key Features * * Intelligent file searching: * - Multi-path search (environment variables, default paths) * - Run name template expansion: %r (run), %y (year), %p (path) * - Automatic format detection based on file extension * - Year-based subdirectory organization (e.g., 2024/run2425.root) * * Data management: * - Caching: Avoid re-reading identical runs in global fits * - Validation: Check file existence before attempting reads * - Error handling: Graceful failure with informative messages * - Batch processing: Handle multiple runs efficiently * * Metadata extraction: * - Magnetic field (from data file or MSR file) * - Sample temperature * - Beam energy * - Run date/time (ISO 8601 format) * - Detector configuration * - Time resolution * - Time-zero (t0) estimates * * \section usage Usage Examples * * Reading data for fitting: * \code * // Constructor reads MSR file and searches default paths * PRunDataHandler handler(msrInfo); * handler.ReadData(); // Loads all runs from MSR RUN blocks * * // Access individual run data * PRawRunData *run0 = handler.GetRunData(0); * PDoubleVector *histogram = run0->GetDataBin(0); // First detector * \endcode * * Reading data with custom search paths: * \code * PStringVector paths; * paths.push_back("/data/musr/2024"); * paths.push_back("/archive/musr"); * * PRunDataHandler handler(msrInfo, paths); * handler.ReadData(); * \endcode * * Converting data formats (any2many): * \code * PAny2ManyInfo convInfo; * convInfo.inputFormat = "nexus"; * convInfo.outputFormat = "root"; * // ... configure convInfo ... * * PRunDataHandler handler(&convInfo); * handler.ReadData(); // Read from NeXus * handler.ConvertData(); // Convert to ROOT * handler.WriteData(); // Write ROOT file * \endcode * * \section templates Run Name Templates * * Template placeholders in run names are expanded automatically: * - %r → Run number (e.g., 2425) * - %y → 4-digit year (e.g., 2024) * - %Y → 2-digit year (e.g., 24) * - %p → Data path from search list * * Example: "run%r.root" with run 2425 → "run2425.root" * * \section thread_safety Thread Safety * PRunDataHandler objects are NOT thread-safe. Each thread should create * its own instance or use external synchronization. * * \see PRawRunData for the raw data structure * \see PMsrHandler for MSR file parsing * \see PAny2ManyInfo for format conversion configuration */ class PRunDataHandler { public: /** * \brief Default constructor creating an uninitialized handler. * * Creates an empty handler with no data loaded. Not typically used directly; * prefer constructors that specify data source (MSR file or conversion config). */ PRunDataHandler(); /** * \brief Constructor for reading a single file with explicit format. * * Reads one data file in the specified format. Use this when the format * cannot be auto-detected or when you want to override detection. * * \param fileName Path to data file (absolute or relative) * \param fileFormat Format identifier ("root", "nexus", "mud", "wkm", "psibin", "ascii") * * Example: PRunDataHandler("run2425.root", "root") */ PRunDataHandler(TString fileName, const TString fileFormat); /** * \brief Constructor for single file with search paths. * * Searches multiple directories for the specified file. Useful when data * files may be in various locations (current dir, archive, network mount). * * \param fileName File name or template (e.g., "run%r.root") * \param fileFormat Format identifier string * \param dataPath Vector of directory paths to search sequentially * * The handler searches paths in order and uses the first match found. */ PRunDataHandler(TString fileName, const TString fileFormat, const PStringVector dataPath); /** * \brief Constructor for reading single file into pre-allocated structure. * * Reads a data file directly into a provided PRawRunData object. Useful * for reading auxiliary data or when you want to manage memory explicitly. * * \param fileName Name of file to read * \param fileFormat Format identifier * \param dataPath Single directory path to search (not a vector) * \param runData Reference to PRawRunData object to fill with data */ PRunDataHandler(TString fileName, const TString fileFormat, const TString dataPath, PRawRunData &runData); /** * \brief Constructor for format conversion (any2many utility). * * Initializes handler for converting between data formats. The any2ManyInfo * structure specifies input/output formats, file lists, and conversion options. * * \param any2ManyInfo Pointer to conversion configuration structure * * Usage: Create PAny2ManyInfo, set input/output formats and file lists, * then call ReadData() → ConvertData() → WriteData() */ PRunDataHandler(PAny2ManyInfo *any2ManyInfo); /** * \brief Constructor for format conversion with custom search paths. * * Like the any2many constructor, but allows specifying additional directories * to search for input files. * * \param any2ManyInfo Pointer to conversion configuration * \param dataPath Vector of directories to search for input files */ PRunDataHandler(PAny2ManyInfo *any2ManyInfo, const PStringVector dataPath); /** * \brief Constructor for MSR-based data loading (standard fitting workflow). * * Primary constructor used by musrfit for loading data specified in an MSR file. * Reads the RUN blocks from the MSR file and loads all referenced data files. * Uses default search paths from environment or MSR file settings. * * \param msrInfo Pointer to MSR file handler containing run specifications * * After construction, call ReadData() to actually load the histogram data. */ PRunDataHandler(PMsrHandler *msrInfo); /** * \brief Constructor for MSR-based loading with custom search paths. * * Like the MSR constructor, but allows overriding the default search paths. * Useful for accessing data in non-standard locations or network mounts. * * \param msrInfo Pointer to MSR file handler * \param dataPath Vector of directory paths to search for data files * * Paths are searched in order; the first matching file is used. */ PRunDataHandler(PMsrHandler *msrInfo, const PStringVector dataPath); /** * \brief Virtual destructor that frees all loaded data. * * Cleans up: * - All PRawRunData objects in fData * - File path vectors * - Temporary buffers * * Pointers to fMsrInfo and fAny2ManyInfo are NOT deleted (owned externally). */ virtual ~PRunDataHandler(); /** * \brief Reads all data files specified in MSR file or configuration. * * Main data loading method that: * 1. Iterates through run specifications (from MSR or any2many config) * 2. Searches data paths for each file using template expansion * 3. Detects file format (from extension or explicit specification) * 4. Calls appropriate format-specific reader (ReadRootFile, ReadNexusFile, etc.) * 5. Extracts metadata (field, temperature, time resolution) * 6. Validates histogram data integrity * 7. Caches data to avoid re-reading in global fits * * Must be called after construction and before accessing data via GetRunData(). * Sets fAllDataAvailable flag based on success/failure. * * \post fData contains loaded run data, fAllDataAvailable indicates success */ virtual void ReadData(); /** * \brief Performs format conversion (for any2many utility). * * Converts data already loaded by ReadData() to the output format specified * in fAny2ManyInfo. This is an internal preprocessing step before WriteData(). * * Conversion operations may include: * - Data structure reorganization * - Metadata format translation * - Histogram rebinning or resampling * - Endianness conversion (for binary formats) * * \pre ReadData() must have been called to load source data * \pre fAny2ManyInfo must specify valid output format */ virtual void ConvertData(); /** * \brief Writes data to file in the specified format. * * Writes loaded and (optionally) converted data to an output file. The format * is determined by fAny2ManyInfo (for any2many) or fFileFormat. * * Supported output formats: * - ROOT (generic or MusrRoot structure) * - NeXus (HDF5) * - WKM, PSI-BIN (legacy formats) * - MUD (TRIUMF) * - ASCII (text export) * * \param fileName Output file name (empty string = auto-generate from input name) * \return True on successful write, false on error * * \pre Data must be loaded (ReadData called) */ virtual Bool_t WriteData(TString fileName=""); /** * \brief Checks if all required data files were successfully loaded. * * Returns false if any file specified in the MSR file or configuration * could not be found or loaded. Useful for validating data availability * before starting a fit. * * \return True if all data loaded successfully, false if any file missing or failed */ virtual Bool_t IsAllDataAvailable() const { return fAllDataAvailable; } /** * \brief Retrieves run data by run name. * * Searches fData for a run with matching name. Run names are extracted from * data files or MSR RUN blocks. * * \param runName Name of run to retrieve (e.g., "2425", "run2425") * \return Pointer to PRawRunData if found, nullptr otherwise * * \note Pointer is valid until handler destruction or data reload */ virtual PRawRunData* GetRunData(const TString &runName); /** * \brief Retrieves run data by index. * * Returns run data from the internal list by position. Index corresponds to * the order in the MSR file RUN blocks or any2many file list. * * \param idx Run index (0-based, default=0 for first run) * \return Pointer to PRawRunData if index valid, nullptr if out of range * * \note Pointer is valid until handler destruction or data reload */ virtual PRawRunData* GetRunData(const UInt_t idx=0); /** * \brief Returns the number of loaded run data sets. * * \return Number of runs currently loaded in fData */ virtual Int_t GetNoOfRunData() {return fData.size(); } /** * \brief Sets or replaces run data at specified index. * * Allows external modification or replacement of run data. Used in special * cases like manual data manipulation or run merging. * * \param data Pointer to PRawRunData to store (handler takes ownership) * \param idx Index where to store data (default=0) * \return True on success, false if index out of range or data invalid * * \warning Handler takes ownership and will delete data on destruction */ virtual Bool_t SetRunData(PRawRunData *data, UInt_t idx=0); private: PMsrHandler *fMsrInfo; ///< Pointer to MSR file handler (not owned, don't delete) PAny2ManyInfo *fAny2ManyInfo; ///< Pointer to any2many conversion configuration (not owned, don't delete) PStringVector fDataPath; ///< Search paths for data files (checked sequentially until file found) Bool_t fAllDataAvailable; ///< Flag: true if all requested data files loaded successfully, false if any failed TString fFileFormat; ///< Explicitly specified file format (overrides auto-detection) TString fRunName; ///< Current run name being processed (used during file reading) TString fRunPathName; ///< Full path to current data file being read PRawRunDataList fData; ///< List of all loaded raw run data (histograms + metadata) virtual void Init(const Int_t tag=0); virtual Bool_t ReadFilesMsr(); virtual Bool_t ReadWriteFilesList(); virtual Bool_t FileAlreadyRead(TString runName); virtual void TestFileName(TString &runName, const TString &ext); virtual Bool_t FileExistsCheck(PMsrRunBlock &runInfo, const UInt_t idx); virtual Bool_t FileExistsCheck(const Bool_t fileName, const Int_t idx); virtual Bool_t FileExistsCheck(const TString fileName); virtual Bool_t ReadRootFile(); virtual Bool_t ReadNexusFile(); virtual Bool_t ReadWkmFile(); virtual Bool_t ReadPsiBinFile(); virtual Bool_t ReadMudFile(); virtual Bool_t ReadMduAsciiFile(); virtual Bool_t ReadAsciiFile(); virtual Bool_t ReadDBFile(); virtual Bool_t ReadDatFile(); virtual Bool_t WriteMusrRootFile(Int_t tag=A2M_MUSR_ROOT_DIR, TString fln=""); virtual Bool_t WriteRootFile(TString fln=""); virtual Bool_t WriteNexusFile(TString fln=""); virtual Bool_t WriteWkmFile(TString fln=""); virtual Bool_t WritePsiBinFile(TString fln=""); virtual Bool_t WriteMudFile(TString fln=""); virtual Bool_t WriteAsciiFile(TString fln=""); virtual Bool_t StripWhitespace(TString &str); virtual Bool_t IsWhitespace(const Char_t *str); virtual Double_t ToDouble(TString &str, Bool_t &ok); virtual Int_t ToInt(TString &str, Bool_t &ok); virtual Int_t GetDataTagIndex(TString &str, const PStringVector* fLabels); virtual TString GenerateOutputFileName(const TString fileName, const TString extension, Bool_t &ok); virtual TString GetFileName(const TString extension, Bool_t &ok); virtual TString FileNameFromTemplate(TString &fileNameTemplate, Int_t run, TString &year, Bool_t &ok); virtual bool DateToISO8601(std::string inDate, std::string &iso8601Date); virtual void SplitTimeDate(TString timeDate, TString &time, TString &date, Bool_t &ok); virtual TString GetMonth(Int_t month); virtual TString GetYear(Int_t month); }; #endif // _PRUNDATAHANDLER_H_