PMsrHandler: replace ROOT tokenizer machinery with C++17 PStringUtils
Reduce the ROOT footprint of the MSR parser by removing the pervasive TString::Tokenize / TObjArray / TObjString / dynamic_cast pattern (28 tokenize sites, 14 TObjArray, 106 TObjString) used to split lines into tokens, together with the manual `delete tokens` cleanup. Add a new dependency-free C++17 utility class PStringUtils (Split, IsInt, IsFloat, ToInt, ToDouble, IsEqualNoCase, ContainsNoCase, BeginsWithNoCase) that replicates the relevant TString semantics exactly, so it can be reused elsewhere in the suite. IsInt/IsFloat tolerate surrounding whitespace to match TString::IsDigit/IsFloat (needed for tokens split on ',' / ';' only). The public API and the PMusr.h data structures keep TString unchanged; only the internal tokenizing logic is rewritten. Net -451 lines in PMsrHandler.cpp. All 85 integration tests pass. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -106,6 +106,7 @@ add_library(PMusr SHARED
|
||||
PMsgBoxDict.cxx
|
||||
PMsr2Data.cpp
|
||||
PMsrHandler.cpp
|
||||
PStringUtils.cpp
|
||||
PMusrCanvas.cpp
|
||||
PMusrCanvasDict.cxx
|
||||
PMusr.cpp
|
||||
@@ -270,6 +271,7 @@ install(
|
||||
${MUSRFIT_INC}/PRunSingleHisto.h
|
||||
${MUSRFIT_INC}/PRunSingleHistoRRF.h
|
||||
${MUSRFIT_INC}/PStartupHandler.h
|
||||
${MUSRFIT_INC}/PStringUtils.h
|
||||
${MUSRFIT_INC}/PTheory.h
|
||||
${MUSRFIT_INC}/PUserFcnBase.h
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
|
||||
+374
-827
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,225 @@
|
||||
/***************************************************************************
|
||||
|
||||
PStringUtils.cpp
|
||||
|
||||
Author: Andreas Suter
|
||||
e-mail: andreas.suter@psi.ch
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
* Copyright (C) 2007-2026 by Andreas Suter *
|
||||
* andreas.suter@psi.ch *
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU General Public License as published by *
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU General Public License *
|
||||
* along with this program; if not, write to the *
|
||||
* Free Software Foundation, Inc., *
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
|
||||
***************************************************************************/
|
||||
|
||||
#include <cctype>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "PStringUtils.h"
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Split (static)
|
||||
//--------------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Splits a string into tokens on any character contained in delimiters,
|
||||
* skipping empty tokens. Mirrors the semantics of TString::Tokenize().
|
||||
*
|
||||
* \param str input string to be tokenized
|
||||
* \param delimiters set of delimiter characters
|
||||
* \return vector of tokens (without the delimiters)
|
||||
*/
|
||||
std::vector<std::string> PStringUtils::Split(const std::string &str, const std::string &delimiters)
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
std::string::size_type start = str.find_first_not_of(delimiters);
|
||||
while (start != std::string::npos) {
|
||||
std::string::size_type end = str.find_first_of(delimiters, start);
|
||||
if (end == std::string::npos) {
|
||||
tokens.push_back(str.substr(start));
|
||||
break;
|
||||
}
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
start = str.find_first_not_of(delimiters, end);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// IsInt (static)
|
||||
//--------------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Returns true if the string is a non-empty sequence of decimal digits
|
||||
* only. Mirrors the semantics of TString::IsDigit().
|
||||
*
|
||||
* \param str string to be checked
|
||||
* \return true if str consists of digits only
|
||||
*/
|
||||
bool PStringUtils::IsInt(const std::string &str)
|
||||
{
|
||||
// mirror TString::IsDigit(): all characters must be digits or whitespace,
|
||||
// and there must be at least one digit (surrounding/embedded whitespace is
|
||||
// tolerated, e.g. for tokens split on ',' or ';' only).
|
||||
bool hasDigit = false;
|
||||
for (char c : str) {
|
||||
if (std::isdigit(static_cast<unsigned char>(c)))
|
||||
hasDigit = true;
|
||||
else if (!std::isspace(static_cast<unsigned char>(c)))
|
||||
return false;
|
||||
}
|
||||
return hasDigit;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// IsFloat (static)
|
||||
//--------------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Returns true if the string is a complete integer or floating point
|
||||
* literal (optionally signed, with decimal point and/or exponent). Mirrors
|
||||
* the semantics of TString::IsFloat() for the relevant cases.
|
||||
*
|
||||
* \param str string to be checked
|
||||
* \return true if str is a valid number
|
||||
*/
|
||||
bool PStringUtils::IsFloat(const std::string &str)
|
||||
{
|
||||
// mirror TString::IsFloat(): surrounding whitespace is ignored (e.g. for
|
||||
// tokens split on ',' or ';' only), then a complete number is required.
|
||||
const std::string ws(" \t\n\r\f\v");
|
||||
std::string::size_type b = str.find_first_not_of(ws);
|
||||
if (b == std::string::npos)
|
||||
return false;
|
||||
std::string::size_type e = str.find_last_not_of(ws);
|
||||
const std::string t = str.substr(b, e - b + 1);
|
||||
|
||||
std::string::size_type i = 0;
|
||||
if (t[i] == '+' || t[i] == '-')
|
||||
++i;
|
||||
// reject things like "inf"/"nan" which strtod would otherwise accept
|
||||
if (i >= t.size() || !(std::isdigit(static_cast<unsigned char>(t[i])) || t[i] == '.'))
|
||||
return false;
|
||||
const char *begin = t.c_str();
|
||||
char *end = nullptr;
|
||||
std::strtod(begin, &end);
|
||||
return end == begin + t.size();
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// ToInt (static)
|
||||
//--------------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Converts the leading part of the string to an int (base 10), mirroring
|
||||
* TString::Atoi(). Returns 0 if no conversion is possible.
|
||||
*
|
||||
* \param str string to be converted
|
||||
* \return converted integer value
|
||||
*/
|
||||
int PStringUtils::ToInt(const std::string &str)
|
||||
{
|
||||
return static_cast<int>(std::strtol(str.c_str(), nullptr, 10));
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// ToDouble (static)
|
||||
//--------------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Converts the leading part of the string to a double, mirroring
|
||||
* TString::Atof(). Returns 0.0 if no conversion is possible.
|
||||
*
|
||||
* \param str string to be converted
|
||||
* \return converted double value
|
||||
*/
|
||||
double PStringUtils::ToDouble(const std::string &str)
|
||||
{
|
||||
return std::strtod(str.c_str(), nullptr);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// IsEqualNoCase (static)
|
||||
//--------------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Case-insensitive full-string equality, mirroring
|
||||
* TString::CompareTo(..., TString::kIgnoreCase) == 0.
|
||||
*
|
||||
* \param a first string
|
||||
* \param b second string
|
||||
* \return true if a and b are equal ignoring case
|
||||
*/
|
||||
bool PStringUtils::IsEqualNoCase(const std::string &a, const std::string &b)
|
||||
{
|
||||
if (a.size() != b.size())
|
||||
return false;
|
||||
for (std::string::size_type i = 0; i < a.size(); ++i) {
|
||||
if (std::tolower(static_cast<unsigned char>(a[i])) !=
|
||||
std::tolower(static_cast<unsigned char>(b[i])))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// ContainsNoCase (static)
|
||||
//--------------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Case-insensitive substring search, mirroring
|
||||
* TString::Contains(..., TString::kIgnoreCase).
|
||||
*
|
||||
* \param haystack string to be searched in
|
||||
* \param needle substring to be searched for
|
||||
* \return true if needle is contained in haystack ignoring case
|
||||
*/
|
||||
bool PStringUtils::ContainsNoCase(const std::string &haystack, const std::string &needle)
|
||||
{
|
||||
if (needle.empty())
|
||||
return true;
|
||||
if (needle.size() > haystack.size())
|
||||
return false;
|
||||
auto toLower = [](unsigned char c) { return std::tolower(c); };
|
||||
for (std::string::size_type i = 0; i + needle.size() <= haystack.size(); ++i) {
|
||||
std::string::size_type j = 0;
|
||||
for (; j < needle.size(); ++j) {
|
||||
if (toLower(haystack[i+j]) != toLower(needle[j]))
|
||||
break;
|
||||
}
|
||||
if (j == needle.size())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// BeginsWithNoCase (static)
|
||||
//--------------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Case-insensitive prefix test, mirroring
|
||||
* TString::BeginsWith(..., TString::kIgnoreCase).
|
||||
*
|
||||
* \param str string to be tested
|
||||
* \param prefix prefix to be searched for
|
||||
* \return true if str starts with prefix ignoring case
|
||||
*/
|
||||
bool PStringUtils::BeginsWithNoCase(const std::string &str, const std::string &prefix)
|
||||
{
|
||||
if (prefix.size() > str.size())
|
||||
return false;
|
||||
for (std::string::size_type i = 0; i < prefix.size(); ++i) {
|
||||
if (std::tolower(static_cast<unsigned char>(str[i])) !=
|
||||
std::tolower(static_cast<unsigned char>(prefix[i])))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
/***************************************************************************
|
||||
|
||||
PStringUtils.h
|
||||
|
||||
Author: Andreas Suter
|
||||
e-mail: andreas.suter@psi.ch
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
* Copyright (C) 2007-2026 by Andreas Suter *
|
||||
* andreas.suter@psi.ch *
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU General Public License as published by *
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU General Public License *
|
||||
* along with this program; if not, write to the *
|
||||
* Free Software Foundation, Inc., *
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef _PSTRINGUTILS_H_
|
||||
#define _PSTRINGUTILS_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
//-------------------------------------------------------------
|
||||
/**
|
||||
* \brief Lightweight, dependency-free string utilities (pure C++17).
|
||||
*
|
||||
* PStringUtils collects small string helpers used throughout the musrfit
|
||||
* suite, in particular for tokenizing and parsing the plain-text MSR file
|
||||
* format. The implementation deliberately relies only on the C++ standard
|
||||
* library (no ROOT) so that it can be reused freely.
|
||||
*
|
||||
* The provided helpers replicate the semantics of the corresponding
|
||||
* ROOT TString methods that were previously used:
|
||||
* - Split replaces TString::Tokenize() (+ TObjArray/TObjString)
|
||||
* - IsInt replaces TString::IsDigit()
|
||||
* - IsFloat replaces TString::IsFloat()
|
||||
* - ToInt replaces TString::Atoi()
|
||||
* - ToDouble replaces TString::Atof()
|
||||
* - IsEqualNoCase replaces TString::CompareTo(..., TString::kIgnoreCase)
|
||||
*
|
||||
* All methods are static; the class is a pure namespace-like utility.
|
||||
*/
|
||||
class PStringUtils
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* <p>Splits a string into tokens on any character contained in
|
||||
* delimiters, skipping empty tokens. Mirrors TString::Tokenize().
|
||||
*
|
||||
* @param str input string to be tokenized
|
||||
* @param delimiters set of delimiter characters
|
||||
* @return vector of tokens (without the delimiters)
|
||||
*/
|
||||
static std::vector<std::string> Split(const std::string &str, const std::string &delimiters);
|
||||
|
||||
/**
|
||||
* <p>Returns true if the string is a non-empty sequence of decimal
|
||||
* digits only. Mirrors TString::IsDigit().
|
||||
*
|
||||
* @param str string to be checked
|
||||
* @return true if str consists of digits only
|
||||
*/
|
||||
static bool IsInt(const std::string &str);
|
||||
|
||||
/**
|
||||
* <p>Returns true if the string is a complete integer or floating point
|
||||
* literal (optionally signed, with decimal point and/or exponent).
|
||||
* Mirrors TString::IsFloat() for the relevant cases.
|
||||
*
|
||||
* @param str string to be checked
|
||||
* @return true if str is a valid number
|
||||
*/
|
||||
static bool IsFloat(const std::string &str);
|
||||
|
||||
/**
|
||||
* <p>Converts the leading part of the string to an int (base 10).
|
||||
* Mirrors TString::Atoi(). Returns 0 if no conversion is possible.
|
||||
*
|
||||
* @param str string to be converted
|
||||
* @return converted integer value
|
||||
*/
|
||||
static int ToInt(const std::string &str);
|
||||
|
||||
/**
|
||||
* <p>Converts the leading part of the string to a double.
|
||||
* Mirrors TString::Atof(). Returns 0.0 if no conversion is possible.
|
||||
*
|
||||
* @param str string to be converted
|
||||
* @return converted double value
|
||||
*/
|
||||
static double ToDouble(const std::string &str);
|
||||
|
||||
/**
|
||||
* <p>Case-insensitive full-string equality.
|
||||
* Mirrors TString::CompareTo(..., TString::kIgnoreCase) == 0.
|
||||
*
|
||||
* @param a first string
|
||||
* @param b second string
|
||||
* @return true if a and b are equal ignoring case
|
||||
*/
|
||||
static bool IsEqualNoCase(const std::string &a, const std::string &b);
|
||||
|
||||
/**
|
||||
* <p>Case-insensitive substring search.
|
||||
* Mirrors TString::Contains(..., TString::kIgnoreCase).
|
||||
*
|
||||
* @param haystack string to be searched in
|
||||
* @param needle substring to be searched for
|
||||
* @return true if needle is contained in haystack ignoring case
|
||||
*/
|
||||
static bool ContainsNoCase(const std::string &haystack, const std::string &needle);
|
||||
|
||||
/**
|
||||
* <p>Case-insensitive prefix test.
|
||||
* Mirrors TString::BeginsWith(..., TString::kIgnoreCase).
|
||||
*
|
||||
* @param str string to be tested
|
||||
* @param prefix prefix to be searched for
|
||||
* @return true if str starts with prefix ignoring case
|
||||
*/
|
||||
static bool BeginsWithNoCase(const std::string &str, const std::string &prefix);
|
||||
};
|
||||
|
||||
#endif // _PSTRINGUTILS_H_
|
||||
Reference in New Issue
Block a user