PMsrHandler: replace ROOT tokenizer machinery with C++17 PStringUtils

Reduce the ROOT footprint of the MSR parser by removing the pervasive
TString::Tokenize / TObjArray / TObjString / dynamic_cast pattern (28
tokenize sites, 14 TObjArray, 106 TObjString) used to split lines into
tokens, together with the manual `delete tokens` cleanup.

Add a new dependency-free C++17 utility class PStringUtils (Split, IsInt,
IsFloat, ToInt, ToDouble, IsEqualNoCase, ContainsNoCase, BeginsWithNoCase)
that replicates the relevant TString semantics exactly, so it can be reused
elsewhere in the suite. IsInt/IsFloat tolerate surrounding whitespace to
match TString::IsDigit/IsFloat (needed for tokens split on ',' / ';' only).

The public API and the PMusr.h data structures keep TString unchanged; only
the internal tokenizing logic is rewritten. Net -451 lines in
PMsrHandler.cpp. All 85 integration tests pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-06 11:15:02 +02:00
parent dd604d4bf6
commit b072a481ba
4 changed files with 738 additions and 827 deletions
+2
View File
@@ -106,6 +106,7 @@ add_library(PMusr SHARED
PMsgBoxDict.cxx
PMsr2Data.cpp
PMsrHandler.cpp
PStringUtils.cpp
PMusrCanvas.cpp
PMusrCanvasDict.cxx
PMusr.cpp
@@ -270,6 +271,7 @@ install(
${MUSRFIT_INC}/PRunSingleHisto.h
${MUSRFIT_INC}/PRunSingleHistoRRF.h
${MUSRFIT_INC}/PStartupHandler.h
${MUSRFIT_INC}/PStringUtils.h
${MUSRFIT_INC}/PTheory.h
${MUSRFIT_INC}/PUserFcnBase.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
File diff suppressed because it is too large Load Diff
+225
View File
@@ -0,0 +1,225 @@
/***************************************************************************
PStringUtils.cpp
Author: Andreas Suter
e-mail: andreas.suter@psi.ch
***************************************************************************/
/***************************************************************************
* Copyright (C) 2007-2026 by Andreas Suter *
* andreas.suter@psi.ch *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include <cctype>
#include <cstdlib>
#include "PStringUtils.h"
//--------------------------------------------------------------------------
// Split (static)
//--------------------------------------------------------------------------
/**
* <p>Splits a string into tokens on any character contained in delimiters,
* skipping empty tokens. Mirrors the semantics of TString::Tokenize().
*
* \param str input string to be tokenized
* \param delimiters set of delimiter characters
* \return vector of tokens (without the delimiters)
*/
std::vector<std::string> PStringUtils::Split(const std::string &str, const std::string &delimiters)
{
std::vector<std::string> tokens;
std::string::size_type start = str.find_first_not_of(delimiters);
while (start != std::string::npos) {
std::string::size_type end = str.find_first_of(delimiters, start);
if (end == std::string::npos) {
tokens.push_back(str.substr(start));
break;
}
tokens.push_back(str.substr(start, end - start));
start = str.find_first_not_of(delimiters, end);
}
return tokens;
}
//--------------------------------------------------------------------------
// IsInt (static)
//--------------------------------------------------------------------------
/**
* <p>Returns true if the string is a non-empty sequence of decimal digits
* only. Mirrors the semantics of TString::IsDigit().
*
* \param str string to be checked
* \return true if str consists of digits only
*/
bool PStringUtils::IsInt(const std::string &str)
{
// mirror TString::IsDigit(): all characters must be digits or whitespace,
// and there must be at least one digit (surrounding/embedded whitespace is
// tolerated, e.g. for tokens split on ',' or ';' only).
bool hasDigit = false;
for (char c : str) {
if (std::isdigit(static_cast<unsigned char>(c)))
hasDigit = true;
else if (!std::isspace(static_cast<unsigned char>(c)))
return false;
}
return hasDigit;
}
//--------------------------------------------------------------------------
// IsFloat (static)
//--------------------------------------------------------------------------
/**
* <p>Returns true if the string is a complete integer or floating point
* literal (optionally signed, with decimal point and/or exponent). Mirrors
* the semantics of TString::IsFloat() for the relevant cases.
*
* \param str string to be checked
* \return true if str is a valid number
*/
bool PStringUtils::IsFloat(const std::string &str)
{
// mirror TString::IsFloat(): surrounding whitespace is ignored (e.g. for
// tokens split on ',' or ';' only), then a complete number is required.
const std::string ws(" \t\n\r\f\v");
std::string::size_type b = str.find_first_not_of(ws);
if (b == std::string::npos)
return false;
std::string::size_type e = str.find_last_not_of(ws);
const std::string t = str.substr(b, e - b + 1);
std::string::size_type i = 0;
if (t[i] == '+' || t[i] == '-')
++i;
// reject things like "inf"/"nan" which strtod would otherwise accept
if (i >= t.size() || !(std::isdigit(static_cast<unsigned char>(t[i])) || t[i] == '.'))
return false;
const char *begin = t.c_str();
char *end = nullptr;
std::strtod(begin, &end);
return end == begin + t.size();
}
//--------------------------------------------------------------------------
// ToInt (static)
//--------------------------------------------------------------------------
/**
* <p>Converts the leading part of the string to an int (base 10), mirroring
* TString::Atoi(). Returns 0 if no conversion is possible.
*
* \param str string to be converted
* \return converted integer value
*/
int PStringUtils::ToInt(const std::string &str)
{
return static_cast<int>(std::strtol(str.c_str(), nullptr, 10));
}
//--------------------------------------------------------------------------
// ToDouble (static)
//--------------------------------------------------------------------------
/**
* <p>Converts the leading part of the string to a double, mirroring
* TString::Atof(). Returns 0.0 if no conversion is possible.
*
* \param str string to be converted
* \return converted double value
*/
double PStringUtils::ToDouble(const std::string &str)
{
return std::strtod(str.c_str(), nullptr);
}
//--------------------------------------------------------------------------
// IsEqualNoCase (static)
//--------------------------------------------------------------------------
/**
* <p>Case-insensitive full-string equality, mirroring
* TString::CompareTo(..., TString::kIgnoreCase) == 0.
*
* \param a first string
* \param b second string
* \return true if a and b are equal ignoring case
*/
bool PStringUtils::IsEqualNoCase(const std::string &a, const std::string &b)
{
if (a.size() != b.size())
return false;
for (std::string::size_type i = 0; i < a.size(); ++i) {
if (std::tolower(static_cast<unsigned char>(a[i])) !=
std::tolower(static_cast<unsigned char>(b[i])))
return false;
}
return true;
}
//--------------------------------------------------------------------------
// ContainsNoCase (static)
//--------------------------------------------------------------------------
/**
* <p>Case-insensitive substring search, mirroring
* TString::Contains(..., TString::kIgnoreCase).
*
* \param haystack string to be searched in
* \param needle substring to be searched for
* \return true if needle is contained in haystack ignoring case
*/
bool PStringUtils::ContainsNoCase(const std::string &haystack, const std::string &needle)
{
if (needle.empty())
return true;
if (needle.size() > haystack.size())
return false;
auto toLower = [](unsigned char c) { return std::tolower(c); };
for (std::string::size_type i = 0; i + needle.size() <= haystack.size(); ++i) {
std::string::size_type j = 0;
for (; j < needle.size(); ++j) {
if (toLower(haystack[i+j]) != toLower(needle[j]))
break;
}
if (j == needle.size())
return true;
}
return false;
}
//--------------------------------------------------------------------------
// BeginsWithNoCase (static)
//--------------------------------------------------------------------------
/**
* <p>Case-insensitive prefix test, mirroring
* TString::BeginsWith(..., TString::kIgnoreCase).
*
* \param str string to be tested
* \param prefix prefix to be searched for
* \return true if str starts with prefix ignoring case
*/
bool PStringUtils::BeginsWithNoCase(const std::string &str, const std::string &prefix)
{
if (prefix.size() > str.size())
return false;
for (std::string::size_type i = 0; i < prefix.size(); ++i) {
if (std::tolower(static_cast<unsigned char>(str[i])) !=
std::tolower(static_cast<unsigned char>(prefix[i])))
return false;
}
return true;
}
+137
View File
@@ -0,0 +1,137 @@
/***************************************************************************
PStringUtils.h
Author: Andreas Suter
e-mail: andreas.suter@psi.ch
***************************************************************************/
/***************************************************************************
* Copyright (C) 2007-2026 by Andreas Suter *
* andreas.suter@psi.ch *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#ifndef _PSTRINGUTILS_H_
#define _PSTRINGUTILS_H_
#include <string>
#include <vector>
//-------------------------------------------------------------
/**
* \brief Lightweight, dependency-free string utilities (pure C++17).
*
* PStringUtils collects small string helpers used throughout the musrfit
* suite, in particular for tokenizing and parsing the plain-text MSR file
* format. The implementation deliberately relies only on the C++ standard
* library (no ROOT) so that it can be reused freely.
*
* The provided helpers replicate the semantics of the corresponding
* ROOT TString methods that were previously used:
* - Split replaces TString::Tokenize() (+ TObjArray/TObjString)
* - IsInt replaces TString::IsDigit()
* - IsFloat replaces TString::IsFloat()
* - ToInt replaces TString::Atoi()
* - ToDouble replaces TString::Atof()
* - IsEqualNoCase replaces TString::CompareTo(..., TString::kIgnoreCase)
*
* All methods are static; the class is a pure namespace-like utility.
*/
class PStringUtils
{
public:
/**
* <p>Splits a string into tokens on any character contained in
* delimiters, skipping empty tokens. Mirrors TString::Tokenize().
*
* @param str input string to be tokenized
* @param delimiters set of delimiter characters
* @return vector of tokens (without the delimiters)
*/
static std::vector<std::string> Split(const std::string &str, const std::string &delimiters);
/**
* <p>Returns true if the string is a non-empty sequence of decimal
* digits only. Mirrors TString::IsDigit().
*
* @param str string to be checked
* @return true if str consists of digits only
*/
static bool IsInt(const std::string &str);
/**
* <p>Returns true if the string is a complete integer or floating point
* literal (optionally signed, with decimal point and/or exponent).
* Mirrors TString::IsFloat() for the relevant cases.
*
* @param str string to be checked
* @return true if str is a valid number
*/
static bool IsFloat(const std::string &str);
/**
* <p>Converts the leading part of the string to an int (base 10).
* Mirrors TString::Atoi(). Returns 0 if no conversion is possible.
*
* @param str string to be converted
* @return converted integer value
*/
static int ToInt(const std::string &str);
/**
* <p>Converts the leading part of the string to a double.
* Mirrors TString::Atof(). Returns 0.0 if no conversion is possible.
*
* @param str string to be converted
* @return converted double value
*/
static double ToDouble(const std::string &str);
/**
* <p>Case-insensitive full-string equality.
* Mirrors TString::CompareTo(..., TString::kIgnoreCase) == 0.
*
* @param a first string
* @param b second string
* @return true if a and b are equal ignoring case
*/
static bool IsEqualNoCase(const std::string &a, const std::string &b);
/**
* <p>Case-insensitive substring search.
* Mirrors TString::Contains(..., TString::kIgnoreCase).
*
* @param haystack string to be searched in
* @param needle substring to be searched for
* @return true if needle is contained in haystack ignoring case
*/
static bool ContainsNoCase(const std::string &haystack, const std::string &needle);
/**
* <p>Case-insensitive prefix test.
* Mirrors TString::BeginsWith(..., TString::kIgnoreCase).
*
* @param str string to be tested
* @param prefix prefix to be searched for
* @return true if str starts with prefix ignoring case
*/
static bool BeginsWithNoCase(const std::string &str, const std::string &prefix);
};
#endif // _PSTRINGUTILS_H_