316 lines
9.7 KiB
C++
316 lines
9.7 KiB
C++
// Copyright 2017 Global Phasing Ltd.
|
|
//
|
|
// Utilities. Mostly for working with strings and vectors.
|
|
|
|
#ifndef GEMMI_UTIL_HPP_
|
|
#define GEMMI_UTIL_HPP_
|
|
|
|
#include <cassert>
|
|
#include <cctype> // for isspace
|
|
#include <cstring> // for strncmp
|
|
#include <algorithm> // for equal, find, remove_if
|
|
#include <iterator> // for begin, end, make_move_iterator
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace gemmi {
|
|
|
|
// ##### string helpers #####
|
|
|
|
inline void append_to_str(std::string& out, int v) { out += std::to_string(v); }
|
|
inline void append_to_str(std::string& out, size_t v) { out += std::to_string(v); }
|
|
void append_to_str(std::string& out, double) = delete;
|
|
template<typename T>
|
|
void append_to_str(std::string& out, const T& v) { out += v; }
|
|
|
|
inline void cat_to(std::string&) {}
|
|
template <typename T, typename... Args>
|
|
void cat_to(std::string& out, const T& value, Args const&... args) {
|
|
append_to_str(out, value);
|
|
cat_to(out, args...);
|
|
}
|
|
template <class... Args>
|
|
std::string cat(Args const&... args) {
|
|
std::string out;
|
|
cat_to(out, args...);
|
|
return out;
|
|
}
|
|
|
|
inline bool starts_with(const std::string& str, const std::string& prefix) {
|
|
size_t sl = prefix.length();
|
|
return str.length() >= sl && str.compare(0, sl, prefix) == 0;
|
|
}
|
|
|
|
template<size_t N> bool starts_with(const char* a, const char (&b)[N]) {
|
|
return std::strncmp(a, b, N-1) == 0;
|
|
}
|
|
|
|
inline bool ends_with(const std::string& str, const std::string& suffix) {
|
|
size_t sl = suffix.length();
|
|
return str.length() >= sl && str.compare(str.length() - sl, sl, suffix) == 0;
|
|
}
|
|
|
|
// can be faster than std::tolower() b/c it takes char not int
|
|
inline char lower(char c) {
|
|
if (c >= 'A' && c <= 'Z')
|
|
return c | 0x20;
|
|
return c;
|
|
}
|
|
|
|
// works as expected only for a-zA-Z
|
|
inline char alpha_up(char c) { return c & ~0x20; }
|
|
|
|
inline std::string to_lower(std::string str) {
|
|
for (char& c : str)
|
|
if (c >= 'A' && c <= 'Z')
|
|
c |= 0x20;
|
|
return str;
|
|
}
|
|
|
|
inline std::string to_upper(std::string str) {
|
|
for (char& c : str)
|
|
if (c >= 'a' && c <= 'z')
|
|
c &= ~0x20;
|
|
return str;
|
|
}
|
|
|
|
// case-insensitive character comparison
|
|
inline bool isame(char a, char b) {
|
|
return a == b || ((a^b) == 0x20 && (a|0x20) >= 'a' && (a|0x20) <= 'z');
|
|
}
|
|
|
|
// Case-insensitive comparisons. The second arg must be lowercase.
|
|
|
|
inline bool iequal_from(const std::string& str, size_t offset, const std::string& low) {
|
|
return str.length() == low.length() + offset &&
|
|
std::equal(std::begin(low), std::end(low), str.begin() + offset,
|
|
[](char c1, char c2) { return c1 == lower(c2); });
|
|
}
|
|
|
|
inline bool iequal(const std::string& str, const std::string& low) {
|
|
return iequal_from(str, 0, low);
|
|
}
|
|
|
|
inline bool istarts_with(const std::string& str, const std::string& prefix) {
|
|
return str.length() >= prefix.length() &&
|
|
std::equal(std::begin(prefix), std::end(prefix), str.begin(),
|
|
[](char c1, char c2) { return c1 == lower(c2); });
|
|
}
|
|
inline bool iends_with(const std::string& str, const std::string& suffix) {
|
|
size_t sl = suffix.length();
|
|
return str.length() >= sl &&
|
|
std::equal(std::begin(suffix), std::end(suffix), str.end() - sl,
|
|
[](char c1, char c2) { return c1 == lower(c2); });
|
|
}
|
|
|
|
inline bool giends_with(const std::string& str, const std::string& suffix) {
|
|
return iends_with(str, suffix) || iends_with(str, suffix + ".gz");
|
|
}
|
|
|
|
inline std::string trim_str(const std::string& str) {
|
|
const std::string ws = " \r\n\t";
|
|
std::string::size_type first = str.find_first_not_of(ws);
|
|
if (first == std::string::npos)
|
|
return std::string{};
|
|
std::string::size_type last = str.find_last_not_of(ws);
|
|
return str.substr(first, last - first + 1);
|
|
}
|
|
|
|
inline std::string rtrim_str(const std::string& str) {
|
|
std::string::size_type last = str.find_last_not_of(" \r\n\t");
|
|
return str.substr(0, last == std::string::npos ? 0 : last + 1);
|
|
}
|
|
|
|
// end is after the last character of the string (typically \0)
|
|
inline const char* rtrim_cstr(const char* start, const char* end=nullptr) {
|
|
if (!start)
|
|
return nullptr;
|
|
if (!end) {
|
|
end = start;
|
|
while (*end != '\0')
|
|
++end;
|
|
}
|
|
while (end > start && std::isspace(end[-1]))
|
|
--end;
|
|
return end;
|
|
}
|
|
|
|
namespace impl {
|
|
inline size_t length(char) { return 1; }
|
|
inline size_t length(const std::string& s) { return s.length(); }
|
|
}
|
|
|
|
// takes a single separator (usually char or string);
|
|
// may return empty fields
|
|
template<typename S>
|
|
void split_str_into(const std::string& str, S sep,
|
|
std::vector<std::string>& result) {
|
|
std::size_t start = 0, end;
|
|
while ((end = str.find(sep, start)) != std::string::npos) {
|
|
result.emplace_back(str, start, end - start);
|
|
start = end + impl::length(sep);
|
|
}
|
|
result.emplace_back(str, start);
|
|
}
|
|
|
|
template<typename S>
|
|
std::vector<std::string> split_str(const std::string& str, S sep) {
|
|
std::vector<std::string> result;
|
|
split_str_into(str, sep, result);
|
|
return result;
|
|
}
|
|
|
|
// _multi variants takes multiple 1-char separators as a string;
|
|
// discards empty fields
|
|
inline void split_str_into_multi(const std::string& str, const char* seps,
|
|
std::vector<std::string>& result) {
|
|
std::size_t start = str.find_first_not_of(seps);
|
|
while (start != std::string::npos) {
|
|
std::size_t end = str.find_first_of(seps, start);
|
|
result.emplace_back(str, start, end - start);
|
|
start = str.find_first_not_of(seps, end);
|
|
}
|
|
}
|
|
|
|
inline std::vector<std::string> split_str_multi(const std::string& str,
|
|
const char* seps=" \t") {
|
|
std::vector<std::string> result;
|
|
split_str_into_multi(str, seps, result);
|
|
return result;
|
|
}
|
|
|
|
template<typename T, typename S, typename F>
|
|
std::string join_str(T begin, T end, const S& sep, const F& getter) {
|
|
std::string r;
|
|
bool first = true;
|
|
for (T i = begin; i != end; ++i) {
|
|
if (!first)
|
|
r += sep;
|
|
r += getter(*i);
|
|
first = false;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
template<typename T, typename S>
|
|
std::string join_str(T begin, T end, const S& sep) {
|
|
return join_str(begin, end, sep, [](const std::string& t) { return t; });
|
|
}
|
|
|
|
template<typename T, typename S, typename F>
|
|
std::string join_str(const T& iterable, const S& sep, const F& getter) {
|
|
return join_str(iterable.begin(), iterable.end(), sep, getter);
|
|
}
|
|
|
|
template<typename T, typename S>
|
|
std::string join_str(const T& iterable, const S& sep) {
|
|
return join_str(iterable.begin(), iterable.end(), sep);
|
|
}
|
|
|
|
template<typename T, typename S>
|
|
void string_append_sep(std::string& str, S sep, const T& item) {
|
|
if (!str.empty())
|
|
str += sep;
|
|
str += item;
|
|
}
|
|
|
|
inline void replace_all(std::string &s,
|
|
const std::string &old, const std::string &new_) {
|
|
std::string::size_type pos = 0;
|
|
while ((pos = s.find(old, pos)) != std::string::npos) {
|
|
s.replace(pos, old.size(), new_);
|
|
pos += new_.size();
|
|
}
|
|
}
|
|
|
|
// list is a comma separated string
|
|
inline bool is_in_list(const std::string& name, const std::string& list,
|
|
char sep=',') {
|
|
if (name.length() >= list.length())
|
|
return name == list;
|
|
for (size_t start=0, end=0; end != std::string::npos; start=end+1) {
|
|
end = list.find(sep, start);
|
|
if (list.compare(start, end - start, name) == 0)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ##### vector helpers #####
|
|
|
|
template <class T>
|
|
bool in_vector(const T& x, const std::vector<T>& v) {
|
|
return std::find(v.begin(), v.end(), x) != v.end();
|
|
}
|
|
|
|
template <typename F, typename T>
|
|
bool in_vector_f(F f, const std::vector<T>& v) {
|
|
return std::find_if(v.begin(), v.end(), f) != v.end();
|
|
}
|
|
|
|
template <class T>
|
|
T* vector_end_ptr(std::vector<T>& v) { return v.data() + v.size(); }
|
|
template <class T>
|
|
const T* vector_end_ptr(const std::vector<T>& v) { return v.data() + v.size(); }
|
|
|
|
template <class T>
|
|
void vector_move_extend(std::vector<T>& dst, std::vector<T>&& src) {
|
|
if (dst.empty())
|
|
dst = std::move(src);
|
|
else
|
|
dst.insert(dst.end(), std::make_move_iterator(src.begin()),
|
|
std::make_move_iterator(src.end()));
|
|
}
|
|
|
|
// wrapper around the erase-remove idiom
|
|
template <class T, typename F>
|
|
void vector_remove_if(std::vector<T>& v, F&& condition) {
|
|
v.erase(std::remove_if(v.begin(), v.end(), condition), v.end());
|
|
}
|
|
|
|
/// \par data - 2d array (old_width x length) in a vector
|
|
/// Insert \par n new columns at position pos.
|
|
template <class T>
|
|
void vector_insert_columns(std::vector<T>& data, size_t old_width,
|
|
size_t length, size_t n, size_t pos, const T& new_value) {
|
|
assert(data.size() == old_width * length);
|
|
assert(pos <= old_width);
|
|
data.resize(data.size() + n * length);
|
|
typename std::vector<T>::iterator dst = data.end();
|
|
for (size_t i = length; i-- != 0; ) {
|
|
for (size_t j = old_width; j-- != pos; )
|
|
*--dst = data[i * old_width + j];
|
|
for (size_t j = n; j-- != 0; )
|
|
*--dst = new_value;
|
|
for (size_t j = pos; j-- != 0; )
|
|
*--dst = data[i * old_width + j];
|
|
}
|
|
assert(dst == data.begin());
|
|
}
|
|
/// \par data - 2d array with new_width+1 columns, in a vector
|
|
/// Remove column at position pos.
|
|
template <class T>
|
|
void vector_remove_column(std::vector<T>& data, size_t new_width, size_t pos) {
|
|
assert(pos <= new_width);
|
|
for (size_t source = pos + 1; source < data.size(); ++source)
|
|
for (size_t i = 0; i < new_width && source < data.size(); ++i)
|
|
data[pos++] = data[source++];
|
|
data.resize(pos);
|
|
}
|
|
|
|
|
|
// ##### other helpers #####
|
|
|
|
// Numeric ID used for case-insensitive comparison of 4 letters.
|
|
// s must have 4 chars or 3 chars + NUL, ' ' and NUL are equivalent in s.
|
|
constexpr int ialpha4_id(const char* s) {
|
|
return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) & ~0x20202020;
|
|
}
|
|
// Numeric ID used for case-insensitive comparison of 3 letters.
|
|
constexpr int ialpha3_id(const char* s) {
|
|
return (s[0] << 16 | s[1] << 8 | s[2]) & ~0x20202020;
|
|
}
|
|
|
|
} // namespace gemmi
|
|
#endif
|