moved functions to NumpyHelper.cpp and added a few tests

2025-12-31 17:31:25 +01:00 · 2024-03-12 16:52:36 +01:00
parent 3fee6b792d
commit 609c754b23
8 changed files with 234 additions and 152 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -89,7 +89,13 @@
        "__tree": "cpp",
        "queue": "cpp",
        "stack": "cpp",
-        "shared_mutex": "cpp"
+        "shared_mutex": "cpp",
        "cfenv": "cpp",
        "complex": "cpp",
        "regex": "cpp",
        "source_location": "cpp",
        "future": "cpp",
        "typeindex": "cpp"
    },
    "C_Cpp.errorSquiggles": "enabled"
 }
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -13,3 +13,10 @@ target_link_libraries(core PUBLIC fmt::fmt PRIVATE aare_compiler_flags)
 set_property(TARGET core PROPERTY POSITION_INDEPENDENT_CODE ON)
 if(AARE_TESTS)
    set(TestSources
            ${CMAKE_CURRENT_SOURCE_DIR}/src/defs.test.cpp
        )
    target_sources(tests PRIVATE ${TestSources} )
    target_link_libraries(tests PRIVATE core)
 endif()
--- a/file_io/CMakeLists.txt
+++ b/file_io/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SourceFiles
    ${CMAKE_CURRENT_SOURCE_DIR}/src/SubFile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFileFactory.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.cpp
 )
 add_library(file_io STATIC ${SourceFiles})
@@ -20,3 +21,10 @@ target_include_directories(file_io PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 target_link_libraries(file_io PRIVATE fmt::fmt core nlohmann_json::nlohmann_json aare_compiler_flags)
 set_property(TARGET file_io PROPERTY POSITION_INDEPENDENT_CODE ON)
 if(AARE_TESTS)
    set(TestSources
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp
        )
    target_sources(tests PRIVATE ${TestSources} )
    target_link_libraries(tests PRIVATE core file_io)
 endif()
--- a/file_io/include/aare/NumpyFile.hpp
+++ b/file_io/include/aare/NumpyFile.hpp
@@ -1,36 +1,11 @@
 #pragma once
 #include "aare/File.hpp"
 #include "aare/defs.hpp"
 #include "aare/NumpyHelpers.hpp"
 #include <iostream>
 #include <numeric>
 using shape_t = std::vector<uint64_t>;
 struct dtype_t {
    char byteorder;
    char kind;
    unsigned int itemsize;
    std::string to_string() {
        std::stringstream sstm;
        sstm << byteorder << kind << itemsize;
        return sstm.str();
    }
 };
 struct header_t {
    dtype_t dtype;
    bool fortran_order;
    shape_t shape;
    std::string to_string() {
        std::stringstream sstm;
        sstm << "dtype: " << dtype.to_string() << ", fortran_order: " << fortran_order << ' ';
        sstm << "shape: (";
        for (auto item : shape)
            sstm << item << ',';
        sstm << ')';
        return sstm.str();
    }
 };
 template <DetectorType detector, typename DataType> class NumpyFile : public File<detector, DataType> {
    FILE *fp = nullptr;
--- a/file_io/include/aare/NumpyHelpers.hpp
+++ b/file_io/include/aare/NumpyHelpers.hpp
@@ -1,4 +1,5 @@
 #pragma once
 #include <algorithm>
 #include <array>
 #include <filesystem>
@@ -7,140 +8,57 @@
 #include <unordered_map>
 #include <vector>
 #include <sstream>
 #include <numeric>
 #include <iostream>
-inline std::string parse_str(const std::string &in) {
+#include "aare/defs.hpp"
    if ((in.front() == '\'') && (in.back() == '\''))
        return in.substr(1, in.length() - 2);
-    throw std::runtime_error("Invalid python string.");
+using shape_t = std::vector<uint64_t>;
-}
+
 struct dtype_t {
    char byteorder;
    char kind;
    unsigned int itemsize;
    std::string to_string() {
        std::stringstream sstm;
        sstm << byteorder << kind << itemsize;
        return sstm.str();
    }
 };
 struct header_t {
    dtype_t dtype;
    bool fortran_order;
    shape_t shape;
    std::string to_string() {
        std::stringstream sstm;
        sstm << "dtype: " << dtype.to_string() << ", fortran_order: " << fortran_order << ' ';
        sstm << "shape: (";
        for (auto item : shape)
            sstm << item << ',';
        sstm << ')';
        return sstm.str();
    }
 };
 std::string parse_str(const std::string &in);
 /**
  Removes leading and trailing whitespaces
  */
-inline std::string trim(const std::string &str) {
+std::string trim(const std::string &str);
    const std::string whitespace = " \t\n";
    auto begin = str.find_first_not_of(whitespace);
-    if (begin == std::string::npos)
+std::vector<std::string> parse_tuple(std::string in);
        return "";
-    auto end = str.find_last_not_of(whitespace);
+bool parse_bool(const std::string &in);
-    return str.substr(begin, end - begin + 1);
+std::string get_value_from_map(const std::string &mapstr);
 }
 inline std::vector<std::string> parse_tuple(std::string in) {
    std::vector<std::string> v;
    const char seperator = ',';
-    in = trim(in);
+std::unordered_map<std::string, std::string> parse_dict(std::string in, const std::vector<std::string> &keys);
    if ((in.front() == '(') && (in.back() == ')'))
        in = in.substr(1, in.length() - 2);
    else
        throw std::runtime_error("Invalid Python tuple.");
    std::istringstream iss(in);
    for (std::string token; std::getline(iss, token, seperator);) {
        v.push_back(token);
    }
    return v;
 }
 inline bool parse_bool(const std::string &in) {
    if (in == "True")
        return true;
    if (in == "False")
        return false;
    throw std::runtime_error("Invalid python boolan.");
 }
 inline std::string get_value_from_map(const std::string &mapstr) {
    size_t sep_pos = mapstr.find_first_of(":");
    if (sep_pos == std::string::npos)
        return "";
    std::string tmp = mapstr.substr(sep_pos + 1);
    return trim(tmp);
 }
 std::unordered_map<std::string, std::string> parse_dict(std::string in, const std::vector<std::string> &keys) {
    std::unordered_map<std::string, std::string> map;
    if (keys.size() == 0)
        return map;
    in = trim(in);
    // unwrap dictionary
    if ((in.front() == '{') && (in.back() == '}'))
        in = in.substr(1, in.length() - 2);
    else
        throw std::runtime_error("Not a Python dictionary.");
    std::vector<std::pair<size_t, std::string>> positions;
    for (auto const &value : keys) {
        size_t pos = in.find("'" + value + "'");
        if (pos == std::string::npos)
            throw std::runtime_error("Missing '" + value + "' key.");
        std::pair<size_t, std::string> position_pair{pos, value};
        positions.push_back(position_pair);
    }
    // sort by position in dict
    std::sort(positions.begin(), positions.end());
    for (size_t i = 0; i < positions.size(); ++i) {
        std::string raw_value;
        size_t begin{positions[i].first};
        size_t end{std::string::npos};
        std::string key = positions[i].second;
        if (i + 1 < positions.size())
            end = positions[i + 1].first;
        raw_value = in.substr(begin, end - begin);
        raw_value = trim(raw_value);
        if (raw_value.back() == ',')
            raw_value.pop_back();
        map[key] = get_value_from_map(raw_value);
    }
    return map;
 }
 template <typename T, size_t N> inline bool in_array(T val, const std::array<T, N> &arr) {
    return std::find(std::begin(arr), std::end(arr), val) != std::end(arr);
 }
-inline bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); }
+bool is_digits(const std::string &str);
-inline dtype_t parse_descr(std::string typestring) {
+dtype_t parse_descr(std::string typestring);
    if (typestring.length() < 3) {
        throw std::runtime_error("invalid typestring (length)");
    }
    char byteorder_c = typestring.at(0);
    char kind_c = typestring.at(1);
    std::string itemsize_s = typestring.substr(2);
    if (!in_array(byteorder_c, endian_chars)) {
        throw std::runtime_error("invalid typestring (byteorder)");
    }
    if (!in_array(kind_c, numtype_chars)) {
        throw std::runtime_error("invalid typestring (kind)");
    }
    if (!is_digits(itemsize_s)) {
        throw std::runtime_error("invalid typestring (itemsize)");
    }
    unsigned int itemsize = std::stoul(itemsize_s);
    return {byteorder_c, kind_c, itemsize};
 }
--- a/file_io/src/NumpyHelpers.cpp
+++ b/file_io/src/NumpyHelpers.cpp
@@ -0,0 +1,137 @@
 #include "aare/NumpyHelpers.hpp"
 std::unordered_map<std::string, std::string> parse_dict(std::string in, const std::vector<std::string> &keys) {
    std::unordered_map<std::string, std::string> map;
    if (keys.size() == 0)
        return map;
    in = trim(in);
    // unwrap dictionary
    if ((in.front() == '{') && (in.back() == '}'))
        in = in.substr(1, in.length() - 2);
    else
        throw std::runtime_error("Not a Python dictionary.");
    std::vector<std::pair<size_t, std::string>> positions;
    for (auto const &value : keys) {
        size_t pos = in.find("'" + value + "'");
        if (pos == std::string::npos)
            throw std::runtime_error("Missing '" + value + "' key.");
        std::pair<size_t, std::string> position_pair{pos, value};
        positions.push_back(position_pair);
    }
    // sort by position in dict
    std::sort(positions.begin(), positions.end());
    for (size_t i = 0; i < positions.size(); ++i) {
        std::string raw_value;
        size_t begin{positions[i].first};
        size_t end{std::string::npos};
        std::string key = positions[i].second;
        if (i + 1 < positions.size())
            end = positions[i + 1].first;
        raw_value = in.substr(begin, end - begin);
        raw_value = trim(raw_value);
        if (raw_value.back() == ',')
            raw_value.pop_back();
        map[key] = get_value_from_map(raw_value);
    }
    return map;
 }
 dtype_t parse_descr(std::string typestring) {
    if (typestring.length() < 3) {
        throw std::runtime_error("invalid typestring (length)");
    }
    char byteorder_c = typestring.at(0);
    char kind_c = typestring.at(1);
    std::string itemsize_s = typestring.substr(2);
    if (!in_array(byteorder_c, endian_chars)) {
        throw std::runtime_error("invalid typestring (byteorder)");
    }
    if (!in_array(kind_c, numtype_chars)) {
        throw std::runtime_error("invalid typestring (kind)");
    }
    if (!is_digits(itemsize_s)) {
        throw std::runtime_error("invalid typestring (itemsize)");
    }
    unsigned int itemsize = std::stoul(itemsize_s);
    return {byteorder_c, kind_c, itemsize};
 }
 bool parse_bool(const std::string &in) {
    if (in == "True")
        return true;
    if (in == "False")
        return false;
    throw std::runtime_error("Invalid python boolan.");
 }
 std::string get_value_from_map(const std::string &mapstr) {
    size_t sep_pos = mapstr.find_first_of(":");
    if (sep_pos == std::string::npos)
        return "";
    std::string tmp = mapstr.substr(sep_pos + 1);
    return trim(tmp);
 }
 bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); }
 std::vector<std::string> parse_tuple(std::string in) {
    std::vector<std::string> v;
    const char seperator = ',';
    in = trim(in);
    if ((in.front() == '(') && (in.back() == ')'))
        in = in.substr(1, in.length() - 2);
    else
        throw std::runtime_error("Invalid Python tuple.");
    std::istringstream iss(in);
    for (std::string token; std::getline(iss, token, seperator);) {
        v.push_back(token);
    }
    return v;
 }
 std::string trim(const std::string &str) {
    const std::string whitespace = " \t\n";
    auto begin = str.find_first_not_of(whitespace);
    if (begin == std::string::npos)
        return "";
    auto end = str.find_last_not_of(whitespace);
    return str.substr(begin, end - begin + 1);
 }
 std::string parse_str(const std::string &in) {
    if ((in.front() == '\'') && (in.back() == '\''))
        return in.substr(1, in.length() - 2);
    throw std::runtime_error("Invalid python string.");
 }
--- a/file_io/src/NumpyHelpers.test.cpp
+++ b/file_io/src/NumpyHelpers.test.cpp
@@ -0,0 +1,31 @@
 #include <catch2/catch_test_macros.hpp>
 #include "aare/NumpyHelpers.hpp" //Is this really a public header?
 TEST_CASE("is_digits with a few standard cases"){
    REQUIRE(is_digits(""));
    REQUIRE(is_digits("123"));
    REQUIRE(is_digits("0"));
    REQUIRE_FALSE(is_digits("hej123"));
    REQUIRE_FALSE(is_digits("a"));
    REQUIRE_FALSE(is_digits(" "));
    REQUIRE_FALSE(is_digits("abcdef"));
 }
 TEST_CASE("Check for quotes and return stripped string"){
    REQUIRE(parse_str("'hej'") == "hej");    
    REQUIRE(parse_str("'hej hej'") == "hej hej");    
    REQUIRE(parse_str("''") == "");    
 }
 TEST_CASE("parsing a string without quotes throws"){
    REQUIRE_THROWS(parse_str("hej"));
 }
 TEST_CASE("trim whitespace"){
    REQUIRE(trim(" hej ") == "hej");
    REQUIRE(trim("hej") == "hej");
    REQUIRE(trim(" hej") == "hej");
    REQUIRE(trim("hej ") == "hej");
    REQUIRE(trim(" ") == "");
    REQUIRE(trim(" \thej hej ") == "hej hej");
 }
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -19,9 +19,9 @@ include(CTest)
 include(Catch)
 catch_discover_tests(tests)
-if(AARE_BUILD_TESTS)
+if(AARE_TESTS)
    set(TestSources
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/defs.test.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/test.cpp
        )
    target_sources(tests PRIVATE ${TestSources} )