Merge pull request #16 from slsdetectorgroup/numpy_tests

moved functions to NumpyHelper.cpp and added a few tests
2025-12-30 17:01:26 +01:00 · 2024-03-12 17:21:32 +01:00
parent 3fee6b792d 9380fd1be8
commit 954db79c36
13 changed files with 245 additions and 164 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -89,7 +89,13 @@
        "__tree": "cpp",
        "queue": "cpp",
        "stack": "cpp",
-        "shared_mutex": "cpp"
+        "shared_mutex": "cpp",
+        "cfenv": "cpp",
+        "complex": "cpp",
+        "regex": "cpp",
+        "source_location": "cpp",
+        "future": "cpp",
+        "typeindex": "cpp"
    },
    "C_Cpp.errorSquiggles": "enabled"
 }
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -13,3 +13,10 @@ target_link_libraries(core PUBLIC fmt::fmt PRIVATE aare_compiler_flags)
 set_property(TARGET core PROPERTY POSITION_INDEPENDENT_CODE ON)


+if(AARE_TESTS)
+    set(TestSources
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/defs.test.cpp
+        )
+    target_sources(tests PRIVATE ${TestSources} )
+    target_link_libraries(tests PRIVATE core)
+endif()
--- a/file_io/CMakeLists.txt
+++ b/file_io/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SourceFiles
    ${CMAKE_CURRENT_SOURCE_DIR}/src/SubFile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFileFactory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.cpp
 )

 add_library(file_io STATIC ${SourceFiles})
@@ -20,3 +21,10 @@ target_include_directories(file_io PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 target_link_libraries(file_io PRIVATE fmt::fmt core nlohmann_json::nlohmann_json aare_compiler_flags)
 set_property(TARGET file_io PROPERTY POSITION_INDEPENDENT_CODE ON)

+if(AARE_TESTS)
+    set(TestSources
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp
+        )
+    target_sources(tests PRIVATE ${TestSources} )
+    target_link_libraries(tests PRIVATE core file_io)
+endif()
--- a/file_io/include/aare/File.hpp
+++ b/file_io/include/aare/File.hpp
@@ -12,7 +12,7 @@ template <DetectorType detector, typename DataType>
 class File {

  public:
-    virtual Frame<DataType>* get_frame(int frame_number) = 0;
+    virtual Frame<DataType>* get_frame(size_t frame_number) = 0;

  private:
  //comment
--- a/file_io/include/aare/JsonFile.hpp
+++ b/file_io/include/aare/JsonFile.hpp
@@ -9,7 +9,7 @@ template <DetectorType detector, typename DataType> class JsonFile : public File
        using config = RawFileConfig;
                public:

-    Frame<DataType> *get_frame(int frame_number);
+    Frame<DataType> *get_frame(size_t frame_number);
    int n_subfiles;
    std::vector<SubFile *> subfiles;
    int subfile_rows, subfile_cols;
--- a/file_io/include/aare/NumpyFile.hpp
+++ b/file_io/include/aare/NumpyFile.hpp
@@ -1,42 +1,17 @@
 #pragma once
 #include "aare/File.hpp"
 #include "aare/defs.hpp"
+#include "aare/NumpyHelpers.hpp"
 #include <iostream>
 #include <numeric>

-using shape_t = std::vector<uint64_t>;

-struct dtype_t {
-    char byteorder;
-    char kind;
-    unsigned int itemsize;
-    std::string to_string() {
-        std::stringstream sstm;
-        sstm << byteorder << kind << itemsize;
-        return sstm.str();
-    }
-};
-struct header_t {
-    dtype_t dtype;
-    bool fortran_order;
-    shape_t shape;
-    std::string to_string() {
-        std::stringstream sstm;
-        sstm << "dtype: " << dtype.to_string() << ", fortran_order: " << fortran_order << ' ';
-
-        sstm << "shape: (";
-        for (auto item : shape)
-            sstm << item << ',';
-        sstm << ')';
-        return sstm.str();
-    }
-};
 template <DetectorType detector, typename DataType> class NumpyFile : public File<detector, DataType> {
    FILE *fp = nullptr;
    
  public:
    NumpyFile(std::filesystem::path fname);
-    Frame<DataType> *get_frame(int frame_number) override;
+    Frame<DataType> *get_frame(size_t frame_number) override;
    header_t header{};
    static constexpr std::array<char, 6> magic_str{'\x93', 'N', 'U', 'M', 'P', 'Y'};
    uint8_t major_ver_{};
--- a/file_io/include/aare/NumpyHelpers.hpp
+++ b/file_io/include/aare/NumpyHelpers.hpp
@@ -1,4 +1,5 @@

+#pragma once
 #include <algorithm>
 #include <array>
 #include <filesystem>
@@ -7,140 +8,57 @@
 #include <unordered_map>
 #include <vector>
 #include <sstream>
+#include <numeric>
+#include <iostream>

-inline std::string parse_str(const std::string &in) {
-    if ((in.front() == '\'') && (in.back() == '\''))
-        return in.substr(1, in.length() - 2);
+#include "aare/defs.hpp"

-    throw std::runtime_error("Invalid python string.");
-}
+using shape_t = std::vector<uint64_t>;
+
+struct dtype_t {
+    char byteorder;
+    char kind;
+    unsigned int itemsize;
+    std::string to_string() {
+        std::stringstream sstm;
+        sstm << byteorder << kind << itemsize;
+        return sstm.str();
+    }
+};
+struct header_t {
+    dtype_t dtype;
+    bool fortran_order;
+    shape_t shape;
+    std::string to_string() {
+        std::stringstream sstm;
+        sstm << "dtype: " << dtype.to_string() << ", fortran_order: " << fortran_order << ' ';
+
+        sstm << "shape: (";
+        for (auto item : shape)
+            sstm << item << ',';
+        sstm << ')';
+        return sstm.str();
+    }
+};
+
+
+std::string parse_str(const std::string &in);
 /**
  Removes leading and trailing whitespaces
  */
-inline std::string trim(const std::string &str) {
-    const std::string whitespace = " \t\n";
-    auto begin = str.find_first_not_of(whitespace);
+std::string trim(const std::string &str);

-    if (begin == std::string::npos)
-        return "";
+std::vector<std::string> parse_tuple(std::string in);

-    auto end = str.find_last_not_of(whitespace);
+bool parse_bool(const std::string &in);

-    return str.substr(begin, end - begin + 1);
-}
-inline std::vector<std::string> parse_tuple(std::string in) {
-    std::vector<std::string> v;
-    const char seperator = ',';
+std::string get_value_from_map(const std::string &mapstr);

-    in = trim(in);
-
-    if ((in.front() == '(') && (in.back() == ')'))
-        in = in.substr(1, in.length() - 2);
-    else
-        throw std::runtime_error("Invalid Python tuple.");
-
-    std::istringstream iss(in);
-
-    for (std::string token; std::getline(iss, token, seperator);) {
-        v.push_back(token);
-    }
-
-    return v;
-}
-inline bool parse_bool(const std::string &in) {
-    if (in == "True")
-        return true;
-    if (in == "False")
-        return false;
-
-    throw std::runtime_error("Invalid python boolan.");
-}
-
-inline std::string get_value_from_map(const std::string &mapstr) {
-    size_t sep_pos = mapstr.find_first_of(":");
-    if (sep_pos == std::string::npos)
-        return "";
-
-    std::string tmp = mapstr.substr(sep_pos + 1);
-    return trim(tmp);
-}
-std::unordered_map<std::string, std::string> parse_dict(std::string in, const std::vector<std::string> &keys) {
-    std::unordered_map<std::string, std::string> map;
-    if (keys.size() == 0)
-        return map;
-
-    in = trim(in);
-
-    // unwrap dictionary
-    if ((in.front() == '{') && (in.back() == '}'))
-        in = in.substr(1, in.length() - 2);
-    else
-        throw std::runtime_error("Not a Python dictionary.");
-
-    std::vector<std::pair<size_t, std::string>> positions;
-
-    for (auto const &value : keys) {
-        size_t pos = in.find("'" + value + "'");
-
-        if (pos == std::string::npos)
-            throw std::runtime_error("Missing '" + value + "' key.");
-
-        std::pair<size_t, std::string> position_pair{pos, value};
-        positions.push_back(position_pair);
-    }
-
-    // sort by position in dict
-    std::sort(positions.begin(), positions.end());
-
-    for (size_t i = 0; i < positions.size(); ++i) {
-        std::string raw_value;
-        size_t begin{positions[i].first};
-        size_t end{std::string::npos};
-
-        std::string key = positions[i].second;
-
-        if (i + 1 < positions.size())
-            end = positions[i + 1].first;
-
-        raw_value = in.substr(begin, end - begin);
-
-        raw_value = trim(raw_value);
-
-        if (raw_value.back() == ',')
-            raw_value.pop_back();
-
-        map[key] = get_value_from_map(raw_value);
-    }
-
-    return map;
-}
+std::unordered_map<std::string, std::string> parse_dict(std::string in, const std::vector<std::string> &keys);

 template <typename T, size_t N> inline bool in_array(T val, const std::array<T, N> &arr) {
    return std::find(std::begin(arr), std::end(arr), val) != std::end(arr);
 }
-inline bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); }
+bool is_digits(const std::string &str);

-inline dtype_t parse_descr(std::string typestring) {
-    if (typestring.length() < 3) {
-        throw std::runtime_error("invalid typestring (length)");
-    }
-
-    char byteorder_c = typestring.at(0);
-    char kind_c = typestring.at(1);
-    std::string itemsize_s = typestring.substr(2);
-
-    if (!in_array(byteorder_c, endian_chars)) {
-        throw std::runtime_error("invalid typestring (byteorder)");
-    }
-
-    if (!in_array(kind_c, numtype_chars)) {
-        throw std::runtime_error("invalid typestring (kind)");
-    }
-
-    if (!is_digits(itemsize_s)) {
-        throw std::runtime_error("invalid typestring (itemsize)");
-    }
-    unsigned int itemsize = std::stoul(itemsize_s);
-
-    return {byteorder_c, kind_c, itemsize};
-}
+dtype_t parse_descr(std::string typestring);
--- a/file_io/src/JsonFile.cpp
+++ b/file_io/src/JsonFile.cpp
@@ -1,7 +1,7 @@
 #include "aare/JsonFile.hpp"

 template <DetectorType detector, typename DataType>
-Frame<DataType> *JsonFile<detector, DataType>::get_frame(int frame_number) {
+Frame<DataType> *JsonFile<detector, DataType>::get_frame(size_t frame_number) {
    if (frame_number > this->total_frames) {
        throw std::runtime_error("Frame number out of range");
    }
--- a/file_io/src/NumpyFile.cpp
+++ b/file_io/src/NumpyFile.cpp
@@ -2,13 +2,13 @@
 #include "aare/NumpyFile.hpp"

 template <DetectorType detector, typename DataType>
-NumpyFile<detector, DataType>::NumpyFile(std::filesystem::path fname){
-    this->fname = fname;
-    fp = fopen(fname.c_str(), "rb");
+NumpyFile<detector, DataType>::NumpyFile(std::filesystem::path fname_){
+    this->fname = fname_;
+    fp = fopen(this->fname.c_str(), "rb");
 }

 template <DetectorType detector, typename DataType>
-Frame<DataType> *NumpyFile<detector, DataType>::get_frame(int frame_number) {
+Frame<DataType> *NumpyFile<detector, DataType>::get_frame(size_t frame_number) {
    if (fp == nullptr) {
        throw std::runtime_error("File not open");
    }
--- a/file_io/src/NumpyFileFactory.cpp
+++ b/file_io/src/NumpyFileFactory.cpp
@@ -78,12 +78,11 @@ template <DetectorType detector, typename DataType>
 File<detector, DataType>* NumpyFileFactory<detector, DataType>::load_file() {
    NumpyFile<detector, DataType> *file = new NumpyFile<detector, DataType>(this->m_fpath);
    parse_metadata(file);
-    NumpyFile<detector, DataType> *f = dynamic_cast<NumpyFile<detector, DataType> *>(file);
-    std::cout << "parsed header: " << f->header.to_string() << std::endl;
+    std::cout << "parsed header: " << file->header.to_string() << std::endl;

-    if(sizeof(DataType) != f->header.dtype.itemsize){
+    if(sizeof(DataType) != file->header.dtype.itemsize){
        std::stringstream  s;
-        s << "Data type size mismatch: " << sizeof(DataType) << " != " << f->header.dtype.itemsize;
+        s << "Data type size mismatch: " << sizeof(DataType) << " != " << file->header.dtype.itemsize;
        throw std::runtime_error(s.str());
    }       
    return file;
--- a/file_io/src/NumpyHelpers.cpp
+++ b/file_io/src/NumpyHelpers.cpp
@@ -0,0 +1,137 @@
+#include "aare/NumpyHelpers.hpp"
+
+std::unordered_map<std::string, std::string> parse_dict(std::string in, const std::vector<std::string> &keys) {
+    std::unordered_map<std::string, std::string> map;
+    if (keys.size() == 0)
+        return map;
+
+    in = trim(in);
+
+    // unwrap dictionary
+    if ((in.front() == '{') && (in.back() == '}'))
+        in = in.substr(1, in.length() - 2);
+    else
+        throw std::runtime_error("Not a Python dictionary.");
+
+    std::vector<std::pair<size_t, std::string>> positions;
+
+    for (auto const &value : keys) {
+        size_t pos = in.find("'" + value + "'");
+
+        if (pos == std::string::npos)
+            throw std::runtime_error("Missing '" + value + "' key.");
+
+        std::pair<size_t, std::string> position_pair{pos, value};
+        positions.push_back(position_pair);
+    }
+
+    // sort by position in dict
+    std::sort(positions.begin(), positions.end());
+
+    for (size_t i = 0; i < positions.size(); ++i) {
+        std::string raw_value;
+        size_t begin{positions[i].first};
+        size_t end{std::string::npos};
+
+        std::string key = positions[i].second;
+
+        if (i + 1 < positions.size())
+            end = positions[i + 1].first;
+
+        raw_value = in.substr(begin, end - begin);
+
+        raw_value = trim(raw_value);
+
+        if (raw_value.back() == ',')
+            raw_value.pop_back();
+
+        map[key] = get_value_from_map(raw_value);
+    }
+
+    return map;
+}
+
+
+dtype_t parse_descr(std::string typestring) {
+    if (typestring.length() < 3) {
+        throw std::runtime_error("invalid typestring (length)");
+    }
+
+    char byteorder_c = typestring.at(0);
+    char kind_c = typestring.at(1);
+    std::string itemsize_s = typestring.substr(2);
+
+    if (!in_array(byteorder_c, endian_chars)) {
+        throw std::runtime_error("invalid typestring (byteorder)");
+    }
+
+    if (!in_array(kind_c, numtype_chars)) {
+        throw std::runtime_error("invalid typestring (kind)");
+    }
+
+    if (!is_digits(itemsize_s)) {
+        throw std::runtime_error("invalid typestring (itemsize)");
+    }
+    unsigned int itemsize = std::stoul(itemsize_s);
+
+    return {byteorder_c, kind_c, itemsize};
+}
+
+bool parse_bool(const std::string &in) {
+    if (in == "True")
+        return true;
+    if (in == "False")
+        return false;
+
+    throw std::runtime_error("Invalid python boolan.");
+}
+
+std::string get_value_from_map(const std::string &mapstr) {
+    size_t sep_pos = mapstr.find_first_of(":");
+    if (sep_pos == std::string::npos)
+        return "";
+
+    std::string tmp = mapstr.substr(sep_pos + 1);
+    return trim(tmp);
+}
+
+bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); }
+
+std::vector<std::string> parse_tuple(std::string in) {
+    std::vector<std::string> v;
+    const char seperator = ',';
+
+    in = trim(in);
+
+    if ((in.front() == '(') && (in.back() == ')'))
+        in = in.substr(1, in.length() - 2);
+    else
+        throw std::runtime_error("Invalid Python tuple.");
+
+    std::istringstream iss(in);
+
+    for (std::string token; std::getline(iss, token, seperator);) {
+        v.push_back(token);
+    }
+
+    return v;
+}
+
+std::string trim(const std::string &str) {
+    const std::string whitespace = " \t\n";
+    auto begin = str.find_first_not_of(whitespace);
+
+    if (begin == std::string::npos)
+        return "";
+
+    auto end = str.find_last_not_of(whitespace);
+
+    return str.substr(begin, end - begin + 1);
+}
+
+std::string parse_str(const std::string &in) {
+    if ((in.front() == '\'') && (in.back() == '\''))
+        return in.substr(1, in.length() - 2);
+
+    throw std::runtime_error("Invalid python string.");
+}
--- a/file_io/src/NumpyHelpers.test.cpp
+++ b/file_io/src/NumpyHelpers.test.cpp
@@ -0,0 +1,31 @@
+#include <catch2/catch_test_macros.hpp>
+#include "aare/NumpyHelpers.hpp" //Is this really a public header?
+
+TEST_CASE("is_digits with a few standard cases"){
+    REQUIRE(is_digits(""));
+    REQUIRE(is_digits("123"));
+    REQUIRE(is_digits("0"));
+    REQUIRE_FALSE(is_digits("hej123"));
+    REQUIRE_FALSE(is_digits("a"));
+    REQUIRE_FALSE(is_digits(" "));
+    REQUIRE_FALSE(is_digits("abcdef"));
+}
+
+TEST_CASE("Check for quotes and return stripped string"){
+    REQUIRE(parse_str("'hej'") == "hej");    
+    REQUIRE(parse_str("'hej hej'") == "hej hej");    
+    REQUIRE(parse_str("''") == "");    
+}
+
+TEST_CASE("parsing a string without quotes throws"){
+    REQUIRE_THROWS(parse_str("hej"));
+}
+
+TEST_CASE("trim whitespace"){
+    REQUIRE(trim(" hej ") == "hej");
+    REQUIRE(trim("hej") == "hej");
+    REQUIRE(trim(" hej") == "hej");
+    REQUIRE(trim("hej ") == "hej");
+    REQUIRE(trim(" ") == "");
+    REQUIRE(trim(" \thej hej ") == "hej hej");
+}
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -19,9 +19,9 @@ include(CTest)
 include(Catch)
 catch_discover_tests(tests)

-if(AARE_BUILD_TESTS)
+if(AARE_TESTS)
    set(TestSources
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/defs.test.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/test.cpp
        )
    target_sources(tests PRIVATE ${TestSources} )