From 609c754b239475814771bf94ef92c52bf5a5f345 Mon Sep 17 00:00:00 2001 From: Erik Frojdh Date: Tue, 12 Mar 2024 16:52:36 +0100 Subject: [PATCH 1/2] moved functions to NumpyHelper.cpp and added a few tests --- .vscode/settings.json | 8 +- core/CMakeLists.txt | 7 ++ file_io/CMakeLists.txt | 8 ++ file_io/include/aare/NumpyFile.hpp | 27 +---- file_io/include/aare/NumpyHelpers.hpp | 164 +++++++------------------- file_io/src/NumpyHelpers.cpp | 137 +++++++++++++++++++++ file_io/src/NumpyHelpers.test.cpp | 31 +++++ tests/CMakeLists.txt | 4 +- 8 files changed, 234 insertions(+), 152 deletions(-) create mode 100644 file_io/src/NumpyHelpers.cpp create mode 100644 file_io/src/NumpyHelpers.test.cpp diff --git a/.vscode/settings.json b/.vscode/settings.json index 24c4aa5..cebeef1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -89,7 +89,13 @@ "__tree": "cpp", "queue": "cpp", "stack": "cpp", - "shared_mutex": "cpp" + "shared_mutex": "cpp", + "cfenv": "cpp", + "complex": "cpp", + "regex": "cpp", + "source_location": "cpp", + "future": "cpp", + "typeindex": "cpp" }, "C_Cpp.errorSquiggles": "enabled" } \ No newline at end of file diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index b79651f..58dbd38 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -13,3 +13,10 @@ target_link_libraries(core PUBLIC fmt::fmt PRIVATE aare_compiler_flags) set_property(TARGET core PROPERTY POSITION_INDEPENDENT_CODE ON) +if(AARE_TESTS) + set(TestSources + ${CMAKE_CURRENT_SOURCE_DIR}/src/defs.test.cpp + ) + target_sources(tests PRIVATE ${TestSources} ) + target_link_libraries(tests PRIVATE core) +endif() \ No newline at end of file diff --git a/file_io/CMakeLists.txt b/file_io/CMakeLists.txt index abccbbb..5452079 100644 --- a/file_io/CMakeLists.txt +++ b/file_io/CMakeLists.txt @@ -13,6 +13,7 @@ set(SourceFiles ${CMAKE_CURRENT_SOURCE_DIR}/src/SubFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFileFactory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.cpp ) add_library(file_io STATIC ${SourceFiles}) @@ -20,3 +21,10 @@ target_include_directories(file_io PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) target_link_libraries(file_io PRIVATE fmt::fmt core nlohmann_json::nlohmann_json aare_compiler_flags) set_property(TARGET file_io PROPERTY POSITION_INDEPENDENT_CODE ON) +if(AARE_TESTS) + set(TestSources + ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp + ) + target_sources(tests PRIVATE ${TestSources} ) + target_link_libraries(tests PRIVATE core file_io) +endif() \ No newline at end of file diff --git a/file_io/include/aare/NumpyFile.hpp b/file_io/include/aare/NumpyFile.hpp index 33ab769..a5a57b2 100644 --- a/file_io/include/aare/NumpyFile.hpp +++ b/file_io/include/aare/NumpyFile.hpp @@ -1,36 +1,11 @@ #pragma once #include "aare/File.hpp" #include "aare/defs.hpp" +#include "aare/NumpyHelpers.hpp" #include #include -using shape_t = std::vector; -struct dtype_t { - char byteorder; - char kind; - unsigned int itemsize; - std::string to_string() { - std::stringstream sstm; - sstm << byteorder << kind << itemsize; - return sstm.str(); - } -}; -struct header_t { - dtype_t dtype; - bool fortran_order; - shape_t shape; - std::string to_string() { - std::stringstream sstm; - sstm << "dtype: " << dtype.to_string() << ", fortran_order: " << fortran_order << ' '; - - sstm << "shape: ("; - for (auto item : shape) - sstm << item << ','; - sstm << ')'; - return sstm.str(); - } -}; template class NumpyFile : public File { FILE *fp = nullptr; diff --git a/file_io/include/aare/NumpyHelpers.hpp b/file_io/include/aare/NumpyHelpers.hpp index 21b08d4..d81147a 100644 --- a/file_io/include/aare/NumpyHelpers.hpp +++ b/file_io/include/aare/NumpyHelpers.hpp @@ -1,4 +1,5 @@ +#pragma once #include #include #include @@ -7,140 +8,57 @@ #include #include #include +#include +#include -inline std::string parse_str(const std::string &in) { - if ((in.front() == '\'') && (in.back() == '\'')) - return in.substr(1, in.length() - 2); +#include "aare/defs.hpp" - throw std::runtime_error("Invalid python string."); -} +using shape_t = std::vector; + +struct dtype_t { + char byteorder; + char kind; + unsigned int itemsize; + std::string to_string() { + std::stringstream sstm; + sstm << byteorder << kind << itemsize; + return sstm.str(); + } +}; +struct header_t { + dtype_t dtype; + bool fortran_order; + shape_t shape; + std::string to_string() { + std::stringstream sstm; + sstm << "dtype: " << dtype.to_string() << ", fortran_order: " << fortran_order << ' '; + + sstm << "shape: ("; + for (auto item : shape) + sstm << item << ','; + sstm << ')'; + return sstm.str(); + } +}; + + +std::string parse_str(const std::string &in); /** Removes leading and trailing whitespaces */ -inline std::string trim(const std::string &str) { - const std::string whitespace = " \t\n"; - auto begin = str.find_first_not_of(whitespace); +std::string trim(const std::string &str); - if (begin == std::string::npos) - return ""; +std::vector parse_tuple(std::string in); - auto end = str.find_last_not_of(whitespace); +bool parse_bool(const std::string &in); - return str.substr(begin, end - begin + 1); -} -inline std::vector parse_tuple(std::string in) { - std::vector v; - const char seperator = ','; +std::string get_value_from_map(const std::string &mapstr); - in = trim(in); - - if ((in.front() == '(') && (in.back() == ')')) - in = in.substr(1, in.length() - 2); - else - throw std::runtime_error("Invalid Python tuple."); - - std::istringstream iss(in); - - for (std::string token; std::getline(iss, token, seperator);) { - v.push_back(token); - } - - return v; -} -inline bool parse_bool(const std::string &in) { - if (in == "True") - return true; - if (in == "False") - return false; - - throw std::runtime_error("Invalid python boolan."); -} - -inline std::string get_value_from_map(const std::string &mapstr) { - size_t sep_pos = mapstr.find_first_of(":"); - if (sep_pos == std::string::npos) - return ""; - - std::string tmp = mapstr.substr(sep_pos + 1); - return trim(tmp); -} -std::unordered_map parse_dict(std::string in, const std::vector &keys) { - std::unordered_map map; - if (keys.size() == 0) - return map; - - in = trim(in); - - // unwrap dictionary - if ((in.front() == '{') && (in.back() == '}')) - in = in.substr(1, in.length() - 2); - else - throw std::runtime_error("Not a Python dictionary."); - - std::vector> positions; - - for (auto const &value : keys) { - size_t pos = in.find("'" + value + "'"); - - if (pos == std::string::npos) - throw std::runtime_error("Missing '" + value + "' key."); - - std::pair position_pair{pos, value}; - positions.push_back(position_pair); - } - - // sort by position in dict - std::sort(positions.begin(), positions.end()); - - for (size_t i = 0; i < positions.size(); ++i) { - std::string raw_value; - size_t begin{positions[i].first}; - size_t end{std::string::npos}; - - std::string key = positions[i].second; - - if (i + 1 < positions.size()) - end = positions[i + 1].first; - - raw_value = in.substr(begin, end - begin); - - raw_value = trim(raw_value); - - if (raw_value.back() == ',') - raw_value.pop_back(); - - map[key] = get_value_from_map(raw_value); - } - - return map; -} +std::unordered_map parse_dict(std::string in, const std::vector &keys); template inline bool in_array(T val, const std::array &arr) { return std::find(std::begin(arr), std::end(arr), val) != std::end(arr); } -inline bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); } +bool is_digits(const std::string &str); -inline dtype_t parse_descr(std::string typestring) { - if (typestring.length() < 3) { - throw std::runtime_error("invalid typestring (length)"); - } - - char byteorder_c = typestring.at(0); - char kind_c = typestring.at(1); - std::string itemsize_s = typestring.substr(2); - - if (!in_array(byteorder_c, endian_chars)) { - throw std::runtime_error("invalid typestring (byteorder)"); - } - - if (!in_array(kind_c, numtype_chars)) { - throw std::runtime_error("invalid typestring (kind)"); - } - - if (!is_digits(itemsize_s)) { - throw std::runtime_error("invalid typestring (itemsize)"); - } - unsigned int itemsize = std::stoul(itemsize_s); - - return {byteorder_c, kind_c, itemsize}; -} +dtype_t parse_descr(std::string typestring); diff --git a/file_io/src/NumpyHelpers.cpp b/file_io/src/NumpyHelpers.cpp new file mode 100644 index 0000000..d96df5e --- /dev/null +++ b/file_io/src/NumpyHelpers.cpp @@ -0,0 +1,137 @@ +#include "aare/NumpyHelpers.hpp" + +std::unordered_map parse_dict(std::string in, const std::vector &keys) { + std::unordered_map map; + if (keys.size() == 0) + return map; + + in = trim(in); + + // unwrap dictionary + if ((in.front() == '{') && (in.back() == '}')) + in = in.substr(1, in.length() - 2); + else + throw std::runtime_error("Not a Python dictionary."); + + std::vector> positions; + + for (auto const &value : keys) { + size_t pos = in.find("'" + value + "'"); + + if (pos == std::string::npos) + throw std::runtime_error("Missing '" + value + "' key."); + + std::pair position_pair{pos, value}; + positions.push_back(position_pair); + } + + // sort by position in dict + std::sort(positions.begin(), positions.end()); + + for (size_t i = 0; i < positions.size(); ++i) { + std::string raw_value; + size_t begin{positions[i].first}; + size_t end{std::string::npos}; + + std::string key = positions[i].second; + + if (i + 1 < positions.size()) + end = positions[i + 1].first; + + raw_value = in.substr(begin, end - begin); + + raw_value = trim(raw_value); + + if (raw_value.back() == ',') + raw_value.pop_back(); + + map[key] = get_value_from_map(raw_value); + } + + return map; +} + + +dtype_t parse_descr(std::string typestring) { + if (typestring.length() < 3) { + throw std::runtime_error("invalid typestring (length)"); + } + + char byteorder_c = typestring.at(0); + char kind_c = typestring.at(1); + std::string itemsize_s = typestring.substr(2); + + if (!in_array(byteorder_c, endian_chars)) { + throw std::runtime_error("invalid typestring (byteorder)"); + } + + if (!in_array(kind_c, numtype_chars)) { + throw std::runtime_error("invalid typestring (kind)"); + } + + if (!is_digits(itemsize_s)) { + throw std::runtime_error("invalid typestring (itemsize)"); + } + unsigned int itemsize = std::stoul(itemsize_s); + + return {byteorder_c, kind_c, itemsize}; +} + +bool parse_bool(const std::string &in) { + if (in == "True") + return true; + if (in == "False") + return false; + + throw std::runtime_error("Invalid python boolan."); +} + +std::string get_value_from_map(const std::string &mapstr) { + size_t sep_pos = mapstr.find_first_of(":"); + if (sep_pos == std::string::npos) + return ""; + + std::string tmp = mapstr.substr(sep_pos + 1); + return trim(tmp); +} + +bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); } + +std::vector parse_tuple(std::string in) { + std::vector v; + const char seperator = ','; + + in = trim(in); + + if ((in.front() == '(') && (in.back() == ')')) + in = in.substr(1, in.length() - 2); + else + throw std::runtime_error("Invalid Python tuple."); + + std::istringstream iss(in); + + for (std::string token; std::getline(iss, token, seperator);) { + v.push_back(token); + } + + return v; +} + +std::string trim(const std::string &str) { + const std::string whitespace = " \t\n"; + auto begin = str.find_first_not_of(whitespace); + + if (begin == std::string::npos) + return ""; + + auto end = str.find_last_not_of(whitespace); + + return str.substr(begin, end - begin + 1); +} + +std::string parse_str(const std::string &in) { + if ((in.front() == '\'') && (in.back() == '\'')) + return in.substr(1, in.length() - 2); + + throw std::runtime_error("Invalid python string."); +} \ No newline at end of file diff --git a/file_io/src/NumpyHelpers.test.cpp b/file_io/src/NumpyHelpers.test.cpp new file mode 100644 index 0000000..dbdf2b7 --- /dev/null +++ b/file_io/src/NumpyHelpers.test.cpp @@ -0,0 +1,31 @@ +#include +#include "aare/NumpyHelpers.hpp" //Is this really a public header? + +TEST_CASE("is_digits with a few standard cases"){ + REQUIRE(is_digits("")); + REQUIRE(is_digits("123")); + REQUIRE(is_digits("0")); + REQUIRE_FALSE(is_digits("hej123")); + REQUIRE_FALSE(is_digits("a")); + REQUIRE_FALSE(is_digits(" ")); + REQUIRE_FALSE(is_digits("abcdef")); +} + +TEST_CASE("Check for quotes and return stripped string"){ + REQUIRE(parse_str("'hej'") == "hej"); + REQUIRE(parse_str("'hej hej'") == "hej hej"); + REQUIRE(parse_str("''") == ""); +} + +TEST_CASE("parsing a string without quotes throws"){ + REQUIRE_THROWS(parse_str("hej")); +} + +TEST_CASE("trim whitespace"){ + REQUIRE(trim(" hej ") == "hej"); + REQUIRE(trim("hej") == "hej"); + REQUIRE(trim(" hej") == "hej"); + REQUIRE(trim("hej ") == "hej"); + REQUIRE(trim(" ") == ""); + REQUIRE(trim(" \thej hej ") == "hej hej"); +} \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 159e7ae..b066915 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -19,9 +19,9 @@ include(CTest) include(Catch) catch_discover_tests(tests) -if(AARE_BUILD_TESTS) +if(AARE_TESTS) set(TestSources - ${CMAKE_CURRENT_SOURCE_DIR}/src/defs.test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test.cpp ) target_sources(tests PRIVATE ${TestSources} ) From 9380fd1be8a932ca7ca90f2c2c062d1cfe328d26 Mon Sep 17 00:00:00 2001 From: Bechir Braham Date: Tue, 12 Mar 2024 17:21:05 +0100 Subject: [PATCH 2/2] fix warnings --- file_io/include/aare/File.hpp | 2 +- file_io/include/aare/JsonFile.hpp | 2 +- file_io/include/aare/NumpyFile.hpp | 2 +- file_io/src/JsonFile.cpp | 2 +- file_io/src/NumpyFile.cpp | 8 ++++---- file_io/src/NumpyFileFactory.cpp | 7 +++---- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/file_io/include/aare/File.hpp b/file_io/include/aare/File.hpp index 611c98c..a490598 100644 --- a/file_io/include/aare/File.hpp +++ b/file_io/include/aare/File.hpp @@ -12,7 +12,7 @@ template class File { public: - virtual Frame* get_frame(int frame_number) = 0; + virtual Frame* get_frame(size_t frame_number) = 0; private: //comment diff --git a/file_io/include/aare/JsonFile.hpp b/file_io/include/aare/JsonFile.hpp index a878ebc..9dc0565 100644 --- a/file_io/include/aare/JsonFile.hpp +++ b/file_io/include/aare/JsonFile.hpp @@ -9,7 +9,7 @@ template class JsonFile : public File using config = RawFileConfig; public: - Frame *get_frame(int frame_number); + Frame *get_frame(size_t frame_number); int n_subfiles; std::vector subfiles; int subfile_rows, subfile_cols; diff --git a/file_io/include/aare/NumpyFile.hpp b/file_io/include/aare/NumpyFile.hpp index a5a57b2..80c8d62 100644 --- a/file_io/include/aare/NumpyFile.hpp +++ b/file_io/include/aare/NumpyFile.hpp @@ -11,7 +11,7 @@ template class NumpyFile : public Fil public: NumpyFile(std::filesystem::path fname); - Frame *get_frame(int frame_number) override; + Frame *get_frame(size_t frame_number) override; header_t header{}; static constexpr std::array magic_str{'\x93', 'N', 'U', 'M', 'P', 'Y'}; uint8_t major_ver_{}; diff --git a/file_io/src/JsonFile.cpp b/file_io/src/JsonFile.cpp index 503b6fa..0b7844d 100644 --- a/file_io/src/JsonFile.cpp +++ b/file_io/src/JsonFile.cpp @@ -1,7 +1,7 @@ #include "aare/JsonFile.hpp" template -Frame *JsonFile::get_frame(int frame_number) { +Frame *JsonFile::get_frame(size_t frame_number) { if (frame_number > this->total_frames) { throw std::runtime_error("Frame number out of range"); } diff --git a/file_io/src/NumpyFile.cpp b/file_io/src/NumpyFile.cpp index 245942e..15f06ff 100644 --- a/file_io/src/NumpyFile.cpp +++ b/file_io/src/NumpyFile.cpp @@ -2,13 +2,13 @@ #include "aare/NumpyFile.hpp" template -NumpyFile::NumpyFile(std::filesystem::path fname){ - this->fname = fname; - fp = fopen(fname.c_str(), "rb"); +NumpyFile::NumpyFile(std::filesystem::path fname_){ + this->fname = fname_; + fp = fopen(this->fname.c_str(), "rb"); } template -Frame *NumpyFile::get_frame(int frame_number) { +Frame *NumpyFile::get_frame(size_t frame_number) { if (fp == nullptr) { throw std::runtime_error("File not open"); } diff --git a/file_io/src/NumpyFileFactory.cpp b/file_io/src/NumpyFileFactory.cpp index 42e2056..1f912eb 100644 --- a/file_io/src/NumpyFileFactory.cpp +++ b/file_io/src/NumpyFileFactory.cpp @@ -78,12 +78,11 @@ template File* NumpyFileFactory::load_file() { NumpyFile *file = new NumpyFile(this->m_fpath); parse_metadata(file); - NumpyFile *f = dynamic_cast *>(file); - std::cout << "parsed header: " << f->header.to_string() << std::endl; + std::cout << "parsed header: " << file->header.to_string() << std::endl; - if(sizeof(DataType) != f->header.dtype.itemsize){ + if(sizeof(DataType) != file->header.dtype.itemsize){ std::stringstream s; - s << "Data type size mismatch: " << sizeof(DataType) << " != " << f->header.dtype.itemsize; + s << "Data type size mismatch: " << sizeof(DataType) << " != " << file->header.dtype.itemsize; throw std::runtime_error(s.str()); } return file;