From 47d381d299474799a1f110cc27b15218fa141072 Mon Sep 17 00:00:00 2001 From: Bechir Date: Fri, 8 Mar 2024 11:47:27 +0100 Subject: [PATCH] merge numpy changes with project structure --- core/include/aare/defs.hpp | 20 ++ file_io/CMakeLists.txt | 2 + file_io/include/aare/File.hpp | 43 +---- file_io/include/aare/FileFactory.hpp | 8 +- file_io/include/aare/JsonFile.hpp | 39 +++- file_io/include/aare/JsonFileFactory.hpp | 7 +- file_io/include/aare/NumpyFile.hpp | 9 + file_io/include/aare/NumpyFileFactory.hpp | 29 +++ file_io/src/File.cpp | 12 +- file_io/src/FileFactory.cpp | 46 +---- file_io/src/JsonFile.cpp | 11 +- file_io/src/JsonFileFactory.cpp | 59 +++++- file_io/src/NumpyFile.cpp | 0 file_io/src/NumpyFileFactory.cpp | 225 ++++++++++++++++++++++ 14 files changed, 396 insertions(+), 114 deletions(-) create mode 100644 file_io/include/aare/NumpyFile.hpp create mode 100644 file_io/include/aare/NumpyFileFactory.hpp create mode 100644 file_io/src/NumpyFile.cpp create mode 100644 file_io/src/NumpyFileFactory.cpp diff --git a/core/include/aare/defs.hpp b/core/include/aare/defs.hpp index 1d34db2..c5f60c5 100644 --- a/core/include/aare/defs.hpp +++ b/core/include/aare/defs.hpp @@ -56,3 +56,23 @@ template <> TimingMode StringTo(std::string); using DataTypeVariants = std::variant; + +struct RawFileConfig { + int module_gap_row{}; + int module_gap_col{}; + + bool operator==(const RawFileConfig &other) const { + if (module_gap_col != other.module_gap_col) + return false; + if (module_gap_row != other.module_gap_row) + return false; + return true; + } +}; + +const char little_endian_char = '<'; +const char big_endian_char = '>'; +const char no_endian_char = '|'; + +const std::array endian_chars = {little_endian_char, big_endian_char, no_endian_char}; +const std::array numtype_chars = {'f', 'i', 'u', 'c'}; \ No newline at end of file diff --git a/file_io/CMakeLists.txt b/file_io/CMakeLists.txt index ff9bc56..b223605 100644 --- a/file_io/CMakeLists.txt +++ b/file_io/CMakeLists.txt @@ -7,6 +7,8 @@ set(SourceFiles ${CMAKE_CURRENT_SOURCE_DIR}/src/JsonFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/JsonFileFactory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/SubFile.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFileFactory.cpp ) add_library(file_io STATIC ${SourceFiles}) diff --git a/file_io/include/aare/File.hpp b/file_io/include/aare/File.hpp index a30e7c8..0a4f628 100644 --- a/file_io/include/aare/File.hpp +++ b/file_io/include/aare/File.hpp @@ -3,23 +3,11 @@ #include "aare/defs.hpp" #include "aare/Frame.hpp" #include "SubFile.hpp" - #include #include #include -struct RawFileConfig { - int module_gap_row{}; - int module_gap_col{}; - bool operator==(const RawFileConfig &other) const { - if (module_gap_col != other.module_gap_col) - return false; - if (module_gap_row != other.module_gap_row) - return false; - return true; - } -}; template class File { @@ -27,54 +15,29 @@ class File { virtual Frame* get_frame(int frame_number) = 0; private: - using config = RawFileConfig; public: - std::vector subfiles; std::filesystem::path fname; std::filesystem::path base_path; std::string base_name, ext; - int findex, n_subfiles; + int findex; size_t total_frames{}; size_t max_frames_per_file{}; std::string version; DetectorType type; TimingMode timing_mode; - int subfile_rows, subfile_cols; bool quad{false}; ssize_t rows{}; ssize_t cols{}; uint8_t bitdepth{}; - using data_type = uint16_t; - std::vector positions; - - config cfg{0, 0}; // File(); - ~File(); + inline size_t bytes_per_frame() const { return rows * cols * bitdepth / 8; } inline size_t pixels() const { return rows * cols; } - inline void set_config(int row, int col) { - cfg.module_gap_row = row; - cfg.module_gap_col = col; - } - // TODO! Deal with fast quad and missing files - void find_number_of_subfiles() { - int n_mod = 0; - while (std::filesystem::exists(data_fname(++n_mod, 0))) - ; - n_subfiles = n_mod; - } - - inline std::filesystem::path master_fname() { - return base_path / fmt::format("{}_master_{}{}", base_name, findex, ext); - } - inline std::filesystem::path data_fname(int mod_id, int file_id) { - return base_path / fmt::format("{}_d{}_f{}_{}.raw", base_name, file_id, mod_id, findex); - } // size_t total_frames(); -}; +}; \ No newline at end of file diff --git a/file_io/include/aare/FileFactory.hpp b/file_io/include/aare/FileFactory.hpp index abb583d..d7a8f62 100644 --- a/file_io/include/aare/FileFactory.hpp +++ b/file_io/include/aare/FileFactory.hpp @@ -1,7 +1,6 @@ #pragma once #include #include "aare/File.hpp" - template class FileFactory{ // Class that will be used to create File objects @@ -13,14 +12,9 @@ public: // virtual int deleteFile() = 0; virtual File* load_file()=0;//TODO: add option to load all file to memory or keep it on disk virtual void parse_metadata(File*)=0; - - - void find_geometry(File*); - void parse_fname(File*); + virtual void parse_fname(File*)=0; - sls_detector_header read_header(const std::filesystem::path &fname); }; - diff --git a/file_io/include/aare/JsonFile.hpp b/file_io/include/aare/JsonFile.hpp index b82aebc..a878ebc 100644 --- a/file_io/include/aare/JsonFile.hpp +++ b/file_io/include/aare/JsonFile.hpp @@ -1,9 +1,40 @@ #pragma once -#include "aare/File.hpp" -#include "aare/Frame.hpp" #include "aare/defs.hpp" -template -class JsonFile : public File { +#include "aare/Frame.hpp" +#include "aare/File.hpp" + + +template class JsonFile : public File { + + using config = RawFileConfig; + public: Frame *get_frame(int frame_number); + int n_subfiles; + std::vector subfiles; + int subfile_rows, subfile_cols; + std::vector positions; + config cfg{0, 0}; + + inline void set_config(int row, int col) { + cfg.module_gap_row = row; + cfg.module_gap_col = col; + } + // TODO! Deal with fast quad and missing files + + void find_number_of_subfiles() { + int n_mod = 0; + while (std::filesystem::exists(data_fname(++n_mod, 0))) + ; + n_subfiles = n_mod; + } + + inline std::filesystem::path master_fname() { + return this->base_path / fmt::format("{}_master_{}{}", this->base_name, this->findex, this->ext); + } + inline std::filesystem::path data_fname(int mod_id, int file_id) { + return this->base_path / fmt::format("{}_d{}_f{}_{}.raw", this->base_name, file_id, mod_id, this->findex); + } + + ~JsonFile(); }; \ No newline at end of file diff --git a/file_io/include/aare/JsonFileFactory.hpp b/file_io/include/aare/JsonFileFactory.hpp index cdbfb95..a65453c 100644 --- a/file_io/include/aare/JsonFileFactory.hpp +++ b/file_io/include/aare/JsonFileFactory.hpp @@ -7,10 +7,13 @@ private: public: File* load_file() override; void parse_metadata(File*) override; + void parse_fname(File*) override; + JsonFileFactory(std::filesystem::path fpath); void open_subfiles(File*); - + sls_detector_header read_header(const std::filesystem::path &fname); + void find_geometry(File*); + }; - diff --git a/file_io/include/aare/NumpyFile.hpp b/file_io/include/aare/NumpyFile.hpp new file mode 100644 index 0000000..1230af9 --- /dev/null +++ b/file_io/include/aare/NumpyFile.hpp @@ -0,0 +1,9 @@ +#include "aare/File.hpp" +#include "aare/defs.hpp" + +template +class NumpyFile : public File +{ + + +}; \ No newline at end of file diff --git a/file_io/include/aare/NumpyFileFactory.hpp b/file_io/include/aare/NumpyFileFactory.hpp new file mode 100644 index 0000000..3ba3903 --- /dev/null +++ b/file_io/include/aare/NumpyFileFactory.hpp @@ -0,0 +1,29 @@ +#include "aare/defs.hpp" +#include "aare/FileFactory.hpp" +#include "aare/NumpyFile.hpp" +#include +#include +#include +#include +#include +#include +#include + +template class NumpyFileFactory : public FileFactory { + public: + NumpyFileFactory(std::filesystem::path fpath); + void parse_metadata(File *_file) override; + void open_subfiles(File *_file) override; + File *load_file() override; + + uint8_t major_ver() const noexcept { return major_ver_; } + uint8_t minor_ver() const noexcept { return minor_ver_; } + + private: + static constexpr std::array magic_str{'\x93', 'N', 'U', 'M', 'P', 'Y'}; + uint8_t major_ver_{}; + uint8_t minor_ver_{}; + uint32_t header_len{}; + uint8_t header_len_size{}; + const uint8_t magic_string_length{6}; +}; \ No newline at end of file diff --git a/file_io/src/File.cpp b/file_io/src/File.cpp index 9cebbcc..68e2378 100644 --- a/file_io/src/File.cpp +++ b/file_io/src/File.cpp @@ -1,10 +1,2 @@ -#include "aare/File.hpp" - -template -File::~File() { - for (auto& subfile : subfiles) { - delete subfile; - } -} - -template class File; +// #include "aare/File.hpp" +// template class File; diff --git a/file_io/src/FileFactory.cpp b/file_io/src/FileFactory.cpp index d56e14c..9e7ed47 100644 --- a/file_io/src/FileFactory.cpp +++ b/file_io/src/FileFactory.cpp @@ -26,50 +26,6 @@ FileFactory *FileFactory::get_factory(st throw std::runtime_error("Unsupported file type"); } -template -void FileFactory::parse_fname(File *file) { - file->base_path = fpath.parent_path(); - file->base_name = fpath.stem(); - file->ext = fpath.extension(); - auto pos = file->base_name.rfind("_"); - file->findex = std::stoi(file->base_name.substr(pos + 1)); - pos = file->base_name.find("_master_"); - file->base_name.erase(pos); -} -template -sls_detector_header FileFactory::read_header(const std::filesystem::path &fname) { - sls_detector_header h{}; - FILE *fp = fopen(fname.c_str(), "r"); - if (!fp) - throw std::runtime_error(fmt::format("Could not open: {} for reading", fname.c_str())); - - size_t rc = fread(reinterpret_cast(&h), sizeof(h), 1, fp); - fclose(fp); - if (rc != 1) - throw std::runtime_error("Could not read header from file"); - return h; -} - -template -void FileFactory::find_geometry(File *file) { - uint16_t r{}; - uint16_t c{}; - for (int i = 0; i != file->n_subfiles; ++i) { - auto h = this->read_header(file->data_fname(i, 0)); - r = std::max(r, h.row); - c = std::max(c, h.column); - - file->positions.push_back({h.row, h.column}); - } - r++; - c++; - - file->rows = r * file->subfile_rows; - file->cols = c * file->subfile_cols; - - file->rows += (r - 1) * file->cfg.module_gap_row; -} - -template class FileFactory; +template class FileFactory; \ No newline at end of file diff --git a/file_io/src/JsonFile.cpp b/file_io/src/JsonFile.cpp index 0d848d9..c900e74 100644 --- a/file_io/src/JsonFile.cpp +++ b/file_io/src/JsonFile.cpp @@ -1,5 +1,4 @@ #include "aare/JsonFile.hpp" -#include template Frame *JsonFile::get_frame(int frame_number) { @@ -16,4 +15,12 @@ Frame *JsonFile::get_frame(int frame_number) { return f; } -template class JsonFile; +template +JsonFile::~JsonFile() { + for (auto& subfile : subfiles) { + delete subfile; + } +} + + +template class JsonFile; \ No newline at end of file diff --git a/file_io/src/JsonFileFactory.cpp b/file_io/src/JsonFileFactory.cpp index c8a6f61..cb19c68 100644 --- a/file_io/src/JsonFileFactory.cpp +++ b/file_io/src/JsonFileFactory.cpp @@ -17,8 +17,8 @@ JsonFileFactory::JsonFileFactory(std::filesystem::path fpath) } template -void JsonFileFactory::parse_metadata(File *file) { - std::cout << "Parsing metadata" << std::endl; +void JsonFileFactory::parse_metadata(File *_file) { + auto file = dynamic_cast *>(_file); std::ifstream ifs(file->master_fname()); json j; ifs >> j; @@ -51,7 +51,8 @@ void JsonFileFactory::parse_metadata(File } template -void JsonFileFactory::open_subfiles(File *file) { +void JsonFileFactory::open_subfiles(File *_file) { + auto file = dynamic_cast *>(_file); for (int i = 0; i != file->n_subfiles; ++i) { file->subfiles.push_back( @@ -61,7 +62,6 @@ void JsonFileFactory::open_subfiles(File * template File *JsonFileFactory::load_file() { - std::cout << "Loading json file" << std::endl; JsonFile *file = new JsonFile(); file->fname = this->fpath; this->parse_fname(file); @@ -73,4 +73,55 @@ File *JsonFileFactory::load_file() { return file; } + +template +sls_detector_header JsonFileFactory::read_header(const std::filesystem::path &fname) { + sls_detector_header h{}; + FILE *fp = fopen(fname.c_str(), "r"); + if (!fp) + throw std::runtime_error(fmt::format("Could not open: {} for reading", fname.c_str())); + + size_t rc = fread(reinterpret_cast(&h), sizeof(h), 1, fp); + fclose(fp); + if (rc != 1) + throw std::runtime_error("Could not read header from file"); + return h; +} + + +template +void JsonFileFactory::find_geometry(File *_file) { + auto file = dynamic_cast *>(_file); + uint16_t r{}; + uint16_t c{}; + for (int i = 0; i != file->n_subfiles; ++i) { + auto h = this->read_header(file->data_fname(i, 0)); + r = std::max(r, h.row); + c = std::max(c, h.column); + + file->positions.push_back({h.row, h.column}); + } + r++; + c++; + + file->rows = r * file->subfile_rows; + file->cols = c * file->subfile_cols; + + file->rows += (r - 1) * file->cfg.module_gap_row; +} + +template +void JsonFileFactory::parse_fname(File *file) { + + file->base_path = this->fpath.parent_path(); + file->base_name = this->fpath.stem(); + file->ext = this->fpath.extension(); + + auto pos = file->base_name.rfind("_"); + file->findex = std::stoi(file->base_name.substr(pos + 1)); + pos = file->base_name.find("_master_"); + file->base_name.erase(pos); +} + + template class JsonFileFactory; \ No newline at end of file diff --git a/file_io/src/NumpyFile.cpp b/file_io/src/NumpyFile.cpp new file mode 100644 index 0000000..e69de29 diff --git a/file_io/src/NumpyFileFactory.cpp b/file_io/src/NumpyFileFactory.cpp new file mode 100644 index 0000000..fc5d470 --- /dev/null +++ b/file_io/src/NumpyFileFactory.cpp @@ -0,0 +1,225 @@ +#include "aare/NumpyFileFactory.hpp" + +template +NumpyFileFactory::NumpyFileFactory(std::filesystem::path fpath) { + this->fpath = fpath; +} +inline std::string parse_str(const std::string &in) { + if ((in.front() == '\'') && (in.back() == '\'')) + return in.substr(1, in.length() - 2); + + throw std::runtime_error("Invalid python string."); +} +/** + Removes leading and trailing whitespaces + */ +inline std::string trim(const std::string& str) { + const std::string whitespace = " \t"; + auto begin = str.find_first_not_of(whitespace); + + if (begin == std::string::npos) + return ""; + + auto end = str.find_last_not_of(whitespace); + + return str.substr(begin, end - begin + 1); +} +inline std::vector parse_tuple(std::string in) { + std::vector v; + const char seperator = ','; + + in = trim(in); + + if ((in.front() == '(') && (in.back() == ')')) + in = in.substr(1, in.length() - 2); + else + throw std::runtime_error("Invalid Python tuple."); + + std::istringstream iss(in); + + for (std::string token; std::getline(iss, token, seperator);) { + v.push_back(token); + } + + return v; +} +inline bool parse_bool(const std::string &in) { + if (in == "True") + return true; + if (in == "False") + return false; + + throw std::runtime_error("Invalid python boolan."); +} + + +inline std::string get_value_from_map(const std::string &mapstr) { + size_t sep_pos = mapstr.find_first_of(":"); + if (sep_pos == std::string::npos) + return ""; + + std::string tmp = mapstr.substr(sep_pos + 1); + return trim(tmp); +} +std::unordered_map parse_dict(std::string in, const std::vector &keys) { + std::unordered_map map; + + if (keys.size() == 0) + return map; + + in = trim(in); + + // unwrap dictionary + if ((in.front() == '{') && (in.back() == '}')) + in = in.substr(1, in.length() - 2); + else + throw std::runtime_error("Not a Python dictionary."); + + std::vector> positions; + + for (auto const &value : keys) { + size_t pos = in.find("'" + value + "'"); + + if (pos == std::string::npos) + throw std::runtime_error("Missing '" + value + "' key."); + + std::pair position_pair{pos, value}; + positions.push_back(position_pair); + } + + // sort by position in dict + std::sort(positions.begin(), positions.end()); + + for (size_t i = 0; i < positions.size(); ++i) { + std::string raw_value; + size_t begin{positions[i].first}; + size_t end{std::string::npos}; + + std::string key = positions[i].second; + + if (i + 1 < positions.size()) + end = positions[i + 1].first; + + raw_value = in.substr(begin, end - begin); + + raw_value = trim(raw_value); + + if (raw_value.back() == ',') + raw_value.pop_back(); + + map[key] = get_value_from_map(raw_value); + } + + return map; +} + + +using shape_t = std::vector; + +struct dtype_t { + char byteorder; + char kind; + unsigned int itemsize; +}; +struct header_t { + dtype_t dtype; + bool fortran_order; + shape_t shape; +}; +template inline bool in_array(T val, const std::array &arr) { + return std::find(std::begin(arr), std::end(arr), val) != std::end(arr); +} +inline bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); } + +inline dtype_t parse_descr(std::string typestring) { + if (typestring.length() < 3) { + throw std::runtime_error("invalid typestring (length)"); + } + + char byteorder_c = typestring.at(0); + char kind_c = typestring.at(1); + std::string itemsize_s = typestring.substr(2); + + if (!in_array(byteorder_c, endian_chars)) { + throw std::runtime_error("invalid typestring (byteorder)"); + } + + if (!in_array(kind_c, numtype_chars)) { + throw std::runtime_error("invalid typestring (kind)"); + } + + if (!is_digits(itemsize_s)) { + throw std::runtime_error("invalid typestring (itemsize)"); + } + unsigned int itemsize = std::stoul(itemsize_s); + + return {byteorder_c, kind_c, itemsize}; +} + +template +void NumpyFileFactory::parse_metadata(File *_file) { + auto file = dynamic_cast *>(_file); + // open ifsteam to file + std::ifstream f(file->fname, std::ios::binary); + if (!f.is_open()) { + throw std::runtime_error(fmt::format("Could not open: {} for reading", file->fname.c_str())); + } + // read magic number + std::array tmp{}; + f.read(tmp.data(), tmp.size()); + if (tmp != NumpyFileFactory::magic_str) { + for (auto item : tmp) + fmt::print("{}, ", int(item)); + fmt::print("\n"); + throw std::runtime_error("Not a numpy file"); + } + + // read version + f.read(reinterpret_cast(&major_ver_), 1); + f.read(reinterpret_cast(&minor_ver_), 1); + + if (major_ver_ == 1) { + header_len_size = 2; + } else if (major_ver_ == 2) { + header_len_size = 4; + } else { + throw std::runtime_error("Unsupported numpy version"); + } + // read header length + f.read(reinterpret_cast(&header_len), header_len_size); + if ((magic_string_length + 2 + header_len_size + header_len) % 16 != 0) { + fmt::print("Warning: header length is not a multiple of 16\n"); + } + // read header + auto buf_v = std::vector(header_len); + f.read(buf_v.data(), header_len); + std::string header(buf_v.data(), header_len); + + // parse header + + std::vector keys{"descr", "fortran_order", "shape"}; + + auto dict_map = parse_dict(header, keys); + if (dict_map.size() == 0) + throw std::runtime_error("invalid dictionary in header"); + + std::string descr_s = dict_map["descr"]; + std::string fortran_s = dict_map["fortran_order"]; + std::string shape_s = dict_map["shape"]; + + std::string descr = parse_str(descr_s); + dtype_t dtype = parse_descr(descr); + + // convert literal Python bool to C++ bool + bool fortran_order = parse_bool(fortran_s); + + // parse the shape tuple + auto shape_v = parse_tuple(shape_s); + shape_t shape; + for (auto item : shape_v) { + auto dim = static_cast(std::stoul(item)); + shape.push_back(dim); + } + + // {dtype, fortran_order, shape}; +}; \ No newline at end of file