From 449c5119a4fc39fc8b78a44021167cf216768f86 Mon Sep 17 00:00:00 2001 From: Bechir Braham Date: Tue, 2 Apr 2024 11:35:58 +0200 Subject: [PATCH] save numpy files frame by frame (#37) Co-authored-by: Bechir --- core/CMakeLists.txt | 2 +- core/include/aare/DType.hpp | 25 ++-- core/include/aare/Frame.hpp | 1 + core/src/DType.cpp | 27 ++++ core/src/Frame.cpp | 1 + examples/CMakeLists.txt | 3 +- ...mpy_example.cpp => numpy_read_example.cpp} | 0 examples/numpy_write_example.cpp | 27 ++++ file_io/include/aare/File.hpp | 5 +- file_io/include/aare/FileFactory.hpp | 39 ++++-- file_io/include/aare/FileInterface.hpp | 12 +- file_io/include/aare/NumpyFile.hpp | 16 +-- file_io/include/aare/NumpyFileFactory.hpp | 5 +- file_io/include/aare/NumpyHelpers.hpp | 34 ++--- file_io/include/aare/RawFile.hpp | 13 +- file_io/include/aare/RawFileFactory.hpp | 4 +- file_io/src/File.cpp | 18 +-- file_io/src/FileFactory.cpp | 5 - file_io/src/NumpyFile.cpp | 65 ++++++++- file_io/src/NumpyFileFactory.cpp | 27 ++-- file_io/src/NumpyHelpers.cpp | 125 +++++++++++++++++- file_io/src/RawFileFactory.cpp | 2 +- file_io/test/NumpyHelpers.test.cpp | 2 + 23 files changed, 357 insertions(+), 101 deletions(-) rename examples/{numpy_example.cpp => numpy_read_example.cpp} (100%) create mode 100644 examples/numpy_write_example.cpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 40f60b5..9db8c47 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -29,5 +29,5 @@ if(AARE_TESTS) ) target_sources(tests PRIVATE ${TestSources} ) - target_link_libraries(tests PRIVATE core ) + target_link_libraries(tests PRIVATE core utils) endif() \ No newline at end of file diff --git a/core/include/aare/DType.hpp b/core/include/aare/DType.hpp index 02ca63b..018047f 100644 --- a/core/include/aare/DType.hpp +++ b/core/include/aare/DType.hpp @@ -1,45 +1,48 @@ #pragma once +#include "aare/utils/logger.hpp" #include #include #include namespace aare { -enum class endian -{ +enum class endian { #ifdef _WIN32 little = 0, - big = 1, + big = 1, native = little #else little = __ORDER_LITTLE_ENDIAN__, - big = __ORDER_BIG_ENDIAN__, + big = __ORDER_BIG_ENDIAN__, native = __BYTE_ORDER__ #endif }; class DType { - static_assert(sizeof(long)==sizeof(int64_t), "long should be 64bits"); + static_assert(sizeof(long) == sizeof(int64_t), "long should be 64bits"); + public: - enum TypeIndex { INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT, DOUBLE, ERROR }; - + enum TypeIndex { INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT, DOUBLE, ERROR }; + + uint8_t bitdepth() const; + explicit DType(const std::type_info &t); explicit DType(std::string_view sv); - //not explicit to allow conversions form enum to DType - DType(DType::TypeIndex ti); + + // not explicit to allow conversions form enum to DType + DType(DType::TypeIndex ti); bool operator==(const DType &other) const noexcept; bool operator!=(const DType &other) const noexcept; bool operator==(const std::type_info &t) const; bool operator!=(const std::type_info &t) const; + // bool operator==(DType::TypeIndex ti) const; // bool operator!=(DType::TypeIndex ti) const; std::string str() const; - private: TypeIndex m_type{TypeIndex::ERROR}; - }; } // namespace aare \ No newline at end of file diff --git a/core/include/aare/Frame.hpp b/core/include/aare/Frame.hpp index 20048f7..4c3149f 100644 --- a/core/include/aare/Frame.hpp +++ b/core/include/aare/Frame.hpp @@ -29,6 +29,7 @@ class Frame { ssize_t rows() const { return m_rows; } ssize_t cols() const { return m_cols; } ssize_t bitdepth() const { return m_bitdepth; } + inline ssize_t size() const { return m_rows * m_cols * m_bitdepth / 8; } std::byte *_get_data() { return m_data; } Frame &operator=(Frame &other) { m_rows = other.rows(); diff --git a/core/src/DType.cpp b/core/src/DType.cpp index cde3b30..39e8702 100644 --- a/core/src/DType.cpp +++ b/core/src/DType.cpp @@ -4,6 +4,7 @@ namespace aare { + DType::DType(const std::type_info &t) { if (t == typeid(int8_t)) m_type = TypeIndex::INT8; @@ -32,6 +33,32 @@ DType::DType(const std::type_info &t) { "Could not construct data type. Type not supported."); } +uint8_t DType::bitdepth()const { + switch (m_type) { + case TypeIndex::INT8: + case TypeIndex::UINT8: + return 8; + case TypeIndex::INT16: + case TypeIndex::UINT16: + return 16; + case TypeIndex::INT32: + case TypeIndex::UINT32: + return 32; + case TypeIndex::INT64: + case TypeIndex::UINT64: + return 64; + case TypeIndex::FLOAT: + return 32; + case TypeIndex::DOUBLE: + return 64; + case TypeIndex::ERROR: + return 0; + default: + throw std::runtime_error(LOCATION+"Could not get bitdepth. Type not supported."); + } + +} + DType::DType(DType::TypeIndex ti):m_type(ti){} DType::DType(std::string_view sv) { diff --git a/core/src/Frame.cpp b/core/src/Frame.cpp index 4ff3797..018f454 100644 --- a/core/src/Frame.cpp +++ b/core/src/Frame.cpp @@ -34,6 +34,7 @@ void Frame::set(int row, int col, T data) { } template void Frame::set(int row, int col, uint16_t data); +template void Frame::set(int row, int col, uint32_t data); // std::vector> Frame::get_array() { diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 558c87f..7d324b5 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,5 +1,6 @@ -set(EXAMPLE_LIST "json_example;logger_example;numpy_example;multiport_example;raw_example;mythen_example") +set(EXAMPLE_LIST "json_example;logger_example;numpy_read_example;multiport_example;raw_example") +set(EXAMPLE_LIST "${EXAMPLE_LIST};mythen_example;numpy_write_example") foreach(example ${EXAMPLE_LIST}) add_executable(${example} ${example}.cpp) target_link_libraries(${example} PUBLIC aare PRIVATE aare_compiler_flags) diff --git a/examples/numpy_example.cpp b/examples/numpy_read_example.cpp similarity index 100% rename from examples/numpy_example.cpp rename to examples/numpy_read_example.cpp diff --git a/examples/numpy_write_example.cpp b/examples/numpy_write_example.cpp new file mode 100644 index 0000000..27aa77c --- /dev/null +++ b/examples/numpy_write_example.cpp @@ -0,0 +1,27 @@ +// Your First C++ Program +#include "aare/File.hpp" +#include "aare/Frame.hpp" +#include + +#define AARE_ROOT_DIR_VAR "PROJECT_ROOT_DIR" + + +int main() { + auto path = std::filesystem::path("/tmp/test.npy"); + auto dtype = aare::DType(typeid(uint32_t)); + FileConfig cfg = {path, dtype, 100, 100}; + File npy(path, "w",cfg); + Frame f(100, 100, dtype.bitdepth()); + for (int i = 0; i < 10000; i++) { + f.set(i/100, i%100,i); + } + + npy.write(f); + f.set(0,0,77); + npy.write(f); + npy.write(f); + return 0; + + + +} diff --git a/file_io/include/aare/File.hpp b/file_io/include/aare/File.hpp index 2ed9a03..03314ed 100644 --- a/file_io/include/aare/File.hpp +++ b/file_io/include/aare/File.hpp @@ -1,4 +1,6 @@ #include "aare/FileInterface.hpp" + + class File { private: FileInterface *file_impl; @@ -9,7 +11,8 @@ class File { // - w writing (overwrites existing file) // - a appending (appends to existing file) // TODO! do we need to support w+, r+ and a+? - File(std::filesystem::path fname, std::string mode); + File(std::filesystem::path fname, std::string mode,FileConfig cfg = {}); + void write(Frame& frame); Frame read(); Frame iread(size_t frame_number); std::vector read(size_t n_frames); diff --git a/file_io/include/aare/FileFactory.hpp b/file_io/include/aare/FileFactory.hpp index 235de32..50baf9c 100644 --- a/file_io/include/aare/FileFactory.hpp +++ b/file_io/include/aare/FileFactory.hpp @@ -1,26 +1,37 @@ #pragma once -#include +#include "aare/DType.hpp" #include "aare/FileInterface.hpp" -class FileFactory{ +#include "aare/utils/logger.hpp" +#include + +class FileFactory { // Class that will be used to create FileInterface objects // follows the factory pattern - protected: + protected: std::filesystem::path m_fpath; -public: - static FileFactory* get_factory(std::filesystem::path); + + public: + static FileFactory *get_factory(std::filesystem::path); // virtual int deleteFile() = 0; - static FileInterface* load_file(std::filesystem::path p){ + static FileInterface *load_file(std::filesystem::path p, std::string mode, FileConfig cfg = {}) { + if ((mode == "r" or mode == "a") and not std::filesystem::exists(p)) { + throw std::runtime_error(LOCATION+"File does not exist"); + } + auto factory = get_factory(p); - FileInterface* tmp= factory->load_file(); + + FileInterface *tmp = nullptr; + if (mode == "r") { + tmp = factory->load_file_read(); + } else if (mode == "w") { + tmp = factory->load_file_write(cfg); + } delete factory; return tmp; }; - virtual FileInterface* load_file()=0;//TODO: add option to load all file to memory or keep it on disk - virtual void parse_metadata(FileInterface*)=0; - virtual void parse_fname(FileInterface*)=0; + virtual FileInterface *load_file_read() = 0; // TODO: add option to load all file to memory or keep it on disk + virtual FileInterface *load_file_write(FileConfig) = 0; + virtual void parse_metadata(FileInterface *) = 0; + virtual void parse_fname(FileInterface *) = 0; virtual ~FileFactory() = default; - - - - }; diff --git a/file_io/include/aare/FileInterface.hpp b/file_io/include/aare/FileInterface.hpp index 5a3c351..bb07414 100644 --- a/file_io/include/aare/FileInterface.hpp +++ b/file_io/include/aare/FileInterface.hpp @@ -1,14 +1,23 @@ #pragma once #include "aare/Frame.hpp" #include "aare/defs.hpp" +#include "aare/DType.hpp" #include "aare/utils/logger.hpp" #include #include + +struct FileConfig { + std::filesystem::path fname; + aare::DType dtype = aare::DType(typeid(uint16_t)); + uint64_t rows; + uint64_t cols; + xy geometry{1, 1}; +}; class FileInterface { public: friend class FileFactory; // write one frame - // virtual void write(Frame &frame) = 0; + virtual void write(Frame &frame) = 0; // write n_frames frames // virtual void write(std::vector &frames) = 0; @@ -71,6 +80,7 @@ class FileInterface { }; public: + std::string mode; std::filesystem::path m_fname; std::filesystem::path m_base_path; std::string m_base_name, m_ext; diff --git a/file_io/include/aare/NumpyFile.hpp b/file_io/include/aare/NumpyFile.hpp index f4cb06c..6d72537 100644 --- a/file_io/include/aare/NumpyFile.hpp +++ b/file_io/include/aare/NumpyFile.hpp @@ -7,8 +7,11 @@ class NumpyFile : public FileInterface { FILE *fp = nullptr; + size_t initial_header_len = 0; + public: + void write(Frame &frame) override; Frame read() override { return get_frame(this->current_frame++); } std::vector read(size_t n_frames) override; @@ -22,23 +25,18 @@ class NumpyFile : public FileInterface { size_t total_frames() const override { return header.shape[0]; } ssize_t rows() const override { return header.shape[1]; } ssize_t cols() const override { return header.shape[2]; } - ssize_t bitdepth() const override { return header.dtype.itemsize; } + ssize_t bitdepth() const override { return header.dtype.bitdepth(); } NumpyFile(std::filesystem::path fname); - header_t header{}; - static constexpr std::array magic_str{'\x93', 'N', 'U', 'M', 'P', 'Y'}; + NumpyFile(FileConfig, header_t); + header_t header; uint8_t major_ver_{}; uint8_t minor_ver_{}; uint32_t header_len{}; uint8_t header_len_size{}; - const uint8_t magic_string_length{6}; ssize_t header_size{}; - ~NumpyFile() { - if (fp != nullptr) { - fclose(fp); - } - } + ~NumpyFile(); private: size_t current_frame{}; diff --git a/file_io/include/aare/NumpyFileFactory.hpp b/file_io/include/aare/NumpyFileFactory.hpp index f013cdc..bd9a11b 100644 --- a/file_io/include/aare/NumpyFileFactory.hpp +++ b/file_io/include/aare/NumpyFileFactory.hpp @@ -14,7 +14,8 @@ class NumpyFileFactory : public FileFactory { public: NumpyFileFactory(std::filesystem::path fpath); void parse_metadata(FileInterface *_file) override; - NumpyFile* load_file() override; - void parse_fname(FileInterface*){}; + NumpyFile* load_file_read() override; + NumpyFile* load_file_write(FileConfig) override; + void parse_fname(FileInterface*)override{}; }; \ No newline at end of file diff --git a/file_io/include/aare/NumpyHelpers.hpp b/file_io/include/aare/NumpyHelpers.hpp index d81147a..389b751 100644 --- a/file_io/include/aare/NumpyHelpers.hpp +++ b/file_io/include/aare/NumpyHelpers.hpp @@ -4,34 +4,28 @@ #include #include #include +#include +#include +#include #include #include #include -#include -#include -#include +#include "aare/DType.hpp" #include "aare/defs.hpp" using shape_t = std::vector; -struct dtype_t { - char byteorder; - char kind; - unsigned int itemsize; - std::string to_string() { - std::stringstream sstm; - sstm << byteorder << kind << itemsize; - return sstm.str(); - } -}; struct header_t { - dtype_t dtype; + header_t() : dtype(aare::DType(aare::DType::ERROR)), fortran_order(false), shape(shape_t()){}; + header_t(aare::DType dtype, bool fortran_order, shape_t shape) + : dtype(dtype), fortran_order(fortran_order), shape(shape){}; + aare::DType dtype; bool fortran_order; shape_t shape; std::string to_string() { std::stringstream sstm; - sstm << "dtype: " << dtype.to_string() << ", fortran_order: " << fortran_order << ' '; + sstm << "dtype: " << dtype.str() << ", fortran_order: " << fortran_order << ' '; sstm << "shape: ("; for (auto item : shape) @@ -41,6 +35,9 @@ struct header_t { } }; +namespace aare::NumpyHelpers { +const constexpr std::array magic_str{'\x93', 'N', 'U', 'M', 'P', 'Y'}; +const uint8_t magic_string_length{6}; std::string parse_str(const std::string &in); /** @@ -61,4 +58,9 @@ template inline bool in_array(T val, const std::arraycurrent_frame++); }; std::vector read(size_t n_frames) override; void read_into(std::byte *image_buf) override { return get_frame_into(this->current_frame++, image_buf); }; @@ -22,10 +23,10 @@ class RawFile : public FileInterface { size_t pixels() override { return m_rows * m_cols; } // goto frame number - void seek(size_t frame_number) { this->current_frame = frame_number; }; + void seek(size_t frame_number) override{ this->current_frame = frame_number; }; // return the position of the file pointer (in number of frames) - size_t tell() { return this->current_frame; }; + size_t tell() override{ return this->current_frame; }; size_t n_subfiles; size_t n_subfile_parts; @@ -59,10 +60,10 @@ class RawFile : public FileInterface { ~RawFile(); - size_t total_frames() const { return m_total_frames; } - ssize_t rows() const { return m_rows; } - ssize_t cols() const { return m_cols; } - ssize_t bitdepth() const { return m_bitdepth; } + size_t total_frames() const override { return m_total_frames; } + ssize_t rows() const override { return m_rows; } + ssize_t cols() const override{ return m_cols; } + ssize_t bitdepth() const override{ return m_bitdepth; } private: size_t current_frame{}; diff --git a/file_io/include/aare/RawFileFactory.hpp b/file_io/include/aare/RawFileFactory.hpp index af0aa1b..634731e 100644 --- a/file_io/include/aare/RawFileFactory.hpp +++ b/file_io/include/aare/RawFileFactory.hpp @@ -6,8 +6,10 @@ class RawFileFactory : public FileFactory { void parse_raw_metadata(RawFile *file); public: + RawFileFactory(std::filesystem::path fpath); - virtual RawFile *load_file() override; + RawFile *load_file_read() override; + RawFile *load_file_write(FileConfig) override{return new RawFile();}; void parse_metadata(FileInterface *) override; void parse_fname(FileInterface *) override; void open_subfiles(FileInterface *); diff --git a/file_io/src/File.cpp b/file_io/src/File.cpp index f5b4adb..73655ce 100644 --- a/file_io/src/File.cpp +++ b/file_io/src/File.cpp @@ -2,13 +2,11 @@ #include "aare/FileFactory.hpp" #include "aare/utils/logger.hpp" -File::File(std::filesystem::path fname, std::string mode) { - if (mode != "r") { - throw std::runtime_error(LOCATION + " Only read mode is supported"); - } - file_impl = FileFactory::load_file(fname); +File::File(std::filesystem::path fname, std::string mode, FileConfig cfg) { + file_impl = FileFactory::load_file(fname, mode, cfg); } +void File::write(Frame& frame) { file_impl->write(frame); } Frame File::read() { return file_impl->read(); } size_t File::total_frames() const { return file_impl->total_frames(); } std::vector File::read(size_t n_frames) { return file_impl->read(n_frames); } @@ -18,17 +16,13 @@ size_t File::frame_number(size_t frame_index) { return file_impl->frame_number(f size_t File::bytes_per_frame() { return file_impl->bytes_per_frame(); } size_t File::pixels() { return file_impl->pixels(); } void File::seek(size_t frame_number) { file_impl->seek(frame_number); } -size_t File::tell() const{ return file_impl->tell(); } +size_t File::tell() const { return file_impl->tell(); } ssize_t File::rows() const { return file_impl->rows(); } ssize_t File::cols() const { return file_impl->cols(); } ssize_t File::bitdepth() const { return file_impl->bitdepth(); } -File::~File() { - delete file_impl; -} +File::~File() { delete file_impl; } -Frame File::iread(size_t frame_number) { - return file_impl->iread(frame_number); -} +Frame File::iread(size_t frame_number) { return file_impl->iread(frame_number); } File::File(File &&other) { file_impl = other.file_impl; diff --git a/file_io/src/FileFactory.cpp b/file_io/src/FileFactory.cpp index 7912c1d..b48cc6a 100644 --- a/file_io/src/FileFactory.cpp +++ b/file_io/src/FileFactory.cpp @@ -7,11 +7,6 @@ #include FileFactory *FileFactory::get_factory(std::filesystem::path fpath) { - // check if file exists - if (!std::filesystem::exists(fpath)) { - throw std::runtime_error("File does not exist"); - } - if (fpath.extension() == ".raw" || fpath.extension() == ".json"){ aare::logger::debug("Loading",fpath.extension(),"file"); return new RawFileFactory(fpath); diff --git a/file_io/src/NumpyFile.cpp b/file_io/src/NumpyFile.cpp index d57f98a..85da8a4 100644 --- a/file_io/src/NumpyFile.cpp +++ b/file_io/src/NumpyFile.cpp @@ -1,17 +1,45 @@ #include "aare/NumpyFile.hpp" +void NumpyFile::write(Frame &frame) { + if (fp == nullptr) { + throw std::runtime_error("File not open"); + } + if (not(mode == "w" or mode == "a")) { + throw std::runtime_error("File not open for writing"); + } + fseek(fp, 0, SEEK_END); + fwrite(frame._get_data(), frame.size(), 1, fp); +} + NumpyFile::NumpyFile(std::filesystem::path fname_) { this->m_fname = fname_; fp = fopen(this->m_fname.c_str(), "rb"); } +NumpyFile::NumpyFile(FileConfig config, header_t header) { + this->mode = "w"; + this->m_fname = config.fname; + this->m_bitdepth = config.dtype.bitdepth(); + this->m_rows = config.rows; + this->m_cols = config.cols; + this->header = header; + this->header.shape = {0, config.rows, config.cols}; + + fp = fopen(this->m_fname.c_str(), "wb"); + if (!fp) { + throw std::runtime_error(fmt::format("Could not open: {} for reading", this->m_fname.c_str())); + } + + this->initial_header_len = + aare::NumpyHelpers::write_header(std::filesystem::path(this->m_fname.c_str()), this->header); +} Frame NumpyFile::get_frame(size_t frame_number) { - Frame frame(header.shape[1], header.shape[2], header.dtype.itemsize*8); - get_frame_into(frame_number,frame._get_data()); + Frame frame(header.shape[1], header.shape[2], header.dtype.bitdepth()); + get_frame_into(frame_number, frame._get_data()); return frame; } -void NumpyFile::get_frame_into( size_t frame_number,std::byte* image_buf) { +void NumpyFile::get_frame_into(size_t frame_number, std::byte *image_buf) { if (fp == nullptr) { throw std::runtime_error("File not open"); } @@ -22,13 +50,10 @@ void NumpyFile::get_frame_into( size_t frame_number,std::byte* image_buf) { fread(image_buf, bytes_per_frame(), 1, fp); } - - size_t NumpyFile::pixels() { return std::accumulate(header.shape.begin() + 1, header.shape.end(), 1, std::multiplies()); }; -size_t NumpyFile::bytes_per_frame() { return header.dtype.itemsize * pixels(); }; - +size_t NumpyFile::bytes_per_frame() { return header.dtype.bitdepth() / 8 * pixels(); }; std::vector NumpyFile::read(size_t n_frames) { // TODO: implement this in a more efficient way @@ -45,4 +70,30 @@ void NumpyFile::read_into(std::byte *image_buf, size_t n_frames) { this->get_frame_into(this->current_frame++, image_buf); image_buf += this->bytes_per_frame(); } +} + +NumpyFile::~NumpyFile() { + if (mode == "w" or mode == "a") { + // determine number of frames + fseek(fp, 0, SEEK_END); + size_t file_size = ftell(fp); + size_t data_size = file_size - initial_header_len; + size_t n_frames = data_size / bytes_per_frame(); + // update number of frames in header (first element of shape) + this->header.shape[0] = n_frames; + fseek(fp, 0, SEEK_SET); + // create string stream to contain header + std::stringstream ss; + aare::NumpyHelpers::write_header(ss, this->header); + std::string header_str = ss.str(); + // write header + fwrite(header_str.c_str(), header_str.size(), 1, fp); + + + } + + if (fp != nullptr) { + fclose(fp); + + } } \ No newline at end of file diff --git a/file_io/src/NumpyFileFactory.cpp b/file_io/src/NumpyFileFactory.cpp index 3e46c10..57d11f8 100644 --- a/file_io/src/NumpyFileFactory.cpp +++ b/file_io/src/NumpyFileFactory.cpp @@ -1,5 +1,8 @@ #include "aare/NumpyFileFactory.hpp" #include "aare/NumpyHelpers.hpp" + +using namespace aare; + NumpyFileFactory::NumpyFileFactory(std::filesystem::path fpath) { this->m_fpath = fpath; } void NumpyFileFactory::parse_metadata(FileInterface *_file) { auto file = dynamic_cast(_file); @@ -12,7 +15,7 @@ void NumpyFileFactory::parse_metadata(FileInterface *_file) { // read magic number std::array tmp{}; f.read(tmp.data(), tmp.size()); - if (tmp != NumpyFile::magic_str) { + if (tmp != aare::NumpyHelpers::magic_str) { for (auto item : tmp) fmt::print("{}, ", int(item)); fmt::print("\n"); @@ -32,7 +35,7 @@ void NumpyFileFactory::parse_metadata(FileInterface *_file) { } // read header length f.read(reinterpret_cast(&file->header_len), file->header_len_size); - file->header_size = file->magic_string_length + 2 + file->header_len_size + file->header_len; + file->header_size = aare::NumpyHelpers::magic_string_length + 2 + file->header_len_size + file->header_len; if (file->header_size % 16 != 0) { fmt::print("Warning: header length is not a multiple of 16\n"); } @@ -46,7 +49,7 @@ void NumpyFileFactory::parse_metadata(FileInterface *_file) { std::vector keys{"descr", "fortran_order", "shape"}; aare::logger::debug("original header: \"header\""); - auto dict_map = parse_dict(header, keys); + auto dict_map = aare::NumpyHelpers::parse_dict(header, keys); if (dict_map.size() == 0) throw std::runtime_error("invalid dictionary in header"); @@ -54,14 +57,14 @@ void NumpyFileFactory::parse_metadata(FileInterface *_file) { std::string fortran_s = dict_map["fortran_order"]; std::string shape_s = dict_map["shape"]; - std::string descr = parse_str(descr_s); - dtype_t dtype = parse_descr(descr); + std::string descr = aare::NumpyHelpers::parse_str(descr_s); + aare::DType dtype = aare::NumpyHelpers::parse_descr(descr); // convert literal Python bool to C++ bool - bool fortran_order = parse_bool(fortran_s); + bool fortran_order = aare::NumpyHelpers::parse_bool(fortran_s); // parse the shape tuple - auto shape_v = parse_tuple(shape_s); + auto shape_v = aare::NumpyHelpers::parse_tuple(shape_s); shape_t shape; for (auto item : shape_v) { auto dim = static_cast(std::stoul(item)); @@ -70,8 +73,16 @@ void NumpyFileFactory::parse_metadata(FileInterface *_file) { file->header = {dtype, fortran_order, shape}; } -NumpyFile *NumpyFileFactory::load_file() { +NumpyFile *NumpyFileFactory::load_file_read() { NumpyFile *file = new NumpyFile(this->m_fpath); parse_metadata(file); + return file; +}; + +NumpyFile *NumpyFileFactory::load_file_write(FileConfig config) { + NumpyFile *file = new NumpyFile(config, {config.dtype, false, {config.rows, config.cols}}); + + + return file; }; diff --git a/file_io/src/NumpyHelpers.cpp b/file_io/src/NumpyHelpers.cpp index d96df5e..59a85b9 100644 --- a/file_io/src/NumpyHelpers.cpp +++ b/file_io/src/NumpyHelpers.cpp @@ -1,5 +1,31 @@ +/* + 28-03-2024 modified by: Bechir Braham + + Copyright 2017-2023 Leon Merten Lohse + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + #include "aare/NumpyHelpers.hpp" +namespace aare::NumpyHelpers { + std::unordered_map parse_dict(std::string in, const std::vector &keys) { std::unordered_map map; if (keys.size() == 0) @@ -51,8 +77,7 @@ std::unordered_map parse_dict(std::string in, const st return map; } - -dtype_t parse_descr(std::string typestring) { +aare::DType parse_descr(std::string typestring) { if (typestring.length() < 3) { throw std::runtime_error("invalid typestring (length)"); } @@ -72,9 +97,9 @@ dtype_t parse_descr(std::string typestring) { if (!is_digits(itemsize_s)) { throw std::runtime_error("invalid typestring (itemsize)"); } - unsigned int itemsize = std::stoul(itemsize_s); + // unsigned int itemsize = std::stoul(itemsize_s); - return {byteorder_c, kind_c, itemsize}; + return aare::DType(typestring); } bool parse_bool(const std::string &in) { @@ -134,4 +159,94 @@ std::string parse_str(const std::string &in) { return in.substr(1, in.length() - 2); throw std::runtime_error("Invalid python string."); -} \ No newline at end of file +} + +void write_magic(std::ostream &ostream, int version_major, int version_minor) { + ostream.write(magic_str.data(), magic_string_length); + ostream.put(version_major); + ostream.put(version_minor); +} +template inline std::string write_tuple(const std::vector &v) { + + if (v.size() == 0) + return "()"; + std::ostringstream ss; + ss.imbue(std::locale("C")); + + if (v.size() == 1) { + ss << "(" << v.front() << ",)"; + } else { + const std::string delimiter = ", "; + // v.size() > 1 + ss << "("; + // for (size_t i = 0; i < v.size() - 1; ++i) { + // ss << v[i] << delimiter; + // } + // ss << v.back(); + std::copy(v.begin(), v.end()-1, std::ostream_iterator(ss, ", ")); + ss << v.back(); + ss << ")"; + } + + return ss.str(); +} + +inline std::string write_boolean(bool b) { + if (b) + return "True"; + else + return "False"; +} + +inline std::string write_header_dict(const std::string &descr, bool fortran_order, const shape_t &shape) { + std::string s_fortran_order = write_boolean(fortran_order); + std::string shape_s = write_tuple(shape); + + return "{'descr': '" + descr + "', 'fortran_order': " + s_fortran_order + ", 'shape': " + shape_s + ", }"; +} + +size_t write_header(std::filesystem::path fname, const header_t &header) { + std::ofstream out(fname, std::ios::binary | std::ios::out); + return write_header(out, header); +} + + +size_t write_header(std::ostream &out, const header_t &header) { + std::string header_dict = write_header_dict(header.dtype.str(), header.fortran_order, header.shape); + + size_t length = magic_string_length + 2 + 2 + header_dict.length() + 1; + + int version_major = 1; + int version_minor = 0; + if (length >= 255 * 255) { + length = magic_string_length + 2 + 4 + header_dict.length() + 1; + version_major = 2; + version_minor = 0; + } + size_t padding_len = 16 - length % 16; + std::string padding(padding_len, ' '); + + // write magic + write_magic(out, version_major, version_minor); + + // write header length + if (version_major == 1 and version_minor == 0) { + auto header_len = static_cast(header_dict.length() + padding.length() + 1); + + std::array header_len_le16{static_cast((header_len >> 0) & 0xff), + static_cast((header_len >> 8) & 0xff)}; + out.write(reinterpret_cast(header_len_le16.data()), 2); + } else { + auto header_len = static_cast(header_dict.length() + padding.length() + 1); + + std::array header_len_le32{ + static_cast((header_len >> 0) & 0xff), static_cast((header_len >> 8) & 0xff), + static_cast((header_len >> 16) & 0xff), static_cast((header_len >> 24) & 0xff)}; + out.write(reinterpret_cast(header_len_le32.data()), 4); + } + + out << header_dict << padding << '\n'; + return length; +} + +} // namespace aare::NumpyHelpers \ No newline at end of file diff --git a/file_io/src/RawFileFactory.cpp b/file_io/src/RawFileFactory.cpp index cd625a2..1361f27 100644 --- a/file_io/src/RawFileFactory.cpp +++ b/file_io/src/RawFileFactory.cpp @@ -119,7 +119,7 @@ void RawFileFactory::open_subfiles(FileInterface *_file) { } } -RawFile *RawFileFactory::load_file() { +RawFile *RawFileFactory::load_file_read() { RawFile *file = new RawFile(); file->m_fname = this->m_fpath; this->parse_fname(file); diff --git a/file_io/test/NumpyHelpers.test.cpp b/file_io/test/NumpyHelpers.test.cpp index dbdf2b7..16da447 100644 --- a/file_io/test/NumpyHelpers.test.cpp +++ b/file_io/test/NumpyHelpers.test.cpp @@ -1,6 +1,8 @@ #include #include "aare/NumpyHelpers.hpp" //Is this really a public header? +using namespace aare::NumpyHelpers; + TEST_CASE("is_digits with a few standard cases"){ REQUIRE(is_digits("")); REQUIRE(is_digits("123"));