moved reading of metadata into NumpyFile

This commit is contained in:
Erik Frojdh 2024-04-02 15:01:56 +02:00
parent 390ceb02d8
commit a6c0879de7
5 changed files with 82 additions and 126 deletions

View File

@ -1,56 +0,0 @@
#pragma once
#include "aare/Frame.hpp"
#include <filesystem>
namespace aare {
class FileInterface {
public:
// options:
// - r reading
// - w writing (overwrites existing file)
// - a appending (appends to existing file)
// TODO! do we need to support w+, r+ and a+?
FileInterface(const std::filesystem::path& fname, const char* opts="r" ){};
// write one frame
virtual void write(Frame& frame) = 0;
// write n_frames frames
virtual void write(std::vector<Frame>& frames) = 0;
// read one frame
virtual Frame read() = 0;
// read n_frames frames
virtual std::vector<Frame> read(size_t n_frames) = 0; //Is this the right interface?
// read one frame into the provided buffer
virtual void read_into(std::byte* image_buf) = 0;
// read n_frames frame into the provided buffer
virtual void read_into(std::byte* image_buf, size_t n_frames) = 0;
// read the frame number on position frame_index
virtual size_t frame_number(size_t frame_index) = 0;
// size of one frame, important fro teh read_into function
virtual size_t bytes_per_frame() const = 0;
// goto frame number
virtual void seek(size_t frame_number) = 0;
// return the position of the file pointer (in number of frames)
virtual size_t tell() = 0;
// total number of frames in the file
virtual size_t total_frames() = 0;
virtual size_t rows() const = 0;
virtual size_t cols() const = 0;
// function to query the data type of the file
/*virtual DataType dtype = 0; */
virtual ~FileInterface() = 0;
};
} // namespace aare

View File

@ -15,9 +15,11 @@ class NumpyFile : public FileInterface {
void get_frame_into(size_t, std::byte *);
Frame get_frame(size_t frame_number);
void load_metadata();
std::filesystem::path m_fname;
public:
std::filesystem::path m_fname; //TO be made private!
NumpyFile(const std::filesystem::path& fname);
NumpyFile(FileConfig, header_t);

View File

@ -13,7 +13,7 @@ class NumpyFileFactory : public FileFactory {
public:
NumpyFileFactory(std::filesystem::path fpath);
void parse_metadata(FileInterface *_file) override;
void parse_metadata(FileInterface *_file) override{/*TODO! remove after refactor*/};
NumpyFile* load_file_read() override;
NumpyFile* load_file_write(FileConfig) override;
void parse_fname(FileInterface*)override{};

View File

@ -4,8 +4,13 @@
namespace aare{
NumpyFile::NumpyFile(const std::filesystem::path& fname) {
//TODO! add opts to constructor
m_fname = fname;
fp = fopen(m_fname.c_str(), "rb");
if (!fp) {
throw std::runtime_error(fmt::format("Could not open: {} for reading", m_fname.c_str()));
}
load_metadata();
}
NumpyFile::NumpyFile(FileConfig config, header_t header) {
mode = "w";
@ -102,4 +107,77 @@ NumpyFile::~NumpyFile() {
}
}
void NumpyFile::load_metadata(){
// auto file = dynamic_cast<NumpyFile *>(_file);
// // open ifsteam to file
// f = std::ifstream(file->m_fname, std::ios::binary);
// // check if file exists
// if (!f.is_open()) {
// throw std::runtime_error(fmt::format("Could not open: \"{}\" for reading", file->m_fname.c_str()));
// }
// read magic number
std::array<char, 6> tmp{};
fread(tmp.data(), tmp.size(), 1, fp);
if (tmp != aare::NumpyHelpers::magic_str) {
for (auto item : tmp)
fmt::print("{}, ", int(item));
fmt::print("\n");
throw std::runtime_error("Not a numpy file");
}
// read version
fread(reinterpret_cast<char *>(&major_ver_),sizeof(major_ver_), 1,fp);
fread(reinterpret_cast<char *>(&minor_ver_), sizeof(minor_ver_),1,fp);
if (major_ver_ == 1) {
header_len_size = 2;
} else if (major_ver_ == 2) {
header_len_size = 4;
} else {
throw std::runtime_error("Unsupported numpy version");
}
// read header length
fread(reinterpret_cast<char *>(&header_len), header_len_size,1, fp);
header_size = aare::NumpyHelpers::magic_string_length + 2 + header_len_size + header_len;
if (header_size % 16 != 0) {
fmt::print("Warning: header length is not a multiple of 16\n");
}
// read header
auto buf_v = std::vector<char>(header_len);
fread(buf_v.data(), header_len,1,fp);
std::string header(buf_v.data(), header_len);
// parse header
std::vector<std::string> keys{"descr", "fortran_order", "shape"};
aare::logger::debug("original header: \"header\"");
auto dict_map = aare::NumpyHelpers::parse_dict(header, keys);
if (dict_map.size() == 0)
throw std::runtime_error("invalid dictionary in header");
std::string descr_s = dict_map["descr"];
std::string fortran_s = dict_map["fortran_order"];
std::string shape_s = dict_map["shape"];
std::string descr = aare::NumpyHelpers::parse_str(descr_s);
aare::DType dtype = aare::NumpyHelpers::parse_descr(descr);
// convert literal Python bool to C++ bool
bool fortran_order = aare::NumpyHelpers::parse_bool(fortran_s);
// parse the shape tuple
auto shape_v = aare::NumpyHelpers::parse_tuple(shape_s);
shape_t shape;
for (auto item : shape_v) {
auto dim = static_cast<unsigned long>(std::stoul(item));
shape.push_back(dim);
}
m_header = {dtype, fortran_order, shape};
}
} // namespace aare

View File

@ -4,78 +4,10 @@
namespace aare {
NumpyFileFactory::NumpyFileFactory(std::filesystem::path fpath) { this->m_fpath = fpath; }
void NumpyFileFactory::parse_metadata(FileInterface *_file) {
auto file = dynamic_cast<NumpyFile *>(_file);
// open ifsteam to file
f = std::ifstream(file->m_fname, std::ios::binary);
// check if file exists
if (!f.is_open()) {
throw std::runtime_error(fmt::format("Could not open: {} for reading", file->m_fname.c_str()));
}
// read magic number
std::array<char, 6> tmp{};
f.read(tmp.data(), tmp.size());
if (tmp != aare::NumpyHelpers::magic_str) {
for (auto item : tmp)
fmt::print("{}, ", int(item));
fmt::print("\n");
throw std::runtime_error("Not a numpy file");
}
// read version
f.read(reinterpret_cast<char *>(&file->major_ver_), 1);
f.read(reinterpret_cast<char *>(&file->minor_ver_), 1);
if (file->major_ver_ == 1) {
file->header_len_size = 2;
} else if (file->major_ver_ == 2) {
file->header_len_size = 4;
} else {
throw std::runtime_error("Unsupported numpy version");
}
// read header length
f.read(reinterpret_cast<char *>(&file->header_len), file->header_len_size);
file->header_size = aare::NumpyHelpers::magic_string_length + 2 + file->header_len_size + file->header_len;
if (file->header_size % 16 != 0) {
fmt::print("Warning: header length is not a multiple of 16\n");
}
// read header
auto buf_v = std::vector<char>(file->header_len);
f.read(buf_v.data(), file->header_len);
std::string header(buf_v.data(), file->header_len);
// parse header
std::vector<std::string> keys{"descr", "fortran_order", "shape"};
aare::logger::debug("original header: \"header\"");
auto dict_map = aare::NumpyHelpers::parse_dict(header, keys);
if (dict_map.size() == 0)
throw std::runtime_error("invalid dictionary in header");
std::string descr_s = dict_map["descr"];
std::string fortran_s = dict_map["fortran_order"];
std::string shape_s = dict_map["shape"];
std::string descr = aare::NumpyHelpers::parse_str(descr_s);
aare::DType dtype = aare::NumpyHelpers::parse_descr(descr);
// convert literal Python bool to C++ bool
bool fortran_order = aare::NumpyHelpers::parse_bool(fortran_s);
// parse the shape tuple
auto shape_v = aare::NumpyHelpers::parse_tuple(shape_s);
shape_t shape;
for (auto item : shape_v) {
auto dim = static_cast<unsigned long>(std::stoul(item));
shape.push_back(dim);
}
file->m_header = {dtype, fortran_order, shape};
}
NumpyFile *NumpyFileFactory::load_file_read() {
NumpyFile *file = new NumpyFile(this->m_fpath);
parse_metadata(file);
return file;
};