Merge pull request #12 from slsdetectorgroup/numpy2

Numpy2 (PR#1)
This commit is contained in:
Erik Fröjdh 2024-03-08 14:24:08 +01:00 committed by GitHub
commit bc5c40d13b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 396 additions and 114 deletions

View File

@ -56,3 +56,23 @@ template <> TimingMode StringTo(std::string);
using DataTypeVariants = std::variant<uint16_t, uint32_t>;
struct RawFileConfig {
int module_gap_row{};
int module_gap_col{};
bool operator==(const RawFileConfig &other) const {
if (module_gap_col != other.module_gap_col)
return false;
if (module_gap_row != other.module_gap_row)
return false;
return true;
}
};
const char little_endian_char = '<';
const char big_endian_char = '>';
const char no_endian_char = '|';
const std::array<char, 3> endian_chars = {little_endian_char, big_endian_char, no_endian_char};
const std::array<char, 4> numtype_chars = {'f', 'i', 'u', 'c'};

View File

@ -7,6 +7,8 @@ set(SourceFiles
${CMAKE_CURRENT_SOURCE_DIR}/src/JsonFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/JsonFileFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/SubFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFileFactory.cpp
)
add_library(file_io STATIC ${SourceFiles})

View File

@ -3,23 +3,11 @@
#include "aare/defs.hpp"
#include "aare/Frame.hpp"
#include "SubFile.hpp"
#include <filesystem>
#include <fmt/core.h>
#include <iostream>
struct RawFileConfig {
int module_gap_row{};
int module_gap_col{};
bool operator==(const RawFileConfig &other) const {
if (module_gap_col != other.module_gap_col)
return false;
if (module_gap_row != other.module_gap_row)
return false;
return true;
}
};
template <DetectorType detector, typename DataType>
class File {
@ -27,54 +15,29 @@ class File {
virtual Frame<DataType>* get_frame(int frame_number) = 0;
private:
using config = RawFileConfig;
public:
std::vector<SubFile*> subfiles;
std::filesystem::path fname;
std::filesystem::path base_path;
std::string base_name, ext;
int findex, n_subfiles;
int findex;
size_t total_frames{};
size_t max_frames_per_file{};
std::string version;
DetectorType type;
TimingMode timing_mode;
int subfile_rows, subfile_cols;
bool quad{false};
ssize_t rows{};
ssize_t cols{};
uint8_t bitdepth{};
using data_type = uint16_t;
std::vector<xy> positions;
config cfg{0, 0};
// File();
~File();
inline size_t bytes_per_frame() const { return rows * cols * bitdepth / 8; }
inline size_t pixels() const { return rows * cols; }
inline void set_config(int row, int col) {
cfg.module_gap_row = row;
cfg.module_gap_col = col;
}
// TODO! Deal with fast quad and missing files
void find_number_of_subfiles() {
int n_mod = 0;
while (std::filesystem::exists(data_fname(++n_mod, 0)))
;
n_subfiles = n_mod;
}
inline std::filesystem::path master_fname() {
return base_path / fmt::format("{}_master_{}{}", base_name, findex, ext);
}
inline std::filesystem::path data_fname(int mod_id, int file_id) {
return base_path / fmt::format("{}_d{}_f{}_{}.raw", base_name, file_id, mod_id, findex);
}
// size_t total_frames();
};
};

View File

@ -1,7 +1,6 @@
#pragma once
#include <filesystem>
#include "aare/File.hpp"
template <DetectorType detector,typename DataType>
class FileFactory{
// Class that will be used to create File objects
@ -13,14 +12,9 @@ public:
// virtual int deleteFile() = 0;
virtual File<detector,DataType>* load_file()=0;//TODO: add option to load all file to memory or keep it on disk
virtual void parse_metadata(File<detector,DataType>*)=0;
void find_geometry(File<detector,DataType>*);
void parse_fname(File<detector,DataType>*);
virtual void parse_fname(File<detector,DataType>*)=0;
sls_detector_header read_header(const std::filesystem::path &fname);
};

View File

@ -1,9 +1,40 @@
#pragma once
#include "aare/File.hpp"
#include "aare/Frame.hpp"
#include "aare/defs.hpp"
template <DetectorType detector, typename DataType>
class JsonFile : public File<detector, DataType> {
#include "aare/Frame.hpp"
#include "aare/File.hpp"
template <DetectorType detector, typename DataType> class JsonFile : public File<detector, DataType> {
using config = RawFileConfig;
public:
Frame<DataType> *get_frame(int frame_number);
int n_subfiles;
std::vector<SubFile *> subfiles;
int subfile_rows, subfile_cols;
std::vector<xy> positions;
config cfg{0, 0};
inline void set_config(int row, int col) {
cfg.module_gap_row = row;
cfg.module_gap_col = col;
}
// TODO! Deal with fast quad and missing files
void find_number_of_subfiles() {
int n_mod = 0;
while (std::filesystem::exists(data_fname(++n_mod, 0)))
;
n_subfiles = n_mod;
}
inline std::filesystem::path master_fname() {
return this->base_path / fmt::format("{}_master_{}{}", this->base_name, this->findex, this->ext);
}
inline std::filesystem::path data_fname(int mod_id, int file_id) {
return this->base_path / fmt::format("{}_d{}_f{}_{}.raw", this->base_name, file_id, mod_id, this->findex);
}
~JsonFile();
};

View File

@ -7,10 +7,13 @@ private:
public:
File<detector,DataType>* load_file() override;
void parse_metadata(File<detector,DataType>*) override;
void parse_fname(File<detector,DataType>*) override;
JsonFileFactory<detector,DataType>(std::filesystem::path fpath);
void open_subfiles(File<detector,DataType>*);
sls_detector_header read_header(const std::filesystem::path &fname);
void find_geometry(File<detector,DataType>*);
};

View File

@ -0,0 +1,9 @@
#include "aare/File.hpp"
#include "aare/defs.hpp"
template <DetectorType detector, typename DataType>
class NumpyFile : public File<detector, DataType>
{
};

View File

@ -0,0 +1,29 @@
#include "aare/defs.hpp"
#include "aare/FileFactory.hpp"
#include "aare/NumpyFile.hpp"
#include <algorithm>
#include <array>
#include <filesystem>
#include <fstream>
#include <string>
#include <unordered_map>
#include <vector>
template <DetectorType detector, typename DataType> class NumpyFileFactory : public FileFactory<detector, DataType> {
public:
NumpyFileFactory(std::filesystem::path fpath);
void parse_metadata(File<detector, DataType> *_file) override;
void open_subfiles(File<detector, DataType> *_file) override;
File<detector, DataType> *load_file() override;
uint8_t major_ver() const noexcept { return major_ver_; }
uint8_t minor_ver() const noexcept { return minor_ver_; }
private:
static constexpr std::array<char, 6> magic_str{'\x93', 'N', 'U', 'M', 'P', 'Y'};
uint8_t major_ver_{};
uint8_t minor_ver_{};
uint32_t header_len{};
uint8_t header_len_size{};
const uint8_t magic_string_length{6};
};

View File

@ -1,10 +1,2 @@
#include "aare/File.hpp"
template <DetectorType detector, typename DataType>
File<detector,DataType>::~File<detector,DataType>() {
for (auto& subfile : subfiles) {
delete subfile;
}
}
template class File<DetectorType::Jungfrau, uint16_t>;
// #include "aare/File.hpp"
// template class File<DetectorType::Jungfrau, uint16_t>;

View File

@ -26,50 +26,6 @@ FileFactory<detector, DataType> *FileFactory<detector, DataType>::get_factory(st
throw std::runtime_error("Unsupported file type");
}
template <DetectorType detector, typename DataType>
void FileFactory<detector, DataType>::parse_fname(File<detector, DataType> *file) {
file->base_path = fpath.parent_path();
file->base_name = fpath.stem();
file->ext = fpath.extension();
auto pos = file->base_name.rfind("_");
file->findex = std::stoi(file->base_name.substr(pos + 1));
pos = file->base_name.find("_master_");
file->base_name.erase(pos);
}
template <DetectorType detector, typename DataType>
sls_detector_header FileFactory<detector, DataType>::read_header(const std::filesystem::path &fname) {
sls_detector_header h{};
FILE *fp = fopen(fname.c_str(), "r");
if (!fp)
throw std::runtime_error(fmt::format("Could not open: {} for reading", fname.c_str()));
size_t rc = fread(reinterpret_cast<char *>(&h), sizeof(h), 1, fp);
fclose(fp);
if (rc != 1)
throw std::runtime_error("Could not read header from file");
return h;
}
template <DetectorType detector, typename DataType>
void FileFactory<detector, DataType>::find_geometry(File<detector, DataType> *file) {
uint16_t r{};
uint16_t c{};
for (int i = 0; i != file->n_subfiles; ++i) {
auto h = this->read_header(file->data_fname(i, 0));
r = std::max(r, h.row);
c = std::max(c, h.column);
file->positions.push_back({h.row, h.column});
}
r++;
c++;
file->rows = r * file->subfile_rows;
file->cols = c * file->subfile_cols;
file->rows += (r - 1) * file->cfg.module_gap_row;
}
template class FileFactory<DetectorType::Jungfrau, uint16_t>;
template class FileFactory<DetectorType::Jungfrau, uint16_t>;

View File

@ -1,5 +1,4 @@
#include "aare/JsonFile.hpp"
#include <typeinfo>
template <DetectorType detector, typename DataType>
Frame<DataType> *JsonFile<detector, DataType>::get_frame(int frame_number) {
@ -16,4 +15,12 @@ Frame<DataType> *JsonFile<detector, DataType>::get_frame(int frame_number) {
return f;
}
template class JsonFile<DetectorType::Jungfrau, uint16_t>;
template <DetectorType detector, typename DataType>
JsonFile<detector,DataType>::~JsonFile<detector,DataType>() {
for (auto& subfile : subfiles) {
delete subfile;
}
}
template class JsonFile<DetectorType::Jungfrau, uint16_t>;

View File

@ -17,8 +17,8 @@ JsonFileFactory<detector,DataType>::JsonFileFactory(std::filesystem::path fpath)
}
template <DetectorType detector,typename DataType>
void JsonFileFactory<detector,DataType>::parse_metadata(File<detector,DataType> *file) {
std::cout << "Parsing metadata" << std::endl;
void JsonFileFactory<detector,DataType>::parse_metadata(File<detector,DataType> *_file) {
auto file = dynamic_cast<JsonFile<detector,DataType> *>(_file);
std::ifstream ifs(file->master_fname());
json j;
ifs >> j;
@ -51,7 +51,8 @@ void JsonFileFactory<detector,DataType>::parse_metadata(File<detector,DataType>
}
template <DetectorType detector,typename DataType>
void JsonFileFactory<detector,DataType>::open_subfiles(File<detector,DataType> *file) {
void JsonFileFactory<detector,DataType>::open_subfiles(File<detector,DataType> *_file) {
auto file = dynamic_cast<JsonFile<detector,DataType> *>(_file);
for (int i = 0; i != file->n_subfiles; ++i) {
file->subfiles.push_back(
@ -61,7 +62,6 @@ void JsonFileFactory<detector,DataType>::open_subfiles(File<detector,DataType> *
template <DetectorType detector,typename DataType>
File<detector,DataType> *JsonFileFactory<detector,DataType>::load_file() {
std::cout << "Loading json file" << std::endl;
JsonFile<detector,DataType> *file = new JsonFile<detector,DataType>();
file->fname = this->fpath;
this->parse_fname(file);
@ -73,4 +73,55 @@ File<detector,DataType> *JsonFileFactory<detector,DataType>::load_file() {
return file;
}
template <DetectorType detector, typename DataType>
sls_detector_header JsonFileFactory<detector, DataType>::read_header(const std::filesystem::path &fname) {
sls_detector_header h{};
FILE *fp = fopen(fname.c_str(), "r");
if (!fp)
throw std::runtime_error(fmt::format("Could not open: {} for reading", fname.c_str()));
size_t rc = fread(reinterpret_cast<char *>(&h), sizeof(h), 1, fp);
fclose(fp);
if (rc != 1)
throw std::runtime_error("Could not read header from file");
return h;
}
template <DetectorType detector, typename DataType>
void JsonFileFactory<detector, DataType>::find_geometry(File<detector, DataType> *_file) {
auto file = dynamic_cast<JsonFile<detector, DataType> *>(_file);
uint16_t r{};
uint16_t c{};
for (int i = 0; i != file->n_subfiles; ++i) {
auto h = this->read_header(file->data_fname(i, 0));
r = std::max(r, h.row);
c = std::max(c, h.column);
file->positions.push_back({h.row, h.column});
}
r++;
c++;
file->rows = r * file->subfile_rows;
file->cols = c * file->subfile_cols;
file->rows += (r - 1) * file->cfg.module_gap_row;
}
template <DetectorType detector, typename DataType>
void JsonFileFactory<detector, DataType>::parse_fname(File<detector, DataType> *file) {
file->base_path = this->fpath.parent_path();
file->base_name = this->fpath.stem();
file->ext = this->fpath.extension();
auto pos = file->base_name.rfind("_");
file->findex = std::stoi(file->base_name.substr(pos + 1));
pos = file->base_name.find("_master_");
file->base_name.erase(pos);
}
template class JsonFileFactory<DetectorType::Jungfrau, uint16_t>;

View File

View File

@ -0,0 +1,225 @@
#include "aare/NumpyFileFactory.hpp"
template <DetectorType detector, typename DataType>
NumpyFileFactory<detector, DataType>::NumpyFileFactory(std::filesystem::path fpath) {
this->fpath = fpath;
}
inline std::string parse_str(const std::string &in) {
if ((in.front() == '\'') && (in.back() == '\''))
return in.substr(1, in.length() - 2);
throw std::runtime_error("Invalid python string.");
}
/**
Removes leading and trailing whitespaces
*/
inline std::string trim(const std::string& str) {
const std::string whitespace = " \t";
auto begin = str.find_first_not_of(whitespace);
if (begin == std::string::npos)
return "";
auto end = str.find_last_not_of(whitespace);
return str.substr(begin, end - begin + 1);
}
inline std::vector<std::string> parse_tuple(std::string in) {
std::vector<std::string> v;
const char seperator = ',';
in = trim(in);
if ((in.front() == '(') && (in.back() == ')'))
in = in.substr(1, in.length() - 2);
else
throw std::runtime_error("Invalid Python tuple.");
std::istringstream iss(in);
for (std::string token; std::getline(iss, token, seperator);) {
v.push_back(token);
}
return v;
}
inline bool parse_bool(const std::string &in) {
if (in == "True")
return true;
if (in == "False")
return false;
throw std::runtime_error("Invalid python boolan.");
}
inline std::string get_value_from_map(const std::string &mapstr) {
size_t sep_pos = mapstr.find_first_of(":");
if (sep_pos == std::string::npos)
return "";
std::string tmp = mapstr.substr(sep_pos + 1);
return trim(tmp);
}
std::unordered_map<std::string, std::string> parse_dict(std::string in, const std::vector<std::string> &keys) {
std::unordered_map<std::string, std::string> map;
if (keys.size() == 0)
return map;
in = trim(in);
// unwrap dictionary
if ((in.front() == '{') && (in.back() == '}'))
in = in.substr(1, in.length() - 2);
else
throw std::runtime_error("Not a Python dictionary.");
std::vector<std::pair<size_t, std::string>> positions;
for (auto const &value : keys) {
size_t pos = in.find("'" + value + "'");
if (pos == std::string::npos)
throw std::runtime_error("Missing '" + value + "' key.");
std::pair<size_t, std::string> position_pair{pos, value};
positions.push_back(position_pair);
}
// sort by position in dict
std::sort(positions.begin(), positions.end());
for (size_t i = 0; i < positions.size(); ++i) {
std::string raw_value;
size_t begin{positions[i].first};
size_t end{std::string::npos};
std::string key = positions[i].second;
if (i + 1 < positions.size())
end = positions[i + 1].first;
raw_value = in.substr(begin, end - begin);
raw_value = trim(raw_value);
if (raw_value.back() == ',')
raw_value.pop_back();
map[key] = get_value_from_map(raw_value);
}
return map;
}
using shape_t = std::vector<uint64_t>;
struct dtype_t {
char byteorder;
char kind;
unsigned int itemsize;
};
struct header_t {
dtype_t dtype;
bool fortran_order;
shape_t shape;
};
template <typename T, size_t N> inline bool in_array(T val, const std::array<T, N> &arr) {
return std::find(std::begin(arr), std::end(arr), val) != std::end(arr);
}
inline bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); }
inline dtype_t parse_descr(std::string typestring) {
if (typestring.length() < 3) {
throw std::runtime_error("invalid typestring (length)");
}
char byteorder_c = typestring.at(0);
char kind_c = typestring.at(1);
std::string itemsize_s = typestring.substr(2);
if (!in_array(byteorder_c, endian_chars)) {
throw std::runtime_error("invalid typestring (byteorder)");
}
if (!in_array(kind_c, numtype_chars)) {
throw std::runtime_error("invalid typestring (kind)");
}
if (!is_digits(itemsize_s)) {
throw std::runtime_error("invalid typestring (itemsize)");
}
unsigned int itemsize = std::stoul(itemsize_s);
return {byteorder_c, kind_c, itemsize};
}
template <DetectorType detector, typename DataType>
void NumpyFileFactory<detector, DataType>::parse_metadata(File<detector, DataType> *_file) {
auto file = dynamic_cast<NumpyFile<detector, DataType> *>(_file);
// open ifsteam to file
std::ifstream f(file->fname, std::ios::binary);
if (!f.is_open()) {
throw std::runtime_error(fmt::format("Could not open: {} for reading", file->fname.c_str()));
}
// read magic number
std::array<char, 6> tmp{};
f.read(tmp.data(), tmp.size());
if (tmp != NumpyFileFactory<detector, DataType>::magic_str) {
for (auto item : tmp)
fmt::print("{}, ", int(item));
fmt::print("\n");
throw std::runtime_error("Not a numpy file");
}
// read version
f.read(reinterpret_cast<char *>(&major_ver_), 1);
f.read(reinterpret_cast<char *>(&minor_ver_), 1);
if (major_ver_ == 1) {
header_len_size = 2;
} else if (major_ver_ == 2) {
header_len_size = 4;
} else {
throw std::runtime_error("Unsupported numpy version");
}
// read header length
f.read(reinterpret_cast<char *>(&header_len), header_len_size);
if ((magic_string_length + 2 + header_len_size + header_len) % 16 != 0) {
fmt::print("Warning: header length is not a multiple of 16\n");
}
// read header
auto buf_v = std::vector<char>(header_len);
f.read(buf_v.data(), header_len);
std::string header(buf_v.data(), header_len);
// parse header
std::vector<std::string> keys{"descr", "fortran_order", "shape"};
auto dict_map = parse_dict(header, keys);
if (dict_map.size() == 0)
throw std::runtime_error("invalid dictionary in header");
std::string descr_s = dict_map["descr"];
std::string fortran_s = dict_map["fortran_order"];
std::string shape_s = dict_map["shape"];
std::string descr = parse_str(descr_s);
dtype_t dtype = parse_descr(descr);
// convert literal Python bool to C++ bool
bool fortran_order = parse_bool(fortran_s);
// parse the shape tuple
auto shape_v = parse_tuple(shape_s);
shape_t shape;
for (auto item : shape_v) {
auto dim = static_cast<unsigned long>(std::stoul(item));
shape.push_back(dim);
}
// {dtype, fortran_order, shape};
};