read numpy file: works

This commit is contained in:
Bechir 2024-03-08 18:26:02 +01:00
parent 146d2aed19
commit 765bbdc295
14 changed files with 154 additions and 67 deletions

View File

@ -3,7 +3,10 @@
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/file_io/**",
"${workspaceFolder}/core/**",
"${workspaceFolder}/tests/**",
"${workspaceFolder}/tests/**",
"/usr/include"
],
"defines": [],

View File

@ -17,7 +17,7 @@ include(GNUInstallDirs)
include(FetchContent)
option(AARE_USE_WARNINGS "Eable warnings" ON)
option(AARE_USE_WARNINGS "Eable warnings" OFF)
option(AARE_PYTHON_BINDINGS "Build python bindings" ON)
option(AARE_TESTS "Build tests" ON)
option(AARE_EXAMPLES "Build examples" ON)

View File

@ -22,6 +22,7 @@ template <class DataType> class Frame{
ssize_t cols;
DataType* data;
ssize_t bitdepth = sizeof(DataType)*8;
Frame(ssize_t rows, ssize_t cols);
Frame(std::byte* fp, ssize_t rows, ssize_t cols);
DataType get(int row, int col);

View File

@ -8,13 +8,18 @@ Frame<DataType>::Frame(std::byte* bytes, ssize_t rows, ssize_t cols):
std::memcpy(data, bytes, rows*cols*sizeof(DataType));
}
template <typename DataType>
Frame<DataType>::Frame(ssize_t rows, ssize_t cols):
rows(rows), cols(cols) {
data = new DataType[rows*cols];
}
template <typename DataType>
DataType Frame<DataType>::get(int row, int col) {
if (row < 0 || row >= rows || col < 0 || col >= cols) {
std::cerr << "Invalid row or column index" << std::endl;
return 0;
throw std::runtime_error("Invalid row or column index");
}
return data[row*cols + col];
}

BIN
data/test_numpy_file.npy Normal file

Binary file not shown.

View File

@ -1,25 +1,30 @@
// Your First C++ Program
#include <iostream>
#include "aare/FileHandler.hpp"
#include <iostream>
using JFileHandler = FileHandler<DetectorType::Jungfrau,uint16_t>;
using JFile = File<DetectorType::Jungfrau,uint16_t>;
using JFileHandler = FileHandler<DetectorType::Jungfrau, uint16_t>;
using JFile = File<DetectorType::Jungfrau, uint16_t>;
using JFrame = Frame<uint16_t>;
void test(JFileHandler* f,int frame_number){
void test(JFileHandler *f, int frame_number) {
std::cout << "frame number: " << frame_number << std::endl;
JFrame* frame = f->get_frame(frame_number);
std::cout << frame->get(0,0) << ' ';
std::cout << frame->get(0,1) << ' ';
std::cout << frame->get(1,0) << ' ';
std::cout << frame->get(511,1023) << std::endl;
JFrame *frame = f->get_frame(frame_number);
std::cout << frame->get(0, 0) << std::endl;
std::cout << frame->get(0, 1) << std::endl;
std::cout << frame->get(1, 0) << std::endl;
std::cout << frame->get(49, 49) << std::endl;
delete frame;
}
int main() {
std::filesystem::path fpath("/home/bb/github/aare/data/jungfrau_single_master_0.json");
auto fileHandler = new JFileHandler (fpath);
// std::filesystem::path fpath("/home/bb/github/aare/data/jungfrau_single_master_0.json");
std::filesystem::path fpath("/home/bb/github/aare/data/test_numpy_file.npy");
auto fileHandler = new JFileHandler(fpath);
test(fileHandler, 0);
test(fileHandler, 24);
delete fileHandler;
}

View File

@ -15,6 +15,8 @@ class File {
virtual Frame<DataType>* get_frame(int frame_number) = 0;
private:
//comment
public:
std::filesystem::path fname;

View File

@ -5,11 +5,10 @@ class JsonFileFactory: public FileFactory<detector,DataType>
private:
/* data */
public:
JsonFileFactory(std::filesystem::path fpath);
File<detector,DataType>* load_file() override;
void parse_metadata(File<detector,DataType>*) override;
void parse_fname(File<detector,DataType>*) override;
JsonFileFactory<detector,DataType>(std::filesystem::path fpath);
void open_subfiles(File<detector,DataType>*);
sls_detector_header read_header(const std::filesystem::path &fname);
void find_geometry(File<detector,DataType>*);

View File

@ -1,9 +1,52 @@
#pragma once
#include "aare/File.hpp"
#include "aare/defs.hpp"
#include <iostream>
#include <numeric>
template <DetectorType detector, typename DataType>
class NumpyFile : public File<detector, DataType>
{
using shape_t = std::vector<uint64_t>;
struct dtype_t {
char byteorder;
char kind;
unsigned int itemsize;
std::string to_string() {
std::stringstream sstm;
sstm << byteorder << kind << itemsize;
return sstm.str();
}
};
struct header_t {
dtype_t dtype;
bool fortran_order;
shape_t shape;
std::string to_string() {
std::stringstream sstm;
sstm << "dtype: " << dtype.to_string() << ", fortran_order: " << fortran_order << ' ';
sstm << "shape: (";
for (auto item : shape)
sstm << item << ',';
sstm << ')';
return sstm.str();
}
};
template <DetectorType detector, typename DataType> class NumpyFile : public File<detector, DataType> {
FILE *fp = nullptr;
public:
NumpyFile(std::filesystem::path fname);
Frame<DataType> *get_frame(int frame_number) override;
header_t header{};
static constexpr std::array<char, 6> magic_str{'\x93', 'N', 'U', 'M', 'P', 'Y'};
uint8_t major_ver_{};
uint8_t minor_ver_{};
uint32_t header_len{};
uint8_t header_len_size{};
const uint8_t magic_string_length{6};
ssize_t header_size{};
inline ssize_t pixels_per_frame() {
return std::accumulate(header.shape.begin() + 1, header.shape.end(), 1, std::multiplies<uint64_t>());
};
inline ssize_t bytes_per_frame() { return header.dtype.itemsize * pixels_per_frame(); };
};

View File

@ -1,6 +1,7 @@
#include "aare/defs.hpp"
#pragma once
#include "aare/FileFactory.hpp"
#include "aare/NumpyFile.hpp"
#include "aare/defs.hpp"
#include <algorithm>
#include <array>
#include <filesystem>
@ -8,22 +9,17 @@
#include <string>
#include <unordered_map>
#include <vector>
#include <sstream>
template <DetectorType detector, typename DataType> class NumpyFileFactory : public FileFactory<detector, DataType> {
private:
std::ifstream f;
void read_data(File<detector, DataType> *_file);
public:
NumpyFileFactory(std::filesystem::path fpath);
void parse_metadata(File<detector, DataType> *_file) override;
void open_subfiles(File<detector, DataType> *_file) override;
File<detector, DataType> *load_file() override;
void parse_fname(File<detector, DataType> *){};
uint8_t major_ver() const noexcept { return major_ver_; }
uint8_t minor_ver() const noexcept { return minor_ver_; }
private:
static constexpr std::array<char, 6> magic_str{'\x93', 'N', 'U', 'M', 'P', 'Y'};
uint8_t major_ver_{};
uint8_t minor_ver_{};
uint32_t header_len{};
uint8_t header_len_size{};
const uint8_t magic_string_length{6};
};

View File

@ -1,6 +1,7 @@
#include "aare/FileFactory.hpp"
#include "aare/File.hpp"
#include "aare/JsonFileFactory.hpp"
#include "aare/NumpyFileFactory.hpp"
#include <iostream>
template <DetectorType detector, typename DataType>
@ -20,7 +21,7 @@ FileFactory<detector, DataType> *FileFactory<detector, DataType>::get_factory(st
// check if extension is numpy
else if (fpath.extension() == ".npy") {
std::cout << "Loading numpy file" << std::endl;
throw std::runtime_error("Numpy file not implemented");
return new NumpyFileFactory<detector, DataType>(fpath);
}
throw std::runtime_error("Unsupported file type");

View File

@ -2,6 +2,9 @@
template <DetectorType detector, typename DataType>
Frame<DataType> *JsonFile<detector, DataType>::get_frame(int frame_number) {
if (frame_number > this->total_frames) {
throw std::runtime_error("Frame number out of range");
}
int subfile_id = frame_number / this->max_frames_per_file;
std::byte *buffer;
size_t frame_size = this->subfiles[subfile_id]->bytes_per_frame();

View File

@ -0,0 +1,24 @@
#include "aare/NumpyFile.hpp"
template <DetectorType detector, typename DataType>
NumpyFile<detector, DataType>::NumpyFile(std::filesystem::path fname){
this->fname = fname;
fp = fopen(fname.c_str(), "rb");
}
template <DetectorType detector, typename DataType>
Frame<DataType> *NumpyFile<detector, DataType>::get_frame(int frame_number) {
if (fp == nullptr) {
throw std::runtime_error("File not open");
}
if (frame_number > header.shape[0]) {
throw std::runtime_error("Frame number out of range");
}
Frame<DataType> *frame = new Frame<DataType>(header.shape[1], header.shape[2]);
fseek(fp, header_size + frame_number * bytes_per_frame(), SEEK_SET);
fread(frame->data, sizeof(DataType), pixels_per_frame(), fp);
return frame;
}
template class NumpyFile<DetectorType::Jungfrau, uint16_t>;

View File

@ -13,8 +13,8 @@ inline std::string parse_str(const std::string &in) {
/**
Removes leading and trailing whitespaces
*/
inline std::string trim(const std::string& str) {
const std::string whitespace = " \t";
inline std::string trim(const std::string &str) {
const std::string whitespace = " \t\n";
auto begin = str.find_first_not_of(whitespace);
if (begin == std::string::npos)
@ -52,7 +52,6 @@ inline bool parse_bool(const std::string &in) {
throw std::runtime_error("Invalid python boolan.");
}
inline std::string get_value_from_map(const std::string &mapstr) {
size_t sep_pos = mapstr.find_first_of(":");
if (sep_pos == std::string::npos)
@ -63,7 +62,6 @@ inline std::string get_value_from_map(const std::string &mapstr) {
}
std::unordered_map<std::string, std::string> parse_dict(std::string in, const std::vector<std::string> &keys) {
std::unordered_map<std::string, std::string> map;
if (keys.size() == 0)
return map;
@ -113,19 +111,6 @@ std::unordered_map<std::string, std::string> parse_dict(std::string in, const st
return map;
}
using shape_t = std::vector<uint64_t>;
struct dtype_t {
char byteorder;
char kind;
unsigned int itemsize;
};
struct header_t {
dtype_t dtype;
bool fortran_order;
shape_t shape;
};
template <typename T, size_t N> inline bool in_array(T val, const std::array<T, N> &arr) {
return std::find(std::begin(arr), std::end(arr), val) != std::end(arr);
}
@ -160,14 +145,15 @@ template <DetectorType detector, typename DataType>
void NumpyFileFactory<detector, DataType>::parse_metadata(File<detector, DataType> *_file) {
auto file = dynamic_cast<NumpyFile<detector, DataType> *>(_file);
// open ifsteam to file
std::ifstream f(file->fname, std::ios::binary);
f = std::ifstream(file->fname, std::ios::binary);
// check if file exists
if (!f.is_open()) {
throw std::runtime_error(fmt::format("Could not open: {} for reading", file->fname.c_str()));
}
// read magic number
std::array<char, 6> tmp{};
f.read(tmp.data(), tmp.size());
if (tmp != NumpyFileFactory<detector, DataType>::magic_str) {
if (tmp != NumpyFile<detector, DataType>::magic_str) {
for (auto item : tmp)
fmt::print("{}, ", int(item));
fmt::print("\n");
@ -175,29 +161,31 @@ void NumpyFileFactory<detector, DataType>::parse_metadata(File<detector, DataTyp
}
// read version
f.read(reinterpret_cast<char *>(&major_ver_), 1);
f.read(reinterpret_cast<char *>(&minor_ver_), 1);
f.read(reinterpret_cast<char *>(&file->major_ver_), 1);
f.read(reinterpret_cast<char *>(&file->minor_ver_), 1);
if (major_ver_ == 1) {
header_len_size = 2;
} else if (major_ver_ == 2) {
header_len_size = 4;
if (file->major_ver_ == 1) {
file->header_len_size = 2;
} else if (file->major_ver_ == 2) {
file->header_len_size = 4;
} else {
throw std::runtime_error("Unsupported numpy version");
}
// read header length
f.read(reinterpret_cast<char *>(&header_len), header_len_size);
if ((magic_string_length + 2 + header_len_size + header_len) % 16 != 0) {
f.read(reinterpret_cast<char *>(&file->header_len), file->header_len_size);
file->header_size = file->magic_string_length + 2 + file->header_len_size + file->header_len;
if (file->header_size % 16 != 0) {
fmt::print("Warning: header length is not a multiple of 16\n");
}
// read header
auto buf_v = std::vector<char>(header_len);
f.read(buf_v.data(), header_len);
std::string header(buf_v.data(), header_len);
auto buf_v = std::vector<char>(file->header_len);
f.read(buf_v.data(), file->header_len);
std::string header(buf_v.data(), file->header_len);
// parse header
std::vector<std::string> keys{"descr", "fortran_order", "shape"};
std::cout << "original header: " << '"' << header << '"' << std::endl;
auto dict_map = parse_dict(header, keys);
if (dict_map.size() == 0)
@ -220,6 +208,23 @@ void NumpyFileFactory<detector, DataType>::parse_metadata(File<detector, DataTyp
auto dim = static_cast<unsigned long>(std::stoul(item));
shape.push_back(dim);
}
file->header = {dtype, fortran_order, shape};
}
template <DetectorType detector, typename DataType>
File<detector, DataType>* NumpyFileFactory<detector, DataType>::load_file() {
NumpyFile<detector, DataType> *file = new NumpyFile<detector, DataType>(this->fpath);
parse_metadata(file);
NumpyFile<detector, DataType> *f = dynamic_cast<NumpyFile<detector, DataType> *>(file);
std::cout << "parsed header: " << f->header.to_string() << std::endl;
if(sizeof(DataType) != f->header.dtype.itemsize){
std::stringstream s;
s << "Data type size mismatch: " << sizeof(DataType) << " != " << f->header.dtype.itemsize;
throw std::runtime_error(s.str());
}
return file;
};
template class NumpyFileFactory<DetectorType::Jungfrau, uint16_t>;
// {dtype, fortran_order, shape};
};