looping over files

This commit is contained in:
froejdh_e
2025-04-04 16:34:10 +02:00
parent 19855e6403
commit 88a5c2e0d8
8 changed files with 365 additions and 96 deletions

View File

@ -342,6 +342,7 @@ set(PUBLICHEADERS
include/aare/File.hpp
include/aare/Fit.hpp
include/aare/FileInterface.hpp
include/aare/FiltPtr.hpp
include/aare/Frame.hpp
include/aare/geo_helpers.hpp
include/aare/JungfrauDataFile.hpp
@ -368,6 +369,7 @@ set(SourceFiles
${CMAKE_CURRENT_SOURCE_DIR}/src/decode.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/Frame.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/File.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/FilePtr.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/Fit.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/geo_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/JungfrauDataFile.cpp

30
include/aare/FilePtr.hpp Normal file
View File

@ -0,0 +1,30 @@
#pragma once
#include <cstdio>
#include <filesystem>
namespace aare {
/**
* \brief RAII wrapper for FILE pointer
*/
class FilePtr {
FILE *fp_{nullptr};
public:
FilePtr() = default;
FilePtr(const std::filesystem::path& fname, const std::string& mode);
FilePtr(const FilePtr &) = delete; // we don't want a copy
FilePtr &operator=(const FilePtr &) = delete; // since we handle a resource
FilePtr(FilePtr &&other);
FilePtr &operator=(FilePtr &&other);
FILE *get();
int64_t tell();
void seek(int64_t offset, int whence = SEEK_SET) {
if (fseek(fp_, offset, whence) != 0)
throw std::runtime_error("Error seeking in file");
}
std::string error_msg();
~FilePtr();
};
} // namespace aare

View File

@ -1,6 +1,11 @@
#pragma once
#include <cstdint>
#include <filesystem>
#include <vector>
#include "aare/FilePtr.hpp"
#include "aare/defs.hpp"
#include "aare/NDArray.hpp"
namespace aare {
struct JungfrauDataHeader{
@ -12,27 +17,31 @@ class JungfrauDataFile {
size_t m_rows{};
size_t m_cols{};
size_t m_bytes_per_frame{};
size_t m_total_frames{};
size_t m_first_frame_index{};
size_t m_offset{}; // file index of the first file, allow starting at non zero file
size_t m_current_file_index{};
size_t m_current_frame{};
std::vector<size_t> m_frames_in_file{};
std::vector<size_t> m_frame_index{};
std::filesystem::path m_base_path;
std::filesystem::path m_path;
std::string m_base_name;
//Data sizes to guess the frame size
using value_type = uint16_t;
static constexpr size_t header_size = sizeof(JungfrauDataHeader);
static constexpr size_t module_data_size = header_size + sizeof(value_type) * 512 * 1024;
static constexpr size_t half_data_size = header_size + sizeof(value_type) * 256 * 1024;
static constexpr size_t chip_data_size = header_size + sizeof(value_type) * 256 * 256;
FilePtr m_fp;
static constexpr size_t n_digits_in_file_index = 6;
//Data sizes to guess the frame size
using pixel_type = uint16_t;
static constexpr size_t header_size = sizeof(JungfrauDataHeader);
static constexpr size_t module_data_size = header_size + sizeof(pixel_type) * 512 * 1024;
static constexpr size_t half_data_size = header_size + sizeof(pixel_type) * 256 * 1024;
static constexpr size_t chip_data_size = header_size + sizeof(pixel_type) * 256 * 256;
static constexpr size_t n_digits_in_file_index = 6; // number of digits in the file index
public:
JungfrauDataFile(const std::filesystem::path &fname);
std::string base_name() const; //!< get the base name of the file (without path and extension)
std::string base_name() const; //!< get the base name of the file (without path and extension)
size_t bytes_per_frame() const;
size_t pixels_per_frame() const;
size_t bytes_per_pixel() const;
@ -42,11 +51,38 @@ class JungfrauDataFile {
size_t total_frames() const;
size_t rows() const;
size_t cols() const;
size_t n_files() const { return m_frame_index.size(); } //!< get the number of files
void read_into(std::byte *image_buf, JungfrauDataHeader *header);
void read_into(std::byte *image_buf, size_t n_frames, JungfrauDataHeader *header);
void read_into(NDArray<uint16_t> &image, JungfrauDataHeader &header) {
if(!(rows() == image.shape(0) && cols() == image.shape(1))){
throw std::runtime_error(LOCATION +
"Image shape does not match file size: " + std::to_string(rows()) + "x" + std::to_string(cols()));
}
read_into(reinterpret_cast<std::byte *>(image.data()), &header);
}
NDArray<uint16_t> read_frame(JungfrauDataHeader& header) {
Shape<2> shape{rows(), cols()};
NDArray<uint16_t> image(shape);
static size_t guess_frame_size(const std::filesystem::path &fname);
static std::filesystem::path get_frame_path(const std::filesystem::path &path, const std::string& base_name, size_t frame_index);
};
read_into(reinterpret_cast<std::byte *>(image.data()),
&header);
return image;
}
std::filesystem::path current_file() const { return fpath(m_current_file_index+m_offset); }
private:
void find_frame_size(const std::filesystem::path &fname);
void parse_fname(const std::filesystem::path &fname);
void scan_files();
void open_file(size_t file_index);
std::filesystem::path fpath(size_t frame_index) const;
};
} // namespace aare

View File

@ -25,6 +25,25 @@ size_t last_smaller(const NDArray<T, 1>& arr, T val) {
return last_smaller(arr.begin(), arr.end(), val);
}
template <typename T>
size_t first_larger(const T* first, const T* last, T val) {
for (auto iter = first; iter != last; ++iter) {
if (*iter > val) {
return std::distance(first, iter);
}
}
return std::distance(first, last-1);
}
template <typename T>
size_t first_larger(const NDArray<T, 1>& arr, T val) {
return first_larger(arr.begin(), arr.end(), val);
}
template <typename T>
size_t first_larger(const std::vector<T>& vec, T val) {
return first_larger(vec.data(), vec.data()+vec.size(), val);
}
template <typename T>
size_t nearest_index(const T* first, const T* last, T val) {
@ -50,6 +69,13 @@ size_t nearest_index(const std::array<T,N>& arr, T val) {
return nearest_index(arr.data(), arr.data()+arr.size(), val);
}
template <typename T>
std::vector<T> cumsum(const std::vector<T>& vec) {
std::vector<T> result(vec.size());
std::partial_sum(vec.begin(), vec.end(), result.begin());
return result;
}
} // namespace aare

View File

@ -16,21 +16,68 @@ namespace py = pybind11;
using namespace ::aare;
void define_jungfrau_data_file_io_bindings(py::module &m) {
//Make the JungfrauDataHeader usable from numpy
PYBIND11_NUMPY_DTYPE(JungfrauDataHeader, framenum, bunchid);
py::class_<JungfrauDataFile>(m, "JungfrauDataFile")
.def(py::init<const std::filesystem::path &>())
.def("guess_frame_size",
[](const std::filesystem::path &fname) {
return JungfrauDataFile::guess_frame_size(fname);
})
.def("seek", &JungfrauDataFile::seek)
.def("tell", &JungfrauDataFile::tell)
.def_property_readonly("rows", &JungfrauDataFile::rows)
.def_property_readonly("cols", &JungfrauDataFile::cols)
.def_property_readonly("base_name", &JungfrauDataFile::base_name)
.def("get_frame_path",
[](const std::filesystem::path &path, const std::string &base_name,
size_t frame_index) {
return JungfrauDataFile::get_frame_path(path, base_name,
frame_index);
});
.def_property_readonly("bytes_per_frame",
&JungfrauDataFile::bytes_per_frame)
.def_property_readonly("pixels_per_frame",
&JungfrauDataFile::pixels_per_frame)
.def_property_readonly("bytes_per_pixel",
&JungfrauDataFile::bytes_per_pixel)
.def_property_readonly("bitdepth", &JungfrauDataFile::bitdepth)
.def_property_readonly("current_file",
&JungfrauDataFile::current_file)
.def_property_readonly("total_frames",
&JungfrauDataFile::total_frames)
.def_property_readonly("n_files", &JungfrauDataFile::n_files)
.def("read_frame",
[](JungfrauDataFile &self) {
std::vector<ssize_t> shape;
shape.reserve(2);
shape.push_back(self.rows());
shape.push_back(self.cols());
// return headers from all subfiles
py::array_t<JungfrauDataHeader> header(1);
py::array_t<uint16_t> image(shape);
self.read_into(
reinterpret_cast<std::byte *>(image.mutable_data()),
header.mutable_data());
return py::make_tuple(header, image);
})
.def("read_n",
[](JungfrauDataFile &self, size_t n_frames) {
// adjust for actual frames left in the file
n_frames =
std::min(n_frames, self.total_frames() - self.tell());
if (n_frames == 0) {
throw std::runtime_error("No frames left in file");
}
std::vector<size_t> shape{n_frames, self.rows(), self.cols()};
// return headers from all subfiles
py::array_t<JungfrauDataHeader> header(n_frames);
py::array_t<uint16_t> image(shape);
self.read_into(
reinterpret_cast<std::byte *>(image.mutable_data()),
n_frames,
header.mutable_data());
return py::make_tuple(header, image);
});
// .def("read_frame",
// [](RawFile &self) {
// py::array image;

44
src/FilePtr.cpp Normal file
View File

@ -0,0 +1,44 @@
#include "aare/FilePtr.hpp"
#include <fmt/format.h>
#include <stdexcept>
#include <utility>
namespace aare {
FilePtr::FilePtr(const std::filesystem::path& fname, const std::string& mode = "rb") {
fp_ = fopen(fname.c_str(), mode.c_str());
if (!fp_)
throw std::runtime_error(fmt::format("Could not open: {}", fname.c_str()));
}
FilePtr::FilePtr(FilePtr &&other) { std::swap(fp_, other.fp_); }
FilePtr &FilePtr::operator=(FilePtr &&other) {
std::swap(fp_, other.fp_);
return *this;
}
FILE *FilePtr::get() { return fp_; }
int64_t FilePtr::tell() {
auto pos = ftell(fp_);
if (pos == -1)
throw std::runtime_error(fmt::format("Error getting file position: {}", error_msg()));
return pos;
}
FilePtr::~FilePtr() {
if (fp_)
fclose(fp_); // check?
}
std::string FilePtr::error_msg(){
if (feof(fp_)) {
return "End of file reached";
}
if (ferror(fp_)) {
return fmt::format("Error reading file: {}", std::strerror(errno));
}
return "";
}
} // namespace aare

View File

@ -1,104 +1,162 @@
#include "aare/JungfrauDataFile.hpp"
#include "aare/defs.hpp"
#include "aare/algorithm.hpp"
#include <fmt/format.h>
#include <cerrno>
namespace aare{
namespace aare {
JungfrauDataFile::JungfrauDataFile(const std::filesystem::path& fname){
JungfrauDataFile::JungfrauDataFile(const std::filesystem::path &fname) {
//setup geometry
auto frame_size = guess_frame_size(fname);
if (frame_size == module_data_size) {
m_rows = 512;
m_cols = 1024;
} else if (frame_size == half_data_size) {
m_rows = 256;
m_cols = 1024;
} else if (frame_size == chip_data_size) {
m_rows = 256;
m_cols = 256;
} else {
throw std::runtime_error(LOCATION + "Cannot guess frame size: file size is not a multiple of any known frame size");
}
m_base_path = fname.parent_path();
m_base_name = fname.stem();
//need to know the first
//remove digits
while(std::isdigit(m_base_name.back())){
m_base_name.pop_back();
if (!std::filesystem::exists(fname)) {
throw std::runtime_error(LOCATION +
"File does not exist: " + fname.string());
}
//find how many files we have
// size_t frame_index = 0;
// while (std::filesystem::exists(get_frame_path(m_base_path, m_base_name, frame_index))) {
// auto n_frames =
// m_frames_in_file.push_back(n_frames);
// ++frame_index;
// }
find_frame_size(fname);
parse_fname(fname);
scan_files();
open_file(m_current_file_index);
}
std::string JungfrauDataFile::base_name() const { return m_base_name; }
std::string JungfrauDataFile::base_name() const {
return m_base_name;
size_t JungfrauDataFile::bytes_per_frame() const {
return m_bytes_per_frame;
}
size_t JungfrauDataFile::bytes_per_frame() const{
return m_rows * m_cols * bytes_per_pixel();
}
size_t JungfrauDataFile::pixels_per_frame() const { return m_rows * m_cols; }
size_t JungfrauDataFile::pixels_per_frame()const {
return m_rows * m_cols;
}
size_t JungfrauDataFile::bytes_per_pixel() const { return sizeof(pixel_type); }
size_t JungfrauDataFile::bytes_per_pixel() const {
return 2;
}
size_t JungfrauDataFile::bitdepth() const { return bytes_per_pixel()*bits_per_byte; }
size_t JungfrauDataFile::bitdepth()const {
return 16;
}
void seek(size_t frame_index){}; //!< seek to the given frame index
void JungfrauDataFile::seek(size_t frame_index) {
if (frame_index >= m_total_frames) {
throw std::runtime_error(LOCATION +
"Frame index out of range: " + std::to_string(frame_index));
}
m_current_frame = frame_index;
auto file_index = first_larger(m_frame_index, frame_index);
size_t JungfrauDataFile::tell() const{
return 0;
} //!< get the frame index of the file pointer
size_t JungfrauDataFile::total_frames() const {
return m_total_frames;
}
size_t JungfrauDataFile::rows() const {
return m_rows;
}
size_t JungfrauDataFile::cols() const {
return m_cols;
}
if(file_index != m_current_file_index)
open_file(file_index);
size_t JungfrauDataFile::guess_frame_size(const std::filesystem::path& fname) {
auto frame_offset = (file_index) ? frame_index - m_frame_index[file_index-1] : frame_index;
auto byte_offset = frame_offset * (m_bytes_per_frame + header_size);
m_fp.seek(byte_offset);
};
size_t JungfrauDataFile::tell() const {
return m_current_frame;
}
size_t JungfrauDataFile::total_frames() const { return m_total_frames; }
size_t JungfrauDataFile::rows() const { return m_rows; }
size_t JungfrauDataFile::cols() const { return m_cols; }
void JungfrauDataFile::find_frame_size(const std::filesystem::path &fname) {
auto file_size = std::filesystem::file_size(fname);
if (file_size == 0) {
throw std::runtime_error(LOCATION + "Cannot guess frame size: file is empty");
throw std::runtime_error(LOCATION +
"Cannot guess frame size: file is empty");
}
if (file_size % module_data_size == 0) {
return module_data_size;
m_rows = 512;
m_cols = 1024;
m_bytes_per_frame = module_data_size-header_size;
} else if (file_size % half_data_size == 0) {
return half_data_size;
m_rows = 256;
m_cols = 1024;
m_bytes_per_frame = half_data_size-header_size;
} else if (file_size % chip_data_size == 0) {
return chip_data_size;
m_rows = 256;
m_cols = 256;
m_bytes_per_frame = chip_data_size-header_size;
} else {
throw std::runtime_error(LOCATION + "Cannot guess frame size: file size is not a multiple of any known frame size");
throw std::runtime_error(LOCATION +
"Cannot find frame size: file size is not a "
"multiple of any known frame size");
}
}
std::filesystem::path JungfrauDataFile::get_frame_path(const std::filesystem::path& path, const std::string& base_name, size_t frame_index) {
auto fname = fmt::format("{}{:0{}}.dat", base_name, frame_index, n_digits_in_file_index);
return path / fname;
void JungfrauDataFile::parse_fname(const std::filesystem::path &fname) {
m_path = fname.parent_path();
m_base_name = fname.stem();
// find file index, then remove if from the base name
if (auto pos = m_base_name.find_last_of('_'); pos != std::string::npos) {
m_offset =
std::stoul(m_base_name.substr(pos+1));
m_base_name.erase(pos);
}
}
void JungfrauDataFile::scan_files() {
// find how many files we have and the number of frames in each file
m_frame_index.clear();
size_t file_index = m_offset;
while (std::filesystem::exists(fpath(file_index))) {
auto n_frames =
std::filesystem::file_size(fpath(file_index)) / (m_bytes_per_frame +
header_size);
m_frame_index.push_back(n_frames);
++file_index;
}
// find where we need to open the next file and total number of frames
m_frame_index = cumsum(m_frame_index);
m_total_frames = m_frame_index.back();
}
void JungfrauDataFile::read_into(std::byte *image_buf,
JungfrauDataHeader *header) {
//read header
if(auto rc = fread(header, sizeof(JungfrauDataHeader), 1, m_fp.get()); rc!= 1){
throw std::runtime_error(LOCATION +
"Could not read header from file:" + m_fp.error_msg());
}
//read data
if(auto rc = fread(image_buf, 1, m_bytes_per_frame, m_fp.get()); rc != m_bytes_per_frame){
throw std::runtime_error(LOCATION +
"Could not read image from file" + m_fp.error_msg());
}
// prepare for next read
// if we are at the end of the file, open the next file
++ m_current_frame;
if(m_current_frame >= m_frame_index[m_current_file_index]){
++m_current_file_index;
open_file(m_current_file_index);
}
}
void JungfrauDataFile::read_into(std::byte *image_buf, size_t n_frames,
JungfrauDataHeader *header) {
for (size_t i = 0; i < n_frames; ++i) {
read_into(image_buf + i * m_bytes_per_frame, header + i);
}
}
void JungfrauDataFile::open_file(size_t file_index) {
// fmt::print(stderr, "Opening file: {}\n", fpath(file_index+m_offset).string());
m_fp = FilePtr(fpath(file_index+m_offset), "rb");
m_current_file_index = file_index;
}
std::filesystem::path
JungfrauDataFile::fpath(size_t file_index) const{
auto fname = fmt::format("{}_{:0{}}.dat", m_base_name, file_index,
n_digits_in_file_index);
return m_path / fname;
}
} // namespace aare

View File

@ -70,4 +70,30 @@ TEST_CASE("returns last bin strictly smaller", "[algorithm]"){
// arr 0, 1, 2, 3, 4
REQUIRE(aare::last_smaller(arr, 2.0) == 2);
}
TEST_CASE("cumsum works", "[algorithm]"){
std::vector<double> vec = {0, 1, 2, 3, 4};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == vec.size());
REQUIRE(result[0] == 0);
REQUIRE(result[1] == 1);
REQUIRE(result[2] == 3);
REQUIRE(result[3] == 6);
REQUIRE(result[4] == 10);
}
TEST_CASE("cumsum works with empty vector", "[algorithm]"){
std::vector<double> vec = {};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == 0);
}
TEST_CASE("cumsum works with negative numbers", "[algorithm]"){
std::vector<double> vec = {0, -1, -2, -3, -4};
auto result = aare::cumsum(vec);
REQUIRE(result.size() == vec.size());
REQUIRE(result[0] == 0);
REQUIRE(result[1] == -1);
REQUIRE(result[2] == -3);
REQUIRE(result[3] == -6);
REQUIRE(result[4] == -10);
}