read write cluster file (#60)

* Read and write cluster files (save work)

* add reading test

* use define for examples env variable and fix ci

* read and write cluster files (working)

* fix cluster CI
This commit is contained in:
Bechir Braham
2024-04-16 13:14:41 +02:00
committed by GitHub
parent 9dfd388927
commit 28d7e8c07a
28 changed files with 554 additions and 28 deletions

View File

@ -15,6 +15,7 @@ set(SourceFiles
${CMAKE_CURRENT_SOURCE_DIR}/src/SubFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterFile.cpp
)
add_library(file_io STATIC ${SourceFiles})
@ -31,6 +32,7 @@ if(AARE_TESTS)
${CMAKE_CURRENT_SOURCE_DIR}/test/NumpyFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test/NumpyHelpers.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test/RawFile.test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test/ClusterFile.test.cpp
)
target_sources(tests PRIVATE ${TestSources} )
target_link_libraries(tests PRIVATE core file_io)

View File

@ -0,0 +1,70 @@
#pragma once
#include "aare/core/defs.hpp"
#include <filesystem>
#include <string>
/**
* cluster file format:
* header: [int32 frame_number][int32 n_clusters]
* data: [clusters....]
* where each cluster is of the form:
* typedef struct {
* int16_t x;
* int16_t y;
* int32_t data[9];
*} Cluster ;
*
*/
namespace aare {
/**
* @brief Configuration of the ClusterFile
* can be use as the header of the cluster file
*/
struct ClusterFileConfig {
int32_t frame_number;
int32_t n_clusters;
ClusterFileConfig(int32_t frame_number_, int32_t n_clusters_)
: frame_number(frame_number_), n_clusters(n_clusters_) {}
ClusterFileConfig() : frame_number(0), n_clusters(0) {}
bool operator==(const ClusterFileConfig &other) const {
return frame_number == other.frame_number && n_clusters == other.n_clusters;
}
bool operator!=(const ClusterFileConfig &other) const { return !(*this == other); }
std::string to_string() const {
return "frame_number: " + std::to_string(frame_number) + " n_clusters: " + std::to_string(n_clusters) + "\n";
}
};
/**
* @brief Class to read and write clusters to a file
*/
class ClusterFile {
public:
ClusterFile(const std::filesystem::path &fname, const std::string &mode, ClusterFileConfig config = {});
void write(std::vector<Cluster> &clusters);
void write(Cluster &cluster);
Cluster read();
Cluster iread(size_t cluster_number);
std::vector<Cluster> read(size_t n_clusters);
void seek(size_t cluster_number);
size_t tell() const;
size_t count() noexcept;
int32_t frame() const;
void update_header() /* throws */;
~ClusterFile() noexcept;
private:
FILE *fp = nullptr;
// size_t current_cluster{};
std::filesystem::path fname{};
std::string mode{};
int32_t frame_number{};
int32_t n_clusters{};
static const int HEADER_BYTES = 8;
};
} // namespace aare

View File

@ -24,7 +24,7 @@ class RawFile : public FileInterface {
* @brief write function is not implemented for RawFile
* @param frame frame to write
*/
void write(Frame & /*frame*/) override { throw std::runtime_error("Not implemented"); };
void write([[maybe_unused]] Frame &frame) override { throw std::runtime_error("Not implemented"); };
Frame read() override { return get_frame(this->current_frame++); };
std::vector<Frame> read(size_t n_frames) override;
void read_into(std::byte *image_buf) override { return get_frame_into(this->current_frame++, image_buf); };

218
file_io/src/ClusterFile.cpp Normal file
View File

@ -0,0 +1,218 @@
#include "aare/file_io/ClusterFile.hpp"
#include "aare/core/defs.hpp"
#include "aare/utils/logger.hpp"
#include "fmt/core.h"
namespace aare {
/**
* @brief Constructor for the ClusterFile class.
*
* Opens the file with the given mode ('r' for read, 'w' for write).
* Throws an exception if the mode is not 'r' or 'w', or if the file cannot be opened.
*
* @param fname_ The name of the file to open.
* @param mode_ The mode to open the file in.
* @param config Configuration for the file header.
*/
ClusterFile::ClusterFile(const std::filesystem::path &fname_, const std::string &mode_, ClusterFileConfig config)
: fname{fname_}, mode{mode_}, frame_number{config.frame_number}, n_clusters{config.n_clusters} {
// check if the file has the .clust extension
if (fname.extension() != ".clust") {
aare::logger::warn("file", fname, "does not have .clust extension");
}
if (mode == "r") {
// check if the file exists and is a regular file
if (not std::filesystem::exists(fname)) {
throw std::invalid_argument(fmt::format("file {} does not exist", fname.c_str()));
}
if (not std::filesystem::is_regular_file(fname)) {
throw std::invalid_argument(fmt::format("file {} is not a regular file", fname.c_str()));
}
// check if the file size is a multiple of the cluster size
if ((std::filesystem::file_size(fname) - HEADER_BYTES) % sizeof(Cluster) != 0) {
aare::logger::warn("file", fname, "size is not a multiple of cluster size");
}
if (config != ClusterFileConfig()) {
aare::logger::warn("ignored ClusterFileConfig for read mode");
}
// open file
fp = fopen(fname.c_str(), "rb");
if (fp == nullptr) {
throw std::runtime_error(fmt::format("could not open file {}", fname.c_str()));
}
// read header
const size_t rc = fread(&config, sizeof(config), 1, fp);
if (rc != 1) {
throw std::runtime_error(fmt::format("could not read header from file {}", fname.c_str()));
}
frame_number = config.frame_number;
n_clusters = config.n_clusters;
} else if (mode == "w") {
// open file
fp = fopen(fname.c_str(), "wb");
if (fp == nullptr) {
throw std::runtime_error(fmt::format("could not open file {}", fname.c_str()));
}
// write header
if (fwrite(&config, sizeof(config), 1, fp) != 1) {
throw std::runtime_error(fmt::format("could not write header to file {}", fname.c_str()));
}
} else {
throw std::invalid_argument("mode must be 'r' or 'w'");
}
}
/**
* @brief Writes a vector of clusters to the file.
*
* Each cluster is written as a binary block of size sizeof(Cluster).
*
* @param clusters The vector of clusters to write to the file.
*/
void ClusterFile::write(std::vector<Cluster> &clusters) {
fwrite(clusters.data(), sizeof(Cluster), clusters.size(), fp);
}
/**
* @brief Writes a single cluster to the file.
*
* The cluster is written as a binary block of size sizeof(Cluster).
*
* @param cluster The cluster to write to the file.
*/
void ClusterFile::write(Cluster &cluster) { fwrite(&cluster, sizeof(Cluster), 1, fp); }
/**
* @brief Reads a single cluster from the file.
*
* The cluster is read as a binary block of size sizeof(Cluster).
*
* @return The cluster read from the file.
*/
Cluster ClusterFile::read() {
if (tell() >= count()) {
throw std::runtime_error("cluster number out of range");
}
Cluster cluster{};
fread(&cluster, sizeof(Cluster), 1, fp);
return cluster;
}
/**
* @brief Reads a specific cluster from the file.
*
* The file pointer is moved to the specific cluster, and the cluster is read as a binary block of size sizeof(Cluster).
*
* @param cluster_number The number of the cluster to read from the file.
* @return The cluster read from the file.
*/
Cluster ClusterFile::iread(size_t cluster_number) {
if (cluster_number >= count()) {
throw std::runtime_error("cluster number out of range");
}
auto old_pos = ftell(fp);
this->seek(cluster_number);
Cluster cluster{};
fread(&cluster, sizeof(Cluster), 1, fp);
fseek(fp, old_pos, SEEK_SET); // restore the file position
return cluster;
}
/**
* @brief Reads a specific number of clusters from the file.
*
* Each cluster is read as a binary block of size sizeof(Cluster).
*
* @param n_clusters The number of clusters to read from the file.
* @return A vector of clusters read from the file.
*/
std::vector<Cluster> ClusterFile::read(size_t n_clusters_) {
if (n_clusters_ + tell() > count()) {
throw std::runtime_error("cluster number out of range");
}
std::vector<Cluster> clusters(n_clusters_);
fread(clusters.data(), sizeof(Cluster), n_clusters, fp);
return clusters;
}
/**
* @brief Moves the file pointer to a specific cluster.
*
* The file pointer is moved to the start of the specific cluster, based on the size of a cluster.
*
* @param cluster_number The number of the cluster to move the file pointer to.
*/
void ClusterFile::seek(size_t cluster_number) {
if (cluster_number > count()) {
throw std::runtime_error("cluster number out of range");
}
const auto offset = static_cast<int64_t>(sizeof(ClusterFileConfig) + cluster_number * sizeof(Cluster));
fseek(fp, offset, SEEK_SET);
}
/**
* @brief Gets the current position of the file pointer in terms of clusters.
*
* The position is calculated as the number of clusters from the beginning of the file to the current position of the
* file pointer.
*
* @return The current position of the file pointer in terms of clusters.
*/
size_t ClusterFile::tell() const { return ftell(fp) / sizeof(Cluster); }
/**
* @brief Counts the number of clusters in the file.
*
* The count is calculated as the size of the file divided by the size of a cluster.
*
* @return The number of clusters in the file.
*/
size_t ClusterFile::count() noexcept {
if (mode == "r") {
return n_clusters;
}
// save the current position
auto old_pos = ftell(fp);
fseek(fp, 0, SEEK_END);
const size_t n_clusters_ = ftell(fp) / sizeof(Cluster);
// restore the file position
fseek(fp, old_pos, SEEK_SET);
return n_clusters_;
}
int32_t ClusterFile::frame() const { return frame_number; }
void ClusterFile::update_header() {
if (mode == "r") {
throw std::runtime_error("update header is not implemented for read mode");
}
// update the header with the correct number of clusters
aare::logger::debug("updating header with correct number of clusters", count());
auto tmp_n_clusters = count();
fseek(fp, 0, SEEK_SET);
ClusterFileConfig config(frame_number, static_cast<int32_t>(tmp_n_clusters));
if (fwrite(&config, sizeof(config), 1, fp) != 1) {
throw std::runtime_error("could not write header to file");
}
if (fflush(fp) != 0) {
throw std::runtime_error("could not flush file");
}
}
ClusterFile::~ClusterFile() noexcept {
if (mode == "w") {
try {
update_header();
} catch (std::runtime_error &e) {
aare::logger::error("error updating header", e.what());
}
}
if (fp != nullptr) {
fclose(fp);
}
}
} // namespace aare

View File

@ -0,0 +1,114 @@
#include "aare/file_io/ClusterFile.hpp"
#include "aare/utils/compare_files.hpp"
#include "test_config.hpp"
#include <catch2/catch_test_macros.hpp>
#include <iostream>
#include <random>
using aare::Cluster;
using aare::ClusterFile;
using aare::ClusterFileConfig;
TEST_CASE("Read a cluster file") {
auto fpath = test_data_path() / "clusters" / "single_frame_97_clustrers.clust";
REQUIRE(std::filesystem::exists(fpath));
ClusterFile cf(fpath, "r");
SECTION("Read the header of the file") {
REQUIRE(cf.count() == 97);
REQUIRE(cf.frame() == 135);
}
SECTION("Read a single cluster") {
Cluster c = cf.read();
REQUIRE(c.x == 1);
REQUIRE(c.y == 200);
for (int i = 0; i < 9; i++) {
REQUIRE(c.data[i] == i);
}
}
SECTION("Read a single cluster using iread") {
Cluster c = cf.iread(0);
REQUIRE(c.x == 1);
REQUIRE(c.y == 200);
for (int i = 0; i < 9; i++) {
REQUIRE(c.data[i] == i);
}
}
SECTION("Read a cluster using seek") {
cf.seek(0);
Cluster c = cf.read();
REQUIRE(c.x == 1);
REQUIRE(c.y == 200);
for (int i = 0; i < 9; i++) {
REQUIRE(c.data[i] == i);
}
c = cf.read();
REQUIRE(c.x == 2);
REQUIRE(c.y == 201);
for (int i = 0; i < 9; i++) {
REQUIRE(c.data[i] == i + 9);
}
}
SECTION("check out of bound reading") {
REQUIRE_THROWS_AS(cf.iread(97), std::runtime_error);
REQUIRE_NOTHROW(cf.seek(97));
REQUIRE_THROWS_AS(cf.read(), std::runtime_error);
REQUIRE_THROWS_AS(cf.read(1), std::runtime_error);
REQUIRE_NOTHROW(cf.seek(0));
REQUIRE_NOTHROW(cf.read(97));
}
SECTION("test read multiple clusters") {
std::vector<Cluster> cluster = cf.read(97);
REQUIRE(cluster.size() == 97);
int offset = 0;
int data_offset = 0;
for (auto c : cluster) {
REQUIRE(c.x == offset + 1);
REQUIRE(c.y == offset + 200);
for (int i = 0; i < 9; i++) {
REQUIRE(c.data[i] == data_offset + i);
}
offset++;
data_offset += 9;
}
}
}
TEST_CASE("write a cluster file") {
auto const FRAME_NUMBER = 1461041991;
auto const TOTAL_CLUSTERS = 214748;
std::filesystem::path const fpath_out("/tmp/file.clust");
ClusterFile cf_out(fpath_out, "w", ClusterFileConfig(FRAME_NUMBER, TOTAL_CLUSTERS));
REQUIRE(cf_out.count() == 0);
REQUIRE(cf_out.frame() == FRAME_NUMBER);
// write file with random close to bounds values
int32_t offset = 0;
std::vector<Cluster> clusters(TOTAL_CLUSTERS);
for (int32_t i = 0; i < TOTAL_CLUSTERS; i++) {
Cluster c;
c.x = INT16_MAX - offset;
c.y = INT16_MAX - (offset + 200);
for (int32_t j = 0; j < 9; j++) {
if (j % 2 == 0)
c.data[j] = -(offset * 2);
else
c.data[j] = (offset * 2);
}
clusters[i] = c;
offset++;
offset %= INT16_MAX - 200;
}
cf_out.write(clusters);
REQUIRE(cf_out.count() == TOTAL_CLUSTERS);
REQUIRE(cf_out.frame() == FRAME_NUMBER);
cf_out.update_header();
REQUIRE(cf_out.count() == TOTAL_CLUSTERS);
REQUIRE(cf_out.frame() == FRAME_NUMBER);
auto data_file = test_data_path() / "clusters" / "test_writing.clust";
REQUIRE(aare::compare_files(fpath_out, data_file));
}