commit 0d6fd2a6427847e547f9a07bd505ed8fabce8c92 Author: Andrej Babic Date: Fri Jan 5 11:58:52 2018 +0100 Chunked writer initial implementation. diff --git a/H5ChunkedWriter.cpp b/H5ChunkedWriter.cpp new file mode 100644 index 0000000..2e8d90e --- /dev/null +++ b/H5ChunkedWriter.cpp @@ -0,0 +1,239 @@ +#include "H5ChunkedWriter.hpp" + +extern "C" +{ + #include "H5DOpublic.h" +} + +hsize_t expand_dataset(const H5::DataSet& dataset, hsize_t frame_index, hsize_t dataset_increase_step) +{ + hsize_t dataset_rank = 3; + hsize_t dataset_dimension[dataset_rank]; + + dataset.getSpace().getSimpleExtentDims(dataset_dimension); + dataset_dimension[0] = frame_index + dataset_increase_step; + + #ifdef DEBUG + std::cout << "Expanding dataspace to size ("; + for (hsize_t i=0; ifilename = filename; + this->dataset_name = dataset_name; + this->frames_per_file = frames_per_file; + this->initial_dataset_size = initial_dataset_size; +} + +void HDF5ChunkedWriter::close_file() +{ + compact_dataset(dataset, max_frame_index); + + hsize_t min_frame_in_dataset = 0; + if (frames_per_file) { + min_frame_in_dataset = (current_frame_chunk - 1) * frames_per_file; + } + + // max_frame_index is relative to the current file. + hsize_t max_frame_in_dataset = max_frame_index + min_frame_in_dataset; + + // Frame indexing starts at 1 (for some reason). + auto image_nr_low = min_frame_in_dataset + 1; + auto image_nr_high = max_frame_in_dataset + 1; + + #ifdef DEBUG + std::cout << "Setting dataset attribute image_nr_low=" << image_nr_low << " and image_nr_high=" << image_nr_high << std::endl; + #endif + + // H5::IntType int_type(H5::PredType::NATIVE_UINT32); + // H5::DataSpace att_space(H5S_SCALAR); + // auto low_index_attribute = dataset.createAttribute("image_nr_low", int_type, att_space); + + // H5::IntType int_type(H5::PredType::NATIVE_UINT32); + // H5::DataSpace att_space(H5S_SCALAR); + // auto high_index_attribute = dataset.createAttribute("image_nr_high", int_type, att_space); + + // TODO: Populate additional h5 attributes. + + // Cleanup. + file.close(); + current_frame_chunk = 0; + current_dataset_size = 0; + max_frame_index = 0; +} + +void HDF5ChunkedWriter::write_data(const MessageMetadata& metadata, char* data) +{ + // Define the ofset of the currently received image in the file. + hsize_t relative_frame_index = prepare_storage_for_frame(metadata); + + // Define where to write values in the dataset. + const hsize_t offset[] = {relative_frame_index, 0, 0}; + uint32_t filters = 0; + + if( H5DOwrite_chunk(dataset.getId(), H5P_DEFAULT, filters, offset, metadata.size, data) ) + { + std::stringstream error_message; + error_message << "Error while writing chunk to file at offset " << relative_frame_index << "." << std::endl; + + throw std::invalid_argument( error_message.str() ); + } +} + +void HDF5ChunkedWriter::create_file(const MessageMetadata& metadata, hsize_t frame_chunk) { + + if (file.getId() != -1) { + close_file(); + } + + auto target_filename = filename; + + // In case frames_per_file is > 0, the filename variable is a template for the filename. + if (frames_per_file) { + #ifdef DEBUG + std::cout << "Frames per file is defined. Format " << filename << " with frame_chunk " << frame_chunk << std::endl; + #endif + + // Space for 10 digits should be enough. + char buffer[filename.length() + 10]; + + sprintf(buffer, filename.c_str(), frame_chunk); + target_filename = std::string(buffer); + } + + #ifdef DEBUG + std::cout << "Creating filename " << target_filename << std::endl; + #endif + + // TODO: Create folder if it does not exist. + + file = H5::H5File( target_filename.c_str(), H5F_ACC_TRUNC ); + + H5::IntType data_type( config::dataset_type ); + data_type.setOrder( config::dataset_byte_order ); + + hsize_t dataset_rank = 3; + const hsize_t dataset_dimension[] = {initial_dataset_size, metadata.frame_shape[0], metadata.frame_shape[1]}; + const hsize_t max_dataset_dimension[] = {H5S_UNLIMITED, metadata.frame_shape[0], metadata.frame_shape[1]}; + H5::DataSpace dataspace(dataset_rank, dataset_dimension, max_dataset_dimension); + + #ifdef DEBUG + std::cout << "Creating dataspace of size ("; + for (hsize_t i=0; i current_dataset_size) { + current_dataset_size = expand_dataset(dataset, frame_index, dataset_increase_step); + } + + // Keep track of the max index in this file - needed for shrinking the dataset at the end. + if (frame_index > max_frame_index) { + max_frame_index = frame_index; + } + + return frame_index; +} + +int main (int argc, char *argv[]) +{ + + HDF5ChunkedWriter writer("juhu-%d.h5", "data", 4, 2); + + MessageMetadata metadata; + metadata.frame_index = 0; + metadata.frame_shape[0] = 5; + metadata.frame_shape[1] = 10; + metadata.size = metadata.frame_shape[0] * metadata.frame_shape[1]; + metadata.dtype = "bytes"; + + char data[metadata.size]; + + + for( int x=0; x<10; ++x ) + { + metadata.frame_index = x; + + for( uint32_t y=0; y +#include +#include +#include + +namespace config +{ + auto dataset_type = H5::PredType::NATIVE_UINT8; + auto dataset_byte_order = H5T_ORDER_LE; + hsize_t dataset_increase_step = 1000; + hsize_t initial_dataset_size = 1000; +} + +struct MessageMetadata +{ + hsize_t buffer_index = 0; + uint32_t size = 0; + + hsize_t frame_index = 0; + hsize_t frame_shape[2]; + std::string dtype; +}; + +hsize_t expand_dataset(const H5::DataSet& dataset, hsize_t frame_index, hsize_t dataset_increase_step); + +void compact_dataset(const H5::DataSet& dataset, hsize_t max_frame_index); + +class HDF5ChunkedWriter +{ + // Initialized in constructor. + std::string filename; + std::string dataset_name; + hsize_t frames_per_file; + hsize_t initial_dataset_size; + + // Configuration parameters. + hsize_t dataset_increase_step = config::dataset_increase_step; + + // State variables. + hsize_t max_frame_index = 0; + hsize_t current_dataset_size = 0; + hsize_t current_frame_chunk = 0; + + H5::H5File file; + H5::DataSet dataset; + + hsize_t prepare_storage_for_frame(const MessageMetadata& metadata); + void create_file(const MessageMetadata& metadata, hsize_t frame_chunk=0); + + public: + HDF5ChunkedWriter(); + HDF5ChunkedWriter(const std::string filename, const std::string dataset_name, hsize_t frames_per_file=0, hsize_t initial_dataset_size=config::initial_dataset_size); + void close_file(); + void write_data(const MessageMetadata& metadata, char* data); +}; + +#endif \ No newline at end of file