Files
Jungfraujoch/writer/HDF5Objects.h
2025-09-21 19:27:51 +02:00

460 lines
19 KiB
C++

// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#ifndef JUNGFRAUJOCH_HDF5OBJECTS_H
#define JUNGFRAUJOCH_HDF5OBJECTS_H
#include <hdf5.h>
#include <memory>
#include <string>
#include <vector>
#include <mutex>
#include <optional>
#include "../common/JFJochException.h"
#include "../compression/CompressionAlgorithmEnum.h"
#include "../common/CompressedImage.h"
extern std::mutex hdf5_mutex;
class HDF5DataSet;
class HDF5Id {
protected:
hid_t id;
HDF5Id() = default;
HDF5Id(HDF5Id &&other) noexcept;
HDF5Id(const HDF5Id& other);
public:
// Assignment would require closing ID's first and cannot be generalized easily
HDF5Id& operator=(HDF5Id &&other) noexcept = delete;
HDF5Id& operator=(const HDF5Id &other) noexcept = delete;
hid_t GetID() const;
};
class HDF5DataSpace : public HDF5Id {
uint8_t ndims = 1;
public:
explicit HDF5DataSpace(const std::vector<hsize_t>& dims = {1}, const std::vector<hsize_t> &max_dims = {});
explicit HDF5DataSpace(const HDF5DataSet& data_set);
uint8_t GetNumOfDimensions() const;
std::vector<hsize_t> GetDimensions() const;
void SelectHyperslab(const std::vector<hsize_t>& start, const std::vector<hsize_t>& size);
void SelectHyperslabWithStride(const std::vector<hsize_t>& start, const std::vector<hsize_t>& size,
const std::vector<hsize_t>& stride);
~HDF5DataSpace();
};
class HDF5DataType : public HDF5Id {
public:
HDF5DataType(uint64_t size_in_bytes, bool is_signed);
explicit HDF5DataType(double val);
explicit HDF5DataType(float val);
explicit HDF5DataType(bool val);
explicit HDF5DataType(int32_t val);
explicit HDF5DataType(uint32_t val);
explicit HDF5DataType(int64_t val);
explicit HDF5DataType(uint64_t val);
explicit HDF5DataType(int16_t val);
explicit HDF5DataType(uint16_t val);
explicit HDF5DataType(int8_t val);
explicit HDF5DataType(uint8_t val);
explicit HDF5DataType(const std::string &val);
explicit HDF5DataType(const char *val);
explicit HDF5DataType(const HDF5DataSet& data_set);
explicit HDF5DataType(CompressedImageMode mode);
size_t GetElemSize() const;
bool IsSigned() const;
bool IsInteger() const;
bool IsFloat() const;
~HDF5DataType();
};
class HDF5Dcpl : public HDF5Id {
uint8_t ndim = 0;
public:
HDF5Dcpl();
HDF5Dcpl(const HDF5DataSet& data_set);
~HDF5Dcpl();
void SetFillValue32(int32_t val);
void SetFillValue16(int16_t val);
void SetFillValue8(int8_t val);
void SetFillValueU32(uint32_t val);
void SetFillValueU16(uint16_t val);
void SetChunking(const std::vector<hsize_t> & dims);
std::vector<hsize_t> GetChunking();
CompressionAlgorithm GetCompression();
void SetCompression(CompressionAlgorithm algorithm, size_t block_size);
void SetVirtual(const std::string& filename, const std::string& dataset, const HDF5DataSpace& src_dataspace,
const HDF5DataSpace& virtual_dataspace);
uint8_t GetNumOfDimensions() const;
};
class HDF5Fapl : public HDF5Id {
public:
HDF5Fapl();
~HDF5Fapl();
void SetVersionTo1p10orNewer();
};
class HDF5Object : public HDF5Id {
public:
HDF5Object& Attr(const std::string& name, double val);
HDF5Object& Attr(const std::string& name, const std::string& val);
HDF5Object& Attr(const std::string& name, int32_t val);
HDF5Object& Attr(const std::string& name, uint32_t val);
HDF5Object& Attr(const std::string& name, int64_t val);
HDF5Object& Attr(const std::string& name, uint64_t val);
HDF5Object& Attr(const std::string& name, const std::vector<double> &val);
std::vector<double> ReadAttrVec(const std::string& name);
std::string ReadAttrStr(const std::string &name);
double ReadAttrDouble(const std::string &name);
int64_t ReadAttrInt(const std::string &name);
HDF5Object& Units(const std::string& val);
HDF5Object& NXClass(const std::string& val);
HDF5Object& Transformation(const std::string& units, const std::string& depends_on,
const std::string& equipment, const std::string& equipment_component,
const std::string& transformation_type, const std::vector<double> &vec);
HDF5Object& Transformation(const std::string& units, const std::string& depends_on,
const std::string& equipment, const std::string& equipment_component,
const std::string& transformation_type, const std::vector<double>& vector,
const std::vector<double> &offset, const std::string& offset_units);
HDF5Object& ExternalLink(std::string filename, const std::string &external_dataset, const std::string &local_name);
HDF5Object& HardLink(const std::string &source, const std::string &dest);
std::unique_ptr<HDF5DataSet> SaveScalar(const std::string &name, const char *val);
std::unique_ptr<HDF5DataSet> SaveScalar(const std::string &name, const std::string& val);
std::unique_ptr<HDF5DataSet> SaveVector(const std::string &name, const std::vector<std::string> &val);
template <class T> std::unique_ptr<HDF5DataSet> SaveScalar(const std::string &name, T val);
template <class T> std::unique_ptr<HDF5DataSet> SaveVector(const std::string &name, const std::vector<T> &val,
std::vector<hsize_t> dim = {}, CompressionAlgorithm algorithm = CompressionAlgorithm::NO_COMPRESSION);
bool GetBool(const std::string &name) const;
int64_t GetInt(const std::string& name) const;
std::optional<int64_t> GetOptInt(const std::string& name) const;
std::optional<float> GetOptFloat(const std::string& name) const;
std::optional<bool> GetOptBool(const std::string& name) const;
float GetFloat(const std::string& name) const;
std::string GetString(const std::string& name, const std::string& def = "") const;
template <class T> std::optional<T> ReadElement(const std::string& name, size_t n) const;
template <class T> std::vector<T> ReadVector(const std::string &name);
template <class T> std::vector<T> ReadOptVector(const std::string &name);
template <class T> std::vector<T> ReadVector(const std::string &name,
const std::vector<hsize_t>& start,
const std::vector<hsize_t>& size);
template <class T> std::vector<T> ReadOptVector(const std::string &name,
const std::vector<hsize_t>& start,
const std::vector<hsize_t>& size);
bool Exists(const std::string& name) const;
std::string GetLinkedFileName(const std::string& name) const;
std::vector<std::string> FindLeafs(const std::string &name) const;
std::vector<hsize_t> GetDimension(const std::string &name);
};
class HDF5Group : public HDF5Object {
public:
HDF5Group(const HDF5Object& parent, const std::string& name);
HDF5Group(const HDF5Object& parent, const char *name);
~HDF5Group();
};
class HDF5File : public HDF5Object {
public:
explicit HDF5File(const std::string& filename, bool v1_10 = false);
~HDF5File();
void Delete(const std::string& path);
};
class HDF5ReadOnlyFile : public HDF5Object {
public:
explicit HDF5ReadOnlyFile(const std::string& filename);
~HDF5ReadOnlyFile();
};
class HDF5DataSet : public HDF5Object {
uint8_t ndim;
const std::string dataset_name;
public:
HDF5DataSet(const HDF5Object& parent, const std::string& name); // Open existing dataset
HDF5DataSet(const HDF5Object& parent, const std::string& name, const HDF5DataType &data_type,
const HDF5DataSpace &data_space, const HDF5Dcpl &dcpl);
HDF5DataSet(const HDF5Object &parent, const std::string &name, const HDF5DataType &data_type,
const HDF5DataSpace &data_space);
~HDF5DataSet();
HDF5DataSet& Write(const HDF5DataType &data_type, const void *val);
template <class T>
HDF5DataSet& WriteVec(const std::vector<T> &v,
const std::vector<hsize_t> &start,
const std::vector<hsize_t> &size) {
HDF5DataSpace mem_space({v.size()});
HDF5DataSpace file_space(*this);
file_space.SelectHyperslab(start, size);
if (H5Dwrite(id,
HDF5DataType(v[0]).GetID(),
mem_space.GetID(),
file_space.GetID(),
H5P_DEFAULT,
v.data()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Vector dataset write unsuccessful");
return *this;
}
template <class T>
HDF5DataSet& WriteScalar(const T &val,
const std::vector<hsize_t> &start) {
HDF5DataSpace mem_space({1});
HDF5DataSpace file_space(*this);
file_space.SelectHyperslab(start, {1});
if (H5Dwrite(id,
HDF5DataType(val).GetID(),
mem_space.GetID(),
file_space.GetID(),
H5P_DEFAULT,
&val) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Vector dataset write unsuccessful");
return *this;
}
HDF5DataSet& WriteDirectChunk(const void *val, hsize_t data_size, const std::vector<hsize_t>& offset);
void ReadDirectChunk(std::vector<uint8_t> &val, const std::vector<hsize_t>& offset);
HDF5DataSet& Flush();
void SetExtent(const std::vector<hsize_t>& dims);
template<class T> T ReadScalar() const {
HDF5DataSpace mem_space({1});
HDF5DataSpace file_space(*this);
if (file_space.GetNumOfDimensions() != 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Dataset tries to read scalar from vector dataset");;
T output;
if (H5Dread(id, HDF5DataType(output).GetID(), mem_space.GetID(), H5S_ALL, H5P_DEFAULT, &output) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, dataset_name + ": read unsuccessful");
return output;
}
void ReadVectorToU8(std::vector<uint8_t> &v, const std::vector<hsize_t>& slab_start, const std::vector<hsize_t>& slab_size) const {
HDF5DataType data_type(*this);
HDF5DataSpace mem_space(slab_size);
HDF5DataSpace file_space(*this);
file_space.SelectHyperslab(slab_start, slab_size);
size_t out_size = 1;
out_size *= data_type.GetElemSize();
for (unsigned long i : slab_size)
out_size *= i;
if (out_size == 0)
throw JFJochException(JFJochExceptionCategory::HDF5,"Output size cannot be zero");
v.resize(out_size);
if (H5Dread(id, data_type.GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, v.data()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Multidimensional vector dataset read unsuccessful");
}
template<class T> void ReadVector(std::vector<T> &v) const {
HDF5DataSpace file_space(*this);
if (file_space.GetNumOfDimensions() == 0) {
// Handle trivial case of scalar
v.resize(1);
if (H5Dread(id, HDF5DataType(v[0]).GetID(), H5S_ALL, H5S_ALL, H5P_DEFAULT, v.data()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, dataset_name + ": read unsuccessful");
return;
}
if (file_space.GetNumOfDimensions() != 1)
throw JFJochException(JFJochExceptionCategory::HDF5, "Dataset tries to read multi-dimensional value into 1D vector ");
v.resize(file_space.GetDimensions()[0]);
if (H5Dread(id, HDF5DataType(v[0]).GetID(), H5S_ALL, H5S_ALL, H5P_DEFAULT, v.data()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "1D vector dataset read unsuccessful");
}
template<class T> void ReadVector(std::vector<T> &v, const std::vector<hsize_t>& start, const std::vector<hsize_t>& size) const {
HDF5DataSpace mem_space({v.size()});
HDF5DataSpace file_space(*this);
file_space.SelectHyperslab(start, size);
if (H5Dread(id, HDF5DataType(v[0]).GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, v.data()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Multidimensional vector dataset read unsuccessful");
}
template<class T> void ReadVector(std::vector<T> &v, const std::vector<hsize_t>& start,
const std::vector<hsize_t>& size, const std::vector<hsize_t>& stride) const {
HDF5DataSpace mem_space({v.size()});
HDF5DataSpace file_space(*this);
file_space.SelectHyperslabWithStride(start, size, stride);
if (H5Dread(id, HDF5DataType(v[0]).GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, v.data()) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "Multidimensional vector dataset read unsuccessful");
}
template<class T>
std::optional<T> ReadElement(size_t n) const {
HDF5DataSpace file_space(*this);
auto dims = file_space.GetDimensions();
// Allow scalar (0-D) to be read with n==0 as a convenience.
if (file_space.GetNumOfDimensions() == 0) {
if (n != 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Index out of bounds for scalar dataset");
return ReadScalar<T>();
}
if (file_space.GetNumOfDimensions() != 1)
throw JFJochException(JFJochExceptionCategory::HDF5, "ReadNth requires a 1D dataset");
if (n >= dims[0])
return std::nullopt;
// Select a single-element hyperslab at position n.
file_space.SelectHyperslab({static_cast<hsize_t>(n)}, {1});
HDF5DataSpace mem_space({1});
T out{};
if (H5Dread(id, HDF5DataType(out).GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, &out) < 0)
throw JFJochException(JFJochExceptionCategory::HDF5, "ReadNth unsuccessful");
return out;
}
std::string ReadString() const;
};
inline std::unique_ptr<HDF5DataSet> SaveScalar(const HDF5Object& parent, const std::string &name, const char* val) {
HDF5DataType data_type(val);
HDF5DataSpace data_space({1});
auto dataset = std::make_unique<HDF5DataSet>(parent, name, data_type, data_space);
dataset->Write(data_type, val);
return dataset;
}
inline std::unique_ptr<HDF5DataSet> SaveScalar(const HDF5Object& parent, const std::string &name, const std::string& val) {
return SaveScalar(parent, name, val.c_str());
}
template <class T> std::unique_ptr<HDF5DataSet> SaveScalar(const HDF5Object& parent, const std::string &name, T val) {
HDF5DataType data_type(val);
HDF5DataSpace data_space({1});
auto dataset = std::make_unique<HDF5DataSet>(parent, name, data_type, data_space);
dataset->Write(data_type, &val);
return dataset;
}
template <class T> std::unique_ptr<HDF5DataSet> HDF5Object::SaveScalar(const std::string &name, T val) {
HDF5DataType data_type(val);
HDF5DataSpace data_space({1});
auto dataset = std::make_unique<HDF5DataSet>(*this, name, data_type, data_space);
dataset->Write(data_type, &val);
return dataset;
}
template <class T> std::vector<T> HDF5Object::ReadVector(const std::string &name) {
std::vector<T> tmp;
HDF5DataSet dataset(*this, name);
dataset.ReadVector(tmp);
return tmp;
}
template <class T> std::vector<T> HDF5Object::ReadOptVector(const std::string &name) {
std::vector<T> tmp;
if (Exists(name)) {
HDF5DataSet dataset(*this, name);
dataset.ReadVector(tmp);
}
return tmp;
}
template<class T>
std::vector<T> HDF5Object::ReadVector(const std::string &name,
const std::vector<hsize_t> &start,
const std::vector<hsize_t> &size) {
std::vector<T> tmp;
if (start.empty() || (start.size() != size.size()))
throw JFJochException(JFJochExceptionCategory::HDF5, "Dimension error");
size_t v_size = size[0];
for (int i = 1; i < size.size(); i++)
v_size *= size[i];
tmp.resize(v_size);
HDF5DataSet dataset(*this, name);
dataset.ReadVector(tmp, start, size);
return tmp;
}
template<class T>
std::vector<T> HDF5Object::ReadOptVector(const std::string &name,
const std::vector<hsize_t> &start,
const std::vector<hsize_t> &size) {
std::vector<T> tmp;
if (Exists(name))
tmp = ReadVector<T>(name, start, size);
return tmp;
}
template<class T>
std::optional<T> HDF5Object::ReadElement(const std::string &name, size_t n) const {
if (Exists(name)) {
const HDF5DataSet dataset(*this, name);
return dataset.ReadElement<T>(n);
}
return std::nullopt;
}
template <class T> std::unique_ptr<HDF5DataSet> SaveVector(const HDF5Object& parent, const std::string &name, const std::vector<T> &val,
std::vector<hsize_t> dims = {},
CompressionAlgorithm algorithm = CompressionAlgorithm::NO_COMPRESSION) {
if (dims.empty()) dims = {val.size()};
if (val.empty())
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot write empty vector");
HDF5DataType data_type(val[0]);
HDF5Dcpl dcpl;
if (algorithm != CompressionAlgorithm::NO_COMPRESSION) {
dcpl.SetCompression(algorithm, 0);
dcpl.SetChunking(dims);
}
HDF5DataSpace data_space(dims);
auto dataset = std::make_unique<HDF5DataSet>(parent, name, data_type, data_space, dcpl);
dataset->Write(data_type, val.data());
return dataset;
}
template <class T> std::unique_ptr<HDF5DataSet> HDF5Object::SaveVector(const std::string &name, const std::vector<T> &val,
std::vector<hsize_t> dims, CompressionAlgorithm algorithm) {
if (val.empty())
throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot write empty vector");
if (dims.empty()) dims = {val.size()};
HDF5DataType data_type(val[0]);
HDF5Dcpl dcpl;
if (algorithm != CompressionAlgorithm::NO_COMPRESSION) {
dcpl.SetCompression(algorithm, 0);
dcpl.SetChunking(dims);
}
HDF5DataSpace data_space(dims);
auto dataset = std::make_unique<HDF5DataSet>(*this, name, data_type, data_space, dcpl);
dataset->Write(data_type, val.data());
return dataset;
}
inline std::string hdf5_version() {
unsigned majnum, minnum, relnum;
H5get_libversion(&majnum, &minnum, &relnum);
return "hdf5-" + std::to_string(majnum) + "." + std::to_string(minnum) + "." + std::to_string(relnum);
}
void RegisterHDF5Filter();
std::string ExtractFilename(const std::string& str);
#endif //JUNGFRAUJOCH_HDF5OBJECTS_H