// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only #ifndef JUNGFRAUJOCH_HDF5OBJECTS_H #define JUNGFRAUJOCH_HDF5OBJECTS_H #include #include #include #include #include #include #include "../common/JFJochException.h" #include "../compression/CompressionAlgorithmEnum.h" #include "../common/CompressedImage.h" extern std::mutex hdf5_mutex; class HDF5DataSet; class HDF5Id { protected: hid_t id; HDF5Id() = default; HDF5Id(HDF5Id &&other) noexcept; HDF5Id(const HDF5Id& other); public: // Assignment would require closing ID's first and cannot be generalized easily HDF5Id& operator=(HDF5Id &&other) noexcept = delete; HDF5Id& operator=(const HDF5Id &other) noexcept = delete; hid_t GetID() const; }; class HDF5DataSpace : public HDF5Id { uint8_t ndims = 1; public: explicit HDF5DataSpace(const std::vector& dims = {1}, const std::vector &max_dims = {}); explicit HDF5DataSpace(const HDF5DataSet& data_set); uint8_t GetNumOfDimensions() const; std::vector GetDimensions() const; void SelectHyperslab(const std::vector& start, const std::vector& size); void SelectHyperslabWithStride(const std::vector& start, const std::vector& size, const std::vector& stride); ~HDF5DataSpace(); }; class HDF5DataType : public HDF5Id { public: HDF5DataType(uint64_t size_in_bytes, bool is_signed); explicit HDF5DataType(double val); explicit HDF5DataType(float val); explicit HDF5DataType(bool val); explicit HDF5DataType(int32_t val); explicit HDF5DataType(uint32_t val); explicit HDF5DataType(int64_t val); explicit HDF5DataType(uint64_t val); explicit HDF5DataType(int16_t val); explicit HDF5DataType(uint16_t val); explicit HDF5DataType(int8_t val); explicit HDF5DataType(uint8_t val); explicit HDF5DataType(const std::string &val); explicit HDF5DataType(const char *val); explicit HDF5DataType(const HDF5DataSet& data_set); explicit HDF5DataType(CompressedImageMode mode); size_t GetElemSize() const; bool IsSigned() const; bool IsInteger() const; bool IsFloat() const; ~HDF5DataType(); }; class HDF5Dcpl : public HDF5Id { uint8_t ndim = 0; public: HDF5Dcpl(); HDF5Dcpl(const HDF5DataSet& data_set); ~HDF5Dcpl(); void SetFillValue32(int32_t val); void SetFillValue16(int16_t val); void SetFillValue8(int8_t val); void SetFillValueU32(uint32_t val); void SetFillValueU16(uint16_t val); void SetChunking(const std::vector & dims); std::vector GetChunking(); CompressionAlgorithm GetCompression(); void SetCompression(CompressionAlgorithm algorithm, size_t block_size); void SetVirtual(const std::string& filename, const std::string& dataset, const HDF5DataSpace& src_dataspace, const HDF5DataSpace& virtual_dataspace); uint8_t GetNumOfDimensions() const; }; class HDF5Fapl : public HDF5Id { public: HDF5Fapl(); ~HDF5Fapl(); void SetVersionTo1p10orNewer(); }; class HDF5Object : public HDF5Id { public: HDF5Object& Attr(const std::string& name, double val); HDF5Object& Attr(const std::string& name, const std::string& val); HDF5Object& Attr(const std::string& name, int32_t val); HDF5Object& Attr(const std::string& name, uint32_t val); HDF5Object& Attr(const std::string& name, int64_t val); HDF5Object& Attr(const std::string& name, uint64_t val); HDF5Object& Attr(const std::string& name, const std::vector &val); std::vector ReadAttrVec(const std::string& name); std::string ReadAttrStr(const std::string &name); double ReadAttrDouble(const std::string &name); int64_t ReadAttrInt(const std::string &name); HDF5Object& Units(const std::string& val); HDF5Object& NXClass(const std::string& val); HDF5Object& Transformation(const std::string& units, const std::string& depends_on, const std::string& equipment, const std::string& equipment_component, const std::string& transformation_type, const std::vector &vec); HDF5Object& Transformation(const std::string& units, const std::string& depends_on, const std::string& equipment, const std::string& equipment_component, const std::string& transformation_type, const std::vector& vector, const std::vector &offset, const std::string& offset_units); HDF5Object& ExternalLink(std::string filename, const std::string &external_dataset, const std::string &local_name); HDF5Object& HardLink(const std::string &source, const std::string &dest); std::unique_ptr SaveScalar(const std::string &name, const char *val); std::unique_ptr SaveScalar(const std::string &name, const std::string& val); std::unique_ptr SaveVector(const std::string &name, const std::vector &val); template std::unique_ptr SaveScalar(const std::string &name, T val); template std::unique_ptr SaveVector(const std::string &name, const std::vector &val, std::vector dim = {}, CompressionAlgorithm algorithm = CompressionAlgorithm::NO_COMPRESSION); bool GetBool(const std::string &name) const; int64_t GetInt(const std::string& name) const; std::optional GetOptInt(const std::string& name) const; std::optional GetOptFloat(const std::string& name) const; std::optional GetOptBool(const std::string& name) const; float GetFloat(const std::string& name) const; std::string GetString(const std::string& name, const std::string& def = "") const; template std::optional ReadElement(const std::string& name, size_t n) const; template std::vector ReadVector(const std::string &name); template std::vector ReadOptVector(const std::string &name); template std::vector ReadVector(const std::string &name, const std::vector& start, const std::vector& size); template std::vector ReadOptVector(const std::string &name, const std::vector& start, const std::vector& size); bool Exists(const std::string& name) const; std::string GetLinkedFileName(const std::string& name) const; std::vector FindLeafs(const std::string &name) const; std::vector GetDimension(const std::string &name); }; class HDF5Group : public HDF5Object { public: HDF5Group(const HDF5Object& parent, const std::string& name); HDF5Group(const HDF5Object& parent, const char *name); ~HDF5Group(); }; class HDF5File : public HDF5Object { public: explicit HDF5File(const std::string& filename, bool v1_10 = false); ~HDF5File(); void Delete(const std::string& path); }; class HDF5ReadOnlyFile : public HDF5Object { public: explicit HDF5ReadOnlyFile(const std::string& filename); ~HDF5ReadOnlyFile(); }; class HDF5DataSet : public HDF5Object { uint8_t ndim; const std::string dataset_name; public: HDF5DataSet(const HDF5Object& parent, const std::string& name); // Open existing dataset HDF5DataSet(const HDF5Object& parent, const std::string& name, const HDF5DataType &data_type, const HDF5DataSpace &data_space, const HDF5Dcpl &dcpl); HDF5DataSet(const HDF5Object &parent, const std::string &name, const HDF5DataType &data_type, const HDF5DataSpace &data_space); ~HDF5DataSet(); HDF5DataSet& Write(const HDF5DataType &data_type, const void *val); template HDF5DataSet& WriteVec(const std::vector &v, const std::vector &start, const std::vector &size) { HDF5DataSpace mem_space({v.size()}); HDF5DataSpace file_space(*this); file_space.SelectHyperslab(start, size); if (H5Dwrite(id, HDF5DataType(v[0]).GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, v.data()) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Vector dataset write unsuccessful"); return *this; } template HDF5DataSet& WriteScalar(const T &val, const std::vector &start) { HDF5DataSpace mem_space({1}); HDF5DataSpace file_space(*this); file_space.SelectHyperslab(start, {1}); if (H5Dwrite(id, HDF5DataType(val).GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, &val) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Vector dataset write unsuccessful"); return *this; } HDF5DataSet& WriteDirectChunk(const void *val, hsize_t data_size, const std::vector& offset); void ReadDirectChunk(std::vector &val, const std::vector& offset); HDF5DataSet& Flush(); void SetExtent(const std::vector& dims); template T ReadScalar() const { HDF5DataSpace mem_space({1}); HDF5DataSpace file_space(*this); if (file_space.GetNumOfDimensions() != 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Dataset tries to read scalar from vector dataset");; T output; if (H5Dread(id, HDF5DataType(output).GetID(), mem_space.GetID(), H5S_ALL, H5P_DEFAULT, &output) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, dataset_name + ": read unsuccessful"); return output; } void ReadVectorToU8(std::vector &v, const std::vector& slab_start, const std::vector& slab_size) const { HDF5DataType data_type(*this); HDF5DataSpace mem_space(slab_size); HDF5DataSpace file_space(*this); file_space.SelectHyperslab(slab_start, slab_size); size_t out_size = 1; out_size *= data_type.GetElemSize(); for (unsigned long i : slab_size) out_size *= i; if (out_size == 0) throw JFJochException(JFJochExceptionCategory::HDF5,"Output size cannot be zero"); v.resize(out_size); if (H5Dread(id, data_type.GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, v.data()) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Multidimensional vector dataset read unsuccessful"); } template void ReadVector(std::vector &v) const { HDF5DataSpace file_space(*this); if (file_space.GetNumOfDimensions() == 0) { // Handle trivial case of scalar v.resize(1); if (H5Dread(id, HDF5DataType(v[0]).GetID(), H5S_ALL, H5S_ALL, H5P_DEFAULT, v.data()) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, dataset_name + ": read unsuccessful"); return; } if (file_space.GetNumOfDimensions() != 1) throw JFJochException(JFJochExceptionCategory::HDF5, "Dataset tries to read multi-dimensional value into 1D vector "); v.resize(file_space.GetDimensions()[0]); if (H5Dread(id, HDF5DataType(v[0]).GetID(), H5S_ALL, H5S_ALL, H5P_DEFAULT, v.data()) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "1D vector dataset read unsuccessful"); } template void ReadVector(std::vector &v, const std::vector& start, const std::vector& size) const { HDF5DataSpace mem_space({v.size()}); HDF5DataSpace file_space(*this); file_space.SelectHyperslab(start, size); if (H5Dread(id, HDF5DataType(v[0]).GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, v.data()) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Multidimensional vector dataset read unsuccessful"); } template void ReadVector(std::vector &v, const std::vector& start, const std::vector& size, const std::vector& stride) const { HDF5DataSpace mem_space({v.size()}); HDF5DataSpace file_space(*this); file_space.SelectHyperslabWithStride(start, size, stride); if (H5Dread(id, HDF5DataType(v[0]).GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, v.data()) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "Multidimensional vector dataset read unsuccessful"); } template std::optional ReadElement(size_t n) const { HDF5DataSpace file_space(*this); auto dims = file_space.GetDimensions(); // Allow scalar (0-D) to be read with n==0 as a convenience. if (file_space.GetNumOfDimensions() == 0) { if (n != 0) return std::nullopt; return ReadScalar(); } if (file_space.GetNumOfDimensions() != 1) throw JFJochException(JFJochExceptionCategory::HDF5, "ReadNth requires a 1D dataset"); if (n >= dims[0]) return std::nullopt; // Select a single-element hyperslab at position n. file_space.SelectHyperslab({static_cast(n)}, {1}); HDF5DataSpace mem_space({1}); T out{}; if (H5Dread(id, HDF5DataType(out).GetID(), mem_space.GetID(), file_space.GetID(), H5P_DEFAULT, &out) < 0) throw JFJochException(JFJochExceptionCategory::HDF5, "ReadNth unsuccessful"); return out; } std::string ReadString() const; }; inline std::unique_ptr SaveScalar(const HDF5Object& parent, const std::string &name, const char* val) { HDF5DataType data_type(val); HDF5DataSpace data_space({1}); auto dataset = std::make_unique(parent, name, data_type, data_space); dataset->Write(data_type, val); return dataset; } inline std::unique_ptr SaveScalar(const HDF5Object& parent, const std::string &name, const std::string& val) { return SaveScalar(parent, name, val.c_str()); } template std::unique_ptr SaveScalar(const HDF5Object& parent, const std::string &name, T val) { HDF5DataType data_type(val); HDF5DataSpace data_space({1}); auto dataset = std::make_unique(parent, name, data_type, data_space); dataset->Write(data_type, &val); return dataset; } template std::unique_ptr HDF5Object::SaveScalar(const std::string &name, T val) { HDF5DataType data_type(val); HDF5DataSpace data_space({1}); auto dataset = std::make_unique(*this, name, data_type, data_space); dataset->Write(data_type, &val); return dataset; } template std::vector HDF5Object::ReadVector(const std::string &name) { std::vector tmp; HDF5DataSet dataset(*this, name); dataset.ReadVector(tmp); return tmp; } template std::vector HDF5Object::ReadOptVector(const std::string &name) { std::vector tmp; if (Exists(name)) { HDF5DataSet dataset(*this, name); dataset.ReadVector(tmp); } return tmp; } template std::vector HDF5Object::ReadVector(const std::string &name, const std::vector &start, const std::vector &size) { std::vector tmp; if (start.empty() || (start.size() != size.size())) throw JFJochException(JFJochExceptionCategory::HDF5, "Dimension error"); size_t v_size = size[0]; for (int i = 1; i < size.size(); i++) v_size *= size[i]; tmp.resize(v_size); HDF5DataSet dataset(*this, name); dataset.ReadVector(tmp, start, size); return tmp; } template std::vector HDF5Object::ReadOptVector(const std::string &name, const std::vector &start, const std::vector &size) { std::vector tmp; if (Exists(name)) tmp = ReadVector(name, start, size); return tmp; } template std::optional HDF5Object::ReadElement(const std::string &name, size_t n) const { if (Exists(name)) { const HDF5DataSet dataset(*this, name); return dataset.ReadElement(n); } return std::nullopt; } template std::unique_ptr SaveVector(const HDF5Object& parent, const std::string &name, const std::vector &val, std::vector dims = {}, CompressionAlgorithm algorithm = CompressionAlgorithm::NO_COMPRESSION) { if (dims.empty()) dims = {val.size()}; if (val.empty()) throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot write empty vector"); HDF5DataType data_type(val[0]); HDF5Dcpl dcpl; if (algorithm != CompressionAlgorithm::NO_COMPRESSION) { dcpl.SetCompression(algorithm, 0); dcpl.SetChunking(dims); } HDF5DataSpace data_space(dims); auto dataset = std::make_unique(parent, name, data_type, data_space, dcpl); dataset->Write(data_type, val.data()); return dataset; } template std::unique_ptr HDF5Object::SaveVector(const std::string &name, const std::vector &val, std::vector dims, CompressionAlgorithm algorithm) { if (val.empty()) throw JFJochException(JFJochExceptionCategory::HDF5, "Cannot write empty vector"); if (dims.empty()) dims = {val.size()}; HDF5DataType data_type(val[0]); HDF5Dcpl dcpl; if (algorithm != CompressionAlgorithm::NO_COMPRESSION) { dcpl.SetCompression(algorithm, 0); dcpl.SetChunking(dims); } HDF5DataSpace data_space(dims); auto dataset = std::make_unique(*this, name, data_type, data_space, dcpl); dataset->Write(data_type, val.data()); return dataset; } inline std::string hdf5_version() { unsigned majnum, minnum, relnum; H5get_libversion(&majnum, &minnum, &relnum); return "hdf5-" + std::to_string(majnum) + "." + std::to_string(minnum) + "." + std::to_string(relnum); } void RegisterHDF5Filter(); std::string ExtractFilename(const std::string& str); #endif //JUNGFRAUJOCH_HDF5OBJECTS_H