Added lines to treat string attributes as fixed-length strings, which are represented as bytes that need to be decoded with utf-8. There are a few advantages, and hdf5 reader provide more precise behavior than variable length strings

This commit is contained in:
2024-03-22 17:28:47 +01:00
parent 13cb6395aa
commit 1bf1f60beb

View File

@ -317,11 +317,21 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,
if not file_dict:
continue
# file_dict = file_obj
# Create group and add their attributes
h5file[group_name].create_group(name=file_dict['name'])
for key in file_dict['attributes_dict'].keys():
h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
# Represent string values as fixed length strings in the HDF5 file, which need
# to be decoded as string when we read them. It provides better control than variable strings,
# at the expense of flexibility.
# https://docs.h5py.org/en/stable/strings.html
value = file_dict['attributes_dict'][key]
if isinstance(value,str):
utf8_type = h5py.string_dtype('utf-8', len(value))
value = np.array(value.encode('utf-8'),dtype=utf8_type)
h5file[group_name][file_dict['name']].attrs.create(name=key,
data=value)
# Add datasets to just created group
for dataset in file_dict['datasets']: