From 1bf1f60bebdd98653af35d570cdb17eb0497e756 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Fri, 22 Mar 2024 17:28:47 +0100 Subject: [PATCH] Added lines to treat string attributes as fixed-length strings, which are represented as bytes that need to be decoded with utf-8. There are a few advantages, and hdf5 reader provide more precise behavior than variable length strings --- src/hdf5_lib.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py index cbab913..ad41383 100644 --- a/src/hdf5_lib.py +++ b/src/hdf5_lib.py @@ -317,11 +317,21 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, if not file_dict: continue - # file_dict = file_obj # Create group and add their attributes h5file[group_name].create_group(name=file_dict['name']) for key in file_dict['attributes_dict'].keys(): - h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key]) + + # Represent string values as fixed length strings in the HDF5 file, which need + # to be decoded as string when we read them. It provides better control than variable strings, + # at the expense of flexibility. + # https://docs.h5py.org/en/stable/strings.html + value = file_dict['attributes_dict'][key] + if isinstance(value,str): + utf8_type = h5py.string_dtype('utf-8', len(value)) + value = np.array(value.encode('utf-8'),dtype=utf8_type) + + h5file[group_name][file_dict['name']].attrs.create(name=key, + data=value) # Add datasets to just created group for dataset in file_dict['datasets']: