Added lines to treat string attributes as fixed-length strings, which are represented as bytes that need to be decoded with utf-8. There are a few advantages, and hdf5 reader provide more precise behavior than variable length strings
This commit is contained in:
@ -317,11 +317,21 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,
|
||||
if not file_dict:
|
||||
continue
|
||||
|
||||
# file_dict = file_obj
|
||||
# Create group and add their attributes
|
||||
h5file[group_name].create_group(name=file_dict['name'])
|
||||
for key in file_dict['attributes_dict'].keys():
|
||||
h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
|
||||
|
||||
# Represent string values as fixed length strings in the HDF5 file, which need
|
||||
# to be decoded as string when we read them. It provides better control than variable strings,
|
||||
# at the expense of flexibility.
|
||||
# https://docs.h5py.org/en/stable/strings.html
|
||||
value = file_dict['attributes_dict'][key]
|
||||
if isinstance(value,str):
|
||||
utf8_type = h5py.string_dtype('utf-8', len(value))
|
||||
value = np.array(value.encode('utf-8'),dtype=utf8_type)
|
||||
|
||||
h5file[group_name][file_dict['name']].attrs.create(name=key,
|
||||
data=value)
|
||||
|
||||
# Add datasets to just created group
|
||||
for dataset in file_dict['datasets']:
|
||||
|
Reference in New Issue
Block a user