Added lines to treat string attributes as fixed-length strings, which are represented as bytes that need to be decoded with utf-8. There are a few advantages, and hdf5 reader provide more precise behavior than variable length strings
This commit is contained in:
@ -317,11 +317,21 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,
|
|||||||
if not file_dict:
|
if not file_dict:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# file_dict = file_obj
|
|
||||||
# Create group and add their attributes
|
# Create group and add their attributes
|
||||||
h5file[group_name].create_group(name=file_dict['name'])
|
h5file[group_name].create_group(name=file_dict['name'])
|
||||||
for key in file_dict['attributes_dict'].keys():
|
for key in file_dict['attributes_dict'].keys():
|
||||||
h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
|
|
||||||
|
# Represent string values as fixed length strings in the HDF5 file, which need
|
||||||
|
# to be decoded as string when we read them. It provides better control than variable strings,
|
||||||
|
# at the expense of flexibility.
|
||||||
|
# https://docs.h5py.org/en/stable/strings.html
|
||||||
|
value = file_dict['attributes_dict'][key]
|
||||||
|
if isinstance(value,str):
|
||||||
|
utf8_type = h5py.string_dtype('utf-8', len(value))
|
||||||
|
value = np.array(value.encode('utf-8'),dtype=utf8_type)
|
||||||
|
|
||||||
|
h5file[group_name][file_dict['name']].attrs.create(name=key,
|
||||||
|
data=value)
|
||||||
|
|
||||||
# Add datasets to just created group
|
# Add datasets to just created group
|
||||||
for dataset in file_dict['datasets']:
|
for dataset in file_dict['datasets']:
|
||||||
|
Reference in New Issue
Block a user