Added function to convert list of strings into a np.array of bytes. This is useful to create list-valued attributes in HDF5.

This commit is contained in:
2024-05-26 14:56:36 +02:00
parent 88572b44b1
commit 77afbbbf8f

View File

@ -204,6 +204,18 @@ def parse_attribute(attr_value):
return new_attr_value
def convert_string_to_bytes(input_list: list):
utf8_type = lambda max_length: h5py.string_dtype('utf-8', max_length)
if input_list:
max_length = max(len(item) for item in input_list)
# Convert the strings to bytes with utf-8 encoding, specifying errors='ignore' to skip characters that cannot be encoded
input_list_bytes = [item.encode('utf-8', errors='ignore') for item in input_list]
input_array_bytes = np.array(input_list_bytes,dtype=utf8_type(max_length))
else:
input_array_bytes = np.array([],dtype=utf8_type(0))
return input_array_bytes
def third_update_hdf5_file_with_review(input_hdf5_file, yaml_review_file, reviewer_attrs = {}, hdf5_upload : bool = False):
"""Third"""