From 77afbbbf8f131f447174196ac1ddadb6d646ff9e Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Sun, 26 May 2024 14:56:36 +0200 Subject: [PATCH] Added function to convert list of strings into a np.array of bytes. This is useful to create list-valued attributes in HDF5. --- src/metadata_review_lib.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/metadata_review_lib.py b/src/metadata_review_lib.py index d59e935..7cc9e65 100644 --- a/src/metadata_review_lib.py +++ b/src/metadata_review_lib.py @@ -204,6 +204,18 @@ def parse_attribute(attr_value): return new_attr_value +def convert_string_to_bytes(input_list: list): + utf8_type = lambda max_length: h5py.string_dtype('utf-8', max_length) + if input_list: + max_length = max(len(item) for item in input_list) + # Convert the strings to bytes with utf-8 encoding, specifying errors='ignore' to skip characters that cannot be encoded + input_list_bytes = [item.encode('utf-8', errors='ignore') for item in input_list] + input_array_bytes = np.array(input_list_bytes,dtype=utf8_type(max_length)) + else: + input_array_bytes = np.array([],dtype=utf8_type(0)) + + return input_array_bytes + def third_update_hdf5_file_with_review(input_hdf5_file, yaml_review_file, reviewer_attrs = {}, hdf5_upload : bool = False): """Third"""