From 8e6ee49188a44f2bea930b59c08a694551813122 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Thu, 19 Jun 2025 20:49:14 +0200 Subject: [PATCH] Modify utils/g5505_utils.py. Implement handling unicode character errors. --- utils/g5505_utils.py | 62 ++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/utils/g5505_utils.py b/utils/g5505_utils.py index 7eaf45b..508cb48 100644 --- a/utils/g5505_utils.py +++ b/utils/g5505_utils.py @@ -217,49 +217,49 @@ def convert_string_to_bytes(input_list: list): def convert_attrdict_to_np_structured_array(attr_value: dict): """ - Converts a dictionary of attributes into a numpy structured array for HDF5 - compound type compatibility. - - Each dictionary key is mapped to a field in the structured array, with the - data type (S) determined by the longest string representation of the values. - If the dictionary is empty, the function returns 'missing'. + Converts a dictionary of attributes into a NumPy structured array with byte-encoded fields. + Handles UTF-8 encoding to avoid UnicodeEncodeError with non-ASCII characters. Parameters ---------- attr_value : dict - Dictionary containing the attributes to be converted. Example: - attr_value = { - 'name': 'Temperature', - 'unit': 'Celsius', - 'value': 23.5, - 'timestamp': '2023-09-26 10:00' - } + Dictionary with scalar values (int, float, str). Returns ------- new_attr_value : ndarray - Numpy structured array with UTF-8 encoded fields. Returns np.array(['missing'], dtype=[str]) if - the input dictionary is empty. + 1-row structured array with fixed-size byte fields (dtype='S'). """ - if not isinstance(attr_value,dict): - raise ValueError(f'Input paremeter {attr_value} must be a dictionary of scalar values.') - + if not isinstance(attr_value, dict): + raise ValueError(f"Input must be a dictionary, got {type(attr_value)}") + + if not attr_value: + return np.array(['missing'], dtype=[('value', 'S16')]) # placeholder + dtype = [] values_list = [] - max_length = max(len(str(attr_value[key])) for key in attr_value.keys()) - for key, val in attr_value.items(): - # Verify if 'rename_as' is still used in metadata revision - if key != 'rename_as' and isinstance(val, (int, float, str)): - dtype.append((key, f'S{max_length}')) - values_list.append(attr_value[key]) - else: - print(f"Skipping unsupported type for key {key}: {type(val)}") - if values_list: - new_attr_value = np.array([tuple(values_list)], dtype=dtype) - else: - new_attr_value = np.array(['missing'], dtype=[str]) - return new_attr_value + max_str_len = max(len(str(v)) for v in attr_value.values()) + byte_len = max_str_len * 4 # UTF-8 worst-case + + for key, val in attr_value.items(): + if key == 'rename_as': + continue + if isinstance(val, (int, float, str)): + dtype.append((key, f'S{byte_len}')) + try: + encoded_val = str(val).encode('utf-8') # explicit UTF-8 + values_list.append(encoded_val) + except UnicodeEncodeError as e: + logging.error(f"Failed to encode {key}={val}: {e}") + raise + else: + logging.warning(f"Skipping unsupported type for key {key}: {type(val)}") + + if values_list: + return np.array([tuple(values_list)], dtype=dtype) + else: + return np.array(['missing'], dtype=[('value', 'S16')]) def infer_units(column_name):