From 8e6ee49188a44f2bea930b59c08a694551813122 Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Thu, 19 Jun 2025 20:49:14 +0200
Subject: [PATCH] Modify utils/g5505_utils.py. Implement handling unicode
 character errors.

---
 utils/g5505_utils.py | 62 ++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/utils/g5505_utils.py b/utils/g5505_utils.py
index 7eaf45b..508cb48 100644
--- a/utils/g5505_utils.py
+++ b/utils/g5505_utils.py
@@ -217,49 +217,49 @@ def convert_string_to_bytes(input_list: list):
 
 def convert_attrdict_to_np_structured_array(attr_value: dict):
     """
-    Converts a dictionary of attributes into a numpy structured array for HDF5 
-    compound type compatibility.
-
-    Each dictionary key is mapped to a field in the structured array, with the 
-    data type (S) determined by the longest string representation of the values. 
-    If the dictionary is empty, the function returns 'missing'.
+    Converts a dictionary of attributes into a NumPy structured array with byte-encoded fields.
+    Handles UTF-8 encoding to avoid UnicodeEncodeError with non-ASCII characters.
 
     Parameters
     ----------
     attr_value : dict
-        Dictionary containing the attributes to be converted. Example:
-        attr_value = {
-            'name': 'Temperature',
-            'unit': 'Celsius',
-            'value': 23.5,
-            'timestamp': '2023-09-26 10:00'
-        }
+        Dictionary with scalar values (int, float, str).
 
     Returns
     -------
     new_attr_value : ndarray 
-        Numpy structured array with UTF-8 encoded fields. Returns np.array(['missing'], dtype=[str]) if 
-        the input dictionary is empty.
+        1-row structured array with fixed-size byte fields (dtype='S').
     """
-    if not isinstance(attr_value,dict):
-        raise ValueError(f'Input paremeter {attr_value} must be a dictionary of scalar values.')
-    
+    if not isinstance(attr_value, dict):
+        raise ValueError(f"Input must be a dictionary, got {type(attr_value)}")
+
+    if not attr_value:
+        return np.array(['missing'], dtype=[('value', 'S16')])  # placeholder
+
     dtype = []
     values_list = []
-    max_length = max(len(str(attr_value[key])) for key in attr_value.keys())
-    for key, val in attr_value.items():
-        # Verify if 'rename_as' is still used in metadata revision
-        if key != 'rename_as' and isinstance(val, (int, float, str)):
-            dtype.append((key, f'S{max_length}'))
-            values_list.append(attr_value[key])  
-        else:
-            print(f"Skipping unsupported type for key {key}: {type(val)}")
-    if values_list:
-        new_attr_value = np.array([tuple(values_list)], dtype=dtype)
-    else:
-        new_attr_value = np.array(['missing'], dtype=[str])
 
-    return new_attr_value
+    max_str_len = max(len(str(v)) for v in attr_value.values())
+    byte_len = max_str_len * 4  # UTF-8 worst-case
+
+    for key, val in attr_value.items():
+        if key == 'rename_as':
+            continue
+        if isinstance(val, (int, float, str)):
+            dtype.append((key, f'S{byte_len}'))
+            try:
+                encoded_val = str(val).encode('utf-8')  # explicit UTF-8
+                values_list.append(encoded_val)
+            except UnicodeEncodeError as e:
+                logging.error(f"Failed to encode {key}={val}: {e}")
+                raise
+        else:
+            logging.warning(f"Skipping unsupported type for key {key}: {type(val)}")
+
+    if values_list:
+        return np.array([tuple(values_list)], dtype=dtype)
+    else:
+        return np.array(['missing'], dtype=[('value', 'S16')])
 
 
 def infer_units(column_name):