Modify utils/g5505_utils.py. Implement handling unicode character errors.
This commit is contained in:
@ -217,49 +217,49 @@ def convert_string_to_bytes(input_list: list):
|
|||||||
|
|
||||||
def convert_attrdict_to_np_structured_array(attr_value: dict):
|
def convert_attrdict_to_np_structured_array(attr_value: dict):
|
||||||
"""
|
"""
|
||||||
Converts a dictionary of attributes into a numpy structured array for HDF5
|
Converts a dictionary of attributes into a NumPy structured array with byte-encoded fields.
|
||||||
compound type compatibility.
|
Handles UTF-8 encoding to avoid UnicodeEncodeError with non-ASCII characters.
|
||||||
|
|
||||||
Each dictionary key is mapped to a field in the structured array, with the
|
|
||||||
data type (S) determined by the longest string representation of the values.
|
|
||||||
If the dictionary is empty, the function returns 'missing'.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
attr_value : dict
|
attr_value : dict
|
||||||
Dictionary containing the attributes to be converted. Example:
|
Dictionary with scalar values (int, float, str).
|
||||||
attr_value = {
|
|
||||||
'name': 'Temperature',
|
|
||||||
'unit': 'Celsius',
|
|
||||||
'value': 23.5,
|
|
||||||
'timestamp': '2023-09-26 10:00'
|
|
||||||
}
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
new_attr_value : ndarray
|
new_attr_value : ndarray
|
||||||
Numpy structured array with UTF-8 encoded fields. Returns np.array(['missing'], dtype=[str]) if
|
1-row structured array with fixed-size byte fields (dtype='S').
|
||||||
the input dictionary is empty.
|
|
||||||
"""
|
"""
|
||||||
if not isinstance(attr_value,dict):
|
if not isinstance(attr_value, dict):
|
||||||
raise ValueError(f'Input paremeter {attr_value} must be a dictionary of scalar values.')
|
raise ValueError(f"Input must be a dictionary, got {type(attr_value)}")
|
||||||
|
|
||||||
|
if not attr_value:
|
||||||
|
return np.array(['missing'], dtype=[('value', 'S16')]) # placeholder
|
||||||
|
|
||||||
dtype = []
|
dtype = []
|
||||||
values_list = []
|
values_list = []
|
||||||
max_length = max(len(str(attr_value[key])) for key in attr_value.keys())
|
|
||||||
for key, val in attr_value.items():
|
|
||||||
# Verify if 'rename_as' is still used in metadata revision
|
|
||||||
if key != 'rename_as' and isinstance(val, (int, float, str)):
|
|
||||||
dtype.append((key, f'S{max_length}'))
|
|
||||||
values_list.append(attr_value[key])
|
|
||||||
else:
|
|
||||||
print(f"Skipping unsupported type for key {key}: {type(val)}")
|
|
||||||
if values_list:
|
|
||||||
new_attr_value = np.array([tuple(values_list)], dtype=dtype)
|
|
||||||
else:
|
|
||||||
new_attr_value = np.array(['missing'], dtype=[str])
|
|
||||||
|
|
||||||
return new_attr_value
|
max_str_len = max(len(str(v)) for v in attr_value.values())
|
||||||
|
byte_len = max_str_len * 4 # UTF-8 worst-case
|
||||||
|
|
||||||
|
for key, val in attr_value.items():
|
||||||
|
if key == 'rename_as':
|
||||||
|
continue
|
||||||
|
if isinstance(val, (int, float, str)):
|
||||||
|
dtype.append((key, f'S{byte_len}'))
|
||||||
|
try:
|
||||||
|
encoded_val = str(val).encode('utf-8') # explicit UTF-8
|
||||||
|
values_list.append(encoded_val)
|
||||||
|
except UnicodeEncodeError as e:
|
||||||
|
logging.error(f"Failed to encode {key}={val}: {e}")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
logging.warning(f"Skipping unsupported type for key {key}: {type(val)}")
|
||||||
|
|
||||||
|
if values_list:
|
||||||
|
return np.array([tuple(values_list)], dtype=dtype)
|
||||||
|
else:
|
||||||
|
return np.array(['missing'], dtype=[('value', 'S16')])
|
||||||
|
|
||||||
|
|
||||||
def infer_units(column_name):
|
def infer_units(column_name):
|
||||||
|
Reference in New Issue
Block a user