Moved is_structured_array() and to_serializable_dtype() to utils, ranamed a few functions and propagated changes to dependent modules.

This commit is contained in:
2024-09-26 14:03:11 +02:00
parent a57e46d89c
commit a92660049f
5 changed files with 679 additions and 98 deletions

View File

@@ -76,7 +76,7 @@ class HDF5DataOpsManager():
# Parse value into HDF5 admissible type
for key in dataset_dict['attributes'].keys():
value = dataset_dict['attributes'][key]
dataset_dict['attributes'][key] = utils.parse_attribute(value)
dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
#name = dataset_dict['name']
#data = dataset_dict['data']
@@ -98,7 +98,7 @@ class HDF5DataOpsManager():
for new_attr_key in annotation_dict.keys():
value = annotation_dict[new_attr_key]
if isinstance(value, dict):
annotation_dict[new_attr_key] = utils.parse_attribute(annotation_dict[new_attr_key])
annotation_dict[new_attr_key] = utils.convert_attrdict_to_np_structured_array(annotation_dict[new_attr_key])
obj.attrs.update(annotation_dict)
def get_metadata(self, obj_path):
@@ -231,60 +231,6 @@ def get_parent_child_relationships(file: h5py.File):
return nodes, parent, values
def to_serializable_dtype(value):
"""Transform value's dtype into YAML/JSON compatible dtype
Parameters
----------
value : _type_
_description_
Returns
-------
_type_
_description_
"""
try:
if isinstance(value, np.generic):
if np.issubdtype(value.dtype, np.bytes_):
value = value.decode('utf-8')
elif np.issubdtype(value.dtype, np.unicode_):
value = str(value)
elif np.issubdtype(value.dtype, np.number):
value = float(value)
else:
print('Yaml-compatible data-type was not found. Value has been set to NaN.')
value = np.nan
elif isinstance(value, np.ndarray):
# Handling structured array types (with fields)
if value.dtype.names:
value = {field: to_serializable_dtype(value[field]) for field in value.dtype.names}
else:
# Handling regular array NumPy types
if np.issubdtype(value.dtype, np.bytes_):
value = [item.decode('utf-8') for item in value] if len(value) > 1 else value[0].decode('utf-8')
elif np.issubdtype(value.dtype, np.unicode_):
value = [str(item) for item in value] if len(value) > 1 else str(value[0])
elif np.issubdtype(value.dtype, np.integer):
value = [int(item) for item in value] if len(value) > 1 else int(value[0])
elif np.issubdtype(value.dtype, np.floating):
value = [float(item) for item in value] if len(value) > 1 else float(value[0])
else:
print('Yaml-compatible data-type was not found. Value has been set to NaN.')
value = np.nan
except Exception as e:
print(f'Error converting value: {e}. Value has been set to NaN.')
value = np.nan
return value
def is_structured_array(attr_val):
if isinstance(attr_val,np.ndarray):
return True if attr_val.dtype.names is not None else False
else:
return False
def construct_attributes_dict(attrs_obj):
@@ -293,13 +239,13 @@ def construct_attributes_dict(attrs_obj):
attr_dict[key] = {}
if not key in ['file_list','filtered_file_list']:
if is_structured_array(value):
if utils.is_structured_array(value):
#for subattr in value.dtype.names:
#attr_dict[key][subattr] = make_dtype_yaml_compatible(value[subattr])
attr_dict[key] = to_serializable_dtype(value)
attr_dict[key] = utils.to_serializable_dtype(value)
else:
attr_dict[key] = {"rename_as" : key,
"value" : to_serializable_dtype(value)
"value" : utils.to_serializable_dtype(value)
}
#if isinstance(value,str):