From f6a46168ec9cec7784fd7171db6f50aa29dcd28d Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Tue, 28 May 2024 11:27:44 +0200 Subject: [PATCH] Improved parsing from HDF5 attr dict to yaml compatible dict. Now we can parse HDF5 compound attributes (structured np arrays). --- src/hdf5_vis.py | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/hdf5_vis.py b/src/hdf5_vis.py index b5c8dc3..77bd9a0 100644 --- a/src/hdf5_vis.py +++ b/src/hdf5_vis.py @@ -59,31 +59,38 @@ def display_group_hierarchy_on_a_treemap(filename: str): #pio.write_image(fig,file_name + ".png",width=800,height=600,format='png') # - def make_dtype_yaml_compatible(value): try: if isinstance(value, np.generic): - #if np.issubdtype(value.dtype, np.string_): - #value = value.astype(str) if np.issubdtype(value.dtype, np.bytes_): value = value.decode('utf-8') elif np.issubdtype(value.dtype, np.unicode_): - value = str(value) + value = str(value) elif np.issubdtype(value.dtype, np.number): value = float(value) else: - print('Yaml-compatible data-type was not found. Value has been set to Nan.') + print('Yaml-compatible data-type was not found. Value has been set to NaN.') value = np.nan - elif isinstance(value, np.ndarray): - if np.issubdtype(value.dtype, np.string_) or np.issubdtype(value.dtype, np.generic): - value = [str(item) for item in value] if len(value)>1 else str(value[0]) # value.astype(str).tolist() - elif np.issubdtype(value.dtype, np.integer) : - value = [int(item) for item in value] if len(value)>1 else int(value[0]) # value.astype(int).tolist() - elif np.issubdtype(value.dtype, np.floating): - value = [float(item) for item in value] if len(value)>1 else float(value[0]) # value.astype(float).tolist() + elif isinstance(value, np.ndarray): + # Handling structured array types (with fields) + if value.dtype.names: + value = {field: make_dtype_yaml_compatible(value[field]) for field in value.dtype.names} + else: + # Handling regular array NumPy types + if np.issubdtype(value.dtype, np.bytes_): + value = [item.decode('utf-8') for item in value] if len(value) > 1 else value[0].decode('utf-8') + elif np.issubdtype(value.dtype, np.unicode_): + value = [str(item) for item in value] if len(value) > 1 else str(value[0]) + elif np.issubdtype(value.dtype, np.integer): + value = [int(item) for item in value] if len(value) > 1 else int(value[0]) + elif np.issubdtype(value.dtype, np.floating): + value = [float(item) for item in value] if len(value) > 1 else float(value[0]) + else: + print('Yaml-compatible data-type was not found. Value has been set to NaN.') + value = np.nan - except: - print('Yaml-compatible data-type was not found. Value has been set to Nan.') + except Exception as e: + print(f'Error converting value: {e}. Value has been set to NaN.') value = np.nan return value @@ -104,8 +111,9 @@ def construct_attributes_dict(attrs_obj): if not key in ['file_list','filtered_file_list']: if is_structured_array(value): - for subattr in value.dtype.names: - attr_dict[key][subattr] = make_dtype_yaml_compatible(value[subattr]) + #for subattr in value.dtype.names: + #attr_dict[key][subattr] = make_dtype_yaml_compatible(value[subattr]) + attr_dict[key] = make_dtype_yaml_compatible(value) else: value = make_dtype_yaml_compatible(value) attr_dict[key] = {"rename_as" : key, @@ -156,7 +164,7 @@ def print_metadata(name, obj, folder_depth, yaml_dict): yaml_dict[obj.name] = group_dict elif isinstance(obj, h5py.Dataset): parent_name = '/'.join(name_to_list[:-1]) - yaml_dict[parent_name]["datasets"][name_head] = {"rename_as": name_head ,"attributes":dict(obj.attrs)} + yaml_dict[parent_name]["datasets"][name_head] = {"rename_as": name_head ,"attributes": construct_attributes_dict(obj.attrs)} #print(yaml.dump(group_dict,sort_keys=False)) #elif len(obj.name.split('/')) == 3: