Improved parsing from HDF5 attr dict to yaml compatible dict. Now we can parse HDF5 compound attributes (structured np arrays).
This commit is contained in:
@ -59,31 +59,38 @@ def display_group_hierarchy_on_a_treemap(filename: str):
|
|||||||
#pio.write_image(fig,file_name + ".png",width=800,height=600,format='png')
|
#pio.write_image(fig,file_name + ".png",width=800,height=600,format='png')
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
||||||
def make_dtype_yaml_compatible(value):
|
def make_dtype_yaml_compatible(value):
|
||||||
try:
|
try:
|
||||||
if isinstance(value, np.generic):
|
if isinstance(value, np.generic):
|
||||||
#if np.issubdtype(value.dtype, np.string_):
|
|
||||||
#value = value.astype(str)
|
|
||||||
if np.issubdtype(value.dtype, np.bytes_):
|
if np.issubdtype(value.dtype, np.bytes_):
|
||||||
value = value.decode('utf-8')
|
value = value.decode('utf-8')
|
||||||
elif np.issubdtype(value.dtype, np.unicode_):
|
elif np.issubdtype(value.dtype, np.unicode_):
|
||||||
value = str(value)
|
value = str(value)
|
||||||
elif np.issubdtype(value.dtype, np.number):
|
elif np.issubdtype(value.dtype, np.number):
|
||||||
value = float(value)
|
value = float(value)
|
||||||
else:
|
else:
|
||||||
print('Yaml-compatible data-type was not found. Value has been set to Nan.')
|
print('Yaml-compatible data-type was not found. Value has been set to NaN.')
|
||||||
value = np.nan
|
value = np.nan
|
||||||
elif isinstance(value, np.ndarray):
|
elif isinstance(value, np.ndarray):
|
||||||
if np.issubdtype(value.dtype, np.string_) or np.issubdtype(value.dtype, np.generic):
|
# Handling structured array types (with fields)
|
||||||
value = [str(item) for item in value] if len(value)>1 else str(value[0]) # value.astype(str).tolist()
|
if value.dtype.names:
|
||||||
elif np.issubdtype(value.dtype, np.integer) :
|
value = {field: make_dtype_yaml_compatible(value[field]) for field in value.dtype.names}
|
||||||
value = [int(item) for item in value] if len(value)>1 else int(value[0]) # value.astype(int).tolist()
|
else:
|
||||||
elif np.issubdtype(value.dtype, np.floating):
|
# Handling regular array NumPy types
|
||||||
value = [float(item) for item in value] if len(value)>1 else float(value[0]) # value.astype(float).tolist()
|
if np.issubdtype(value.dtype, np.bytes_):
|
||||||
|
value = [item.decode('utf-8') for item in value] if len(value) > 1 else value[0].decode('utf-8')
|
||||||
|
elif np.issubdtype(value.dtype, np.unicode_):
|
||||||
|
value = [str(item) for item in value] if len(value) > 1 else str(value[0])
|
||||||
|
elif np.issubdtype(value.dtype, np.integer):
|
||||||
|
value = [int(item) for item in value] if len(value) > 1 else int(value[0])
|
||||||
|
elif np.issubdtype(value.dtype, np.floating):
|
||||||
|
value = [float(item) for item in value] if len(value) > 1 else float(value[0])
|
||||||
|
else:
|
||||||
|
print('Yaml-compatible data-type was not found. Value has been set to NaN.')
|
||||||
|
value = np.nan
|
||||||
|
|
||||||
except:
|
except Exception as e:
|
||||||
print('Yaml-compatible data-type was not found. Value has been set to Nan.')
|
print(f'Error converting value: {e}. Value has been set to NaN.')
|
||||||
value = np.nan
|
value = np.nan
|
||||||
|
|
||||||
return value
|
return value
|
||||||
@ -104,8 +111,9 @@ def construct_attributes_dict(attrs_obj):
|
|||||||
if not key in ['file_list','filtered_file_list']:
|
if not key in ['file_list','filtered_file_list']:
|
||||||
|
|
||||||
if is_structured_array(value):
|
if is_structured_array(value):
|
||||||
for subattr in value.dtype.names:
|
#for subattr in value.dtype.names:
|
||||||
attr_dict[key][subattr] = make_dtype_yaml_compatible(value[subattr])
|
#attr_dict[key][subattr] = make_dtype_yaml_compatible(value[subattr])
|
||||||
|
attr_dict[key] = make_dtype_yaml_compatible(value)
|
||||||
else:
|
else:
|
||||||
value = make_dtype_yaml_compatible(value)
|
value = make_dtype_yaml_compatible(value)
|
||||||
attr_dict[key] = {"rename_as" : key,
|
attr_dict[key] = {"rename_as" : key,
|
||||||
@ -156,7 +164,7 @@ def print_metadata(name, obj, folder_depth, yaml_dict):
|
|||||||
yaml_dict[obj.name] = group_dict
|
yaml_dict[obj.name] = group_dict
|
||||||
elif isinstance(obj, h5py.Dataset):
|
elif isinstance(obj, h5py.Dataset):
|
||||||
parent_name = '/'.join(name_to_list[:-1])
|
parent_name = '/'.join(name_to_list[:-1])
|
||||||
yaml_dict[parent_name]["datasets"][name_head] = {"rename_as": name_head ,"attributes":dict(obj.attrs)}
|
yaml_dict[parent_name]["datasets"][name_head] = {"rename_as": name_head ,"attributes": construct_attributes_dict(obj.attrs)}
|
||||||
#print(yaml.dump(group_dict,sort_keys=False))
|
#print(yaml.dump(group_dict,sort_keys=False))
|
||||||
|
|
||||||
#elif len(obj.name.split('/')) == 3:
|
#elif len(obj.name.split('/')) == 3:
|
||||||
|
Reference in New Issue
Block a user