From a727e38db47105528ffb944261fb31e1849e8fbc Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Tue, 26 Mar 2024 16:14:40 +0100 Subject: [PATCH] Implemented hdf5_vis.py, which is a hdf5 visualization library to obtain treemap and yaml representations of hdf5 files. --- src/hdf5_vis.py | 93 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 31 deletions(-) diff --git a/src/hdf5_vis.py b/src/hdf5_vis.py index 636f452..ada825a 100644 --- a/src/hdf5_vis.py +++ b/src/hdf5_vis.py @@ -1,22 +1,19 @@ +import sys +import os +root_dir = os.path.abspath(os.curdir) +sys.path.append(root_dir) + import h5py import yaml -import os + +import src.hdf5_lib as hdf5_lib + import numpy as np -import numpy as pd +import pandas as pd -import config_file -import hdf5_lib -import g5505_utils as utils - -import matplotlib.pyplot as plt -import plotly.express as px -import plotly.graph_objects as go from plotly.subplots import make_subplots - -import subprocess - -#output = subprocess.run("git status",capture_output=True) -#output.stdout() +import plotly.graph_objects as go +import plotly.express as px def display_group_hierarchy_on_a_treemap(filename: str): @@ -63,9 +60,12 @@ def display_group_hierarchy_on_a_treemap(filename: str): def make_dtype_yaml_compatible(value): try: if isinstance(value, np.generic): - if np.issubdtype(value.dtype, np.string_) or np.issubdtype(value.dtype, np.unicode_): + #if np.issubdtype(value.dtype, np.string_): #value = value.astype(str) - value = str(value) + if np.issubdtype(value.dtype, np.bytes_): + value = value.decode('utf-8') + elif np.issubdtype(value.dtype, np.unicode_): + value = str(value) elif np.issubdtype(value.dtype, np.number): value = float(value) else: @@ -91,30 +91,49 @@ def make_dtype_yaml_compatible(value): return value -def print_metadata(name, obj, yaml_dict): +def construct_attributes_dict(attrs_obj): + + attr_dict = {} + for key, value in attrs_obj.items(): + if not key in ['file_list','filtered_file_list']: + + value = make_dtype_yaml_compatible(value) + + #if isinstance(value,str): + # value.replace('\\','\\\\') + + attr_dict[key] = {"rename_as" : key, + "value" : value + } + return attr_dict + +def print_metadata(name, obj, folder_depth, yaml_dict): # TODO: should we enable deeper folders ? - if len(obj.name.split('/')) <= 4: + if len(obj.name.split('/')) <= folder_depth: name_to_list = obj.name.split('/') name_head = name_to_list[-1] if isinstance(obj,h5py.Group): #print('name:', obj.name) #print('attributes:', dict(obj.attrs)) - attr_dict = {} + #attr_dict = {} group_dict = {} - for key, value in obj.attrs.items(): + + attr_dict = construct_attributes_dict(obj.attrs) + + #for key, value in obj.attrs.items(): #print (key, value.dtype) - if key == 'Layout': - print(value) + # if key == 'Layout': + # print(value) - if not key in ['file_list','filtered_file_list']: + # if not key in ['file_list','filtered_file_list']: - value = make_dtype_yaml_compatible(value) + # value = make_dtype_yaml_compatible(value) - attr_dict[key] = {'rename_as' : key, - 'value' : value - } + # attr_dict[key] = {'rename_as' : key, + # 'value' : value + # } #group_dict[obj.name] = {'name': obj.name, 'attributes': attr_dict} group_dict = {"name": name_head, "attributes": attr_dict, "datasets":{}} @@ -126,7 +145,7 @@ def print_metadata(name, obj, yaml_dict): yaml_dict[obj.name] = group_dict elif isinstance(obj, h5py.Dataset): parent_name = '/'.join(name_to_list[:-1]) - yaml_dict[parent_name]["datasets"][name_head] = {'rename_as': name_head ,'attributes':dict(obj.attrs)} + yaml_dict[parent_name]["datasets"][name_head] = {"rename_as": name_head ,"attributes":dict(obj.attrs)} #print(yaml.dump(group_dict,sort_keys=False)) #elif len(obj.name.split('/')) == 3: @@ -135,18 +154,30 @@ def print_metadata(name, obj, yaml_dict): -def take_yml_snapshot_of_hdf5_file(input_filename_path): +def take_yml_snapshot_of_hdf5_file(input_filename_path,folder_depth: int = 4): yaml_dict = {} output_filename_tail, ext = os.path.splitext(input_filename_path) with h5py.File(input_filename_path,'r') as f: - f.visititems(lambda name, obj: print_metadata(name,obj,yaml_dict)) + + attrs_dict = construct_attributes_dict(f.attrs) + yaml_dict[f.name] = {"name": f.name, "attributes": attrs_dict, "datasets":{}} + f.visititems(lambda name, obj: print_metadata(name,obj,folder_depth,yaml_dict)) + + #with open(output_filename_tail+".json","w") as yaml_file: + # json_obj = json.dumps(yaml_dict,indent=4,sort_keys=False,) + # yaml_file.write(json_obj) with open(output_filename_tail+".yaml","w") as yaml_file: - yaml.dump(yaml_dict,yaml_file,sort_keys=False) + yaml_output = yaml.dump(yaml_dict,sort_keys=False) + #for key in yaml_dict: + # yaml_output = yaml.dump(yaml_dict[key],sort_keys=False) + yaml_file.write(yaml_output ) return output_filename_tail+".yaml" + +