diff --git a/src/hdf5_vis.py b/src/hdf5_vis.py index 40c8977..f09cede 100644 --- a/src/hdf5_vis.py +++ b/src/hdf5_vis.py @@ -6,8 +6,6 @@ sys.path.append(root_dir) import h5py import yaml -import src.hdf5_lib as hdf5_lib - import numpy as np import pandas as pd @@ -16,13 +14,45 @@ import plotly.graph_objects as go import plotly.express as px #import plotly.io as pio +def get_parent_child_relationships(file: h5py.File): + + nodes = ['/'] + parent = [''] + #values = [file.attrs['count']] + # TODO: maybe we should make this more general and not dependent on file_list attribute? + #if 'file_list' in file.attrs.keys(): + # values = [len(file.attrs['file_list'])] + #else: + # values = [1] + values = [len(file.keys())] + + def node_visitor(name,obj): + if name.count('/') <=2: + nodes.append(obj.name) + parent.append(obj.parent.name) + #nodes.append(os.path.split(obj.name)[1]) + #parent.append(os.path.split(obj.parent.name)[1]) + + if isinstance(obj,h5py.Dataset):# or not 'file_list' in obj.attrs.keys(): + values.append(1) + else: + print(obj.name) + try: + values.append(len(obj.keys())) + except: + values.append(0) + + file.visititems(node_visitor) + + return nodes, parent, values + def display_group_hierarchy_on_a_treemap(filename: str): """ filename (str): hdf5 file's filename""" with h5py.File(filename,'r') as file: - nodes, parents, values = hdf5_lib.get_parent_child_relationships(file) + nodes, parents, values = get_parent_child_relationships(file) metadata_list = [] metadata_dict={}