From 4974246522906c5763a08d96ac8992251b7c3e9d Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Fri, 13 Sep 2024 14:59:11 +0200 Subject: [PATCH] move def get_parent_child_relationships(file: h5py.File) from ..._vis.py to ..._ops.py --- src/hdf5_ops.py | 32 ++++++++++++++++++++++++++++++++ src/hdf5_vis.py | 33 ++------------------------------- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/src/hdf5_ops.py b/src/hdf5_ops.py index 0906c87..133f0f3 100644 --- a/src/hdf5_ops.py +++ b/src/hdf5_ops.py @@ -196,3 +196,35 @@ def list_datasets_in_hdf5file(hdf5_file_path): dataset_df['parent_file'] = dataset_df['dataset_name'].apply(lambda x: x.split('/')[-2]) return dataset_df + +def get_parent_child_relationships(file: h5py.File): + + nodes = ['/'] + parent = [''] + #values = [file.attrs['count']] + # TODO: maybe we should make this more general and not dependent on file_list attribute? + #if 'file_list' in file.attrs.keys(): + # values = [len(file.attrs['file_list'])] + #else: + # values = [1] + values = [len(file.keys())] + + def node_visitor(name,obj): + if name.count('/') <=2: + nodes.append(obj.name) + parent.append(obj.parent.name) + #nodes.append(os.path.split(obj.name)[1]) + #parent.append(os.path.split(obj.parent.name)[1]) + + if isinstance(obj,h5py.Dataset):# or not 'file_list' in obj.attrs.keys(): + values.append(1) + else: + print(obj.name) + try: + values.append(len(obj.keys())) + except: + values.append(0) + + file.visititems(node_visitor) + + return nodes, parent, values \ No newline at end of file diff --git a/src/hdf5_vis.py b/src/hdf5_vis.py index f09cede..cebab9e 100644 --- a/src/hdf5_vis.py +++ b/src/hdf5_vis.py @@ -13,38 +13,9 @@ from plotly.subplots import make_subplots import plotly.graph_objects as go import plotly.express as px #import plotly.io as pio +from hdf5_ops import get_parent_child_relationships -def get_parent_child_relationships(file: h5py.File): - - nodes = ['/'] - parent = [''] - #values = [file.attrs['count']] - # TODO: maybe we should make this more general and not dependent on file_list attribute? - #if 'file_list' in file.attrs.keys(): - # values = [len(file.attrs['file_list'])] - #else: - # values = [1] - values = [len(file.keys())] - - def node_visitor(name,obj): - if name.count('/') <=2: - nodes.append(obj.name) - parent.append(obj.parent.name) - #nodes.append(os.path.split(obj.name)[1]) - #parent.append(os.path.split(obj.parent.name)[1]) - - if isinstance(obj,h5py.Dataset):# or not 'file_list' in obj.attrs.keys(): - values.append(1) - else: - print(obj.name) - try: - values.append(len(obj.keys())) - except: - values.append(0) - - file.visititems(node_visitor) - - return nodes, parent, values + def display_group_hierarchy_on_a_treemap(filename: str):