diff --git a/src/hdf5_vis.py b/src/hdf5_vis.py
new file mode 100644
index 0000000..636f452
--- /dev/null
+++ b/src/hdf5_vis.py
@@ -0,0 +1,152 @@
+import h5py
+import yaml
+import os
+import numpy as np
+import numpy as pd
+
+import config_file
+import hdf5_lib
+import g5505_utils as utils
+
+import matplotlib.pyplot as plt
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+
+import subprocess
+
+#output = subprocess.run("git status",capture_output=True)
+#output.stdout()
+
+def display_group_hierarchy_on_a_treemap(filename: str):
+
+ """
+ filename (str): hdf5 file's filename"""
+
+ with h5py.File(filename,'r') as file:
+ nodes, parents, values = hdf5_lib.get_parent_child_relationships(file)
+
+ metadata_list = []
+ metadata_dict={}
+ for key in file.attrs.keys():
+ if 'metadata' in key:
+ metadata_dict[key[key.find('_')+1::]]= file.attrs[key]
+ metadata_list.append(key[key.find('_')+1::]+':'+file.attrs[key])
+ metadata = '
'.join(['
'] + metadata_list)
+
+ customdata_series = pd.Series(nodes)
+ customdata_series[0] = metadata
+
+ fig = make_subplots(1, 1, specs=[[{"type": "domain"}]],)
+ fig.add_trace(go.Treemap(
+ labels=nodes, #formating_df['formated_names'][nodes],
+ parents=parents,#formating_df['formated_names'][parents],
+ values=values,
+ branchvalues='remainder',
+ customdata= customdata_series,
+ #marker=dict(
+ # colors=df_all_trees['color'],
+ # colorscale='RdBu',
+ # cmid=average_score),
+ #hovertemplate='%{label}
Number of files: %{value}
Success rate: %{color:.2f}',
+ hovertemplate='%{label}
Count: %{value}
Path: %{customdata}',
+ name='',
+ root_color="lightgrey"
+ ))
+ fig.update_layout(width = 800, height= 600, margin = dict(t=50, l=25, r=25, b=25))
+ fig.show()
+ file_name, file_ext = os.path.splitext(filename)
+ fig.write_html(file_name + ".html")
+
+#
+
+def make_dtype_yaml_compatible(value):
+ try:
+ if isinstance(value, np.generic):
+ if np.issubdtype(value.dtype, np.string_) or np.issubdtype(value.dtype, np.unicode_):
+ #value = value.astype(str)
+ value = str(value)
+ elif np.issubdtype(value.dtype, np.number):
+ value = float(value)
+ else:
+ print('Yaml-compatible data-type was not found. Value has been set to Nan.')
+ value = np.nan
+ elif isinstance(value, np.ndarray):
+ if np.issubdtype(value.dtype, np.string_):
+ value = value.astype(str).tolist()
+ elif np.issubdtype(value.dtype, np.integer):
+ value = value.astype(int).tolist()
+ elif np.issubdtype(value.dtype, np.floating):
+ value = value.astype(float).tolist()
+ elif np.issubdtype(value.dtype, np.generic):
+ value = value.astype(str).tolist()
+ #elif isinstance(value,(int,float,str)):
+
+ #print('Yaml-compatible data-type was not found. Value has been set to Nan.')
+ #value = np.nan
+ #print('leave value as is')
+ except:
+ print('Yaml-compatible data-type was not found. Value has been set to Nan.')
+ value = np.nan
+
+ return value
+
+def print_metadata(name, obj, yaml_dict):
+
+ # TODO: should we enable deeper folders ?
+ if len(obj.name.split('/')) <= 4:
+ name_to_list = obj.name.split('/')
+ name_head = name_to_list[-1]
+
+ if isinstance(obj,h5py.Group):
+ #print('name:', obj.name)
+ #print('attributes:', dict(obj.attrs))
+ attr_dict = {}
+ group_dict = {}
+ for key, value in obj.attrs.items():
+ #print (key, value.dtype)
+ if key == 'Layout':
+ print(value)
+
+ if not key in ['file_list','filtered_file_list']:
+
+ value = make_dtype_yaml_compatible(value)
+
+ attr_dict[key] = {'rename_as' : key,
+ 'value' : value
+ }
+
+ #group_dict[obj.name] = {'name': obj.name, 'attributes': attr_dict}
+ group_dict = {"name": name_head, "attributes": attr_dict, "datasets":{}}
+ #group_dict[obj.name]["name"] = obj.name
+ #group_dict[obj.name]["attributes"] = attr_dict
+ #group_dict[obj.name]["datasets"] = {}
+ print(name)
+
+ yaml_dict[obj.name] = group_dict
+ elif isinstance(obj, h5py.Dataset):
+ parent_name = '/'.join(name_to_list[:-1])
+ yaml_dict[parent_name]["datasets"][name_head] = {'rename_as': name_head ,'attributes':dict(obj.attrs)}
+ #print(yaml.dump(group_dict,sort_keys=False))
+
+ #elif len(obj.name.split('/')) == 3:
+ # print(yaml.dump())
+
+
+
+
+def take_yml_snapshot_of_hdf5_file(input_filename_path):
+
+ yaml_dict = {}
+
+ output_filename_tail, ext = os.path.splitext(input_filename_path)
+
+ with h5py.File(input_filename_path,'r') as f:
+ f.visititems(lambda name, obj: print_metadata(name,obj,yaml_dict))
+
+ with open(output_filename_tail+".yaml","w") as yaml_file:
+ yaml.dump(yaml_dict,yaml_file,sort_keys=False)
+
+ return output_filename_tail+".yaml"
+
+