Implemented hdf5_vis.py to gather functions that display or represent properties of an hdf5 file in a human readable file format like yalm or html files that enble interative visualizations on the browser.
This commit is contained in:
152
src/hdf5_vis.py
Normal file
152
src/hdf5_vis.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
import h5py
|
||||||
|
import yaml
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import numpy as pd
|
||||||
|
|
||||||
|
import config_file
|
||||||
|
import hdf5_lib
|
||||||
|
import g5505_utils as utils
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import plotly.express as px
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
from plotly.subplots import make_subplots
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
#output = subprocess.run("git status",capture_output=True)
|
||||||
|
#output.stdout()
|
||||||
|
|
||||||
|
def display_group_hierarchy_on_a_treemap(filename: str):
|
||||||
|
|
||||||
|
"""
|
||||||
|
filename (str): hdf5 file's filename"""
|
||||||
|
|
||||||
|
with h5py.File(filename,'r') as file:
|
||||||
|
nodes, parents, values = hdf5_lib.get_parent_child_relationships(file)
|
||||||
|
|
||||||
|
metadata_list = []
|
||||||
|
metadata_dict={}
|
||||||
|
for key in file.attrs.keys():
|
||||||
|
if 'metadata' in key:
|
||||||
|
metadata_dict[key[key.find('_')+1::]]= file.attrs[key]
|
||||||
|
metadata_list.append(key[key.find('_')+1::]+':'+file.attrs[key])
|
||||||
|
metadata = '<br>'.join(['<br>'] + metadata_list)
|
||||||
|
|
||||||
|
customdata_series = pd.Series(nodes)
|
||||||
|
customdata_series[0] = metadata
|
||||||
|
|
||||||
|
fig = make_subplots(1, 1, specs=[[{"type": "domain"}]],)
|
||||||
|
fig.add_trace(go.Treemap(
|
||||||
|
labels=nodes, #formating_df['formated_names'][nodes],
|
||||||
|
parents=parents,#formating_df['formated_names'][parents],
|
||||||
|
values=values,
|
||||||
|
branchvalues='remainder',
|
||||||
|
customdata= customdata_series,
|
||||||
|
#marker=dict(
|
||||||
|
# colors=df_all_trees['color'],
|
||||||
|
# colorscale='RdBu',
|
||||||
|
# cmid=average_score),
|
||||||
|
#hovertemplate='<b>%{label} </b> <br> Number of files: %{value}<br> Success rate: %{color:.2f}',
|
||||||
|
hovertemplate='<b>%{label} </b> <br> Count: %{value} <br> Path: %{customdata}',
|
||||||
|
name='',
|
||||||
|
root_color="lightgrey"
|
||||||
|
))
|
||||||
|
fig.update_layout(width = 800, height= 600, margin = dict(t=50, l=25, r=25, b=25))
|
||||||
|
fig.show()
|
||||||
|
file_name, file_ext = os.path.splitext(filename)
|
||||||
|
fig.write_html(file_name + ".html")
|
||||||
|
|
||||||
|
#
|
||||||
|
|
||||||
|
def make_dtype_yaml_compatible(value):
|
||||||
|
try:
|
||||||
|
if isinstance(value, np.generic):
|
||||||
|
if np.issubdtype(value.dtype, np.string_) or np.issubdtype(value.dtype, np.unicode_):
|
||||||
|
#value = value.astype(str)
|
||||||
|
value = str(value)
|
||||||
|
elif np.issubdtype(value.dtype, np.number):
|
||||||
|
value = float(value)
|
||||||
|
else:
|
||||||
|
print('Yaml-compatible data-type was not found. Value has been set to Nan.')
|
||||||
|
value = np.nan
|
||||||
|
elif isinstance(value, np.ndarray):
|
||||||
|
if np.issubdtype(value.dtype, np.string_):
|
||||||
|
value = value.astype(str).tolist()
|
||||||
|
elif np.issubdtype(value.dtype, np.integer):
|
||||||
|
value = value.astype(int).tolist()
|
||||||
|
elif np.issubdtype(value.dtype, np.floating):
|
||||||
|
value = value.astype(float).tolist()
|
||||||
|
elif np.issubdtype(value.dtype, np.generic):
|
||||||
|
value = value.astype(str).tolist()
|
||||||
|
#elif isinstance(value,(int,float,str)):
|
||||||
|
|
||||||
|
#print('Yaml-compatible data-type was not found. Value has been set to Nan.')
|
||||||
|
#value = np.nan
|
||||||
|
#print('leave value as is')
|
||||||
|
except:
|
||||||
|
print('Yaml-compatible data-type was not found. Value has been set to Nan.')
|
||||||
|
value = np.nan
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
def print_metadata(name, obj, yaml_dict):
|
||||||
|
|
||||||
|
# TODO: should we enable deeper folders ?
|
||||||
|
if len(obj.name.split('/')) <= 4:
|
||||||
|
name_to_list = obj.name.split('/')
|
||||||
|
name_head = name_to_list[-1]
|
||||||
|
|
||||||
|
if isinstance(obj,h5py.Group):
|
||||||
|
#print('name:', obj.name)
|
||||||
|
#print('attributes:', dict(obj.attrs))
|
||||||
|
attr_dict = {}
|
||||||
|
group_dict = {}
|
||||||
|
for key, value in obj.attrs.items():
|
||||||
|
#print (key, value.dtype)
|
||||||
|
if key == 'Layout':
|
||||||
|
print(value)
|
||||||
|
|
||||||
|
if not key in ['file_list','filtered_file_list']:
|
||||||
|
|
||||||
|
value = make_dtype_yaml_compatible(value)
|
||||||
|
|
||||||
|
attr_dict[key] = {'rename_as' : key,
|
||||||
|
'value' : value
|
||||||
|
}
|
||||||
|
|
||||||
|
#group_dict[obj.name] = {'name': obj.name, 'attributes': attr_dict}
|
||||||
|
group_dict = {"name": name_head, "attributes": attr_dict, "datasets":{}}
|
||||||
|
#group_dict[obj.name]["name"] = obj.name
|
||||||
|
#group_dict[obj.name]["attributes"] = attr_dict
|
||||||
|
#group_dict[obj.name]["datasets"] = {}
|
||||||
|
print(name)
|
||||||
|
|
||||||
|
yaml_dict[obj.name] = group_dict
|
||||||
|
elif isinstance(obj, h5py.Dataset):
|
||||||
|
parent_name = '/'.join(name_to_list[:-1])
|
||||||
|
yaml_dict[parent_name]["datasets"][name_head] = {'rename_as': name_head ,'attributes':dict(obj.attrs)}
|
||||||
|
#print(yaml.dump(group_dict,sort_keys=False))
|
||||||
|
|
||||||
|
#elif len(obj.name.split('/')) == 3:
|
||||||
|
# print(yaml.dump())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def take_yml_snapshot_of_hdf5_file(input_filename_path):
|
||||||
|
|
||||||
|
yaml_dict = {}
|
||||||
|
|
||||||
|
output_filename_tail, ext = os.path.splitext(input_filename_path)
|
||||||
|
|
||||||
|
with h5py.File(input_filename_path,'r') as f:
|
||||||
|
f.visititems(lambda name, obj: print_metadata(name,obj,yaml_dict))
|
||||||
|
|
||||||
|
with open(output_filename_tail+".yaml","w") as yaml_file:
|
||||||
|
yaml.dump(yaml_dict,yaml_file,sort_keys=False)
|
||||||
|
|
||||||
|
return output_filename_tail+".yaml"
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user