diff --git a/src/hdf5_vis.py b/src/hdf5_vis.py new file mode 100644 index 0000000..636f452 --- /dev/null +++ b/src/hdf5_vis.py @@ -0,0 +1,152 @@ +import h5py +import yaml +import os +import numpy as np +import numpy as pd + +import config_file +import hdf5_lib +import g5505_utils as utils + +import matplotlib.pyplot as plt +import plotly.express as px +import plotly.graph_objects as go +from plotly.subplots import make_subplots + +import subprocess + +#output = subprocess.run("git status",capture_output=True) +#output.stdout() + +def display_group_hierarchy_on_a_treemap(filename: str): + + """ + filename (str): hdf5 file's filename""" + + with h5py.File(filename,'r') as file: + nodes, parents, values = hdf5_lib.get_parent_child_relationships(file) + + metadata_list = [] + metadata_dict={} + for key in file.attrs.keys(): + if 'metadata' in key: + metadata_dict[key[key.find('_')+1::]]= file.attrs[key] + metadata_list.append(key[key.find('_')+1::]+':'+file.attrs[key]) + metadata = '
'.join(['
'] + metadata_list) + + customdata_series = pd.Series(nodes) + customdata_series[0] = metadata + + fig = make_subplots(1, 1, specs=[[{"type": "domain"}]],) + fig.add_trace(go.Treemap( + labels=nodes, #formating_df['formated_names'][nodes], + parents=parents,#formating_df['formated_names'][parents], + values=values, + branchvalues='remainder', + customdata= customdata_series, + #marker=dict( + # colors=df_all_trees['color'], + # colorscale='RdBu', + # cmid=average_score), + #hovertemplate='%{label}
Number of files: %{value}
Success rate: %{color:.2f}', + hovertemplate='%{label}
Count: %{value}
Path: %{customdata}', + name='', + root_color="lightgrey" + )) + fig.update_layout(width = 800, height= 600, margin = dict(t=50, l=25, r=25, b=25)) + fig.show() + file_name, file_ext = os.path.splitext(filename) + fig.write_html(file_name + ".html") + +# + +def make_dtype_yaml_compatible(value): + try: + if isinstance(value, np.generic): + if np.issubdtype(value.dtype, np.string_) or np.issubdtype(value.dtype, np.unicode_): + #value = value.astype(str) + value = str(value) + elif np.issubdtype(value.dtype, np.number): + value = float(value) + else: + print('Yaml-compatible data-type was not found. Value has been set to Nan.') + value = np.nan + elif isinstance(value, np.ndarray): + if np.issubdtype(value.dtype, np.string_): + value = value.astype(str).tolist() + elif np.issubdtype(value.dtype, np.integer): + value = value.astype(int).tolist() + elif np.issubdtype(value.dtype, np.floating): + value = value.astype(float).tolist() + elif np.issubdtype(value.dtype, np.generic): + value = value.astype(str).tolist() + #elif isinstance(value,(int,float,str)): + + #print('Yaml-compatible data-type was not found. Value has been set to Nan.') + #value = np.nan + #print('leave value as is') + except: + print('Yaml-compatible data-type was not found. Value has been set to Nan.') + value = np.nan + + return value + +def print_metadata(name, obj, yaml_dict): + + # TODO: should we enable deeper folders ? + if len(obj.name.split('/')) <= 4: + name_to_list = obj.name.split('/') + name_head = name_to_list[-1] + + if isinstance(obj,h5py.Group): + #print('name:', obj.name) + #print('attributes:', dict(obj.attrs)) + attr_dict = {} + group_dict = {} + for key, value in obj.attrs.items(): + #print (key, value.dtype) + if key == 'Layout': + print(value) + + if not key in ['file_list','filtered_file_list']: + + value = make_dtype_yaml_compatible(value) + + attr_dict[key] = {'rename_as' : key, + 'value' : value + } + + #group_dict[obj.name] = {'name': obj.name, 'attributes': attr_dict} + group_dict = {"name": name_head, "attributes": attr_dict, "datasets":{}} + #group_dict[obj.name]["name"] = obj.name + #group_dict[obj.name]["attributes"] = attr_dict + #group_dict[obj.name]["datasets"] = {} + print(name) + + yaml_dict[obj.name] = group_dict + elif isinstance(obj, h5py.Dataset): + parent_name = '/'.join(name_to_list[:-1]) + yaml_dict[parent_name]["datasets"][name_head] = {'rename_as': name_head ,'attributes':dict(obj.attrs)} + #print(yaml.dump(group_dict,sort_keys=False)) + + #elif len(obj.name.split('/')) == 3: + # print(yaml.dump()) + + + + +def take_yml_snapshot_of_hdf5_file(input_filename_path): + + yaml_dict = {} + + output_filename_tail, ext = os.path.splitext(input_filename_path) + + with h5py.File(input_filename_path,'r') as f: + f.visititems(lambda name, obj: print_metadata(name,obj,yaml_dict)) + + with open(output_filename_tail+".yaml","w") as yaml_file: + yaml.dump(yaml_dict,yaml_file,sort_keys=False) + + return output_filename_tail+".yaml" + +