Implemented hdf5_vis.py, which is a hdf5 visualization library to obtain treemap and yaml representations of hdf5 files.
This commit is contained in:
@@ -1,22 +1,19 @@
|
||||
import sys
|
||||
import os
|
||||
root_dir = os.path.abspath(os.curdir)
|
||||
sys.path.append(root_dir)
|
||||
|
||||
import h5py
|
||||
import yaml
|
||||
import os
|
||||
|
||||
import src.hdf5_lib as hdf5_lib
|
||||
|
||||
import numpy as np
|
||||
import numpy as pd
|
||||
import pandas as pd
|
||||
|
||||
import config_file
|
||||
import hdf5_lib
|
||||
import g5505_utils as utils
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
from plotly.subplots import make_subplots
|
||||
|
||||
import subprocess
|
||||
|
||||
#output = subprocess.run("git status",capture_output=True)
|
||||
#output.stdout()
|
||||
import plotly.graph_objects as go
|
||||
import plotly.express as px
|
||||
|
||||
def display_group_hierarchy_on_a_treemap(filename: str):
|
||||
|
||||
@@ -63,9 +60,12 @@ def display_group_hierarchy_on_a_treemap(filename: str):
|
||||
def make_dtype_yaml_compatible(value):
|
||||
try:
|
||||
if isinstance(value, np.generic):
|
||||
if np.issubdtype(value.dtype, np.string_) or np.issubdtype(value.dtype, np.unicode_):
|
||||
#if np.issubdtype(value.dtype, np.string_):
|
||||
#value = value.astype(str)
|
||||
value = str(value)
|
||||
if np.issubdtype(value.dtype, np.bytes_):
|
||||
value = value.decode('utf-8')
|
||||
elif np.issubdtype(value.dtype, np.unicode_):
|
||||
value = str(value)
|
||||
elif np.issubdtype(value.dtype, np.number):
|
||||
value = float(value)
|
||||
else:
|
||||
@@ -91,30 +91,49 @@ def make_dtype_yaml_compatible(value):
|
||||
|
||||
return value
|
||||
|
||||
def print_metadata(name, obj, yaml_dict):
|
||||
def construct_attributes_dict(attrs_obj):
|
||||
|
||||
attr_dict = {}
|
||||
for key, value in attrs_obj.items():
|
||||
if not key in ['file_list','filtered_file_list']:
|
||||
|
||||
value = make_dtype_yaml_compatible(value)
|
||||
|
||||
#if isinstance(value,str):
|
||||
# value.replace('\\','\\\\')
|
||||
|
||||
attr_dict[key] = {"rename_as" : key,
|
||||
"value" : value
|
||||
}
|
||||
return attr_dict
|
||||
|
||||
def print_metadata(name, obj, folder_depth, yaml_dict):
|
||||
|
||||
# TODO: should we enable deeper folders ?
|
||||
if len(obj.name.split('/')) <= 4:
|
||||
if len(obj.name.split('/')) <= folder_depth:
|
||||
name_to_list = obj.name.split('/')
|
||||
name_head = name_to_list[-1]
|
||||
|
||||
if isinstance(obj,h5py.Group):
|
||||
#print('name:', obj.name)
|
||||
#print('attributes:', dict(obj.attrs))
|
||||
attr_dict = {}
|
||||
#attr_dict = {}
|
||||
group_dict = {}
|
||||
for key, value in obj.attrs.items():
|
||||
|
||||
attr_dict = construct_attributes_dict(obj.attrs)
|
||||
|
||||
#for key, value in obj.attrs.items():
|
||||
#print (key, value.dtype)
|
||||
if key == 'Layout':
|
||||
print(value)
|
||||
# if key == 'Layout':
|
||||
# print(value)
|
||||
|
||||
if not key in ['file_list','filtered_file_list']:
|
||||
# if not key in ['file_list','filtered_file_list']:
|
||||
|
||||
value = make_dtype_yaml_compatible(value)
|
||||
# value = make_dtype_yaml_compatible(value)
|
||||
|
||||
attr_dict[key] = {'rename_as' : key,
|
||||
'value' : value
|
||||
}
|
||||
# attr_dict[key] = {'rename_as' : key,
|
||||
# 'value' : value
|
||||
# }
|
||||
|
||||
#group_dict[obj.name] = {'name': obj.name, 'attributes': attr_dict}
|
||||
group_dict = {"name": name_head, "attributes": attr_dict, "datasets":{}}
|
||||
@@ -126,7 +145,7 @@ def print_metadata(name, obj, yaml_dict):
|
||||
yaml_dict[obj.name] = group_dict
|
||||
elif isinstance(obj, h5py.Dataset):
|
||||
parent_name = '/'.join(name_to_list[:-1])
|
||||
yaml_dict[parent_name]["datasets"][name_head] = {'rename_as': name_head ,'attributes':dict(obj.attrs)}
|
||||
yaml_dict[parent_name]["datasets"][name_head] = {"rename_as": name_head ,"attributes":dict(obj.attrs)}
|
||||
#print(yaml.dump(group_dict,sort_keys=False))
|
||||
|
||||
#elif len(obj.name.split('/')) == 3:
|
||||
@@ -135,18 +154,30 @@ def print_metadata(name, obj, yaml_dict):
|
||||
|
||||
|
||||
|
||||
def take_yml_snapshot_of_hdf5_file(input_filename_path):
|
||||
def take_yml_snapshot_of_hdf5_file(input_filename_path,folder_depth: int = 4):
|
||||
|
||||
yaml_dict = {}
|
||||
|
||||
output_filename_tail, ext = os.path.splitext(input_filename_path)
|
||||
|
||||
with h5py.File(input_filename_path,'r') as f:
|
||||
f.visititems(lambda name, obj: print_metadata(name,obj,yaml_dict))
|
||||
|
||||
attrs_dict = construct_attributes_dict(f.attrs)
|
||||
yaml_dict[f.name] = {"name": f.name, "attributes": attrs_dict, "datasets":{}}
|
||||
f.visititems(lambda name, obj: print_metadata(name,obj,folder_depth,yaml_dict))
|
||||
|
||||
#with open(output_filename_tail+".json","w") as yaml_file:
|
||||
# json_obj = json.dumps(yaml_dict,indent=4,sort_keys=False,)
|
||||
# yaml_file.write(json_obj)
|
||||
|
||||
with open(output_filename_tail+".yaml","w") as yaml_file:
|
||||
yaml.dump(yaml_dict,yaml_file,sort_keys=False)
|
||||
yaml_output = yaml.dump(yaml_dict,sort_keys=False)
|
||||
#for key in yaml_dict:
|
||||
# yaml_output = yaml.dump(yaml_dict[key],sort_keys=False)
|
||||
yaml_file.write(yaml_output )
|
||||
|
||||
return output_filename_tail+".yaml"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user