Commented out metadata info about group members for a given group. This is to simplify yaml or json representation of the metadata.
This commit is contained in:
@ -459,50 +459,51 @@ def get_parent_child_relationships(file: h5py.File):
|
|||||||
|
|
||||||
def __print_metadata__(name, obj, folder_depth, yaml_dict):
|
def __print_metadata__(name, obj, folder_depth, yaml_dict):
|
||||||
|
|
||||||
# TODO: should we enable deeper folders ?
|
"""
|
||||||
if len(obj.name.split('/')) <= folder_depth:
|
Extracts metadata from HDF5 groups and datasets and organizes them into a dictionary with compact representation.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
name (str): Name of the HDF5 object being inspected.
|
||||||
|
obj (h5py.Group or h5py.Dataset): The HDF5 object (Group or Dataset).
|
||||||
|
folder_depth (int): Maximum depth of folders to explore.
|
||||||
|
yaml_dict (dict): Dictionary to populate with metadata.
|
||||||
|
"""
|
||||||
|
# Process only objects within the specified folder depth
|
||||||
|
if len(obj.name.split('/')) <= folder_depth: # and ".h5" not in obj.name:
|
||||||
name_to_list = obj.name.split('/')
|
name_to_list = obj.name.split('/')
|
||||||
name_head = name_to_list[-1]
|
name_head = name_to_list[-1] if not name_to_list[-1]=='' else obj.name
|
||||||
|
|
||||||
if isinstance(obj,h5py.Group):
|
if isinstance(obj, h5py.Group): # Handle groups
|
||||||
#print('name:', obj.name)
|
# Convert attributes to a YAML/JSON serializable format
|
||||||
#print('attributes:', dict(obj.attrs))
|
|
||||||
#attr_dict = {}
|
|
||||||
group_dict = {}
|
|
||||||
|
|
||||||
# Convert attribute dict to a YAML/JSON serializable dict
|
|
||||||
attr_dict = {key: utils.to_serializable_dtype(val) for key, val in obj.attrs.items()}
|
attr_dict = {key: utils.to_serializable_dtype(val) for key, val in obj.attrs.items()}
|
||||||
|
|
||||||
#for key, value in obj.attrs.items():
|
# Initialize the group dictionary
|
||||||
#print (key, value.dtype)
|
group_dict = {"name": name_head, "attributes": attr_dict}
|
||||||
# if key == 'Layout':
|
|
||||||
# print(value)
|
|
||||||
|
|
||||||
# if not key in ['file_list','filtered_file_list']:
|
# Handle group members compactly
|
||||||
|
#subgroups = [member_name for member_name in obj if isinstance(obj[member_name], h5py.Group)]
|
||||||
|
#datasets = [member_name for member_name in obj if isinstance(obj[member_name], h5py.Dataset)]
|
||||||
|
|
||||||
# value = make_dtype_yaml_compatible(value)
|
# Summarize groups and datasets
|
||||||
|
#group_dict["content_summary"] = {
|
||||||
# attr_dict[key] = {'rename_as' : key,
|
# "group_count": len(subgroups),
|
||||||
# 'value' : value
|
# "group_preview": subgroups[:3] + (["..."] if len(subgroups) > 3 else []),
|
||||||
|
# "dataset_count": len(datasets),
|
||||||
|
# "dataset_preview": datasets[:3] + (["..."] if len(datasets) > 3 else [])
|
||||||
#}
|
#}
|
||||||
|
|
||||||
#group_dict[obj.name] = {'name': obj.name, 'attributes': attr_dict}
|
|
||||||
group_dict = {"name": name_head, "attributes": attr_dict, "datasets":{}}
|
|
||||||
#group_dict[obj.name]["name"] = obj.name
|
|
||||||
#group_dict[obj.name]["attributes"] = attr_dict
|
|
||||||
#group_dict[obj.name]["datasets"] = {}
|
|
||||||
#print(name)
|
|
||||||
|
|
||||||
yaml_dict[obj.name] = group_dict
|
yaml_dict[obj.name] = group_dict
|
||||||
elif isinstance(obj, h5py.Dataset):
|
|
||||||
# Convert attribute dict to a YAML/JSON serializable dict
|
|
||||||
attr_dict = {key: utils.to_serializable_dtype(val) for key, val in obj.attrs.items()}
|
|
||||||
parent_name = '/'.join(name_to_list[:-1])
|
|
||||||
yaml_dict[parent_name]["datasets"][name_head] = {"rename_as": name_head ,"attributes": attr_dict}
|
|
||||||
#print(yaml.dump(group_dict,sort_keys=False))
|
|
||||||
|
|
||||||
#elif len(obj.name.split('/')) == 3:
|
elif isinstance(obj, h5py.Dataset): # Handle datasets
|
||||||
# print(yaml.dump())
|
# Convert attributes to a YAML/JSON serializable format
|
||||||
|
attr_dict = {key: utils.to_serializable_dtype(val) for key, val in obj.attrs.items()}
|
||||||
|
|
||||||
|
dataset_dict = {"name": name_head, "attributes": attr_dict}
|
||||||
|
|
||||||
|
yaml_dict[obj.name] = dataset_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def serialize_metadata(input_filename_path, folder_depth: int = 4, output_format: str = 'yaml') -> str:
|
def serialize_metadata(input_filename_path, folder_depth: int = 4, output_format: str = 'yaml') -> str:
|
||||||
"""
|
"""
|
||||||
@ -537,12 +538,13 @@ def serialize_metadata(input_filename_path, folder_depth: int = 4, output_format
|
|||||||
# Open the HDF5 file and extract metadata
|
# Open the HDF5 file and extract metadata
|
||||||
with h5py.File(input_filename_path, 'r') as f:
|
with h5py.File(input_filename_path, 'r') as f:
|
||||||
# Convert attribute dict to a YAML/JSON serializable dict
|
# Convert attribute dict to a YAML/JSON serializable dict
|
||||||
attrs_dict = {key: utils.to_serializable_dtype(val) for key, val in f.attrs.items()}
|
#attrs_dict = {key: utils.to_serializable_dtype(val) for key, val in f.attrs.items()}
|
||||||
yaml_dict[f.name] = {
|
#yaml_dict[f.name] = {
|
||||||
"name": f.name,
|
# "name": f.name,
|
||||||
"attributes": attrs_dict,
|
# "attributes": attrs_dict,
|
||||||
"datasets": {}
|
# "datasets": {}
|
||||||
}
|
#}
|
||||||
|
__print_metadata__(f.name, f, folder_depth, yaml_dict)
|
||||||
# Traverse HDF5 file hierarchy and add datasets
|
# Traverse HDF5 file hierarchy and add datasets
|
||||||
f.visititems(lambda name, obj: __print_metadata__(name, obj, folder_depth, yaml_dict))
|
f.visititems(lambda name, obj: __print_metadata__(name, obj, folder_depth, yaml_dict))
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user