Renamed to_yaml() as serialize_metadata() and introduce input parameter output_format, which allows yaml or json.
This commit is contained in:
@ -178,7 +178,7 @@ def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with
|
||||
#if numerical_variables:
|
||||
dataset = {}
|
||||
dataset['name'] = 'data_table'#_numerical_variables'
|
||||
dataset['data'] = utils.dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
|
||||
dataset['data'] = utils.convert_dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
|
||||
dataset['shape'] = dataset['data'].shape
|
||||
dataset['dtype'] = type(dataset['data'])
|
||||
#dataset['data_units'] = file_obj['wave']['data_units']
|
||||
@ -191,7 +191,7 @@ def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with
|
||||
for column_name in df.columns:
|
||||
column_attr_dict = description_dict['table_header'].get(column_name,
|
||||
{'note':'there was no description available. Review instrument files.'})
|
||||
dataset['attributes'].update({column_name: utils.parse_attribute(column_attr_dict)})
|
||||
dataset['attributes'].update({column_name: utils.convert_attrdict_to_np_structured_array(column_attr_dict)})
|
||||
|
||||
#try:
|
||||
# dataset['attributes'] = description_dict['table_header'].copy()
|
||||
|
@ -10,7 +10,12 @@ import numpy as np
|
||||
import utils.g5505_utils as utils
|
||||
import logging
|
||||
import datetime
|
||||
|
||||
import os
|
||||
import h5py
|
||||
|
||||
import yaml
|
||||
import json
|
||||
|
||||
class HDF5DataOpsManager():
|
||||
def __init__(self, file_path, mode = 'r+') -> None:
|
||||
@ -254,7 +259,7 @@ def construct_attributes_dict(attrs_obj):
|
||||
|
||||
return attr_dict
|
||||
|
||||
def print_metadata(name, obj, folder_depth, yaml_dict):
|
||||
def __print_metadata__(name, obj, folder_depth, yaml_dict):
|
||||
|
||||
# TODO: should we enable deeper folders ?
|
||||
if len(obj.name.split('/')) <= folder_depth:
|
||||
@ -298,29 +303,60 @@ def print_metadata(name, obj, folder_depth, yaml_dict):
|
||||
#elif len(obj.name.split('/')) == 3:
|
||||
# print(yaml.dump())
|
||||
|
||||
def to_yaml(input_filename_path,folder_depth: int = 4):
|
||||
|
||||
def serialize_metadata(input_filename_path, folder_depth: int = 4, output_format: str = 'yaml') -> str:
|
||||
"""
|
||||
Serialize metadata from an HDF5 file into YAML or JSON format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_filename_path : str
|
||||
The path to the input HDF5 file.
|
||||
folder_depth : int, optional
|
||||
The folder depth to control how much of the HDF5 file hierarchy is traversed (default is 4).
|
||||
output_format : str, optional
|
||||
The format to serialize the output, either 'yaml' or 'json' (default is 'yaml').
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The output file path where the serialized metadata is stored (either .yaml or .json).
|
||||
|
||||
"""
|
||||
|
||||
# Choose the appropriate output format (YAML or JSON)
|
||||
if output_format not in ['yaml', 'json']:
|
||||
raise ValueError("Unsupported format. Please choose either 'yaml' or 'json'.")
|
||||
|
||||
# Initialize dictionary to store YAML/JSON data
|
||||
yaml_dict = {}
|
||||
|
||||
|
||||
# Split input file path to get the output file's base name
|
||||
output_filename_tail, ext = os.path.splitext(input_filename_path)
|
||||
|
||||
with h5py.File(input_filename_path,'r') as f:
|
||||
|
||||
|
||||
# Open the HDF5 file and extract metadata
|
||||
with h5py.File(input_filename_path, 'r') as f:
|
||||
# Construct attributes dictionary and top-level structure
|
||||
attrs_dict = construct_attributes_dict(f.attrs)
|
||||
yaml_dict[f.name] = {"name": f.name, "attributes": attrs_dict, "datasets":{}}
|
||||
f.visititems(lambda name, obj: print_metadata(name,obj,folder_depth,yaml_dict))
|
||||
|
||||
#with open(output_filename_tail+".json","w") as yaml_file:
|
||||
# json_obj = json.dumps(yaml_dict,indent=4,sort_keys=False,)
|
||||
# yaml_file.write(json_obj)
|
||||
|
||||
with open(output_filename_tail+".yaml","w") as yaml_file:
|
||||
yaml_output = yaml.dump(yaml_dict,sort_keys=False)
|
||||
#for key in yaml_dict:
|
||||
# yaml_output = yaml.dump(yaml_dict[key],sort_keys=False)
|
||||
yaml_file.write(yaml_output )
|
||||
|
||||
return output_filename_tail+".yaml"
|
||||
yaml_dict[f.name] = {
|
||||
"name": f.name,
|
||||
"attributes": attrs_dict,
|
||||
"datasets": {}
|
||||
}
|
||||
# Traverse HDF5 file hierarchy and add datasets
|
||||
f.visititems(lambda name, obj: __print_metadata__(name, obj, folder_depth, yaml_dict))
|
||||
|
||||
|
||||
# Serialize and write the data
|
||||
output_file_path = output_filename_tail + '.' + output_format
|
||||
with open(output_file_path, 'w') as output_file:
|
||||
if output_format == 'json':
|
||||
json_output = json.dumps(yaml_dict, indent=4, sort_keys=False)
|
||||
output_file.write(json_output)
|
||||
elif output_format == 'yaml':
|
||||
yaml_output = yaml.dump(yaml_dict, sort_keys=False)
|
||||
output_file.write(yaml_output)
|
||||
|
||||
return output_file_path
|
||||
|
||||
|
||||
def get_groups_at_a_level(file: h5py.File, level: str):
|
||||
|
Reference in New Issue
Block a user