Renamed to_yaml() as serialize_metadata() and introduce input parameter output_format, which allows yaml or json.
This commit is contained in:
@ -178,7 +178,7 @@ def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with
|
|||||||
#if numerical_variables:
|
#if numerical_variables:
|
||||||
dataset = {}
|
dataset = {}
|
||||||
dataset['name'] = 'data_table'#_numerical_variables'
|
dataset['name'] = 'data_table'#_numerical_variables'
|
||||||
dataset['data'] = utils.dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
|
dataset['data'] = utils.convert_dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
|
||||||
dataset['shape'] = dataset['data'].shape
|
dataset['shape'] = dataset['data'].shape
|
||||||
dataset['dtype'] = type(dataset['data'])
|
dataset['dtype'] = type(dataset['data'])
|
||||||
#dataset['data_units'] = file_obj['wave']['data_units']
|
#dataset['data_units'] = file_obj['wave']['data_units']
|
||||||
@ -191,7 +191,7 @@ def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with
|
|||||||
for column_name in df.columns:
|
for column_name in df.columns:
|
||||||
column_attr_dict = description_dict['table_header'].get(column_name,
|
column_attr_dict = description_dict['table_header'].get(column_name,
|
||||||
{'note':'there was no description available. Review instrument files.'})
|
{'note':'there was no description available. Review instrument files.'})
|
||||||
dataset['attributes'].update({column_name: utils.parse_attribute(column_attr_dict)})
|
dataset['attributes'].update({column_name: utils.convert_attrdict_to_np_structured_array(column_attr_dict)})
|
||||||
|
|
||||||
#try:
|
#try:
|
||||||
# dataset['attributes'] = description_dict['table_header'].copy()
|
# dataset['attributes'] = description_dict['table_header'].copy()
|
||||||
|
@ -10,7 +10,12 @@ import numpy as np
|
|||||||
import utils.g5505_utils as utils
|
import utils.g5505_utils as utils
|
||||||
import logging
|
import logging
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
import os
|
||||||
|
import h5py
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
import json
|
||||||
|
|
||||||
class HDF5DataOpsManager():
|
class HDF5DataOpsManager():
|
||||||
def __init__(self, file_path, mode = 'r+') -> None:
|
def __init__(self, file_path, mode = 'r+') -> None:
|
||||||
@ -254,7 +259,7 @@ def construct_attributes_dict(attrs_obj):
|
|||||||
|
|
||||||
return attr_dict
|
return attr_dict
|
||||||
|
|
||||||
def print_metadata(name, obj, folder_depth, yaml_dict):
|
def __print_metadata__(name, obj, folder_depth, yaml_dict):
|
||||||
|
|
||||||
# TODO: should we enable deeper folders ?
|
# TODO: should we enable deeper folders ?
|
||||||
if len(obj.name.split('/')) <= folder_depth:
|
if len(obj.name.split('/')) <= folder_depth:
|
||||||
@ -298,29 +303,60 @@ def print_metadata(name, obj, folder_depth, yaml_dict):
|
|||||||
#elif len(obj.name.split('/')) == 3:
|
#elif len(obj.name.split('/')) == 3:
|
||||||
# print(yaml.dump())
|
# print(yaml.dump())
|
||||||
|
|
||||||
def to_yaml(input_filename_path,folder_depth: int = 4):
|
def serialize_metadata(input_filename_path, folder_depth: int = 4, output_format: str = 'yaml') -> str:
|
||||||
|
"""
|
||||||
|
Serialize metadata from an HDF5 file into YAML or JSON format.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
input_filename_path : str
|
||||||
|
The path to the input HDF5 file.
|
||||||
|
folder_depth : int, optional
|
||||||
|
The folder depth to control how much of the HDF5 file hierarchy is traversed (default is 4).
|
||||||
|
output_format : str, optional
|
||||||
|
The format to serialize the output, either 'yaml' or 'json' (default is 'yaml').
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
The output file path where the serialized metadata is stored (either .yaml or .json).
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Choose the appropriate output format (YAML or JSON)
|
||||||
|
if output_format not in ['yaml', 'json']:
|
||||||
|
raise ValueError("Unsupported format. Please choose either 'yaml' or 'json'.")
|
||||||
|
|
||||||
|
# Initialize dictionary to store YAML/JSON data
|
||||||
yaml_dict = {}
|
yaml_dict = {}
|
||||||
|
|
||||||
|
# Split input file path to get the output file's base name
|
||||||
output_filename_tail, ext = os.path.splitext(input_filename_path)
|
output_filename_tail, ext = os.path.splitext(input_filename_path)
|
||||||
|
|
||||||
with h5py.File(input_filename_path,'r') as f:
|
# Open the HDF5 file and extract metadata
|
||||||
|
with h5py.File(input_filename_path, 'r') as f:
|
||||||
|
# Construct attributes dictionary and top-level structure
|
||||||
attrs_dict = construct_attributes_dict(f.attrs)
|
attrs_dict = construct_attributes_dict(f.attrs)
|
||||||
yaml_dict[f.name] = {"name": f.name, "attributes": attrs_dict, "datasets":{}}
|
yaml_dict[f.name] = {
|
||||||
f.visititems(lambda name, obj: print_metadata(name,obj,folder_depth,yaml_dict))
|
"name": f.name,
|
||||||
|
"attributes": attrs_dict,
|
||||||
#with open(output_filename_tail+".json","w") as yaml_file:
|
"datasets": {}
|
||||||
# json_obj = json.dumps(yaml_dict,indent=4,sort_keys=False,)
|
}
|
||||||
# yaml_file.write(json_obj)
|
# Traverse HDF5 file hierarchy and add datasets
|
||||||
|
f.visititems(lambda name, obj: __print_metadata__(name, obj, folder_depth, yaml_dict))
|
||||||
with open(output_filename_tail+".yaml","w") as yaml_file:
|
|
||||||
yaml_output = yaml.dump(yaml_dict,sort_keys=False)
|
|
||||||
#for key in yaml_dict:
|
# Serialize and write the data
|
||||||
# yaml_output = yaml.dump(yaml_dict[key],sort_keys=False)
|
output_file_path = output_filename_tail + '.' + output_format
|
||||||
yaml_file.write(yaml_output )
|
with open(output_file_path, 'w') as output_file:
|
||||||
|
if output_format == 'json':
|
||||||
return output_filename_tail+".yaml"
|
json_output = json.dumps(yaml_dict, indent=4, sort_keys=False)
|
||||||
|
output_file.write(json_output)
|
||||||
|
elif output_format == 'yaml':
|
||||||
|
yaml_output = yaml.dump(yaml_dict, sort_keys=False)
|
||||||
|
output_file.write(yaml_output)
|
||||||
|
|
||||||
|
return output_file_path
|
||||||
|
|
||||||
|
|
||||||
def get_groups_at_a_level(file: h5py.File, level: str):
|
def get_groups_at_a_level(file: h5py.File, level: str):
|
||||||
|
Reference in New Issue
Block a user