Included cli commands update and serialize to simplify running metadata revision pipeline.

This commit is contained in:
2024-10-29 07:56:43 +01:00
parent 3f7a089a28
commit e2fec03d4a
2 changed files with 30 additions and 7 deletions

View File

@ -1,5 +1,9 @@
import sys
import os
# Add the parent directory to the system path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import h5py
import yaml
import src.hdf5_ops as hdf5_ops
@ -153,12 +157,14 @@ def count(hdf5_obj,yml_dict):
print('additions',count_additions, 'deletions', count_delections)
if __name__ == "__main__":
if len(sys.argv) != 4:
print("Usage: python metadata_revision.py update <path/to/review_file.hdf5> <path/to/review_file.yaml>")
sys.exit(1)
if sys.argv[2] == 'update':
input_hdf5_file = sys.argv[3]
yaml_review_file = sys.argv[4]
if len(sys.argv) < 4:
print("Usage: python metadata_revision.py update <path/to/target_file.hdf5> <path/to/metadata_review_file.yaml>")
sys.exit(1)
if sys.argv[1] == 'update':
input_hdf5_file = sys.argv[2]
yaml_review_file = sys.argv[3]
update_hdf5_file_with_review(input_hdf5_file, yaml_review_file)
#run(sys.argv[2])

View File

@ -388,7 +388,7 @@ class HDF5DataOpsManager():
# Parse value into HDF5 admissible type
for key in dataset_dict['attributes'].keys():
value = dataset_dict['attributes'][key]
if isinstance(key, dict):
if isinstance(value, dict):
dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
if not group_name in self.file_obj:
@ -573,5 +573,22 @@ def get_groups_at_a_level(file: h5py.File, level: str):
return groups
if __name__ == "__main__":
if len(sys.argv) < 4:
print("Usage: python hdf5_ops.py serialize <path/to/target_file.hdf5> <format=json|yaml>")
sys.exit(1)
if sys.argv[1] == 'serialize':
input_hdf5_file = sys.argv[2]
file_format = sys.argv[3]
try:
# Call the serialize_metadata function and capture the output path
path_to_file = serialize_metadata(input_hdf5_file, output_format=file_format)
print(f"Metadata serialized to {path_to_file}")
except Exception as e:
print(f"An error occurred during serialization: {e}")
sys.exit(1)
#run(sys.argv[2])