Added function to validate review yaml file, and updated update_hdf5_with_review function

This commit is contained in:
2024-10-28 16:20:28 +01:00
parent 69b73c26b0
commit 15b0ff3cc4

View File

@ -189,7 +189,65 @@ def load_yaml(yaml_review_file):
print(exc)
return None
#def update_hdf5_attributes(input_hdf5_file, yaml_dict):
def validate_yaml_dict(input_hdf5_file, yaml_dict):
errors = []
notes = []
with h5py.File(input_hdf5_file, 'r') as hdf5_file:
# 1. Check for valid object names
for key in yaml_dict:
if key not in hdf5_file:
error_msg = f"Error: {key} is not a valid object's name in the HDF5 file."
print(error_msg)
errors.append(error_msg)
# 2. Confirm metadata dict for each object is a dictionary
for key, meta_dict in yaml_dict.items():
if not isinstance(meta_dict, dict):
error_msg = f"Error: Metadata for {key} should be a dictionary."
print(error_msg)
errors.append(error_msg)
else:
if 'attributes' not in meta_dict:
warning_msg = f"Warning: No 'attributes' in metadata dict for {key}."
print(warning_msg)
notes.append(warning_msg)
# 3. Verify update, append, and delete operations are well specified
for key, meta_dict in yaml_dict.items():
attributes = meta_dict.get("attributes", {})
for attr_name, attr_value in attributes.items():
# Ensure the object exists before accessing attributes
if key in hdf5_file:
hdf5_obj_attrs = hdf5_file[key].attrs # Access object-specific attributes
if attr_name in hdf5_obj_attrs:
# Attribute exists: it can be updated or deleted
if isinstance(attr_value, dict) and "delete" in attr_value:
note_msg = f"Note: '{attr_name}' in {key} may be deleted if 'delete' is set as true."
print(note_msg)
notes.append(note_msg)
else:
note_msg = f"Note: '{attr_name}' in {key} will be updated."
print(note_msg)
notes.append(note_msg)
else:
# Attribute does not exist: it can be appended or flagged as an invalid delete
if isinstance(attr_value, dict) and "delete" in attr_value:
error_msg = f"Error: Cannot delete non-existent attribute '{attr_name}' in {key}."
print(error_msg)
errors.append(error_msg)
else:
note_msg = f"Note: '{attr_name}' in {key} will be appended."
print(note_msg)
notes.append(note_msg)
else:
error_msg = f"Error: '{key}' is not a valid object in the HDF5 file."
print(error_msg)
errors.append(error_msg)
return len(errors) == 0, errors, notes
def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file):
@ -215,6 +273,10 @@ def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file):
"""
yaml_dict = load_yaml(yaml_review_file)
success, errors, notes = validate_yaml_dict(input_hdf5_file,yaml_dict)
if not success:
raise ValueError(f"Review yaml file {yaml_review_file} is invalid. Validation errors: {errors}")
# Initialize HDF5 operations manager
DataOpsAPI = hdf5_ops.HDF5DataOpsManager(input_hdf5_file)
DataOpsAPI.load_file_obj()
@ -413,6 +475,9 @@ def main():
# third_update_hdf5_file_with_review(output_filename_path, os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename))
#fourth_complete_metadata_review()
#if __name__ == '__main__':
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python pipeline.py run <path/to/descriptor.json>")
sys.exit(1)
# main()
#run(sys.argv[2])