diff --git a/pipelines/metadata_revision.py b/pipelines/metadata_revision.py index 65e9ec6..d5e6f40 100644 --- a/pipelines/metadata_revision.py +++ b/pipelines/metadata_revision.py @@ -189,7 +189,65 @@ def load_yaml(yaml_review_file): print(exc) return None -#def update_hdf5_attributes(input_hdf5_file, yaml_dict): +def validate_yaml_dict(input_hdf5_file, yaml_dict): + errors = [] + notes = [] + + with h5py.File(input_hdf5_file, 'r') as hdf5_file: + # 1. Check for valid object names + for key in yaml_dict: + if key not in hdf5_file: + error_msg = f"Error: {key} is not a valid object's name in the HDF5 file." + print(error_msg) + errors.append(error_msg) + + # 2. Confirm metadata dict for each object is a dictionary + for key, meta_dict in yaml_dict.items(): + if not isinstance(meta_dict, dict): + error_msg = f"Error: Metadata for {key} should be a dictionary." + print(error_msg) + errors.append(error_msg) + else: + if 'attributes' not in meta_dict: + warning_msg = f"Warning: No 'attributes' in metadata dict for {key}." + print(warning_msg) + notes.append(warning_msg) + + # 3. Verify update, append, and delete operations are well specified + for key, meta_dict in yaml_dict.items(): + attributes = meta_dict.get("attributes", {}) + + for attr_name, attr_value in attributes.items(): + # Ensure the object exists before accessing attributes + if key in hdf5_file: + hdf5_obj_attrs = hdf5_file[key].attrs # Access object-specific attributes + + if attr_name in hdf5_obj_attrs: + # Attribute exists: it can be updated or deleted + if isinstance(attr_value, dict) and "delete" in attr_value: + note_msg = f"Note: '{attr_name}' in {key} may be deleted if 'delete' is set as true." + print(note_msg) + notes.append(note_msg) + else: + note_msg = f"Note: '{attr_name}' in {key} will be updated." + print(note_msg) + notes.append(note_msg) + else: + # Attribute does not exist: it can be appended or flagged as an invalid delete + if isinstance(attr_value, dict) and "delete" in attr_value: + error_msg = f"Error: Cannot delete non-existent attribute '{attr_name}' in {key}." + print(error_msg) + errors.append(error_msg) + else: + note_msg = f"Note: '{attr_name}' in {key} will be appended." + print(note_msg) + notes.append(note_msg) + else: + error_msg = f"Error: '{key}' is not a valid object in the HDF5 file." + print(error_msg) + errors.append(error_msg) + + return len(errors) == 0, errors, notes def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file): @@ -215,6 +273,10 @@ def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file): """ yaml_dict = load_yaml(yaml_review_file) + success, errors, notes = validate_yaml_dict(input_hdf5_file,yaml_dict) + if not success: + raise ValueError(f"Review yaml file {yaml_review_file} is invalid. Validation errors: {errors}") + # Initialize HDF5 operations manager DataOpsAPI = hdf5_ops.HDF5DataOpsManager(input_hdf5_file) DataOpsAPI.load_file_obj() @@ -413,6 +475,9 @@ def main(): # third_update_hdf5_file_with_review(output_filename_path, os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename)) #fourth_complete_metadata_review() -#if __name__ == '__main__': +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: python pipeline.py run ") + sys.exit(1) -# main() + #run(sys.argv[2])