import sys import os try: thisFilePath = os.path.abspath(__file__) except NameError: print("Error: __file__ is not available. Ensure the script is being run from a file.") print("[Notice] Path to DIMA package may not be resolved properly.") thisFilePath = os.getcwd() # Use current directory or specify a default dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..')) # Move up to project root if dimaPath not in sys.path: # Avoid duplicate entries sys.path.append(dimaPath) import h5py import yaml try: from dima.src import hdf5_ops as hdf5_ops except ModuleNotFoundError: import src.hdf5_ops as hdf5_ops def load_yaml(review_yaml_file): with open(review_yaml_file, 'r') as stream: try: return yaml.load(stream, Loader=yaml.FullLoader) except yaml.YAMLError as exc: print(exc) return None def validate_yaml_dict(input_hdf5_file, yaml_dict): errors = [] notes = [] with h5py.File(input_hdf5_file, 'r') as hdf5_file: # 1. Check for valid object names for key in yaml_dict: if key not in hdf5_file: error_msg = f"Error: {key} is not a valid object's name in the HDF5 file." print(error_msg) errors.append(error_msg) # 2. Confirm metadata dict for each object is a dictionary for key, meta_dict in yaml_dict.items(): if not isinstance(meta_dict, dict): error_msg = f"Error: Metadata for {key} should be a dictionary." print(error_msg) errors.append(error_msg) else: if 'attributes' not in meta_dict: warning_msg = f"Warning: No 'attributes' in metadata dict for {key}." print(warning_msg) notes.append(warning_msg) # 3. Verify update, append, and delete operations are well specified for key, meta_dict in yaml_dict.items(): attributes = meta_dict.get("attributes", {}) for attr_name, attr_value in attributes.items(): # Ensure the object exists before accessing attributes if key in hdf5_file: hdf5_obj_attrs = hdf5_file[key].attrs # Access object-specific attributes if attr_name in hdf5_obj_attrs: # Attribute exists: it can be updated or deleted if isinstance(attr_value, dict) and "delete" in attr_value: note_msg = f"Note: '{attr_name}' in {key} may be deleted if 'delete' is set as true." print(note_msg) notes.append(note_msg) else: note_msg = f"Note: '{attr_name}' in {key} will be updated." print(note_msg) notes.append(note_msg) else: # Attribute does not exist: it can be appended or flagged as an invalid delete if isinstance(attr_value, dict) and "delete" in attr_value: error_msg = f"Error: Cannot delete non-existent attribute '{attr_name}' in {key}." print(error_msg) errors.append(error_msg) else: note_msg = f"Note: '{attr_name}' in {key} will be appended." print(note_msg) notes.append(note_msg) else: error_msg = f"Error: '{key}' is not a valid object in the HDF5 file." print(error_msg) errors.append(error_msg) return len(errors) == 0, errors, notes def update_hdf5_file_with_review(input_hdf5_file, review_yaml_file): """ Updates, appends, or deletes metadata attributes in an HDF5 file based on a provided YAML dictionary. Parameters: ----------- input_hdf5_file : str Path to the HDF5 file. yaml_dict : dict Dictionary specifying objects and their attributes with operations. Example format: { "object_name": { "attributes" : "attr_name": { "value": attr_value, "delete": true | false } } } """ yaml_dict = load_yaml(review_yaml_file) success, errors, notes = validate_yaml_dict(input_hdf5_file,yaml_dict) if not success: raise ValueError(f"Review yaml file {review_yaml_file} is invalid. Validation errors: {errors}") # Initialize HDF5 operations manager DataOpsAPI = hdf5_ops.HDF5DataOpsManager(input_hdf5_file) DataOpsAPI.load_file_obj() # Iterate over each object in the YAML dictionary for obj_name, attr_dict in yaml_dict.items(): # Prepare dictionaries for append, update, and delete actions append_dict = {} update_dict = {} delete_dict = {} if not obj_name in DataOpsAPI.file_obj: continue # Skip if the object does not exist # Iterate over each attribute in the current object for attr_name, attr_props in attr_dict['attributes'].items(): if not isinstance(attr_props, dict): #attr_props = {'value': attr_props} # Check if the attribute exists (for updating) if attr_name in DataOpsAPI.file_obj[obj_name].attrs: update_dict[attr_name] = attr_props # Otherwise, it's a new attribute to append else: append_dict[attr_name] = attr_props else: # Check if the attribute is marked for deletion if attr_props.get('delete', False): delete_dict[attr_name] = attr_props # Perform a single pass for all three operations if append_dict: DataOpsAPI.append_metadata(obj_name, append_dict) if update_dict: DataOpsAPI.update_metadata(obj_name, update_dict) if delete_dict: DataOpsAPI.delete_metadata(obj_name, delete_dict) # Close hdf5 file DataOpsAPI.unload_file_obj() # Regenerate yaml snapshot of updated HDF5 file output_yml_filename_path = hdf5_ops.serialize_metadata(input_hdf5_file) print(f'{output_yml_filename_path} was successfully regenerated from the updated version of{input_hdf5_file}') def count(hdf5_obj,yml_dict): print(hdf5_obj.name) if isinstance(hdf5_obj,h5py.Group) and len(hdf5_obj.name.split('/')) <= 4: obj_review = yml_dict[hdf5_obj.name] additions = [not (item in hdf5_obj.attrs.keys()) for item in obj_review['attributes'].keys()] count_additions = sum(additions) deletions = [not (item in obj_review['attributes'].keys()) for item in hdf5_obj.attrs.keys()] count_delections = sum(deletions) print('additions',count_additions, 'deletions', count_delections) if __name__ == "__main__": if len(sys.argv) < 4: print("Usage: python metadata_revision.py update ") sys.exit(1) if sys.argv[1] == 'update': input_hdf5_file = sys.argv[2] review_yaml_file = sys.argv[3] update_hdf5_file_with_review(input_hdf5_file, review_yaml_file) #run(sys.argv[2])