185 lines
7.6 KiB
Python
185 lines
7.6 KiB
Python
import sys
|
|
import os
|
|
|
|
try:
|
|
thisFilePath = os.path.abspath(__file__)
|
|
except NameError:
|
|
print("Error: __file__ is not available. Ensure the script is being run from a file.")
|
|
print("[Notice] Path to DIMA package may not be resolved properly.")
|
|
thisFilePath = os.getcwd() # Use current directory or specify a default
|
|
|
|
dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..')) # Move up to project root
|
|
|
|
if dimaPath not in sys.path: # Avoid duplicate entries
|
|
sys.path.append(dimaPath)
|
|
|
|
import h5py
|
|
import yaml
|
|
|
|
try:
|
|
from dima.src import hdf5_ops as hdf5_ops
|
|
except ModuleNotFoundError:
|
|
import src.hdf5_ops as hdf5_ops
|
|
|
|
|
|
|
|
def load_yaml(review_yaml_file):
|
|
with open(review_yaml_file, 'r') as stream:
|
|
try:
|
|
return yaml.load(stream, Loader=yaml.FullLoader)
|
|
except yaml.YAMLError as exc:
|
|
print(exc)
|
|
return None
|
|
|
|
def validate_yaml_dict(input_hdf5_file, yaml_dict):
|
|
errors = []
|
|
notes = []
|
|
|
|
with h5py.File(input_hdf5_file, 'r') as hdf5_file:
|
|
# 1. Check for valid object names
|
|
for key in yaml_dict:
|
|
if key not in hdf5_file:
|
|
error_msg = f"Error: {key} is not a valid object's name in the HDF5 file."
|
|
print(error_msg)
|
|
errors.append(error_msg)
|
|
|
|
# 2. Confirm metadata dict for each object is a dictionary
|
|
for key, meta_dict in yaml_dict.items():
|
|
if not isinstance(meta_dict, dict):
|
|
error_msg = f"Error: Metadata for {key} should be a dictionary."
|
|
print(error_msg)
|
|
errors.append(error_msg)
|
|
else:
|
|
if 'attributes' not in meta_dict:
|
|
warning_msg = f"Warning: No 'attributes' in metadata dict for {key}."
|
|
print(warning_msg)
|
|
notes.append(warning_msg)
|
|
|
|
# 3. Verify update, append, and delete operations are well specified
|
|
for key, meta_dict in yaml_dict.items():
|
|
attributes = meta_dict.get("attributes", {})
|
|
|
|
for attr_name, attr_value in attributes.items():
|
|
# Ensure the object exists before accessing attributes
|
|
if key in hdf5_file:
|
|
hdf5_obj_attrs = hdf5_file[key].attrs # Access object-specific attributes
|
|
|
|
if attr_name in hdf5_obj_attrs:
|
|
# Attribute exists: it can be updated or deleted
|
|
if isinstance(attr_value, dict) and "delete" in attr_value:
|
|
note_msg = f"Note: '{attr_name}' in {key} may be deleted if 'delete' is set as true."
|
|
print(note_msg)
|
|
notes.append(note_msg)
|
|
else:
|
|
note_msg = f"Note: '{attr_name}' in {key} will be updated."
|
|
print(note_msg)
|
|
notes.append(note_msg)
|
|
else:
|
|
# Attribute does not exist: it can be appended or flagged as an invalid delete
|
|
if isinstance(attr_value, dict) and "delete" in attr_value:
|
|
error_msg = f"Error: Cannot delete non-existent attribute '{attr_name}' in {key}."
|
|
print(error_msg)
|
|
errors.append(error_msg)
|
|
else:
|
|
note_msg = f"Note: '{attr_name}' in {key} will be appended."
|
|
print(note_msg)
|
|
notes.append(note_msg)
|
|
else:
|
|
error_msg = f"Error: '{key}' is not a valid object in the HDF5 file."
|
|
print(error_msg)
|
|
errors.append(error_msg)
|
|
|
|
return len(errors) == 0, errors, notes
|
|
|
|
|
|
def update_hdf5_file_with_review(input_hdf5_file, review_yaml_file):
|
|
|
|
"""
|
|
Updates, appends, or deletes metadata attributes in an HDF5 file based on a provided YAML dictionary.
|
|
|
|
Parameters:
|
|
-----------
|
|
input_hdf5_file : str
|
|
Path to the HDF5 file.
|
|
|
|
yaml_dict : dict
|
|
Dictionary specifying objects and their attributes with operations. Example format:
|
|
{
|
|
"object_name": { "attributes" : "attr_name": { "value": attr_value,
|
|
"delete": true | false
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
yaml_dict = load_yaml(review_yaml_file)
|
|
|
|
success, errors, notes = validate_yaml_dict(input_hdf5_file,yaml_dict)
|
|
if not success:
|
|
raise ValueError(f"Review yaml file {review_yaml_file} is invalid. Validation errors: {errors}")
|
|
|
|
# Initialize HDF5 operations manager
|
|
DataOpsAPI = hdf5_ops.HDF5DataOpsManager(input_hdf5_file)
|
|
DataOpsAPI.load_file_obj()
|
|
|
|
# Iterate over each object in the YAML dictionary
|
|
for obj_name, attr_dict in yaml_dict.items():
|
|
# Prepare dictionaries for append, update, and delete actions
|
|
append_dict = {}
|
|
update_dict = {}
|
|
delete_dict = {}
|
|
|
|
if not obj_name in DataOpsAPI.file_obj:
|
|
continue # Skip if the object does not exist
|
|
|
|
# Iterate over each attribute in the current object
|
|
for attr_name, attr_props in attr_dict['attributes'].items():
|
|
if not isinstance(attr_props, dict):
|
|
#attr_props = {'value': attr_props}
|
|
# Check if the attribute exists (for updating)
|
|
if attr_name in DataOpsAPI.file_obj[obj_name].attrs:
|
|
update_dict[attr_name] = attr_props
|
|
# Otherwise, it's a new attribute to append
|
|
else:
|
|
append_dict[attr_name] = attr_props
|
|
else:
|
|
# Check if the attribute is marked for deletion
|
|
if attr_props.get('delete', False):
|
|
delete_dict[attr_name] = attr_props
|
|
|
|
# Perform a single pass for all three operations
|
|
if append_dict:
|
|
DataOpsAPI.append_metadata(obj_name, append_dict)
|
|
if update_dict:
|
|
DataOpsAPI.update_metadata(obj_name, update_dict)
|
|
if delete_dict:
|
|
DataOpsAPI.delete_metadata(obj_name, delete_dict)
|
|
|
|
# Close hdf5 file
|
|
DataOpsAPI.unload_file_obj()
|
|
# Regenerate yaml snapshot of updated HDF5 file
|
|
output_yml_filename_path = hdf5_ops.serialize_metadata(input_hdf5_file)
|
|
print(f'{output_yml_filename_path} was successfully regenerated from the updated version of{input_hdf5_file}')
|
|
|
|
def count(hdf5_obj,yml_dict):
|
|
print(hdf5_obj.name)
|
|
if isinstance(hdf5_obj,h5py.Group) and len(hdf5_obj.name.split('/')) <= 4:
|
|
obj_review = yml_dict[hdf5_obj.name]
|
|
additions = [not (item in hdf5_obj.attrs.keys()) for item in obj_review['attributes'].keys()]
|
|
count_additions = sum(additions)
|
|
deletions = [not (item in obj_review['attributes'].keys()) for item in hdf5_obj.attrs.keys()]
|
|
count_delections = sum(deletions)
|
|
print('additions',count_additions, 'deletions', count_delections)
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if len(sys.argv) < 4:
|
|
print("Usage: python metadata_revision.py update <path/to/target_file.hdf5> <path/to/metadata_review_file.yaml>")
|
|
sys.exit(1)
|
|
|
|
|
|
if sys.argv[1] == 'update':
|
|
input_hdf5_file = sys.argv[2]
|
|
review_yaml_file = sys.argv[3]
|
|
update_hdf5_file_with_review(input_hdf5_file, review_yaml_file)
|
|
#run(sys.argv[2])
|