Synch with remote repo
This commit is contained in:
@ -1,179 +1,179 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
try:
|
||||
thisFilePath = os.path.abspath(__file__)
|
||||
except NameError:
|
||||
print("Error: __file__ is not available. Ensure the script is being run from a file.")
|
||||
print("[Notice] Path to DIMA package may not be resolved properly.")
|
||||
thisFilePath = os.getcwd() # Use current directory or specify a default
|
||||
|
||||
dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..')) # Move up to project root
|
||||
|
||||
if dimaPath not in sys.path: # Avoid duplicate entries
|
||||
sys.path.append(dimaPath)
|
||||
|
||||
import h5py
|
||||
import yaml
|
||||
import src.hdf5_ops as hdf5_ops
|
||||
|
||||
|
||||
def load_yaml(review_yaml_file):
|
||||
with open(review_yaml_file, 'r') as stream:
|
||||
try:
|
||||
return yaml.load(stream, Loader=yaml.FullLoader)
|
||||
except yaml.YAMLError as exc:
|
||||
print(exc)
|
||||
return None
|
||||
|
||||
def validate_yaml_dict(input_hdf5_file, yaml_dict):
|
||||
errors = []
|
||||
notes = []
|
||||
|
||||
with h5py.File(input_hdf5_file, 'r') as hdf5_file:
|
||||
# 1. Check for valid object names
|
||||
for key in yaml_dict:
|
||||
if key not in hdf5_file:
|
||||
error_msg = f"Error: {key} is not a valid object's name in the HDF5 file."
|
||||
print(error_msg)
|
||||
errors.append(error_msg)
|
||||
|
||||
# 2. Confirm metadata dict for each object is a dictionary
|
||||
for key, meta_dict in yaml_dict.items():
|
||||
if not isinstance(meta_dict, dict):
|
||||
error_msg = f"Error: Metadata for {key} should be a dictionary."
|
||||
print(error_msg)
|
||||
errors.append(error_msg)
|
||||
else:
|
||||
if 'attributes' not in meta_dict:
|
||||
warning_msg = f"Warning: No 'attributes' in metadata dict for {key}."
|
||||
print(warning_msg)
|
||||
notes.append(warning_msg)
|
||||
|
||||
# 3. Verify update, append, and delete operations are well specified
|
||||
for key, meta_dict in yaml_dict.items():
|
||||
attributes = meta_dict.get("attributes", {})
|
||||
|
||||
for attr_name, attr_value in attributes.items():
|
||||
# Ensure the object exists before accessing attributes
|
||||
if key in hdf5_file:
|
||||
hdf5_obj_attrs = hdf5_file[key].attrs # Access object-specific attributes
|
||||
|
||||
if attr_name in hdf5_obj_attrs:
|
||||
# Attribute exists: it can be updated or deleted
|
||||
if isinstance(attr_value, dict) and "delete" in attr_value:
|
||||
note_msg = f"Note: '{attr_name}' in {key} may be deleted if 'delete' is set as true."
|
||||
print(note_msg)
|
||||
notes.append(note_msg)
|
||||
else:
|
||||
note_msg = f"Note: '{attr_name}' in {key} will be updated."
|
||||
print(note_msg)
|
||||
notes.append(note_msg)
|
||||
else:
|
||||
# Attribute does not exist: it can be appended or flagged as an invalid delete
|
||||
if isinstance(attr_value, dict) and "delete" in attr_value:
|
||||
error_msg = f"Error: Cannot delete non-existent attribute '{attr_name}' in {key}."
|
||||
print(error_msg)
|
||||
errors.append(error_msg)
|
||||
else:
|
||||
note_msg = f"Note: '{attr_name}' in {key} will be appended."
|
||||
print(note_msg)
|
||||
notes.append(note_msg)
|
||||
else:
|
||||
error_msg = f"Error: '{key}' is not a valid object in the HDF5 file."
|
||||
print(error_msg)
|
||||
errors.append(error_msg)
|
||||
|
||||
return len(errors) == 0, errors, notes
|
||||
|
||||
|
||||
def update_hdf5_file_with_review(input_hdf5_file, review_yaml_file):
|
||||
|
||||
"""
|
||||
Updates, appends, or deletes metadata attributes in an HDF5 file based on a provided YAML dictionary.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
input_hdf5_file : str
|
||||
Path to the HDF5 file.
|
||||
|
||||
yaml_dict : dict
|
||||
Dictionary specifying objects and their attributes with operations. Example format:
|
||||
{
|
||||
"object_name": { "attributes" : "attr_name": { "value": attr_value,
|
||||
"delete": true | false
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
yaml_dict = load_yaml(review_yaml_file)
|
||||
|
||||
success, errors, notes = validate_yaml_dict(input_hdf5_file,yaml_dict)
|
||||
if not success:
|
||||
raise ValueError(f"Review yaml file {review_yaml_file} is invalid. Validation errors: {errors}")
|
||||
|
||||
# Initialize HDF5 operations manager
|
||||
DataOpsAPI = hdf5_ops.HDF5DataOpsManager(input_hdf5_file)
|
||||
DataOpsAPI.load_file_obj()
|
||||
|
||||
# Iterate over each object in the YAML dictionary
|
||||
for obj_name, attr_dict in yaml_dict.items():
|
||||
# Prepare dictionaries for append, update, and delete actions
|
||||
append_dict = {}
|
||||
update_dict = {}
|
||||
delete_dict = {}
|
||||
|
||||
if not obj_name in DataOpsAPI.file_obj:
|
||||
continue # Skip if the object does not exist
|
||||
|
||||
# Iterate over each attribute in the current object
|
||||
for attr_name, attr_props in attr_dict['attributes'].items():
|
||||
if not isinstance(attr_props, dict):
|
||||
#attr_props = {'value': attr_props}
|
||||
# Check if the attribute exists (for updating)
|
||||
if attr_name in DataOpsAPI.file_obj[obj_name].attrs:
|
||||
update_dict[attr_name] = attr_props
|
||||
# Otherwise, it's a new attribute to append
|
||||
else:
|
||||
append_dict[attr_name] = attr_props
|
||||
else:
|
||||
# Check if the attribute is marked for deletion
|
||||
if attr_props.get('delete', False):
|
||||
delete_dict[attr_name] = attr_props
|
||||
|
||||
# Perform a single pass for all three operations
|
||||
if append_dict:
|
||||
DataOpsAPI.append_metadata(obj_name, append_dict)
|
||||
if update_dict:
|
||||
DataOpsAPI.update_metadata(obj_name, update_dict)
|
||||
if delete_dict:
|
||||
DataOpsAPI.delete_metadata(obj_name, delete_dict)
|
||||
|
||||
# Close hdf5 file
|
||||
DataOpsAPI.unload_file_obj()
|
||||
# Regenerate yaml snapshot of updated HDF5 file
|
||||
output_yml_filename_path = hdf5_ops.serialize_metadata(input_hdf5_file)
|
||||
print(f'{output_yml_filename_path} was successfully regenerated from the updated version of{input_hdf5_file}')
|
||||
|
||||
def count(hdf5_obj,yml_dict):
|
||||
print(hdf5_obj.name)
|
||||
if isinstance(hdf5_obj,h5py.Group) and len(hdf5_obj.name.split('/')) <= 4:
|
||||
obj_review = yml_dict[hdf5_obj.name]
|
||||
additions = [not (item in hdf5_obj.attrs.keys()) for item in obj_review['attributes'].keys()]
|
||||
count_additions = sum(additions)
|
||||
deletions = [not (item in obj_review['attributes'].keys()) for item in hdf5_obj.attrs.keys()]
|
||||
count_delections = sum(deletions)
|
||||
print('additions',count_additions, 'deletions', count_delections)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print("Usage: python metadata_revision.py update <path/to/target_file.hdf5> <path/to/metadata_review_file.yaml>")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if sys.argv[1] == 'update':
|
||||
input_hdf5_file = sys.argv[2]
|
||||
review_yaml_file = sys.argv[3]
|
||||
update_hdf5_file_with_review(input_hdf5_file, review_yaml_file)
|
||||
#run(sys.argv[2])
|
||||
import sys
|
||||
import os
|
||||
|
||||
try:
|
||||
thisFilePath = os.path.abspath(__file__)
|
||||
except NameError:
|
||||
print("Error: __file__ is not available. Ensure the script is being run from a file.")
|
||||
print("[Notice] Path to DIMA package may not be resolved properly.")
|
||||
thisFilePath = os.getcwd() # Use current directory or specify a default
|
||||
|
||||
dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..')) # Move up to project root
|
||||
|
||||
if dimaPath not in sys.path: # Avoid duplicate entries
|
||||
sys.path.append(dimaPath)
|
||||
|
||||
import h5py
|
||||
import yaml
|
||||
import src.hdf5_ops as hdf5_ops
|
||||
|
||||
|
||||
def load_yaml(review_yaml_file):
|
||||
with open(review_yaml_file, 'r') as stream:
|
||||
try:
|
||||
return yaml.load(stream, Loader=yaml.FullLoader)
|
||||
except yaml.YAMLError as exc:
|
||||
print(exc)
|
||||
return None
|
||||
|
||||
def validate_yaml_dict(input_hdf5_file, yaml_dict):
|
||||
errors = []
|
||||
notes = []
|
||||
|
||||
with h5py.File(input_hdf5_file, 'r') as hdf5_file:
|
||||
# 1. Check for valid object names
|
||||
for key in yaml_dict:
|
||||
if key not in hdf5_file:
|
||||
error_msg = f"Error: {key} is not a valid object's name in the HDF5 file."
|
||||
print(error_msg)
|
||||
errors.append(error_msg)
|
||||
|
||||
# 2. Confirm metadata dict for each object is a dictionary
|
||||
for key, meta_dict in yaml_dict.items():
|
||||
if not isinstance(meta_dict, dict):
|
||||
error_msg = f"Error: Metadata for {key} should be a dictionary."
|
||||
print(error_msg)
|
||||
errors.append(error_msg)
|
||||
else:
|
||||
if 'attributes' not in meta_dict:
|
||||
warning_msg = f"Warning: No 'attributes' in metadata dict for {key}."
|
||||
print(warning_msg)
|
||||
notes.append(warning_msg)
|
||||
|
||||
# 3. Verify update, append, and delete operations are well specified
|
||||
for key, meta_dict in yaml_dict.items():
|
||||
attributes = meta_dict.get("attributes", {})
|
||||
|
||||
for attr_name, attr_value in attributes.items():
|
||||
# Ensure the object exists before accessing attributes
|
||||
if key in hdf5_file:
|
||||
hdf5_obj_attrs = hdf5_file[key].attrs # Access object-specific attributes
|
||||
|
||||
if attr_name in hdf5_obj_attrs:
|
||||
# Attribute exists: it can be updated or deleted
|
||||
if isinstance(attr_value, dict) and "delete" in attr_value:
|
||||
note_msg = f"Note: '{attr_name}' in {key} may be deleted if 'delete' is set as true."
|
||||
print(note_msg)
|
||||
notes.append(note_msg)
|
||||
else:
|
||||
note_msg = f"Note: '{attr_name}' in {key} will be updated."
|
||||
print(note_msg)
|
||||
notes.append(note_msg)
|
||||
else:
|
||||
# Attribute does not exist: it can be appended or flagged as an invalid delete
|
||||
if isinstance(attr_value, dict) and "delete" in attr_value:
|
||||
error_msg = f"Error: Cannot delete non-existent attribute '{attr_name}' in {key}."
|
||||
print(error_msg)
|
||||
errors.append(error_msg)
|
||||
else:
|
||||
note_msg = f"Note: '{attr_name}' in {key} will be appended."
|
||||
print(note_msg)
|
||||
notes.append(note_msg)
|
||||
else:
|
||||
error_msg = f"Error: '{key}' is not a valid object in the HDF5 file."
|
||||
print(error_msg)
|
||||
errors.append(error_msg)
|
||||
|
||||
return len(errors) == 0, errors, notes
|
||||
|
||||
|
||||
def update_hdf5_file_with_review(input_hdf5_file, review_yaml_file):
|
||||
|
||||
"""
|
||||
Updates, appends, or deletes metadata attributes in an HDF5 file based on a provided YAML dictionary.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
input_hdf5_file : str
|
||||
Path to the HDF5 file.
|
||||
|
||||
yaml_dict : dict
|
||||
Dictionary specifying objects and their attributes with operations. Example format:
|
||||
{
|
||||
"object_name": { "attributes" : "attr_name": { "value": attr_value,
|
||||
"delete": true | false
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
yaml_dict = load_yaml(review_yaml_file)
|
||||
|
||||
success, errors, notes = validate_yaml_dict(input_hdf5_file,yaml_dict)
|
||||
if not success:
|
||||
raise ValueError(f"Review yaml file {review_yaml_file} is invalid. Validation errors: {errors}")
|
||||
|
||||
# Initialize HDF5 operations manager
|
||||
DataOpsAPI = hdf5_ops.HDF5DataOpsManager(input_hdf5_file)
|
||||
DataOpsAPI.load_file_obj()
|
||||
|
||||
# Iterate over each object in the YAML dictionary
|
||||
for obj_name, attr_dict in yaml_dict.items():
|
||||
# Prepare dictionaries for append, update, and delete actions
|
||||
append_dict = {}
|
||||
update_dict = {}
|
||||
delete_dict = {}
|
||||
|
||||
if not obj_name in DataOpsAPI.file_obj:
|
||||
continue # Skip if the object does not exist
|
||||
|
||||
# Iterate over each attribute in the current object
|
||||
for attr_name, attr_props in attr_dict['attributes'].items():
|
||||
if not isinstance(attr_props, dict):
|
||||
#attr_props = {'value': attr_props}
|
||||
# Check if the attribute exists (for updating)
|
||||
if attr_name in DataOpsAPI.file_obj[obj_name].attrs:
|
||||
update_dict[attr_name] = attr_props
|
||||
# Otherwise, it's a new attribute to append
|
||||
else:
|
||||
append_dict[attr_name] = attr_props
|
||||
else:
|
||||
# Check if the attribute is marked for deletion
|
||||
if attr_props.get('delete', False):
|
||||
delete_dict[attr_name] = attr_props
|
||||
|
||||
# Perform a single pass for all three operations
|
||||
if append_dict:
|
||||
DataOpsAPI.append_metadata(obj_name, append_dict)
|
||||
if update_dict:
|
||||
DataOpsAPI.update_metadata(obj_name, update_dict)
|
||||
if delete_dict:
|
||||
DataOpsAPI.delete_metadata(obj_name, delete_dict)
|
||||
|
||||
# Close hdf5 file
|
||||
DataOpsAPI.unload_file_obj()
|
||||
# Regenerate yaml snapshot of updated HDF5 file
|
||||
output_yml_filename_path = hdf5_ops.serialize_metadata(input_hdf5_file)
|
||||
print(f'{output_yml_filename_path} was successfully regenerated from the updated version of{input_hdf5_file}')
|
||||
|
||||
def count(hdf5_obj,yml_dict):
|
||||
print(hdf5_obj.name)
|
||||
if isinstance(hdf5_obj,h5py.Group) and len(hdf5_obj.name.split('/')) <= 4:
|
||||
obj_review = yml_dict[hdf5_obj.name]
|
||||
additions = [not (item in hdf5_obj.attrs.keys()) for item in obj_review['attributes'].keys()]
|
||||
count_additions = sum(additions)
|
||||
deletions = [not (item in obj_review['attributes'].keys()) for item in hdf5_obj.attrs.keys()]
|
||||
count_delections = sum(deletions)
|
||||
print('additions',count_additions, 'deletions', count_delections)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print("Usage: python metadata_revision.py update <path/to/target_file.hdf5> <path/to/metadata_review_file.yaml>")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if sys.argv[1] == 'update':
|
||||
input_hdf5_file = sys.argv[2]
|
||||
review_yaml_file = sys.argv[3]
|
||||
update_hdf5_file_with_review(input_hdf5_file, review_yaml_file)
|
||||
#run(sys.argv[2])
|
||||
|
Reference in New Issue
Block a user