Implemented metadata append, rename, delete, and update operations on the hdf5 manager object and refactored metadata update script based on yaml file to use said operations.

This commit is contained in:
2024-09-30 16:32:39 +02:00
parent afe31288a0
commit 6f5d4adcee
2 changed files with 259 additions and 43 deletions

View File

@ -190,32 +190,71 @@ def load_yaml(yaml_review_file):
print(exc)
return None
def update_hdf5_attributes(input_hdf5_file, yaml_dict):
#def update_hdf5_attributes(input_hdf5_file, yaml_dict):
def update_attributes(hdf5_obj, yaml_obj):
for attr_name, attr_value in yaml_obj['attributes'].items():
if not isinstance(attr_value, dict):
attr_value = {'rename_as': attr_name, 'value': attr_value}
if (attr_name in hdf5_obj.attrs.keys()): # delete or update
if attr_value.get('delete'): # delete when True
hdf5_obj.attrs.__delitem__(attr_name)
elif not (attr_value.get('rename_as') == attr_name): # update when true
hdf5_obj.attrs[attr_value.get('rename_as')] = hdf5_obj.attrs[attr_name] # convert_attrdict_to_np_structured_array(attr_value)
hdf5_obj.attrs.__delitem__(attr_name)
else: # add a new attribute
hdf5_obj.attrs.update({attr_name : utils.convert_attrdict_to_np_structured_array(attr_value)})
with h5py.File(input_hdf5_file, 'r+') as f:
for key in yaml_dict.keys():
hdf5_obj = f[key]
yaml_obj = yaml_dict[key]
update_attributes(hdf5_obj, yaml_obj)
def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file):
"""
Updates, appends, or deletes metadata attributes in an HDF5 file based on a provided YAML dictionary.
Parameters:
-----------
input_hdf5_file : str
Path to the HDF5 file.
yaml_dict : dict
Dictionary specifying objects and their attributes with operations. Example format:
{
"object_name": {
"attr_name": {
"value": attr_value,
"delete": True/False
}
}
}
"""
yaml_dict = load_yaml(yaml_review_file)
update_hdf5_attributes(input_hdf5_file, yaml_dict)
# Initialize HDF5 operations manager
DataOpsAPI = hdf5_ops.HDF5DataOpsManager(input_hdf5_file)
DataOpsAPI.open_file()
# Iterate over each object in the YAML dictionary
for obj_name, attr_dict in yaml_dict.items():
# Prepare dictionaries for append, update, and delete actions
append_dict = {}
update_dict = {}
delete_dict = {}
if not obj_name in DataOpsAPI.file_obj:
continue # Skip if the object does not exist
# Iterate over each attribute in the current object
for attr_name, attr_props in attr_dict['attributes'].items():
if not isinstance(attr_props, dict):
#attr_props = {'value': attr_props}
# Check if the attribute exists (for updating)
if attr_name in DataOpsAPI.file_obj[obj_name].attrs:
update_dict[attr_name] = attr_props
# Otherwise, it's a new attribute to append
else:
append_dict[attr_name] = attr_props
else:
# Check if the attribute is marked for deletion
if attr_props.get('delete', False):
delete_dict[attr_name] = attr_props
# Perform a single pass for all three operations
if append_dict:
DataOpsAPI.append_metadata(obj_name, append_dict)
if update_dict:
DataOpsAPI.update_metadata(obj_name, update_dict)
if delete_dict:
DataOpsAPI.delete_metadata(obj_name, delete_dict)
# Close hdf5 file
DataOpsAPI.close_file()
# Regenerate yaml snapshot of updated HDF5 file
output_yml_filename_path = hdf5_ops.serialize_metadata(input_hdf5_file)
print(f'{output_yml_filename_path} was successfully regenerated from the updated version of{input_hdf5_file}')