Robustifed metadata revision methods with error detection conditions and try-except statements. Metadata revision methods now do not have the capability of opening the file.

This commit is contained in:
2024-10-10 10:39:10 +02:00
parent 31c9db98ca
commit 568f747a69

View File

@ -92,21 +92,7 @@ class HDF5DataOpsManager():
logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {exp}. Instead, dataset will be returned as Numpy array.")
return data # 'data' is a NumPy array here
def append_dataset(self,dataset_dict, group_name):
# Parse value into HDF5 admissible type
for key in dataset_dict['attributes'].keys():
value = dataset_dict['attributes'][key]
if isinstance(key, dict):
dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
if not group_name in self.file_obj:
self.file_obj.create_group(group_name, track_order=True)
self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8")
self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8")
# Define metadata revision methods: append(), update(), delete(), and rename().
def append_metadata(self, obj_name, annotation_dict):
"""
@ -140,26 +126,30 @@ class HDF5DataOpsManager():
"""
if self.file_obj is None:
self.open_file()
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
# Create a copy of annotation_dict to avoid modifying the original
annotation_dict_copy = copy.deepcopy(annotation_dict)
#with h5py.File(self.file_path, mode='r+') as file_obj:
obj = self.file_obj[obj_name]
try:
obj = self.file_obj[obj_name]
# Check if any attribute already exists
if any(key in obj.attrs for key in annotation_dict_copy.keys()):
raise ValueError("Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()")
# Process the dictionary values and convert them to structured arrays if needed
for key, value in annotation_dict_copy.items():
if isinstance(value, dict):
# Convert dictionaries to NumPy structured arrays for complex attributes
annotation_dict_copy[key] = utils.convert_attrdict_to_np_structured_array(value)
# Check if any attribute already exists
if any(key in obj.attrs for key in annotation_dict_copy.keys()):
raise ValueError("Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()")
# Process the dictionary values and convert them to structured arrays if needed
for key, value in annotation_dict_copy.items():
if isinstance(value, dict):
# Convert dictionaries to NumPy structured arrays for complex attributes
annotation_dict_copy[key] = utils.convert_attrdict_to_np_structured_array(value)
# Update the object's attributes with the new metadata
obj.attrs.update(annotation_dict_copy)
# Update the object's attributes with the new metadata
obj.attrs.update(annotation_dict_copy)
except Exception as e:
self.close_file()
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
def update_metadata(self, obj_name, annotation_dict):
@ -194,24 +184,28 @@ class HDF5DataOpsManager():
"""
if self.file_obj is None:
self.open_file()
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
update_dict = {}
#with h5py.File(self.file_path, mode='r+') as file_obj:
obj = self.file_obj[obj_name]
try:
for key, value in annotation_dict.items():
if key in obj.attrs:
if isinstance(value, dict):
update_dict[key] = utils.convert_attrdict_to_np_structured_array(value)
obj = self.file_obj[obj_name]
for key, value in annotation_dict.items():
if key in obj.attrs:
if isinstance(value, dict):
update_dict[key] = utils.convert_attrdict_to_np_structured_array(value)
else:
update_dict[key] = value
else:
update_dict[key] = value
else:
# Optionally, log or warn about non-existing keys being ignored.
print(f"Warning: Key '{key}' does not exist and will be ignored.")
# Optionally, log or warn about non-existing keys being ignored.
print(f"Warning: Key '{key}' does not exist and will be ignored.")
obj.attrs.update(update_dict)
obj.attrs.update(update_dict)
except Exception as e:
self.close_file()
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
def delete_metadata(self, obj_name, annotation_dict):
"""
@ -237,21 +231,24 @@ class HDF5DataOpsManager():
"""
if self.file_obj is None:
self.open_file()
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
#with h5py.File(self.file_path, mode='r+') as file_obj:
obj = self.file_obj[obj_name]
for attr_key, value in annotation_dict.items():
if attr_key in obj.attrs:
if isinstance(value, dict) and value.get('delete', False):
obj.attrs.__delitem__(attr_key)
try:
obj = self.file_obj[obj_name]
for attr_key, value in annotation_dict.items():
if attr_key in obj.attrs:
if isinstance(value, dict) and value.get('delete', False):
obj.attrs.__delitem__(attr_key)
else:
msg = f"Warning: Value for key '{attr_key}' is not marked for deletion or is invalid."
print(msg)
else:
msg = f"Warning: Value for key '{attr_key}' is not marked for deletion or is invalid."
msg = f"Warning: Key '{attr_key}' does not exist in metadata."
print(msg)
else:
msg = f"Warning: Key '{attr_key}' does not exist in metadata."
print(msg)
except Exception as e:
self.close_file()
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
def rename_metadata(self, obj_name, renaming_map):
@ -275,27 +272,32 @@ class HDF5DataOpsManager():
"""
#with h5py.File(self.file_path, mode='r+') as file_obj:
if self.file_obj is None:
self.open_file()
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
obj = self.file_obj[obj_name]
try:
obj = self.file_obj[obj_name]
# Iterate over the renaming_map to process renaming
for old_attr, new_attr in renaming_map.items():
if old_attr in obj.attrs:
# Get the old attribute's value
attr_value = obj.attrs[old_attr]
# Iterate over the renaming_map to process renaming
for old_attr, new_attr in renaming_map.items():
if old_attr in obj.attrs:
# Get the old attribute's value
attr_value = obj.attrs[old_attr]
# Create a new attribute with the new name
obj.attrs.create(new_attr, data=attr_value)
# Create a new attribute with the new name
obj.attrs.create(new_attr, data=attr_value)
# Delete the old attribute
obj.attrs.__delitem__(old_attr)
else:
# Skip if the old attribute doesn't exist
msg = f"Skipping: Attribute '{old_attr}' does not exist."
print(msg) # Optionally, replace with warnings.warn(msg)
# Delete the old attribute
obj.attrs.__delitem__(old_attr)
else:
# Skip if the old attribute doesn't exist
msg = f"Skipping: Attribute '{old_attr}' does not exist."
print(msg) # Optionally, replace with warnings.warn(msg)
except Exception as e:
self.close_file()
print(
f"An unexpected error occurred: {e}. The file object has been properly closed. "
"Please ensure that 'obj_name' exists in the file, and that the keys in 'renaming_map' are valid attributes of the object."
)
self.close_file()
@ -313,6 +315,7 @@ class HDF5DataOpsManager():
return metadata_dict
def reformat_datetime_column(self, dataset_name, column_name, src_format, desired_format='%Y-%m-%d %H:%M:%S.%f'):
# Access the dataset
dataset = self.file_obj[dataset_name]
@ -358,6 +361,24 @@ class HDF5DataOpsManager():
#return np.array(timestamps)
return dt_column_data.to_numpy()
# Define data append operations: append_dataset(), and update_file()
def append_dataset(self,dataset_dict, group_name):
# Parse value into HDF5 admissible type
for key in dataset_dict['attributes'].keys():
value = dataset_dict['attributes'][key]
if isinstance(key, dict):
dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
if not group_name in self.file_obj:
self.file_obj.create_group(group_name, track_order=True)
self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8")
self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8")
def update_file(self, path_to_append_dir):
# Split the reference file path and the append directory path into directories and filenames
ref_tail, ref_head = os.path.split(self.file_path)