Robustifed metadata revision methods with error detection conditions and try-except statements. Metadata revision methods now do not have the capability of opening the file.

This commit is contained in:
2024-10-10 10:39:10 +02:00
parent 31c9db98ca
commit 568f747a69

View File

@ -92,21 +92,7 @@ class HDF5DataOpsManager():
logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {exp}. Instead, dataset will be returned as Numpy array.")
return data # 'data' is a NumPy array here
def append_dataset(self,dataset_dict, group_name):
# Parse value into HDF5 admissible type
for key in dataset_dict['attributes'].keys():
value = dataset_dict['attributes'][key]
if isinstance(key, dict):
dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
if not group_name in self.file_obj:
self.file_obj.create_group(group_name, track_order=True)
self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8")
self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8")
# Define metadata revision methods: append(), update(), delete(), and rename().
def append_metadata(self, obj_name, annotation_dict):
"""
@ -140,12 +126,12 @@ class HDF5DataOpsManager():
"""
if self.file_obj is None:
self.open_file()
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
# Create a copy of annotation_dict to avoid modifying the original
annotation_dict_copy = copy.deepcopy(annotation_dict)
#with h5py.File(self.file_path, mode='r+') as file_obj:
try:
obj = self.file_obj[obj_name]
# Check if any attribute already exists
@ -161,6 +147,10 @@ class HDF5DataOpsManager():
# Update the object's attributes with the new metadata
obj.attrs.update(annotation_dict_copy)
except Exception as e:
self.close_file()
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
def update_metadata(self, obj_name, annotation_dict):
"""
@ -194,13 +184,13 @@ class HDF5DataOpsManager():
"""
if self.file_obj is None:
self.open_file()
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
update_dict = {}
#with h5py.File(self.file_path, mode='r+') as file_obj:
obj = self.file_obj[obj_name]
try:
obj = self.file_obj[obj_name]
for key, value in annotation_dict.items():
if key in obj.attrs:
if isinstance(value, dict):
@ -213,6 +203,10 @@ class HDF5DataOpsManager():
obj.attrs.update(update_dict)
except Exception as e:
self.close_file()
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
def delete_metadata(self, obj_name, annotation_dict):
"""
Deletes metadata attributes of the specified object (obj_name) based on the provided annotation_dict.
@ -237,11 +231,10 @@ class HDF5DataOpsManager():
"""
if self.file_obj is None:
self.open_file()
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
#with h5py.File(self.file_path, mode='r+') as file_obj:
try:
obj = self.file_obj[obj_name]
for attr_key, value in annotation_dict.items():
if attr_key in obj.attrs:
if isinstance(value, dict) and value.get('delete', False):
@ -253,6 +246,10 @@ class HDF5DataOpsManager():
msg = f"Warning: Key '{attr_key}' does not exist in metadata."
print(msg)
except Exception as e:
self.close_file()
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
def rename_metadata(self, obj_name, renaming_map):
"""
@ -275,12 +272,11 @@ class HDF5DataOpsManager():
"""
#with h5py.File(self.file_path, mode='r+') as file_obj:
if self.file_obj is None:
self.open_file()
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
try:
obj = self.file_obj[obj_name]
# Iterate over the renaming_map to process renaming
for old_attr, new_attr in renaming_map.items():
if old_attr in obj.attrs:
@ -296,6 +292,12 @@ class HDF5DataOpsManager():
# Skip if the old attribute doesn't exist
msg = f"Skipping: Attribute '{old_attr}' does not exist."
print(msg) # Optionally, replace with warnings.warn(msg)
except Exception as e:
self.close_file()
print(
f"An unexpected error occurred: {e}. The file object has been properly closed. "
"Please ensure that 'obj_name' exists in the file, and that the keys in 'renaming_map' are valid attributes of the object."
)
self.close_file()
@ -313,6 +315,7 @@ class HDF5DataOpsManager():
return metadata_dict
def reformat_datetime_column(self, dataset_name, column_name, src_format, desired_format='%Y-%m-%d %H:%M:%S.%f'):
# Access the dataset
dataset = self.file_obj[dataset_name]
@ -358,6 +361,24 @@ class HDF5DataOpsManager():
#return np.array(timestamps)
return dt_column_data.to_numpy()
# Define data append operations: append_dataset(), and update_file()
def append_dataset(self,dataset_dict, group_name):
# Parse value into HDF5 admissible type
for key in dataset_dict['attributes'].keys():
value = dataset_dict['attributes'][key]
if isinstance(key, dict):
dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
if not group_name in self.file_obj:
self.file_obj.create_group(group_name, track_order=True)
self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8")
self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8")
def update_file(self, path_to_append_dir):
# Split the reference file path and the append directory path into directories and filenames
ref_tail, ref_head = os.path.split(self.file_path)