From 568f747a69571f77681ca9e19815bb450227b74e Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Thu, 10 Oct 2024 10:39:10 +0200 Subject: [PATCH] Robustifed metadata revision methods with error detection conditions and try-except statements. Metadata revision methods now do not have the capability of opening the file. --- src/hdf5_ops.py | 161 +++++++++++++++++++++++++++--------------------- 1 file changed, 91 insertions(+), 70 deletions(-) diff --git a/src/hdf5_ops.py b/src/hdf5_ops.py index b40c2d6..7172fd4 100644 --- a/src/hdf5_ops.py +++ b/src/hdf5_ops.py @@ -92,21 +92,7 @@ class HDF5DataOpsManager(): logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {exp}. Instead, dataset will be returned as Numpy array.") return data # 'data' is a NumPy array here - def append_dataset(self,dataset_dict, group_name): - - # Parse value into HDF5 admissible type - for key in dataset_dict['attributes'].keys(): - value = dataset_dict['attributes'][key] - if isinstance(key, dict): - dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value) - - if not group_name in self.file_obj: - self.file_obj.create_group(group_name, track_order=True) - self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8") - - self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data']) - self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes']) - self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8") + # Define metadata revision methods: append(), update(), delete(), and rename(). def append_metadata(self, obj_name, annotation_dict): """ @@ -140,26 +126,30 @@ class HDF5DataOpsManager(): """ if self.file_obj is None: - self.open_file() + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.") # Create a copy of annotation_dict to avoid modifying the original annotation_dict_copy = copy.deepcopy(annotation_dict) - #with h5py.File(self.file_path, mode='r+') as file_obj: - obj = self.file_obj[obj_name] + try: + obj = self.file_obj[obj_name] - # Check if any attribute already exists - if any(key in obj.attrs for key in annotation_dict_copy.keys()): - raise ValueError("Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()") - - # Process the dictionary values and convert them to structured arrays if needed - for key, value in annotation_dict_copy.items(): - if isinstance(value, dict): - # Convert dictionaries to NumPy structured arrays for complex attributes - annotation_dict_copy[key] = utils.convert_attrdict_to_np_structured_array(value) + # Check if any attribute already exists + if any(key in obj.attrs for key in annotation_dict_copy.keys()): + raise ValueError("Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()") + + # Process the dictionary values and convert them to structured arrays if needed + for key, value in annotation_dict_copy.items(): + if isinstance(value, dict): + # Convert dictionaries to NumPy structured arrays for complex attributes + annotation_dict_copy[key] = utils.convert_attrdict_to_np_structured_array(value) - # Update the object's attributes with the new metadata - obj.attrs.update(annotation_dict_copy) + # Update the object's attributes with the new metadata + obj.attrs.update(annotation_dict_copy) + + except Exception as e: + self.close_file() + print(f"An unexpected error occurred: {e}. The file object has been properly closed.") def update_metadata(self, obj_name, annotation_dict): @@ -194,24 +184,28 @@ class HDF5DataOpsManager(): """ if self.file_obj is None: - self.open_file() + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.") update_dict = {} - #with h5py.File(self.file_path, mode='r+') as file_obj: - obj = self.file_obj[obj_name] + try: - for key, value in annotation_dict.items(): - if key in obj.attrs: - if isinstance(value, dict): - update_dict[key] = utils.convert_attrdict_to_np_structured_array(value) + obj = self.file_obj[obj_name] + for key, value in annotation_dict.items(): + if key in obj.attrs: + if isinstance(value, dict): + update_dict[key] = utils.convert_attrdict_to_np_structured_array(value) + else: + update_dict[key] = value else: - update_dict[key] = value - else: - # Optionally, log or warn about non-existing keys being ignored. - print(f"Warning: Key '{key}' does not exist and will be ignored.") + # Optionally, log or warn about non-existing keys being ignored. + print(f"Warning: Key '{key}' does not exist and will be ignored.") - obj.attrs.update(update_dict) + obj.attrs.update(update_dict) + + except Exception as e: + self.close_file() + print(f"An unexpected error occurred: {e}. The file object has been properly closed.") def delete_metadata(self, obj_name, annotation_dict): """ @@ -237,21 +231,24 @@ class HDF5DataOpsManager(): """ if self.file_obj is None: - self.open_file() + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.") - #with h5py.File(self.file_path, mode='r+') as file_obj: - obj = self.file_obj[obj_name] - - for attr_key, value in annotation_dict.items(): - if attr_key in obj.attrs: - if isinstance(value, dict) and value.get('delete', False): - obj.attrs.__delitem__(attr_key) + try: + obj = self.file_obj[obj_name] + for attr_key, value in annotation_dict.items(): + if attr_key in obj.attrs: + if isinstance(value, dict) and value.get('delete', False): + obj.attrs.__delitem__(attr_key) + else: + msg = f"Warning: Value for key '{attr_key}' is not marked for deletion or is invalid." + print(msg) else: - msg = f"Warning: Value for key '{attr_key}' is not marked for deletion or is invalid." + msg = f"Warning: Key '{attr_key}' does not exist in metadata." print(msg) - else: - msg = f"Warning: Key '{attr_key}' does not exist in metadata." - print(msg) + + except Exception as e: + self.close_file() + print(f"An unexpected error occurred: {e}. The file object has been properly closed.") def rename_metadata(self, obj_name, renaming_map): @@ -275,27 +272,32 @@ class HDF5DataOpsManager(): """ - #with h5py.File(self.file_path, mode='r+') as file_obj: if self.file_obj is None: - self.open_file() + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.") - obj = self.file_obj[obj_name] + try: + obj = self.file_obj[obj_name] + # Iterate over the renaming_map to process renaming + for old_attr, new_attr in renaming_map.items(): + if old_attr in obj.attrs: + # Get the old attribute's value + attr_value = obj.attrs[old_attr] - # Iterate over the renaming_map to process renaming - for old_attr, new_attr in renaming_map.items(): - if old_attr in obj.attrs: - # Get the old attribute's value - attr_value = obj.attrs[old_attr] + # Create a new attribute with the new name + obj.attrs.create(new_attr, data=attr_value) - # Create a new attribute with the new name - obj.attrs.create(new_attr, data=attr_value) - - # Delete the old attribute - obj.attrs.__delitem__(old_attr) - else: - # Skip if the old attribute doesn't exist - msg = f"Skipping: Attribute '{old_attr}' does not exist." - print(msg) # Optionally, replace with warnings.warn(msg) + # Delete the old attribute + obj.attrs.__delitem__(old_attr) + else: + # Skip if the old attribute doesn't exist + msg = f"Skipping: Attribute '{old_attr}' does not exist." + print(msg) # Optionally, replace with warnings.warn(msg) + except Exception as e: + self.close_file() + print( + f"An unexpected error occurred: {e}. The file object has been properly closed. " + "Please ensure that 'obj_name' exists in the file, and that the keys in 'renaming_map' are valid attributes of the object." + ) self.close_file() @@ -313,6 +315,7 @@ class HDF5DataOpsManager(): return metadata_dict + def reformat_datetime_column(self, dataset_name, column_name, src_format, desired_format='%Y-%m-%d %H:%M:%S.%f'): # Access the dataset dataset = self.file_obj[dataset_name] @@ -358,6 +361,24 @@ class HDF5DataOpsManager(): #return np.array(timestamps) return dt_column_data.to_numpy() + # Define data append operations: append_dataset(), and update_file() + + def append_dataset(self,dataset_dict, group_name): + + # Parse value into HDF5 admissible type + for key in dataset_dict['attributes'].keys(): + value = dataset_dict['attributes'][key] + if isinstance(key, dict): + dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value) + + if not group_name in self.file_obj: + self.file_obj.create_group(group_name, track_order=True) + self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8") + + self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data']) + self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes']) + self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8") + def update_file(self, path_to_append_dir): # Split the reference file path and the append directory path into directories and filenames ref_tail, ref_head = os.path.split(self.file_path)