Robustifed metadata revision methods with error detection conditions and try-except statements. Metadata revision methods now do not have the capability of opening the file.

2024-10-10 10:39:10 +02:00
parent 31c9db98ca
commit 568f747a69
1 changed files with 91 additions and 70 deletions
--- a/src/hdf5_ops.py
+++ b/src/hdf5_ops.py
@ -92,21 +92,7 @@ class HDF5DataOpsManager():
            logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {exp}. Instead, dataset will be returned as Numpy array.")
            return data  # 'data' is a NumPy array here
        
-    def append_dataset(self,dataset_dict, group_name):
-        
-        # Parse value into HDF5 admissible type
-        for key in dataset_dict['attributes'].keys():
-            value = dataset_dict['attributes'][key] 
-            if isinstance(key, dict):
-                dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
-
-        if not group_name in self.file_obj:
-            self.file_obj.create_group(group_name, track_order=True)
-            self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8")
-            
-        self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
-        self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
-        self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8")
+    # Define metadata revision methods: append(), update(), delete(), and rename().

    def append_metadata(self, obj_name, annotation_dict):
        """ 
@ -140,26 +126,30 @@ class HDF5DataOpsManager():
        """

        if self.file_obj is None:
-            self.open_file()
+            raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")

        # Create a copy of annotation_dict to avoid modifying the original
        annotation_dict_copy = copy.deepcopy(annotation_dict)

-        #with h5py.File(self.file_path, mode='r+') as file_obj:
-        obj = self.file_obj[obj_name]
+        try:
+            obj = self.file_obj[obj_name]

-        # Check if any attribute already exists
-        if any(key in obj.attrs for key in annotation_dict_copy.keys()):
-            raise ValueError("Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()")
-        
-        # Process the dictionary values and convert them to structured arrays if needed
-        for key, value in annotation_dict_copy.items():
-            if isinstance(value, dict):
-                # Convert dictionaries to NumPy structured arrays for complex attributes
-                annotation_dict_copy[key] = utils.convert_attrdict_to_np_structured_array(value)
+            # Check if any attribute already exists
+            if any(key in obj.attrs for key in annotation_dict_copy.keys()):
+                raise ValueError("Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()")
+            
+            # Process the dictionary values and convert them to structured arrays if needed
+            for key, value in annotation_dict_copy.items():
+                if isinstance(value, dict):
+                    # Convert dictionaries to NumPy structured arrays for complex attributes
+                    annotation_dict_copy[key] = utils.convert_attrdict_to_np_structured_array(value)

-        # Update the object's attributes with the new metadata
-        obj.attrs.update(annotation_dict_copy)
+            # Update the object's attributes with the new metadata
+            obj.attrs.update(annotation_dict_copy)
+
+        except Exception as e:
+            self.close_file()
+            print(f"An unexpected error occurred: {e}. The file object has been properly closed.")


    def update_metadata(self, obj_name, annotation_dict):
@ -194,24 +184,28 @@ class HDF5DataOpsManager():
        """

        if self.file_obj is None:
-            self.open_file()
+            raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")

        update_dict = {}

-        #with h5py.File(self.file_path, mode='r+') as file_obj:
-        obj = self.file_obj[obj_name]
+        try:

-        for key, value in annotation_dict.items():
-            if key in obj.attrs:
-                if isinstance(value, dict):
-                    update_dict[key] = utils.convert_attrdict_to_np_structured_array(value)
+            obj = self.file_obj[obj_name]
+            for key, value in annotation_dict.items():
+                if key in obj.attrs:
+                    if isinstance(value, dict):
+                        update_dict[key] = utils.convert_attrdict_to_np_structured_array(value)
+                    else:
+                        update_dict[key] = value
                else:
-                    update_dict[key] = value
-            else:
-                # Optionally, log or warn about non-existing keys being ignored.
-                print(f"Warning: Key '{key}' does not exist and will be ignored.")
+                    # Optionally, log or warn about non-existing keys being ignored.
+                    print(f"Warning: Key '{key}' does not exist and will be ignored.")

-        obj.attrs.update(update_dict)
+            obj.attrs.update(update_dict)
+
+        except Exception as e:
+            self.close_file()
+            print(f"An unexpected error occurred: {e}. The file object has been properly closed.")

    def delete_metadata(self, obj_name, annotation_dict):
        """
@ -237,21 +231,24 @@ class HDF5DataOpsManager():
        """
        
        if self.file_obj is None:
-            self.open_file()
+            raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")

-        #with h5py.File(self.file_path, mode='r+') as file_obj:
-        obj = self.file_obj[obj_name]
-
-        for attr_key, value in annotation_dict.items():
-            if attr_key in obj.attrs:
-                if isinstance(value, dict) and value.get('delete', False):
-                    obj.attrs.__delitem__(attr_key)
+        try:
+            obj = self.file_obj[obj_name]
+            for attr_key, value in annotation_dict.items():
+                if attr_key in obj.attrs:
+                    if isinstance(value, dict) and value.get('delete', False):
+                        obj.attrs.__delitem__(attr_key)
+                    else:
+                        msg = f"Warning: Value for key '{attr_key}' is not marked for deletion or is invalid."
+                        print(msg)
                else:
-                    msg = f"Warning: Value for key '{attr_key}' is not marked for deletion or is invalid."
+                    msg = f"Warning: Key '{attr_key}' does not exist in metadata."
                    print(msg)
-            else:
-                msg = f"Warning: Key '{attr_key}' does not exist in metadata."
-                print(msg)
+
+        except Exception as e:
+            self.close_file()
+            print(f"An unexpected error occurred: {e}. The file object has been properly closed.")


    def rename_metadata(self, obj_name, renaming_map):
@ -275,27 +272,32 @@ class HDF5DataOpsManager():

        """

-        #with h5py.File(self.file_path, mode='r+') as file_obj:
        if self.file_obj is None:
-            self.open_file()
+            raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")

-        obj = self.file_obj[obj_name]
+        try:
+            obj = self.file_obj[obj_name]
+            # Iterate over the renaming_map to process renaming
+            for old_attr, new_attr in renaming_map.items():
+                if old_attr in obj.attrs:
+                    # Get the old attribute's value
+                    attr_value = obj.attrs[old_attr]

-        # Iterate over the renaming_map to process renaming
-        for old_attr, new_attr in renaming_map.items():
-            if old_attr in obj.attrs:
-                # Get the old attribute's value
-                attr_value = obj.attrs[old_attr]
+                    # Create a new attribute with the new name
+                    obj.attrs.create(new_attr, data=attr_value)

-                # Create a new attribute with the new name
-                obj.attrs.create(new_attr, data=attr_value)
-
-                # Delete the old attribute
-                obj.attrs.__delitem__(old_attr)
-            else:
-                # Skip if the old attribute doesn't exist
-                msg = f"Skipping: Attribute '{old_attr}' does not exist."
-                print(msg)  # Optionally, replace with warnings.warn(msg)
+                    # Delete the old attribute
+                    obj.attrs.__delitem__(old_attr)
+                else:
+                    # Skip if the old attribute doesn't exist
+                    msg = f"Skipping: Attribute '{old_attr}' does not exist."
+                    print(msg)  # Optionally, replace with warnings.warn(msg)
+        except Exception as e:
+            self.close_file()
+            print(
+                f"An unexpected error occurred: {e}. The file object has been properly closed. "
+                "Please ensure that 'obj_name' exists in the file, and that the keys in 'renaming_map' are valid attributes of the object."
+            )            

        self.close_file()

@ -313,6 +315,7 @@ class HDF5DataOpsManager():

        return metadata_dict

+
    def reformat_datetime_column(self, dataset_name, column_name, src_format, desired_format='%Y-%m-%d %H:%M:%S.%f'):
        # Access the dataset
        dataset = self.file_obj[dataset_name]
@ -358,6 +361,24 @@ class HDF5DataOpsManager():
        #return np.array(timestamps)
        return dt_column_data.to_numpy()
    
+    # Define data append operations: append_dataset(), and update_file()
+
+    def append_dataset(self,dataset_dict, group_name):
+        
+        # Parse value into HDF5 admissible type
+        for key in dataset_dict['attributes'].keys():
+            value = dataset_dict['attributes'][key] 
+            if isinstance(key, dict):
+                dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
+
+        if not group_name in self.file_obj:
+            self.file_obj.create_group(group_name, track_order=True)
+            self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8")
+            
+        self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
+        self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
+        self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8")
+
    def update_file(self, path_to_append_dir):
        # Split the reference file path and the append directory path into directories and filenames
        ref_tail, ref_head = os.path.split(self.file_path)