From 2a9d69c757a3b506dc8ef6693e5d25db7bb71dc8 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Thu, 10 Oct 2024 11:28:23 +0200 Subject: [PATCH] Robustified metadata and dataset extraction methods by requiring explicit load of file obj before their use. Renamed a few functions and fixed types in print statements. --- src/hdf5_ops.py | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/src/hdf5_ops.py b/src/hdf5_ops.py index 50c0fb3..61b43e0 100644 --- a/src/hdf5_ops.py +++ b/src/hdf5_ops.py @@ -54,29 +54,44 @@ class HDF5DataOpsManager(): self.file_obj.close() self.file_obj = None - def load_dataset_metadata(self): + def extract_and_load_dataset_metadata(self): def __get_datasets(name, obj, list_of_datasets): if isinstance(obj,h5py.Dataset): list_of_datasets.append(name) #print(f'Adding dataset: {name}') #tail: {head} head: {tail}') list_of_datasets = [] - with h5py.File(self.file_path,'r') as file: + + if self.file_obj is None: + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to extract datasets.") + + try: + list_of_datasets = [] - file.visititems(lambda name, obj: __get_datasets(name, obj, list_of_datasets)) - dataset_metadata_df = pd.DataFrame({'dataset_name': list_of_datasets}) - dataset_metadata_df['parent_instrument'] = dataset_metadata_df['dataset_name'].apply(lambda x: x.split('/')[-3]) - dataset_metadata_df['parent_file'] = dataset_metadata_df['dataset_name'].apply(lambda x: x.split('/')[-2]) + self.file_obj.visititems(lambda name, obj: __get_datasets(name, obj, list_of_datasets)) + + dataset_metadata_df = pd.DataFrame({'dataset_name': list_of_datasets}) + dataset_metadata_df['parent_instrument'] = dataset_metadata_df['dataset_name'].apply(lambda x: x.split('/')[-3]) + dataset_metadata_df['parent_file'] = dataset_metadata_df['dataset_name'].apply(lambda x: x.split('/')[-2]) + + self.dataset_metadata_df = dataset_metadata_df + + except Exception as e: + + self.unload_file_obj() + print(f"An unexpected error occurred: {e}. File object will be unloaded.") + + + - self.dataset_metadata_df = dataset_metadata_df - def read_dataset_as_dataframe(self,dataset_name): + def extract_dataset_as_dataframe(self,dataset_name): """ returns a copy of the dataset content in the form of dataframe when possible or numpy array """ if self.file_obj is None: - self.load_file_obj() + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to extract datasets.") dataset_obj = self.file_obj[dataset_name] # Read dataset content from dataset obj @@ -88,9 +103,13 @@ class HDF5DataOpsManager(): try: return pd.DataFrame(data) - except ValueError as exp: - logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {exp}. Instead, dataset will be returned as Numpy array.") + except ValueError as e: + logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {e}. Instead, dataset will be returned as Numpy array.") return data # 'data' is a NumPy array here + except Exception as e: + self.unload_file_obj() + print(f"An unexpected error occurred: {e}. Returning None and unloading file object") + return None # Define metadata revision methods: append(), update(), delete(), and rename(). @@ -126,7 +145,7 @@ class HDF5DataOpsManager(): """ if self.file_obj is None: - raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.") + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.") # Create a copy of annotation_dict to avoid modifying the original annotation_dict_copy = copy.deepcopy(annotation_dict) @@ -184,7 +203,7 @@ class HDF5DataOpsManager(): """ if self.file_obj is None: - raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.") + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.") update_dict = {} @@ -231,7 +250,7 @@ class HDF5DataOpsManager(): """ if self.file_obj is None: - raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.") + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.") try: obj = self.file_obj[obj_name] @@ -273,7 +292,7 @@ class HDF5DataOpsManager(): """ if self.file_obj is None: - raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.") + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.") try: obj = self.file_obj[obj_name]