Robustified metadata and dataset extraction methods by requiring explicit load of file obj before their use. Renamed a few functions and fixed types in print statements.
This commit is contained in:
@ -54,29 +54,44 @@ class HDF5DataOpsManager():
|
||||
self.file_obj.close()
|
||||
self.file_obj = None
|
||||
|
||||
def load_dataset_metadata(self):
|
||||
def extract_and_load_dataset_metadata(self):
|
||||
|
||||
def __get_datasets(name, obj, list_of_datasets):
|
||||
if isinstance(obj,h5py.Dataset):
|
||||
list_of_datasets.append(name)
|
||||
#print(f'Adding dataset: {name}') #tail: {head} head: {tail}')
|
||||
list_of_datasets = []
|
||||
with h5py.File(self.file_path,'r') as file:
|
||||
|
||||
if self.file_obj is None:
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to extract datasets.")
|
||||
|
||||
try:
|
||||
|
||||
list_of_datasets = []
|
||||
file.visititems(lambda name, obj: __get_datasets(name, obj, list_of_datasets))
|
||||
|
||||
dataset_metadata_df = pd.DataFrame({'dataset_name': list_of_datasets})
|
||||
dataset_metadata_df['parent_instrument'] = dataset_metadata_df['dataset_name'].apply(lambda x: x.split('/')[-3])
|
||||
dataset_metadata_df['parent_file'] = dataset_metadata_df['dataset_name'].apply(lambda x: x.split('/')[-2])
|
||||
self.file_obj.visititems(lambda name, obj: __get_datasets(name, obj, list_of_datasets))
|
||||
|
||||
dataset_metadata_df = pd.DataFrame({'dataset_name': list_of_datasets})
|
||||
dataset_metadata_df['parent_instrument'] = dataset_metadata_df['dataset_name'].apply(lambda x: x.split('/')[-3])
|
||||
dataset_metadata_df['parent_file'] = dataset_metadata_df['dataset_name'].apply(lambda x: x.split('/')[-2])
|
||||
|
||||
self.dataset_metadata_df = dataset_metadata_df
|
||||
|
||||
except Exception as e:
|
||||
|
||||
self.unload_file_obj()
|
||||
print(f"An unexpected error occurred: {e}. File object will be unloaded.")
|
||||
|
||||
|
||||
|
||||
|
||||
self.dataset_metadata_df = dataset_metadata_df
|
||||
|
||||
def read_dataset_as_dataframe(self,dataset_name):
|
||||
def extract_dataset_as_dataframe(self,dataset_name):
|
||||
"""
|
||||
returns a copy of the dataset content in the form of dataframe when possible or numpy array
|
||||
"""
|
||||
if self.file_obj is None:
|
||||
self.load_file_obj()
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to extract datasets.")
|
||||
|
||||
dataset_obj = self.file_obj[dataset_name]
|
||||
# Read dataset content from dataset obj
|
||||
@ -88,9 +103,13 @@ class HDF5DataOpsManager():
|
||||
|
||||
try:
|
||||
return pd.DataFrame(data)
|
||||
except ValueError as exp:
|
||||
logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {exp}. Instead, dataset will be returned as Numpy array.")
|
||||
except ValueError as e:
|
||||
logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {e}. Instead, dataset will be returned as Numpy array.")
|
||||
return data # 'data' is a NumPy array here
|
||||
except Exception as e:
|
||||
self.unload_file_obj()
|
||||
print(f"An unexpected error occurred: {e}. Returning None and unloading file object")
|
||||
return None
|
||||
|
||||
# Define metadata revision methods: append(), update(), delete(), and rename().
|
||||
|
||||
@ -126,7 +145,7 @@ class HDF5DataOpsManager():
|
||||
"""
|
||||
|
||||
if self.file_obj is None:
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.")
|
||||
|
||||
# Create a copy of annotation_dict to avoid modifying the original
|
||||
annotation_dict_copy = copy.deepcopy(annotation_dict)
|
||||
@ -184,7 +203,7 @@ class HDF5DataOpsManager():
|
||||
"""
|
||||
|
||||
if self.file_obj is None:
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.")
|
||||
|
||||
update_dict = {}
|
||||
|
||||
@ -231,7 +250,7 @@ class HDF5DataOpsManager():
|
||||
"""
|
||||
|
||||
if self.file_obj is None:
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.")
|
||||
|
||||
try:
|
||||
obj = self.file_obj[obj_name]
|
||||
@ -273,7 +292,7 @@ class HDF5DataOpsManager():
|
||||
"""
|
||||
|
||||
if self.file_obj is None:
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
|
||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.")
|
||||
|
||||
try:
|
||||
obj = self.file_obj[obj_name]
|
||||
|
Reference in New Issue
Block a user