Robustifed metadata revision methods with error detection conditions and try-except statements. Metadata revision methods now do not have the capability of opening the file.
This commit is contained in:
161
src/hdf5_ops.py
161
src/hdf5_ops.py
@ -92,21 +92,7 @@ class HDF5DataOpsManager():
|
|||||||
logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {exp}. Instead, dataset will be returned as Numpy array.")
|
logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {exp}. Instead, dataset will be returned as Numpy array.")
|
||||||
return data # 'data' is a NumPy array here
|
return data # 'data' is a NumPy array here
|
||||||
|
|
||||||
def append_dataset(self,dataset_dict, group_name):
|
# Define metadata revision methods: append(), update(), delete(), and rename().
|
||||||
|
|
||||||
# Parse value into HDF5 admissible type
|
|
||||||
for key in dataset_dict['attributes'].keys():
|
|
||||||
value = dataset_dict['attributes'][key]
|
|
||||||
if isinstance(key, dict):
|
|
||||||
dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
|
|
||||||
|
|
||||||
if not group_name in self.file_obj:
|
|
||||||
self.file_obj.create_group(group_name, track_order=True)
|
|
||||||
self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8")
|
|
||||||
|
|
||||||
self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
|
|
||||||
self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
|
|
||||||
self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8")
|
|
||||||
|
|
||||||
def append_metadata(self, obj_name, annotation_dict):
|
def append_metadata(self, obj_name, annotation_dict):
|
||||||
"""
|
"""
|
||||||
@ -140,26 +126,30 @@ class HDF5DataOpsManager():
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if self.file_obj is None:
|
if self.file_obj is None:
|
||||||
self.open_file()
|
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
|
||||||
|
|
||||||
# Create a copy of annotation_dict to avoid modifying the original
|
# Create a copy of annotation_dict to avoid modifying the original
|
||||||
annotation_dict_copy = copy.deepcopy(annotation_dict)
|
annotation_dict_copy = copy.deepcopy(annotation_dict)
|
||||||
|
|
||||||
#with h5py.File(self.file_path, mode='r+') as file_obj:
|
try:
|
||||||
obj = self.file_obj[obj_name]
|
obj = self.file_obj[obj_name]
|
||||||
|
|
||||||
# Check if any attribute already exists
|
# Check if any attribute already exists
|
||||||
if any(key in obj.attrs for key in annotation_dict_copy.keys()):
|
if any(key in obj.attrs for key in annotation_dict_copy.keys()):
|
||||||
raise ValueError("Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()")
|
raise ValueError("Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()")
|
||||||
|
|
||||||
# Process the dictionary values and convert them to structured arrays if needed
|
# Process the dictionary values and convert them to structured arrays if needed
|
||||||
for key, value in annotation_dict_copy.items():
|
for key, value in annotation_dict_copy.items():
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
# Convert dictionaries to NumPy structured arrays for complex attributes
|
# Convert dictionaries to NumPy structured arrays for complex attributes
|
||||||
annotation_dict_copy[key] = utils.convert_attrdict_to_np_structured_array(value)
|
annotation_dict_copy[key] = utils.convert_attrdict_to_np_structured_array(value)
|
||||||
|
|
||||||
# Update the object's attributes with the new metadata
|
# Update the object's attributes with the new metadata
|
||||||
obj.attrs.update(annotation_dict_copy)
|
obj.attrs.update(annotation_dict_copy)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.close_file()
|
||||||
|
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
|
||||||
|
|
||||||
|
|
||||||
def update_metadata(self, obj_name, annotation_dict):
|
def update_metadata(self, obj_name, annotation_dict):
|
||||||
@ -194,24 +184,28 @@ class HDF5DataOpsManager():
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if self.file_obj is None:
|
if self.file_obj is None:
|
||||||
self.open_file()
|
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
|
||||||
|
|
||||||
update_dict = {}
|
update_dict = {}
|
||||||
|
|
||||||
#with h5py.File(self.file_path, mode='r+') as file_obj:
|
try:
|
||||||
obj = self.file_obj[obj_name]
|
|
||||||
|
|
||||||
for key, value in annotation_dict.items():
|
obj = self.file_obj[obj_name]
|
||||||
if key in obj.attrs:
|
for key, value in annotation_dict.items():
|
||||||
if isinstance(value, dict):
|
if key in obj.attrs:
|
||||||
update_dict[key] = utils.convert_attrdict_to_np_structured_array(value)
|
if isinstance(value, dict):
|
||||||
|
update_dict[key] = utils.convert_attrdict_to_np_structured_array(value)
|
||||||
|
else:
|
||||||
|
update_dict[key] = value
|
||||||
else:
|
else:
|
||||||
update_dict[key] = value
|
# Optionally, log or warn about non-existing keys being ignored.
|
||||||
else:
|
print(f"Warning: Key '{key}' does not exist and will be ignored.")
|
||||||
# Optionally, log or warn about non-existing keys being ignored.
|
|
||||||
print(f"Warning: Key '{key}' does not exist and will be ignored.")
|
|
||||||
|
|
||||||
obj.attrs.update(update_dict)
|
obj.attrs.update(update_dict)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.close_file()
|
||||||
|
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
|
||||||
|
|
||||||
def delete_metadata(self, obj_name, annotation_dict):
|
def delete_metadata(self, obj_name, annotation_dict):
|
||||||
"""
|
"""
|
||||||
@ -237,21 +231,24 @@ class HDF5DataOpsManager():
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if self.file_obj is None:
|
if self.file_obj is None:
|
||||||
self.open_file()
|
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
|
||||||
|
|
||||||
#with h5py.File(self.file_path, mode='r+') as file_obj:
|
try:
|
||||||
obj = self.file_obj[obj_name]
|
obj = self.file_obj[obj_name]
|
||||||
|
for attr_key, value in annotation_dict.items():
|
||||||
for attr_key, value in annotation_dict.items():
|
if attr_key in obj.attrs:
|
||||||
if attr_key in obj.attrs:
|
if isinstance(value, dict) and value.get('delete', False):
|
||||||
if isinstance(value, dict) and value.get('delete', False):
|
obj.attrs.__delitem__(attr_key)
|
||||||
obj.attrs.__delitem__(attr_key)
|
else:
|
||||||
|
msg = f"Warning: Value for key '{attr_key}' is not marked for deletion or is invalid."
|
||||||
|
print(msg)
|
||||||
else:
|
else:
|
||||||
msg = f"Warning: Value for key '{attr_key}' is not marked for deletion or is invalid."
|
msg = f"Warning: Key '{attr_key}' does not exist in metadata."
|
||||||
print(msg)
|
print(msg)
|
||||||
else:
|
|
||||||
msg = f"Warning: Key '{attr_key}' does not exist in metadata."
|
except Exception as e:
|
||||||
print(msg)
|
self.close_file()
|
||||||
|
print(f"An unexpected error occurred: {e}. The file object has been properly closed.")
|
||||||
|
|
||||||
|
|
||||||
def rename_metadata(self, obj_name, renaming_map):
|
def rename_metadata(self, obj_name, renaming_map):
|
||||||
@ -275,27 +272,32 @@ class HDF5DataOpsManager():
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#with h5py.File(self.file_path, mode='r+') as file_obj:
|
|
||||||
if self.file_obj is None:
|
if self.file_obj is None:
|
||||||
self.open_file()
|
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file' method before attempting to modify it.")
|
||||||
|
|
||||||
obj = self.file_obj[obj_name]
|
try:
|
||||||
|
obj = self.file_obj[obj_name]
|
||||||
|
# Iterate over the renaming_map to process renaming
|
||||||
|
for old_attr, new_attr in renaming_map.items():
|
||||||
|
if old_attr in obj.attrs:
|
||||||
|
# Get the old attribute's value
|
||||||
|
attr_value = obj.attrs[old_attr]
|
||||||
|
|
||||||
# Iterate over the renaming_map to process renaming
|
# Create a new attribute with the new name
|
||||||
for old_attr, new_attr in renaming_map.items():
|
obj.attrs.create(new_attr, data=attr_value)
|
||||||
if old_attr in obj.attrs:
|
|
||||||
# Get the old attribute's value
|
|
||||||
attr_value = obj.attrs[old_attr]
|
|
||||||
|
|
||||||
# Create a new attribute with the new name
|
# Delete the old attribute
|
||||||
obj.attrs.create(new_attr, data=attr_value)
|
obj.attrs.__delitem__(old_attr)
|
||||||
|
else:
|
||||||
# Delete the old attribute
|
# Skip if the old attribute doesn't exist
|
||||||
obj.attrs.__delitem__(old_attr)
|
msg = f"Skipping: Attribute '{old_attr}' does not exist."
|
||||||
else:
|
print(msg) # Optionally, replace with warnings.warn(msg)
|
||||||
# Skip if the old attribute doesn't exist
|
except Exception as e:
|
||||||
msg = f"Skipping: Attribute '{old_attr}' does not exist."
|
self.close_file()
|
||||||
print(msg) # Optionally, replace with warnings.warn(msg)
|
print(
|
||||||
|
f"An unexpected error occurred: {e}. The file object has been properly closed. "
|
||||||
|
"Please ensure that 'obj_name' exists in the file, and that the keys in 'renaming_map' are valid attributes of the object."
|
||||||
|
)
|
||||||
|
|
||||||
self.close_file()
|
self.close_file()
|
||||||
|
|
||||||
@ -313,6 +315,7 @@ class HDF5DataOpsManager():
|
|||||||
|
|
||||||
return metadata_dict
|
return metadata_dict
|
||||||
|
|
||||||
|
|
||||||
def reformat_datetime_column(self, dataset_name, column_name, src_format, desired_format='%Y-%m-%d %H:%M:%S.%f'):
|
def reformat_datetime_column(self, dataset_name, column_name, src_format, desired_format='%Y-%m-%d %H:%M:%S.%f'):
|
||||||
# Access the dataset
|
# Access the dataset
|
||||||
dataset = self.file_obj[dataset_name]
|
dataset = self.file_obj[dataset_name]
|
||||||
@ -358,6 +361,24 @@ class HDF5DataOpsManager():
|
|||||||
#return np.array(timestamps)
|
#return np.array(timestamps)
|
||||||
return dt_column_data.to_numpy()
|
return dt_column_data.to_numpy()
|
||||||
|
|
||||||
|
# Define data append operations: append_dataset(), and update_file()
|
||||||
|
|
||||||
|
def append_dataset(self,dataset_dict, group_name):
|
||||||
|
|
||||||
|
# Parse value into HDF5 admissible type
|
||||||
|
for key in dataset_dict['attributes'].keys():
|
||||||
|
value = dataset_dict['attributes'][key]
|
||||||
|
if isinstance(key, dict):
|
||||||
|
dataset_dict['attributes'][key] = utils.convert_attrdict_to_np_structured_array(value)
|
||||||
|
|
||||||
|
if not group_name in self.file_obj:
|
||||||
|
self.file_obj.create_group(group_name, track_order=True)
|
||||||
|
self.file_obj[group_name].attrs['creation_date'] = utils.created_at().encode("utf-8")
|
||||||
|
|
||||||
|
self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
|
||||||
|
self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
|
||||||
|
self.file_obj[group_name].attrs['last_update_date'] = utils.created_at().encode("utf-8")
|
||||||
|
|
||||||
def update_file(self, path_to_append_dir):
|
def update_file(self, path_to_append_dir):
|
||||||
# Split the reference file path and the append directory path into directories and filenames
|
# Split the reference file path and the append directory path into directories and filenames
|
||||||
ref_tail, ref_head = os.path.split(self.file_path)
|
ref_tail, ref_head = os.path.split(self.file_path)
|
||||||
|
Reference in New Issue
Block a user