diff --git a/src/hdf5_data_extraction.py b/src/hdf5_data_extraction.py index 190ed0f..32b8b1a 100644 --- a/src/hdf5_data_extraction.py +++ b/src/hdf5_data_extraction.py @@ -5,6 +5,7 @@ import os import src.hdf5_vis as hdf5_vis import src.g5505_utils as utils import logging +import datetime class HDF5DataOpsManager(): def __init__(self, file_path, mode = 'r+') -> None: @@ -14,6 +15,7 @@ class HDF5DataOpsManager(): self.file_path = file_path self.file_obj = None self._open_file() + self.list_of_datasets = [] # Define private methods @@ -78,6 +80,38 @@ class HDF5DataOpsManager(): self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data']) self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes']) + def reformat_datetime_column(self, dataset_name, column_name, src_format, desired_format = '%Y-%m-%d %H:%M:%S.%f'): + + dataset = self.file_obj[dataset_name] + + dt_column_data = dataset[column_name][:] + + # Convert byte strings to datetime objects + timestamps = [datetime.datetime.strptime(a.decode(), src_format).strftime(desired_format) for a in dt_column_data] + + #datetime.strptime('31/01/22 23:59:59.999999', + # '%d/%m/%y %H:%M:%S.%f') + + #pd.to_datetime( + # np.array([a.decode() for a in dt_column_data]), + # format=src_format, + # errors='coerce' + #) + + + # Standardize the datetime format + #standardized_time = datetime.strftime(desired_format) + + # Convert to byte strings to store back in the HDF5 dataset + #standardized_time_bytes = np.array([s.encode() for s in timestamps]) + + # Update the column in the dataset (in-place update) + # TODO: make this a more secure operation + #dataset[column_name][:] = standardized_time_bytes + + return np.array(timestamps) + + def read_dataset_from_hdf5file(hdf5_file_path, dataset_path):