Added method to reformat columns containing datetime byte strings into a desired datetime formated object

This commit is contained in:
2024-08-14 16:22:28 +02:00
parent 5124df14d8
commit 062a688f47

View File

@ -5,6 +5,7 @@ import os
import src.hdf5_vis as hdf5_vis
import src.g5505_utils as utils
import logging
import datetime
class HDF5DataOpsManager():
def __init__(self, file_path, mode = 'r+') -> None:
@ -14,6 +15,7 @@ class HDF5DataOpsManager():
self.file_path = file_path
self.file_obj = None
self._open_file()
self.list_of_datasets = []
# Define private methods
@ -78,6 +80,38 @@ class HDF5DataOpsManager():
self.file_obj[group_name].create_dataset(dataset_dict['name'], data=dataset_dict['data'])
self.file_obj[group_name][dataset_dict['name']].attrs.update(dataset_dict['attributes'])
def reformat_datetime_column(self, dataset_name, column_name, src_format, desired_format = '%Y-%m-%d %H:%M:%S.%f'):
dataset = self.file_obj[dataset_name]
dt_column_data = dataset[column_name][:]
# Convert byte strings to datetime objects
timestamps = [datetime.datetime.strptime(a.decode(), src_format).strftime(desired_format) for a in dt_column_data]
#datetime.strptime('31/01/22 23:59:59.999999',
# '%d/%m/%y %H:%M:%S.%f')
#pd.to_datetime(
# np.array([a.decode() for a in dt_column_data]),
# format=src_format,
# errors='coerce'
#)
# Standardize the datetime format
#standardized_time = datetime.strftime(desired_format)
# Convert to byte strings to store back in the HDF5 dataset
#standardized_time_bytes = np.array([s.encode() for s in timestamps])
# Update the column in the dataset (in-place update)
# TODO: make this a more secure operation
#dataset[column_name][:] = standardized_time_bytes
return np.array(timestamps)
def read_dataset_from_hdf5file(hdf5_file_path, dataset_path):