From 5d0ab4603f1b1758d48a00b8be55bb2cc950d222 Mon Sep 17 00:00:00 2001 From: Juan Felipe Florez Ospina Date: Tue, 4 Feb 2025 17:23:32 +0100 Subject: [PATCH] Add property to extracted dataset as dataframe. Now time column is of datetime type to facilitate downstream procesing. --- src/hdf5_ops.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/hdf5_ops.py b/src/hdf5_ops.py index 5928681..ba71f73 100644 --- a/src/hdf5_ops.py +++ b/src/hdf5_ops.py @@ -116,7 +116,7 @@ class HDF5DataOpsManager(): - return None + return None, None @@ -128,6 +128,8 @@ class HDF5DataOpsManager(): raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to extract datasets.") dataset_obj = self.file_obj[dataset_name] + + datetime_var, datetime_format = self.infer_datetime_variable(dataset_name) # Read dataset content from dataset obj data = dataset_obj[...] # The above statement can be understood as follows: @@ -136,6 +138,12 @@ class HDF5DataOpsManager(): # dataset_obj.read_direct(data) try: + data = pd.DataFrame(data) + + if datetime_var in data.columns: + data[datetime_var] = data[datetime_var].apply(lambda x : x.decode()) + data[datetime_var] = pd.to_datetime(data[datetime_var],format=datetime_format,errors='coerce') + return pd.DataFrame(data) except ValueError as e: logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {e}. Instead, dataset will be returned as Numpy array.")