Add property to extracted dataset as dataframe. Now time column is of datetime type to facilitate downstream procesing.
This commit is contained in:
@ -116,7 +116,7 @@ class HDF5DataOpsManager():
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
return None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -128,6 +128,8 @@ class HDF5DataOpsManager():
|
|||||||
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to extract datasets.")
|
raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to extract datasets.")
|
||||||
|
|
||||||
dataset_obj = self.file_obj[dataset_name]
|
dataset_obj = self.file_obj[dataset_name]
|
||||||
|
|
||||||
|
datetime_var, datetime_format = self.infer_datetime_variable(dataset_name)
|
||||||
# Read dataset content from dataset obj
|
# Read dataset content from dataset obj
|
||||||
data = dataset_obj[...]
|
data = dataset_obj[...]
|
||||||
# The above statement can be understood as follows:
|
# The above statement can be understood as follows:
|
||||||
@ -136,6 +138,12 @@ class HDF5DataOpsManager():
|
|||||||
# dataset_obj.read_direct(data)
|
# dataset_obj.read_direct(data)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
data = pd.DataFrame(data)
|
||||||
|
|
||||||
|
if datetime_var in data.columns:
|
||||||
|
data[datetime_var] = data[datetime_var].apply(lambda x : x.decode())
|
||||||
|
data[datetime_var] = pd.to_datetime(data[datetime_var],format=datetime_format,errors='coerce')
|
||||||
|
|
||||||
return pd.DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {e}. Instead, dataset will be returned as Numpy array.")
|
logging.error(f"Failed to convert dataset '{dataset_name}' to DataFrame: {e}. Instead, dataset will be returned as Numpy array.")
|
||||||
|
Reference in New Issue
Block a user