Updated reader to standardize timestamps to a desired format when possible. The desired format is set in text_data_sources.yaml.

This commit is contained in:
2024-06-02 15:59:01 +02:00
parent 69f3857936
commit d335836a7d

View File

@ -135,6 +135,7 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
separator = config_dict[key].get('separator',separator).replace('\\t','\t')
table_header = config_dict[key].get('table_header',table_header)
timestamp_variables = config_dict[key].get('timestamp',[])
datetime_format = config_dict[key].get('datetime_format',[])
description_dict = {}
link_to_description = config_dict[key].get('link_to_description',[]).replace('/',os.sep)
@ -210,7 +211,28 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
df_categorical_attrs['timestamps'] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
valid_indices = []
if datetime_format:
df_categorical_attrs['timestamps'] = pd.to_datetime(df_categorical_attrs['timestamps'],format=datetime_format,errors='coerce')
valid_indices = df_categorical_attrs.dropna(subset=['timestamps']).index
df_categorical_attrs = df_categorical_attrs.loc[valid_indices,:]
df_numerical_attrs = df_numerical_attrs.loc[valid_indices,:]
df_categorical_attrs['timestamps'] = df_categorical_attrs['timestamps'].dt.strftime(config_dict['default']['desired_format'])
startdate = df_categorical_attrs['timestamps'].min()
enddate = df_categorical_attrs['timestamps'].max()
df_categorical_attrs['timestamps'] = df_categorical_attrs['timestamps'].astype(str)
#header_dict.update({'stastrrtdate':startdate,'enddate':enddate})
header_dict['startdate']= str(startdate)
header_dict['enddate']=str(enddate)
df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
#df_categorical_attrs.reindex(drop=True)
#df_numerical_attrs.reindex(drop=True)