Incorparated feature to merge data and time data which may originally be in separate columns in text source files. This is specified in the text source specification yaml file

This commit is contained in:
2024-04-30 14:50:33 +02:00
parent f3c2777bb0
commit 553c3fe946

View File

@ -72,6 +72,18 @@ def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_n
if 'tmp_files' in tmp_file_path:
os.remove(tmp_file_path)
import re
def infer_units(column_name):
match = re.search('\[.+\]')
if match:
return match
else:
match = re.search('\(.+\)')
return match
def read_txt_files_as_dict(filename : str ):
@ -91,6 +103,7 @@ def read_txt_files_as_dict(filename : str ):
file_encoding = config_dict[key].get('file_encoding',file_encoding)
separator = config_dict[key].get('separator',separator).replace('\\t','\t')
table_header = config_dict[key].get('table_header',table_header)
timestamp_variables = config_dict[key].get('timestamp',[])
break
#if 'None' in table_header:
# return {}
@ -140,10 +153,11 @@ def read_txt_files_as_dict(filename : str ):
df_categorical_attrs = df.select_dtypes(exclude='number')
numerical_variables = [item for item in df_numerical_attrs.columns]
# TODO:
if 'Pressure' in tmp_filename:
df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
df_categorical_attrs = df_categorical_attrs.drop(columns=['0_Date','1_Time'])
# Consolidate into single timestamp column the separate columns 'date' 'time' specified in text_data_source.yaml
if timestamp_variables:
df_categorical_attrs['timestamps'] = [' '.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index]
#df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
categorical_variables = [item for item in df_categorical_attrs.columns]
####