Incorparated feature to merge data and time data which may originally be in separate columns in text source files. This is specified in the text source specification yaml file
This commit is contained in:
@ -72,6 +72,18 @@ def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_n
|
||||
if 'tmp_files' in tmp_file_path:
|
||||
os.remove(tmp_file_path)
|
||||
|
||||
import re
|
||||
|
||||
def infer_units(column_name):
|
||||
|
||||
match = re.search('\[.+\]')
|
||||
|
||||
if match:
|
||||
return match
|
||||
else:
|
||||
match = re.search('\(.+\)')
|
||||
|
||||
return match
|
||||
|
||||
def read_txt_files_as_dict(filename : str ):
|
||||
|
||||
@ -91,6 +103,7 @@ def read_txt_files_as_dict(filename : str ):
|
||||
file_encoding = config_dict[key].get('file_encoding',file_encoding)
|
||||
separator = config_dict[key].get('separator',separator).replace('\\t','\t')
|
||||
table_header = config_dict[key].get('table_header',table_header)
|
||||
timestamp_variables = config_dict[key].get('timestamp',[])
|
||||
break
|
||||
#if 'None' in table_header:
|
||||
# return {}
|
||||
@ -140,10 +153,11 @@ def read_txt_files_as_dict(filename : str ):
|
||||
df_categorical_attrs = df.select_dtypes(exclude='number')
|
||||
numerical_variables = [item for item in df_numerical_attrs.columns]
|
||||
|
||||
# TODO:
|
||||
if 'Pressure' in tmp_filename:
|
||||
df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
|
||||
df_categorical_attrs = df_categorical_attrs.drop(columns=['0_Date','1_Time'])
|
||||
# Consolidate into single timestamp column the separate columns 'date' 'time' specified in text_data_source.yaml
|
||||
if timestamp_variables:
|
||||
df_categorical_attrs['timestamps'] = [' '.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index]
|
||||
#df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
|
||||
df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
|
||||
|
||||
categorical_variables = [item for item in df_categorical_attrs.columns]
|
||||
####
|
||||
|
Reference in New Issue
Block a user