From 553c3fe946694852392b1fa5e261ee04e9fbfe7a Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Tue, 30 Apr 2024 14:50:33 +0200 Subject: [PATCH] Incorparated feature to merge data and time data which may originally be in separate columns in text source files. This is specified in the text source specification yaml file --- src/g5505_file_reader.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index c4769b5..d0f7c5b 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -72,6 +72,18 @@ def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_n if 'tmp_files' in tmp_file_path: os.remove(tmp_file_path) +import re + +def infer_units(column_name): + + match = re.search('\[.+\]') + + if match: + return match + else: + match = re.search('\(.+\)') + + return match def read_txt_files_as_dict(filename : str ): @@ -91,6 +103,7 @@ def read_txt_files_as_dict(filename : str ): file_encoding = config_dict[key].get('file_encoding',file_encoding) separator = config_dict[key].get('separator',separator).replace('\\t','\t') table_header = config_dict[key].get('table_header',table_header) + timestamp_variables = config_dict[key].get('timestamp',[]) break #if 'None' in table_header: # return {} @@ -140,10 +153,11 @@ def read_txt_files_as_dict(filename : str ): df_categorical_attrs = df.select_dtypes(exclude='number') numerical_variables = [item for item in df_numerical_attrs.columns] - # TODO: - if 'Pressure' in tmp_filename: - df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index] - df_categorical_attrs = df_categorical_attrs.drop(columns=['0_Date','1_Time']) + # Consolidate into single timestamp column the separate columns 'date' 'time' specified in text_data_source.yaml + if timestamp_variables: + df_categorical_attrs['timestamps'] = [' '.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index] + #df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index] + df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables) categorical_variables = [item for item in df_categorical_attrs.columns] ####