From 553c3fe946694852392b1fa5e261ee04e9fbfe7a Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Tue, 30 Apr 2024 14:50:33 +0200
Subject: [PATCH] Incorparated feature to merge data and time data which may
 originally be in separate columns in text source files. This is specified in
 the text source specification yaml file

---
 src/g5505_file_reader.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py
index c4769b5..d0f7c5b 100644
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@@ -72,6 +72,18 @@ def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_n
     if 'tmp_files' in tmp_file_path:
         os.remove(tmp_file_path)
 
+import re
+
+def infer_units(column_name):
+
+    match = re.search('\[.+\]')
+
+    if match:
+        return match
+    else:
+        match = re.search('\(.+\)')
+        
+    return match
 
 def read_txt_files_as_dict(filename : str ):
 
@@ -91,6 +103,7 @@ def read_txt_files_as_dict(filename : str ):
             file_encoding = config_dict[key].get('file_encoding',file_encoding)
             separator = config_dict[key].get('separator',separator).replace('\\t','\t')
             table_header = config_dict[key].get('table_header',table_header)
+            timestamp_variables = config_dict[key].get('timestamp',[])
             break
     #if 'None' in table_header:
     #    return {}
@@ -140,10 +153,11 @@ def read_txt_files_as_dict(filename : str ):
         df_categorical_attrs = df.select_dtypes(exclude='number')
         numerical_variables = [item for item in df_numerical_attrs.columns]       
 
-        # TODO: 
-        if 'Pressure' in tmp_filename:
-            df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
-            df_categorical_attrs = df_categorical_attrs.drop(columns=['0_Date','1_Time'])
+        # Consolidate into single timestamp column the separate columns 'date' 'time' specified in text_data_source.yaml
+        if timestamp_variables:
+            df_categorical_attrs['timestamps'] = [' '.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index]
+            #df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
+            df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
 
         categorical_variables = [item for item in df_categorical_attrs.columns]
         ####