From 8004a891aa88ab465fd188ce280abc8e3ca19f67 Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Tue, 19 Mar 2024 14:55:49 +0100
Subject: [PATCH] Included lines to work on copies of files, and removed
 .strip() to create the table preamble because it destroyed txt structure.

---
 src/g5505_file_reader.py        |  3 ++-
 src/smog_chamber_file_reader.py | 18 +++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py
index bd6574e..58b98d2 100644
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@@ -115,7 +115,8 @@ def read_txt_files_as_dict(filename : str ):
         for line_number, line in enumerate(f):        
             list_of_substrings = line.split(separator)
             if not (line == '\n'):
-                table_preamble += line.strip() #+ "\n"
+                #table_preamble += line.strip() #+ "\n"
+                table_preamble += line
             if table_header in line:                
                 data_start = True  
                 column_names = []
diff --git a/src/smog_chamber_file_reader.py b/src/smog_chamber_file_reader.py
index 9b63996..ef31c92 100644
--- a/src/smog_chamber_file_reader.py
+++ b/src/smog_chamber_file_reader.py
@@ -3,6 +3,7 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import os
+import g5505_utils as utils
 
 #def read_txt_files_as_dict(filename : str ,instrument_folder : str):
 def read_txt_files_as_dict(filename : str ):
@@ -18,17 +19,20 @@ def read_txt_files_as_dict(filename : str ):
         separator = '\t'
     else:
         raise ValueError('intrument_folder must be set as a either "smps" or "gas"')
+    
+    tmp_file_path = utils.make_file_copy(filename)
 
     # Read header as a dictionary and detect where data table starts
     header_dict = {}
     data_start = False    
-    with open(filename,'r') as f:
+    with open(tmp_file_path,'r') as f:
         file_encoding = f.encoding
         table_preamble = ""
         for line_number, line in enumerate(f):        
             list_of_substrings = line.split(separator)
             if not (line == '\n'):
-                table_preamble += line.strip() #+ "\n"
+                #table_preamble += line.strip() #+ "\n"
+                table_preamble += line                
             if table_of_header in line:                
                 data_start = True  
                 column_names = []
@@ -37,13 +41,13 @@ def read_txt_files_as_dict(filename : str ):
 
                 print(line_number, len(column_names ))
                 break
-            
+
         header_dict["table_preamble"] = table_preamble
 
     if not data_start:
         raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.')
     
-    df = pd.read_csv(filename, 
+    df = pd.read_csv(tmp_file_path, 
                      delimiter = separator, 
                      header=line_number, 
                      #encoding='latin-1',
@@ -54,10 +58,10 @@ def read_txt_files_as_dict(filename : str ):
     df_numerical_attrs = df.select_dtypes(include ='number')
     df_categorical_attrs = df.select_dtypes(exclude='number')
 
-    if 'smps' in filename:
+    if 'smps' in tmp_file_path:
         df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'1_Date']+' '+df_categorical_attrs.loc[i,'2_Start Time'] for i in df.index]
         df_categorical_attrs = df_categorical_attrs.drop(columns=['1_Date','2_Start Time'])
-    elif 'gas' in filename:
+    elif 'gas' in tmp_file_path:
         df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Date_Time' : 'timestamps'})
 
     #data_column_names = [item.encode("utf-8") for item in df_numerical_attrs.columns]
@@ -66,7 +70,7 @@ def read_txt_files_as_dict(filename : str ):
 
     ###
     file_dict = {}
-    path_tail, path_head = os.path.split(filename)
+    path_tail, path_head = os.path.split(tmp_file_path)
 
     file_dict['name'] = path_head
     # TODO: review this header dictionary, it may not be the best way to represent header data