From 8004a891aa88ab465fd188ce280abc8e3ca19f67 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Tue, 19 Mar 2024 14:55:49 +0100 Subject: [PATCH] Included lines to work on copies of files, and removed .strip() to create the table preamble because it destroyed txt structure. --- src/g5505_file_reader.py | 3 ++- src/smog_chamber_file_reader.py | 18 +++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index bd6574e..58b98d2 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -115,7 +115,8 @@ def read_txt_files_as_dict(filename : str ): for line_number, line in enumerate(f): list_of_substrings = line.split(separator) if not (line == '\n'): - table_preamble += line.strip() #+ "\n" + #table_preamble += line.strip() #+ "\n" + table_preamble += line if table_header in line: data_start = True column_names = [] diff --git a/src/smog_chamber_file_reader.py b/src/smog_chamber_file_reader.py index 9b63996..ef31c92 100644 --- a/src/smog_chamber_file_reader.py +++ b/src/smog_chamber_file_reader.py @@ -3,6 +3,7 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt import os +import g5505_utils as utils #def read_txt_files_as_dict(filename : str ,instrument_folder : str): def read_txt_files_as_dict(filename : str ): @@ -18,17 +19,20 @@ def read_txt_files_as_dict(filename : str ): separator = '\t' else: raise ValueError('intrument_folder must be set as a either "smps" or "gas"') + + tmp_file_path = utils.make_file_copy(filename) # Read header as a dictionary and detect where data table starts header_dict = {} data_start = False - with open(filename,'r') as f: + with open(tmp_file_path,'r') as f: file_encoding = f.encoding table_preamble = "" for line_number, line in enumerate(f): list_of_substrings = line.split(separator) if not (line == '\n'): - table_preamble += line.strip() #+ "\n" + #table_preamble += line.strip() #+ "\n" + table_preamble += line if table_of_header in line: data_start = True column_names = [] @@ -37,13 +41,13 @@ def read_txt_files_as_dict(filename : str ): print(line_number, len(column_names )) break - + header_dict["table_preamble"] = table_preamble if not data_start: raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.') - df = pd.read_csv(filename, + df = pd.read_csv(tmp_file_path, delimiter = separator, header=line_number, #encoding='latin-1', @@ -54,10 +58,10 @@ def read_txt_files_as_dict(filename : str ): df_numerical_attrs = df.select_dtypes(include ='number') df_categorical_attrs = df.select_dtypes(exclude='number') - if 'smps' in filename: + if 'smps' in tmp_file_path: df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'1_Date']+' '+df_categorical_attrs.loc[i,'2_Start Time'] for i in df.index] df_categorical_attrs = df_categorical_attrs.drop(columns=['1_Date','2_Start Time']) - elif 'gas' in filename: + elif 'gas' in tmp_file_path: df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Date_Time' : 'timestamps'}) #data_column_names = [item.encode("utf-8") for item in df_numerical_attrs.columns] @@ -66,7 +70,7 @@ def read_txt_files_as_dict(filename : str ): ### file_dict = {} - path_tail, path_head = os.path.split(filename) + path_tail, path_head = os.path.split(tmp_file_path) file_dict['name'] = path_head # TODO: review this header dictionary, it may not be the best way to represent header data