From bbff41931325491cab1ff1046313f2ba0afc0568 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Thu, 28 Mar 2024 17:43:26 +0100 Subject: [PATCH] Removed strange bug when reading .TXT smps files. Specified latin-1 encoding and relaxed error detection to ignore. --- src/smog_chamber_file_reader.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/smog_chamber_file_reader.py b/src/smog_chamber_file_reader.py index 5bdb8ec..0f40788 100644 --- a/src/smog_chamber_file_reader.py +++ b/src/smog_chamber_file_reader.py @@ -18,9 +18,11 @@ def read_txt_files_as_dict(filename : str ): if 'smps' in filename: table_of_header = 'Sample # Date Start Time Sample Temp (C) Sample Pressure (kPa)' separator = '\t' + file_encoding = 'latin-1' elif 'gas' in filename: table_of_header = 'Date_Time HoribaNO HoribaNOy Thermo42C_NO Thermo42C_NOx APHA370 CH4' separator = '\t' + file_encoding = 'utf-8' else: raise ValueError('intrument_folder must be set as a either "smps" or "gas"') @@ -29,12 +31,13 @@ def read_txt_files_as_dict(filename : str ): # Read header as a dictionary and detect where data table starts header_dict = {} data_start = False - with open(tmp_file_path,'r') as f: - file_encoding = f.encoding + with open(tmp_file_path,'r', encoding=file_encoding, errors='ignore') as f: + #file_encoding = f.encoding #table_preamble = "" table_preamble = [] - for line_number, line in enumerate(f): - + line_number = 0 + for line_number, line in enumerate(f): + #print(line_number,line) if table_of_header in line: list_of_substrings = line.split(separator) data_start = True @@ -42,7 +45,7 @@ def read_txt_files_as_dict(filename : str ): for i, name in enumerate(list_of_substrings): column_names.append(str(i)+'_'+name) - print(line_number, len(column_names )) + #print(line_number, len(column_names )) break # Subdivide line into words, and join them by single space. # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.