Removed strange bug when reading .TXT smps files. Specified latin-1 encoding and relaxed error detection to ignore.
This commit is contained in:
@ -18,9 +18,11 @@ def read_txt_files_as_dict(filename : str ):
|
|||||||
if 'smps' in filename:
|
if 'smps' in filename:
|
||||||
table_of_header = 'Sample # Date Start Time Sample Temp (C) Sample Pressure (kPa)'
|
table_of_header = 'Sample # Date Start Time Sample Temp (C) Sample Pressure (kPa)'
|
||||||
separator = '\t'
|
separator = '\t'
|
||||||
|
file_encoding = 'latin-1'
|
||||||
elif 'gas' in filename:
|
elif 'gas' in filename:
|
||||||
table_of_header = 'Date_Time HoribaNO HoribaNOy Thermo42C_NO Thermo42C_NOx APHA370 CH4'
|
table_of_header = 'Date_Time HoribaNO HoribaNOy Thermo42C_NO Thermo42C_NOx APHA370 CH4'
|
||||||
separator = '\t'
|
separator = '\t'
|
||||||
|
file_encoding = 'utf-8'
|
||||||
else:
|
else:
|
||||||
raise ValueError('intrument_folder must be set as a either "smps" or "gas"')
|
raise ValueError('intrument_folder must be set as a either "smps" or "gas"')
|
||||||
|
|
||||||
@ -29,12 +31,13 @@ def read_txt_files_as_dict(filename : str ):
|
|||||||
# Read header as a dictionary and detect where data table starts
|
# Read header as a dictionary and detect where data table starts
|
||||||
header_dict = {}
|
header_dict = {}
|
||||||
data_start = False
|
data_start = False
|
||||||
with open(tmp_file_path,'r') as f:
|
with open(tmp_file_path,'r', encoding=file_encoding, errors='ignore') as f:
|
||||||
file_encoding = f.encoding
|
#file_encoding = f.encoding
|
||||||
#table_preamble = ""
|
#table_preamble = ""
|
||||||
table_preamble = []
|
table_preamble = []
|
||||||
|
line_number = 0
|
||||||
for line_number, line in enumerate(f):
|
for line_number, line in enumerate(f):
|
||||||
|
#print(line_number,line)
|
||||||
if table_of_header in line:
|
if table_of_header in line:
|
||||||
list_of_substrings = line.split(separator)
|
list_of_substrings = line.split(separator)
|
||||||
data_start = True
|
data_start = True
|
||||||
@ -42,7 +45,7 @@ def read_txt_files_as_dict(filename : str ):
|
|||||||
for i, name in enumerate(list_of_substrings):
|
for i, name in enumerate(list_of_substrings):
|
||||||
column_names.append(str(i)+'_'+name)
|
column_names.append(str(i)+'_'+name)
|
||||||
|
|
||||||
print(line_number, len(column_names ))
|
#print(line_number, len(column_names ))
|
||||||
break
|
break
|
||||||
# Subdivide line into words, and join them by single space.
|
# Subdivide line into words, and join them by single space.
|
||||||
# I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
|
# I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
|
||||||
|
Reference in New Issue
Block a user