diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index 97abb1a..f069e63 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -93,10 +93,12 @@ def read_txt_files_as_dict(filename : str ): file_encoding = 'latin-1' elif 'ICAD' in filename and 'HONO' in filename: table_header = 'Start Date/Time (UTC) Duration (s) NO2 (ppb) NO2 Uncertainty (ppb) H2O (ppb) H2O Uncertainty (ppb) CHOCHO (ppb) CHOCHO Uncertainty (ppb) File Number Light Intensity #ICEDOAS iter. Cell Pressure Ambient Pressure Cell Temp Spec Temp Lat Lon Height Speed GPSQuality 0-Air Ref. Time 0-Air Ref. Duration 0-Air Ref. File Number 0-Air Ref. Intensity 0-Air Ref. Rel Intensity 0-Air Ref. Intensity valid MeasMode SampleSource' - separator = '\t' + separator = '\t' + file_encoding = 'latin-1' elif 'ICAD' in filename and 'NO2' in filename: table_header = 'Start Date/Time (UTC) Duration (s) NO2 (ppb) NO2 Uncertainty (ppb) H2O (ppb) H2O Uncertainty (ppb) CHOCHO (ppb) CHOCHO Uncertainty (ppb) File Number Light Intensity #ICEDOAS iter. Cell Pressure Ambient Pressure Cell Temp Spec Temp Lat Lon Height Speed GPSQuality 0-Air Ref. Time 0-Air Ref. Duration 0-Air Ref. File Number 0-Air Ref. Intensity 0-Air Ref. Rel Intensity 0-Air Ref. Intensity valid MeasMode SampleSource' separator = '\t' + file_encoding = 'latin-1' else: return {} #raise ValueError('intrument_folder must be set as a either "RGA" or "Pressure"') @@ -107,33 +109,32 @@ def read_txt_files_as_dict(filename : str ): # Work with copy of the file for safety tmp_filename = utils.make_file_copy(source_file_path=filename) - with open(tmp_filename,'r',encoding=file_encoding,errors='ignore') as f: - #file_encoding = f.encoding - #table_preamble = "" + #with open(tmp_filename,'rb',encoding=file_encoding,errors='ignore') as f: + with open(tmp_filename,'rb') as f: table_preamble = [] for line_number, line in enumerate(f): - if table_header in line: - list_of_substrings = line.split(separator) + if table_header in line.decode(file_encoding): + list_of_substrings = line.decode(file_encoding).split(separator) data_start = True column_names = [] for i, name in enumerate(list_of_substrings): column_names.append(str(i)+'_'+name) - print(line_number, len(column_names )) + #print(line_number, len(column_names ),'\n') break # Subdivide line into words, and join them by single space. # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on. - list_of_substrings = line.split() + list_of_substrings = line.decode(file_encoding).split() # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character #line = ' '.join(list_of_substrings+['\n']) - line = ' '.join(list_of_substrings) - table_preamble.append(line)# += new_line + #line = ' '.join(list_of_substrings) + table_preamble.append(' '.join(list_of_substrings))# += new_line header_dict["table_preamble"] = table_preamble - # TODO: it does not work with separater as none :(. fix for RGA + # TODO: it does not work with separator as none :(. fix for RGA try: df = pd.read_csv(tmp_filename, delimiter = separator,