diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index 136d6a9..2589448 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -111,13 +111,13 @@ def read_txt_files_as_dict(filename : str ): with open(tmp_filename,'r') as f: file_encoding = f.encoding - table_preamble = "" + #table_preamble = "" + table_preamble = [] for line_number, line in enumerate(f): - list_of_substrings = line.split(separator) - if not (line == '\n'): - #table_preamble += line.strip() #+ "\n" - table_preamble += line - if table_header in line: + + table_preamble += line + if table_header in line: + list_of_substrings = line.split(separator) data_start = True column_names = [] for i, name in enumerate(list_of_substrings): @@ -125,6 +125,14 @@ def read_txt_files_as_dict(filename : str ): print(line_number, len(column_names )) break + + # Subdivide line into words, and join them by single space. + # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on. + list_of_substrings = line.split() + # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character + #line = ' '.join(list_of_substrings+['\n']) + line = ' '.join(list_of_substrings) + table_preamble.append(line)# += new_line header_dict["table_preamble"] = table_preamble diff --git a/src/smog_chamber_file_reader.py b/src/smog_chamber_file_reader.py index 083721c..61f145c 100644 --- a/src/smog_chamber_file_reader.py +++ b/src/smog_chamber_file_reader.py @@ -27,13 +27,12 @@ def read_txt_files_as_dict(filename : str ): data_start = False with open(tmp_file_path,'r') as f: file_encoding = f.encoding - table_preamble = "" - for line_number, line in enumerate(f): - list_of_substrings = line.split(separator) - if not (line == '\n'): - #table_preamble += line.strip() #+ "\n" - table_preamble += line - if table_of_header in line: + #table_preamble = "" + table_preamble = [] + for line_number, line in enumerate(f): + + if table_of_header in line: + list_of_substrings = line.split(separator) data_start = True column_names = [] for i, name in enumerate(list_of_substrings): @@ -41,8 +40,15 @@ def read_txt_files_as_dict(filename : str ): print(line_number, len(column_names )) break + # Subdivide line into words, and join them by single space. + # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on. + list_of_substrings = line.split() + # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character + #line = ' '.join(list_of_substrings+['\n']) + line = ' '.join(list_of_substrings) + table_preamble.append(line)# += new_line - header_dict["table_preamble"] = table_preamble + header_dict["table_preamble"] = table_preamble #.replace('\n','\\n').replace('\t','\\t') if not data_start: raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.')