Restructured the way table_preamble attribute is represented. Now it is a list of strings as opposed to a multilinear string with special characters like \n. This is to avoid parsing problems in the yalm files.

This commit is contained in:
2024-03-22 17:26:30 +01:00
parent fff935f551
commit 13cb6395aa
2 changed files with 28 additions and 14 deletions

View File

@ -111,13 +111,13 @@ def read_txt_files_as_dict(filename : str ):
with open(tmp_filename,'r') as f: with open(tmp_filename,'r') as f:
file_encoding = f.encoding file_encoding = f.encoding
table_preamble = "" #table_preamble = ""
table_preamble = []
for line_number, line in enumerate(f): for line_number, line in enumerate(f):
list_of_substrings = line.split(separator)
if not (line == '\n'): table_preamble += line
#table_preamble += line.strip() #+ "\n" if table_header in line:
table_preamble += line list_of_substrings = line.split(separator)
if table_header in line:
data_start = True data_start = True
column_names = [] column_names = []
for i, name in enumerate(list_of_substrings): for i, name in enumerate(list_of_substrings):
@ -125,6 +125,14 @@ def read_txt_files_as_dict(filename : str ):
print(line_number, len(column_names )) print(line_number, len(column_names ))
break break
# Subdivide line into words, and join them by single space.
# I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
list_of_substrings = line.split()
# TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
#line = ' '.join(list_of_substrings+['\n'])
line = ' '.join(list_of_substrings)
table_preamble.append(line)# += new_line
header_dict["table_preamble"] = table_preamble header_dict["table_preamble"] = table_preamble

View File

@ -27,13 +27,12 @@ def read_txt_files_as_dict(filename : str ):
data_start = False data_start = False
with open(tmp_file_path,'r') as f: with open(tmp_file_path,'r') as f:
file_encoding = f.encoding file_encoding = f.encoding
table_preamble = "" #table_preamble = ""
for line_number, line in enumerate(f): table_preamble = []
list_of_substrings = line.split(separator) for line_number, line in enumerate(f):
if not (line == '\n'):
#table_preamble += line.strip() #+ "\n" if table_of_header in line:
table_preamble += line list_of_substrings = line.split(separator)
if table_of_header in line:
data_start = True data_start = True
column_names = [] column_names = []
for i, name in enumerate(list_of_substrings): for i, name in enumerate(list_of_substrings):
@ -41,8 +40,15 @@ def read_txt_files_as_dict(filename : str ):
print(line_number, len(column_names )) print(line_number, len(column_names ))
break break
# Subdivide line into words, and join them by single space.
# I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
list_of_substrings = line.split()
# TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
#line = ' '.join(list_of_substrings+['\n'])
line = ' '.join(list_of_substrings)
table_preamble.append(line)# += new_line
header_dict["table_preamble"] = table_preamble header_dict["table_preamble"] = table_preamble #.replace('\n','\\n').replace('\t','\\t')
if not data_start: if not data_start:
raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.') raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.')