Restructured the way table_preamble attribute is represented. Now it is a list of strings as opposed to a multilinear string with special characters like \n. This is to avoid parsing problems in the yalm files.

2024-03-22 17:26:30 +01:00
parent fff935f551
commit 13cb6395aa
2 changed files with 28 additions and 14 deletions
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@ -111,13 +111,13 @@ def read_txt_files_as_dict(filename : str ):

    with open(tmp_filename,'r') as f:
        file_encoding = f.encoding
-        table_preamble = ""
+        #table_preamble = ""
+        table_preamble = []
        for line_number, line in enumerate(f):        
-            list_of_substrings = line.split(separator)
-            if not (line == '\n'):
-                #table_preamble += line.strip() #+ "\n"
-                table_preamble += line
-            if table_header in line:                
+            
+            table_preamble += line
+            if table_header in line:   
+                list_of_substrings = line.split(separator)             
                data_start = True  
                column_names = []
                for i, name in enumerate(list_of_substrings):
@ -125,6 +125,14 @@ def read_txt_files_as_dict(filename : str ):

                print(line_number, len(column_names ))
                break
+            
+            # Subdivide line into words, and join them by single space. 
+            # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
+            list_of_substrings = line.split()
+            # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
+            #line = ' '.join(list_of_substrings+['\n'])
+            line = ' '.join(list_of_substrings)     
+            table_preamble.append(line)# += new_line  

        header_dict["table_preamble"] = table_preamble

--- a/src/smog_chamber_file_reader.py
+++ b/src/smog_chamber_file_reader.py
@ -27,13 +27,12 @@ def read_txt_files_as_dict(filename : str ):
    data_start = False    
    with open(tmp_file_path,'r') as f:
        file_encoding = f.encoding
-        table_preamble = ""
-        for line_number, line in enumerate(f):        
-            list_of_substrings = line.split(separator)
-            if not (line == '\n'):
-                #table_preamble += line.strip() #+ "\n"
-                table_preamble += line                
-            if table_of_header in line:                
+        #table_preamble = ""
+        table_preamble = []
+        for line_number, line in enumerate(f):   
+            
+            if table_of_header in line:  
+                list_of_substrings = line.split(separator)              
                data_start = True  
                column_names = []
                for i, name in enumerate(list_of_substrings):
@ -41,8 +40,15 @@ def read_txt_files_as_dict(filename : str ):

                print(line_number, len(column_names ))
                break
+            # Subdivide line into words, and join them by single space. 
+            # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
+            list_of_substrings = line.split()
+            # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
+            #line = ' '.join(list_of_substrings+['\n'])
+            line = ' '.join(list_of_substrings)     
+            table_preamble.append(line)# += new_line     

-        header_dict["table_preamble"] = table_preamble
+        header_dict["table_preamble"] = table_preamble #.replace('\n','\\n').replace('\t','\\t')

    if not data_start:
        raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.')