Included lines to work on copies of files, and removed .strip() to create the table preamble because it destroyed txt structure.

This commit is contained in:
2024-03-19 14:55:49 +01:00
parent 63e7fb28d0
commit 8004a891aa
2 changed files with 13 additions and 8 deletions

View File

@ -115,7 +115,8 @@ def read_txt_files_as_dict(filename : str ):
for line_number, line in enumerate(f): for line_number, line in enumerate(f):
list_of_substrings = line.split(separator) list_of_substrings = line.split(separator)
if not (line == '\n'): if not (line == '\n'):
table_preamble += line.strip() #+ "\n" #table_preamble += line.strip() #+ "\n"
table_preamble += line
if table_header in line: if table_header in line:
data_start = True data_start = True
column_names = [] column_names = []

View File

@ -3,6 +3,7 @@ import pandas as pd
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import os import os
import g5505_utils as utils
#def read_txt_files_as_dict(filename : str ,instrument_folder : str): #def read_txt_files_as_dict(filename : str ,instrument_folder : str):
def read_txt_files_as_dict(filename : str ): def read_txt_files_as_dict(filename : str ):
@ -19,16 +20,19 @@ def read_txt_files_as_dict(filename : str ):
else: else:
raise ValueError('intrument_folder must be set as a either "smps" or "gas"') raise ValueError('intrument_folder must be set as a either "smps" or "gas"')
tmp_file_path = utils.make_file_copy(filename)
# Read header as a dictionary and detect where data table starts # Read header as a dictionary and detect where data table starts
header_dict = {} header_dict = {}
data_start = False data_start = False
with open(filename,'r') as f: with open(tmp_file_path,'r') as f:
file_encoding = f.encoding file_encoding = f.encoding
table_preamble = "" table_preamble = ""
for line_number, line in enumerate(f): for line_number, line in enumerate(f):
list_of_substrings = line.split(separator) list_of_substrings = line.split(separator)
if not (line == '\n'): if not (line == '\n'):
table_preamble += line.strip() #+ "\n" #table_preamble += line.strip() #+ "\n"
table_preamble += line
if table_of_header in line: if table_of_header in line:
data_start = True data_start = True
column_names = [] column_names = []
@ -43,7 +47,7 @@ def read_txt_files_as_dict(filename : str ):
if not data_start: if not data_start:
raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.') raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.')
df = pd.read_csv(filename, df = pd.read_csv(tmp_file_path,
delimiter = separator, delimiter = separator,
header=line_number, header=line_number,
#encoding='latin-1', #encoding='latin-1',
@ -54,10 +58,10 @@ def read_txt_files_as_dict(filename : str ):
df_numerical_attrs = df.select_dtypes(include ='number') df_numerical_attrs = df.select_dtypes(include ='number')
df_categorical_attrs = df.select_dtypes(exclude='number') df_categorical_attrs = df.select_dtypes(exclude='number')
if 'smps' in filename: if 'smps' in tmp_file_path:
df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'1_Date']+' '+df_categorical_attrs.loc[i,'2_Start Time'] for i in df.index] df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'1_Date']+' '+df_categorical_attrs.loc[i,'2_Start Time'] for i in df.index]
df_categorical_attrs = df_categorical_attrs.drop(columns=['1_Date','2_Start Time']) df_categorical_attrs = df_categorical_attrs.drop(columns=['1_Date','2_Start Time'])
elif 'gas' in filename: elif 'gas' in tmp_file_path:
df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Date_Time' : 'timestamps'}) df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Date_Time' : 'timestamps'})
#data_column_names = [item.encode("utf-8") for item in df_numerical_attrs.columns] #data_column_names = [item.encode("utf-8") for item in df_numerical_attrs.columns]
@ -66,7 +70,7 @@ def read_txt_files_as_dict(filename : str ):
### ###
file_dict = {} file_dict = {}
path_tail, path_head = os.path.split(filename) path_tail, path_head = os.path.split(tmp_file_path)
file_dict['name'] = path_head file_dict['name'] = path_head
# TODO: review this header dictionary, it may not be the best way to represent header data # TODO: review this header dictionary, it may not be the best way to represent header data