Included lines to work on copies of files, and removed .strip() to create the table preamble because it destroyed txt structure.
This commit is contained in:
@ -115,7 +115,8 @@ def read_txt_files_as_dict(filename : str ):
|
||||
for line_number, line in enumerate(f):
|
||||
list_of_substrings = line.split(separator)
|
||||
if not (line == '\n'):
|
||||
table_preamble += line.strip() #+ "\n"
|
||||
#table_preamble += line.strip() #+ "\n"
|
||||
table_preamble += line
|
||||
if table_header in line:
|
||||
data_start = True
|
||||
column_names = []
|
||||
|
@ -3,6 +3,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
import g5505_utils as utils
|
||||
|
||||
#def read_txt_files_as_dict(filename : str ,instrument_folder : str):
|
||||
def read_txt_files_as_dict(filename : str ):
|
||||
@ -18,17 +19,20 @@ def read_txt_files_as_dict(filename : str ):
|
||||
separator = '\t'
|
||||
else:
|
||||
raise ValueError('intrument_folder must be set as a either "smps" or "gas"')
|
||||
|
||||
tmp_file_path = utils.make_file_copy(filename)
|
||||
|
||||
# Read header as a dictionary and detect where data table starts
|
||||
header_dict = {}
|
||||
data_start = False
|
||||
with open(filename,'r') as f:
|
||||
with open(tmp_file_path,'r') as f:
|
||||
file_encoding = f.encoding
|
||||
table_preamble = ""
|
||||
for line_number, line in enumerate(f):
|
||||
list_of_substrings = line.split(separator)
|
||||
if not (line == '\n'):
|
||||
table_preamble += line.strip() #+ "\n"
|
||||
#table_preamble += line.strip() #+ "\n"
|
||||
table_preamble += line
|
||||
if table_of_header in line:
|
||||
data_start = True
|
||||
column_names = []
|
||||
@ -37,13 +41,13 @@ def read_txt_files_as_dict(filename : str ):
|
||||
|
||||
print(line_number, len(column_names ))
|
||||
break
|
||||
|
||||
|
||||
header_dict["table_preamble"] = table_preamble
|
||||
|
||||
if not data_start:
|
||||
raise ValueError('Invalid table header. The table header was not found and therefore table data cannot be extracted from txt or dat file.')
|
||||
|
||||
df = pd.read_csv(filename,
|
||||
df = pd.read_csv(tmp_file_path,
|
||||
delimiter = separator,
|
||||
header=line_number,
|
||||
#encoding='latin-1',
|
||||
@ -54,10 +58,10 @@ def read_txt_files_as_dict(filename : str ):
|
||||
df_numerical_attrs = df.select_dtypes(include ='number')
|
||||
df_categorical_attrs = df.select_dtypes(exclude='number')
|
||||
|
||||
if 'smps' in filename:
|
||||
if 'smps' in tmp_file_path:
|
||||
df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'1_Date']+' '+df_categorical_attrs.loc[i,'2_Start Time'] for i in df.index]
|
||||
df_categorical_attrs = df_categorical_attrs.drop(columns=['1_Date','2_Start Time'])
|
||||
elif 'gas' in filename:
|
||||
elif 'gas' in tmp_file_path:
|
||||
df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Date_Time' : 'timestamps'})
|
||||
|
||||
#data_column_names = [item.encode("utf-8") for item in df_numerical_attrs.columns]
|
||||
@ -66,7 +70,7 @@ def read_txt_files_as_dict(filename : str ):
|
||||
|
||||
###
|
||||
file_dict = {}
|
||||
path_tail, path_head = os.path.split(filename)
|
||||
path_tail, path_head = os.path.split(tmp_file_path)
|
||||
|
||||
file_dict['name'] = path_head
|
||||
# TODO: review this header dictionary, it may not be the best way to represent header data
|
||||
|
Reference in New Issue
Block a user