Fix reader txt/csv default behavior.

This commit is contained in:
2025-02-07 16:25:45 +01:00
parent 0d26777732
commit f986edd4a5
3 changed files with 64 additions and 49 deletions

View File

@ -1,7 +1,7 @@
default:
file_encoding : 'utf-8'
separator : 'None'
table_header : 'None'
separator : ','
table_header : 'infer'
desired_format: '%Y-%m-%d %H:%M:%S.%f'
RGA:

View File

@ -17,8 +17,7 @@ file_readers = {
'txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
'TXT': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
'dat': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
#'ACSM_TOFWARE_txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
#'ACSM_TOFWARE_csv': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False)
'csv': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False)
}
# Add new "instrument reader (Data flagging app data)"

View File

@ -37,17 +37,22 @@ def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with
file_encoding = config_dict['default']['file_encoding'] #'utf-8'
separator = config_dict['default']['separator']
table_header = config_dict['default']['table_header']
for key in config_dict.keys():
if key.replace('/',os.sep) in filename:
file_encoding = config_dict[key].get('file_encoding',file_encoding)
separator = config_dict[key].get('separator',separator)
table_header = config_dict[key].get('table_header',table_header)
timestamp_variables = config_dict[key].get('timestamp',[])
datetime_format = config_dict[key].get('datetime_format',[])
timestamp_variables = []
datetime_format = []
tb_idx = 0
column_names = ''
description_dict = {}
link_to_description = config_dict[key].get('link_to_description', '').replace('/', os.sep)
for instFolder in config_dict.keys():
if instFolder in filename.split(os.sep):
file_encoding = config_dict[instFolder].get('file_encoding',file_encoding)
separator = config_dict[instFolder].get('separator',separator)
table_header = config_dict[instFolder].get('table_header',table_header)
timestamp_variables = config_dict[instFolder].get('timestamp',[])
datetime_format = config_dict[instFolder].get('datetime_format',[])
link_to_description = config_dict[instFolder].get('link_to_description', '').replace('/', os.sep)
if link_to_description:
path = os.path.join(instruments_dir, link_to_description)
@ -75,8 +80,12 @@ def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with
file_encoding = [file_encoding]
separator = [separator]
with open(tmp_filename,'rb') as f:
table_preamble = []
line_number = 0
if 'infer' not in table_header:
with open(tmp_filename,'rb') as f:
for line_number, line in enumerate(f):
@ -111,6 +120,7 @@ def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with
# TODO: it does not work with separator as none :(. fix for RGA
try:
if not 'infer' in table_header:
df = pd.read_csv(tmp_filename,
delimiter = separator[tb_idx].replace('\\t','\t'),
header=line_number,
@ -118,6 +128,12 @@ def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with
encoding = file_encoding[tb_idx],
names=column_names,
skip_blank_lines=True)
else:
df = pd.read_csv(tmp_filename,
delimiter = separator[tb_idx].replace('\\t','\t'),
header=line_number,
encoding = file_encoding[tb_idx],
skip_blank_lines=True)
df_numerical_attrs = df.select_dtypes(include ='number')
df_categorical_attrs = df.select_dtypes(exclude='number')