diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index d0f7c5b..cc0cdb4 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -5,6 +5,7 @@ import pandas as pd from igor2.binarywave import load as loadibw import src.g5505_utils as utils +from src.metadata_review_lib import parse_attribute import yaml import h5py @@ -104,6 +105,14 @@ def read_txt_files_as_dict(filename : str ): separator = config_dict[key].get('separator',separator).replace('\\t','\t') table_header = config_dict[key].get('table_header',table_header) timestamp_variables = config_dict[key].get('timestamp',[]) + + description_dict = {} + link_to_description = config_dict[key].get('link_to_description',[]).replace('/',os.sep) + with open(link_to_description,'r') as stream: + try: + description_dict = yaml.load(stream, Loader=yaml.FullLoader) + except yaml.YAMLError as exc: + print(exc) break #if 'None' in table_header: # return {} @@ -184,6 +193,16 @@ def read_txt_files_as_dict(filename : str ): file_dict['datasets'].append(dataset) rows,cols = dataset['shape'] + try: + dataset['attributes'] = description_dict['table_header'].copy() + for key in description_dict['table_header'].keys(): + if not key in numerical_variables: + dataset['attributes'].pop(key) # delete key + else: + dataset['attributes'][key] = parse_attribute(dataset['attributes'][key]) + except ValueError as err: + print(err) + dataset = {} numerical_variables= [item.encode("utf-8") for item in numerical_variables] dataset['name'] = 'numerical_variable_names' @@ -192,15 +211,6 @@ def read_txt_files_as_dict(filename : str ): dataset['dtype'] = type(dataset['data']) file_dict['datasets'].append(dataset) - #if 'timestamps' in categorical_variables: - # dataset = {} - # dataset['name'] = 'timestamps' - # dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1)) - # dataset['shape'] = dataset['data'].shape - # dataset['dtype'] = type(dataset['data']) - # file_dict['datasets'].append(dataset) - # categorical_variables.remove('timestamps') - if categorical_variables: dataset = {} dataset['name'] = 'categorical_variables'