Added feature to interpret links to description in the yaml intrument configuration file and added them at the dataset level as attributes.

2024-05-09 19:17:08 +02:00
parent 1429c56916
commit 1729cd40fa
1 changed files with 19 additions and 9 deletions
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@ -5,6 +5,7 @@ import pandas as pd
 from igor2.binarywave import load as loadibw
 import src.g5505_utils as utils
 from src.metadata_review_lib import parse_attribute
 import yaml
 import h5py
@ -104,6 +105,14 @@ def read_txt_files_as_dict(filename : str ):
            separator = config_dict[key].get('separator',separator).replace('\\t','\t')
            table_header = config_dict[key].get('table_header',table_header)
            timestamp_variables = config_dict[key].get('timestamp',[])
            description_dict = {}
            link_to_description = config_dict[key].get('link_to_description',[]).replace('/',os.sep)
            with open(link_to_description,'r') as stream:
                try:
                    description_dict = yaml.load(stream, Loader=yaml.FullLoader)
                except yaml.YAMLError as exc:
                    print(exc)
            break
    #if 'None' in table_header:
    #    return {}
@ -184,6 +193,16 @@ def read_txt_files_as_dict(filename : str ):
            file_dict['datasets'].append(dataset)
            rows,cols = dataset['shape']
            try:
                dataset['attributes'] = description_dict['table_header'].copy()
                for key in description_dict['table_header'].keys():
                    if not key in numerical_variables:
                        dataset['attributes'].pop(key) # delete key
                    else:
                        dataset['attributes'][key] = parse_attribute(dataset['attributes'][key])
            except ValueError as err:
                print(err)
            dataset = {}
            numerical_variables= [item.encode("utf-8") for item in numerical_variables]
            dataset['name'] = 'numerical_variable_names'
@ -192,15 +211,6 @@ def read_txt_files_as_dict(filename : str ):
            dataset['dtype'] = type(dataset['data'])
            file_dict['datasets'].append(dataset)            
        #if 'timestamps' in categorical_variables:
        #    dataset = {}
        #    dataset['name'] = 'timestamps'
        #    dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
        #    dataset['shape'] = dataset['data'].shape
        #    dataset['dtype'] = type(dataset['data'])
        #    file_dict['datasets'].append(dataset)            
        #    categorical_variables.remove('timestamps')
        if categorical_variables:
            dataset = {}
            dataset['name'] = 'categorical_variables'