Added feature to interpret links to description in the yaml intrument configuration file and added them at the dataset level as attributes.

2024-05-09 19:17:08 +02:00
parent 1429c56916
commit 1729cd40fa
1 changed files with 19 additions and 9 deletions
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@ -5,6 +5,7 @@ import pandas as pd
 from igor2.binarywave import load as loadibw

 import src.g5505_utils as utils
+from src.metadata_review_lib import parse_attribute

 import yaml
 import h5py
@ -104,6 +105,14 @@ def read_txt_files_as_dict(filename : str ):
            separator = config_dict[key].get('separator',separator).replace('\\t','\t')
            table_header = config_dict[key].get('table_header',table_header)
            timestamp_variables = config_dict[key].get('timestamp',[])
+
+            description_dict = {}
+            link_to_description = config_dict[key].get('link_to_description',[]).replace('/',os.sep)
+            with open(link_to_description,'r') as stream:
+                try:
+                    description_dict = yaml.load(stream, Loader=yaml.FullLoader)
+                except yaml.YAMLError as exc:
+                    print(exc)
            break
    #if 'None' in table_header:
    #    return {}
@ -184,6 +193,16 @@ def read_txt_files_as_dict(filename : str ):
            file_dict['datasets'].append(dataset)
            rows,cols = dataset['shape']

+            try:
+                dataset['attributes'] = description_dict['table_header'].copy()
+                for key in description_dict['table_header'].keys():
+                    if not key in numerical_variables:
+                        dataset['attributes'].pop(key) # delete key
+                    else:
+                        dataset['attributes'][key] = parse_attribute(dataset['attributes'][key])
+            except ValueError as err:
+                print(err)
+
            dataset = {}
            numerical_variables= [item.encode("utf-8") for item in numerical_variables]
            dataset['name'] = 'numerical_variable_names'
@ -192,15 +211,6 @@ def read_txt_files_as_dict(filename : str ):
            dataset['dtype'] = type(dataset['data'])
            file_dict['datasets'].append(dataset)            
        
-        #if 'timestamps' in categorical_variables:
-        #    dataset = {}
-        #    dataset['name'] = 'timestamps'
-        #    dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
-        #    dataset['shape'] = dataset['data'].shape
-        #    dataset['dtype'] = type(dataset['data'])
-        #    file_dict['datasets'].append(dataset)            
-        #    categorical_variables.remove('timestamps')
-
        if categorical_variables:
            dataset = {}
            dataset['name'] = 'categorical_variables'