Added feature to interpret links to description in the yaml intrument configuration file and added them at the dataset level as attributes.

This commit is contained in:
2024-05-09 19:17:08 +02:00
parent 1429c56916
commit 1729cd40fa

View File

@ -5,6 +5,7 @@ import pandas as pd
from igor2.binarywave import load as loadibw
import src.g5505_utils as utils
from src.metadata_review_lib import parse_attribute
import yaml
import h5py
@ -104,6 +105,14 @@ def read_txt_files_as_dict(filename : str ):
separator = config_dict[key].get('separator',separator).replace('\\t','\t')
table_header = config_dict[key].get('table_header',table_header)
timestamp_variables = config_dict[key].get('timestamp',[])
description_dict = {}
link_to_description = config_dict[key].get('link_to_description',[]).replace('/',os.sep)
with open(link_to_description,'r') as stream:
try:
description_dict = yaml.load(stream, Loader=yaml.FullLoader)
except yaml.YAMLError as exc:
print(exc)
break
#if 'None' in table_header:
# return {}
@ -184,6 +193,16 @@ def read_txt_files_as_dict(filename : str ):
file_dict['datasets'].append(dataset)
rows,cols = dataset['shape']
try:
dataset['attributes'] = description_dict['table_header'].copy()
for key in description_dict['table_header'].keys():
if not key in numerical_variables:
dataset['attributes'].pop(key) # delete key
else:
dataset['attributes'][key] = parse_attribute(dataset['attributes'][key])
except ValueError as err:
print(err)
dataset = {}
numerical_variables= [item.encode("utf-8") for item in numerical_variables]
dataset['name'] = 'numerical_variable_names'
@ -192,15 +211,6 @@ def read_txt_files_as_dict(filename : str ):
dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset)
#if 'timestamps' in categorical_variables:
# dataset = {}
# dataset['name'] = 'timestamps'
# dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
# dataset['shape'] = dataset['data'].shape
# dataset['dtype'] = type(dataset['data'])
# file_dict['datasets'].append(dataset)
# categorical_variables.remove('timestamps')
if categorical_variables:
dataset = {}
dataset['name'] = 'categorical_variables'