Added feature to interpret links to description in the yaml intrument configuration file and added them at the dataset level as attributes.
This commit is contained in:
@ -5,6 +5,7 @@ import pandas as pd
|
||||
from igor2.binarywave import load as loadibw
|
||||
|
||||
import src.g5505_utils as utils
|
||||
from src.metadata_review_lib import parse_attribute
|
||||
|
||||
import yaml
|
||||
import h5py
|
||||
@ -104,6 +105,14 @@ def read_txt_files_as_dict(filename : str ):
|
||||
separator = config_dict[key].get('separator',separator).replace('\\t','\t')
|
||||
table_header = config_dict[key].get('table_header',table_header)
|
||||
timestamp_variables = config_dict[key].get('timestamp',[])
|
||||
|
||||
description_dict = {}
|
||||
link_to_description = config_dict[key].get('link_to_description',[]).replace('/',os.sep)
|
||||
with open(link_to_description,'r') as stream:
|
||||
try:
|
||||
description_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
||||
except yaml.YAMLError as exc:
|
||||
print(exc)
|
||||
break
|
||||
#if 'None' in table_header:
|
||||
# return {}
|
||||
@ -184,6 +193,16 @@ def read_txt_files_as_dict(filename : str ):
|
||||
file_dict['datasets'].append(dataset)
|
||||
rows,cols = dataset['shape']
|
||||
|
||||
try:
|
||||
dataset['attributes'] = description_dict['table_header'].copy()
|
||||
for key in description_dict['table_header'].keys():
|
||||
if not key in numerical_variables:
|
||||
dataset['attributes'].pop(key) # delete key
|
||||
else:
|
||||
dataset['attributes'][key] = parse_attribute(dataset['attributes'][key])
|
||||
except ValueError as err:
|
||||
print(err)
|
||||
|
||||
dataset = {}
|
||||
numerical_variables= [item.encode("utf-8") for item in numerical_variables]
|
||||
dataset['name'] = 'numerical_variable_names'
|
||||
@ -192,15 +211,6 @@ def read_txt_files_as_dict(filename : str ):
|
||||
dataset['dtype'] = type(dataset['data'])
|
||||
file_dict['datasets'].append(dataset)
|
||||
|
||||
#if 'timestamps' in categorical_variables:
|
||||
# dataset = {}
|
||||
# dataset['name'] = 'timestamps'
|
||||
# dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
|
||||
# dataset['shape'] = dataset['data'].shape
|
||||
# dataset['dtype'] = type(dataset['data'])
|
||||
# file_dict['datasets'].append(dataset)
|
||||
# categorical_variables.remove('timestamps')
|
||||
|
||||
if categorical_variables:
|
||||
dataset = {}
|
||||
dataset['name'] = 'categorical_variables'
|
||||
|
Reference in New Issue
Block a user