Added feature to interpret links to description in the yaml intrument configuration file and added them at the dataset level as attributes.
This commit is contained in:
@ -5,6 +5,7 @@ import pandas as pd
|
|||||||
from igor2.binarywave import load as loadibw
|
from igor2.binarywave import load as loadibw
|
||||||
|
|
||||||
import src.g5505_utils as utils
|
import src.g5505_utils as utils
|
||||||
|
from src.metadata_review_lib import parse_attribute
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
import h5py
|
import h5py
|
||||||
@ -104,6 +105,14 @@ def read_txt_files_as_dict(filename : str ):
|
|||||||
separator = config_dict[key].get('separator',separator).replace('\\t','\t')
|
separator = config_dict[key].get('separator',separator).replace('\\t','\t')
|
||||||
table_header = config_dict[key].get('table_header',table_header)
|
table_header = config_dict[key].get('table_header',table_header)
|
||||||
timestamp_variables = config_dict[key].get('timestamp',[])
|
timestamp_variables = config_dict[key].get('timestamp',[])
|
||||||
|
|
||||||
|
description_dict = {}
|
||||||
|
link_to_description = config_dict[key].get('link_to_description',[]).replace('/',os.sep)
|
||||||
|
with open(link_to_description,'r') as stream:
|
||||||
|
try:
|
||||||
|
description_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
||||||
|
except yaml.YAMLError as exc:
|
||||||
|
print(exc)
|
||||||
break
|
break
|
||||||
#if 'None' in table_header:
|
#if 'None' in table_header:
|
||||||
# return {}
|
# return {}
|
||||||
@ -184,6 +193,16 @@ def read_txt_files_as_dict(filename : str ):
|
|||||||
file_dict['datasets'].append(dataset)
|
file_dict['datasets'].append(dataset)
|
||||||
rows,cols = dataset['shape']
|
rows,cols = dataset['shape']
|
||||||
|
|
||||||
|
try:
|
||||||
|
dataset['attributes'] = description_dict['table_header'].copy()
|
||||||
|
for key in description_dict['table_header'].keys():
|
||||||
|
if not key in numerical_variables:
|
||||||
|
dataset['attributes'].pop(key) # delete key
|
||||||
|
else:
|
||||||
|
dataset['attributes'][key] = parse_attribute(dataset['attributes'][key])
|
||||||
|
except ValueError as err:
|
||||||
|
print(err)
|
||||||
|
|
||||||
dataset = {}
|
dataset = {}
|
||||||
numerical_variables= [item.encode("utf-8") for item in numerical_variables]
|
numerical_variables= [item.encode("utf-8") for item in numerical_variables]
|
||||||
dataset['name'] = 'numerical_variable_names'
|
dataset['name'] = 'numerical_variable_names'
|
||||||
@ -192,15 +211,6 @@ def read_txt_files_as_dict(filename : str ):
|
|||||||
dataset['dtype'] = type(dataset['data'])
|
dataset['dtype'] = type(dataset['data'])
|
||||||
file_dict['datasets'].append(dataset)
|
file_dict['datasets'].append(dataset)
|
||||||
|
|
||||||
#if 'timestamps' in categorical_variables:
|
|
||||||
# dataset = {}
|
|
||||||
# dataset['name'] = 'timestamps'
|
|
||||||
# dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
|
|
||||||
# dataset['shape'] = dataset['data'].shape
|
|
||||||
# dataset['dtype'] = type(dataset['data'])
|
|
||||||
# file_dict['datasets'].append(dataset)
|
|
||||||
# categorical_variables.remove('timestamps')
|
|
||||||
|
|
||||||
if categorical_variables:
|
if categorical_variables:
|
||||||
dataset = {}
|
dataset = {}
|
||||||
dataset['name'] = 'categorical_variables'
|
dataset['name'] = 'categorical_variables'
|
||||||
|
Reference in New Issue
Block a user