Moved parse_attribute() from ..review_lib.py into ...utils.py and backpropagate (refactored) changes to respective modules.
This commit is contained in:
@ -259,25 +259,34 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
|||||||
file_dict['datasets'] = []
|
file_dict['datasets'] = []
|
||||||
####
|
####
|
||||||
|
|
||||||
if numerical_variables:
|
df = pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)
|
||||||
|
|
||||||
|
#if numerical_variables:
|
||||||
dataset = {}
|
dataset = {}
|
||||||
dataset['name'] = 'data_table'#_numerical_variables'
|
dataset['name'] = 'data_table'#_numerical_variables'
|
||||||
dataset['data'] = utils.dataframe_to_np_structured_array(pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)) #df_numerical_attrs.to_numpy()
|
dataset['data'] = utils.dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
|
||||||
dataset['shape'] = dataset['data'].shape
|
dataset['shape'] = dataset['data'].shape
|
||||||
dataset['dtype'] = type(dataset['data'])
|
dataset['dtype'] = type(dataset['data'])
|
||||||
#dataset['data_units'] = file_obj['wave']['data_units']
|
#dataset['data_units'] = file_obj['wave']['data_units']
|
||||||
|
#
|
||||||
|
# Create attribute descriptions based on description_dict
|
||||||
|
dataset['attributes'] = {}
|
||||||
|
|
||||||
try:
|
for column_name in df.columns:
|
||||||
dataset['attributes'] = description_dict['table_header'].copy()
|
column_attr_dict = description_dict['table_header'].get(column_name,{'note':'there was no description available. Review instrument files.'})
|
||||||
for key in description_dict['table_header'].keys():
|
dataset['attributes'].update({column_name: utils.parse_attribute(column_attr_dict)})
|
||||||
if not key in numerical_variables:
|
|
||||||
dataset['attributes'].pop(key) # delete key
|
#try:
|
||||||
else:
|
# dataset['attributes'] = description_dict['table_header'].copy()
|
||||||
dataset['attributes'][key] = utils.parse_attribute(dataset['attributes'][key])
|
# for key in description_dict['table_header'].keys():
|
||||||
if timestamps_name in categorical_variables:
|
# if not key in numerical_variables:
|
||||||
dataset['attributes'][timestamps_name] = utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})
|
# dataset['attributes'].pop(key) # delete key
|
||||||
except ValueError as err:
|
# else:
|
||||||
print(err)
|
# dataset['attributes'][key] = utils.parse_attribute(dataset['attributes'][key])
|
||||||
|
# if timestamps_name in categorical_variables:
|
||||||
|
# dataset['attributes'][timestamps_name] = utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})
|
||||||
|
#except ValueError as err:
|
||||||
|
# print(err)
|
||||||
|
|
||||||
file_dict['datasets'].append(dataset)
|
file_dict['datasets'].append(dataset)
|
||||||
|
|
||||||
|
@ -165,6 +165,22 @@ def infer_units(column_name):
|
|||||||
|
|
||||||
return match
|
return match
|
||||||
|
|
||||||
|
def parse_attribute(attr_value : dict):
|
||||||
|
"Parse a dictionary attribute into an equivalent numpy structured array, which compatible with compound HDF5 type"
|
||||||
|
dtype = []
|
||||||
|
values_list = []
|
||||||
|
max_length = max(len(str(attr_value[key])) for key in attr_value.keys())
|
||||||
|
for key in attr_value.keys():
|
||||||
|
if (not key=='rename_as'):
|
||||||
|
dtype.append((key,f'S{max_length}'))
|
||||||
|
values_list.append(attr_value[key])
|
||||||
|
|
||||||
|
if values_list:
|
||||||
|
new_attr_value = np.array([tuple(values_list)],dtype=dtype)
|
||||||
|
else:
|
||||||
|
new_attr_value = 'missing'
|
||||||
|
|
||||||
|
return new_attr_value
|
||||||
|
|
||||||
def progressBar(count_value, total, suffix=''):
|
def progressBar(count_value, total, suffix=''):
|
||||||
bar_length = 100
|
bar_length = 100
|
||||||
|
@ -427,7 +427,7 @@ def save_processed_dataframe_to_hdf5(df, annotator, output_filename): # src_hdf5
|
|||||||
|
|
||||||
for key, value in data_level_attributes.items():
|
for key, value in data_level_attributes.items():
|
||||||
if isinstance(value,dict):
|
if isinstance(value,dict):
|
||||||
data_level_attributes[key] = metadata_lib.parse_attribute(value)
|
data_level_attributes[key] = utils.parse_attribute(value)
|
||||||
|
|
||||||
|
|
||||||
# Prepare file dictionary
|
# Prepare file dictionary
|
||||||
|
@ -32,35 +32,6 @@ def get_review_status(filename_path):
|
|||||||
workflow_steps.append(line)
|
workflow_steps.append(line)
|
||||||
return workflow_steps[-1]
|
return workflow_steps[-1]
|
||||||
|
|
||||||
def parse_attribute(attr_value : dict):
|
|
||||||
"Parse a dictionary attribute into an equivalent numpy structured array, which compatible with compound HDF5 type"
|
|
||||||
dtype = []
|
|
||||||
values_list = []
|
|
||||||
max_length = max(len(str(attr_value[key])) for key in attr_value.keys())
|
|
||||||
for key in attr_value.keys():
|
|
||||||
if (not key=='rename_as'):
|
|
||||||
dtype.append((key,f'S{max_length}'))
|
|
||||||
values_list.append(attr_value[key])
|
|
||||||
|
|
||||||
if values_list:
|
|
||||||
new_attr_value = np.array([tuple(values_list)],dtype=dtype)
|
|
||||||
else:
|
|
||||||
new_attr_value = 'missing'
|
|
||||||
|
|
||||||
return new_attr_value
|
|
||||||
|
|
||||||
def convert_string_to_bytes(input_list: list):
|
|
||||||
utf8_type = lambda max_length: h5py.string_dtype('utf-8', max_length)
|
|
||||||
if input_list:
|
|
||||||
max_length = max(len(item) for item in input_list)
|
|
||||||
# Convert the strings to bytes with utf-8 encoding, specifying errors='ignore' to skip characters that cannot be encoded
|
|
||||||
input_list_bytes = [item.encode('utf-8', errors='ignore') for item in input_list]
|
|
||||||
input_array_bytes = np.array(input_list_bytes,dtype=utf8_type(max_length))
|
|
||||||
else:
|
|
||||||
input_array_bytes = np.array([],dtype=utf8_type(0))
|
|
||||||
|
|
||||||
return input_array_bytes
|
|
||||||
|
|
||||||
def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs, restart = False):
|
def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs, restart = False):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -234,7 +205,7 @@ def update_hdf5_attributes(input_hdf5_file, yaml_dict):
|
|||||||
hdf5_obj.attrs[attr_value.get('rename_as')] = hdf5_obj.attrs[attr_name] # parse_attribute(attr_value)
|
hdf5_obj.attrs[attr_value.get('rename_as')] = hdf5_obj.attrs[attr_name] # parse_attribute(attr_value)
|
||||||
hdf5_obj.attrs.__delitem__(attr_name)
|
hdf5_obj.attrs.__delitem__(attr_name)
|
||||||
else: # add a new attribute
|
else: # add a new attribute
|
||||||
hdf5_obj.attrs.update({attr_name : parse_attribute(attr_value)})
|
hdf5_obj.attrs.update({attr_name : utils.parse_attribute(attr_value)})
|
||||||
|
|
||||||
with h5py.File(input_hdf5_file, 'r+') as f:
|
with h5py.File(input_hdf5_file, 'r+') as f:
|
||||||
for key in yaml_dict.keys():
|
for key in yaml_dict.keys():
|
||||||
|
Reference in New Issue
Block a user