Added .strip to column names to remove unwanted characters (\r|\t|\n) and included units description to timestamps.
This commit is contained in:
@ -165,7 +165,7 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
|||||||
if table_header in line.decode(file_encoding):
|
if table_header in line.decode(file_encoding):
|
||||||
list_of_substrings = line.decode(file_encoding).split(separator)
|
list_of_substrings = line.decode(file_encoding).split(separator)
|
||||||
data_start = True
|
data_start = True
|
||||||
column_names = [str(i)+'_'+name for i, name in enumerate(list_of_substrings)]
|
column_names = [str(i)+'_'+name.strip() for i, name in enumerate(list_of_substrings)]
|
||||||
#column_names = []
|
#column_names = []
|
||||||
#for i, name in enumerate(list_of_substrings):
|
#for i, name in enumerate(list_of_substrings):
|
||||||
# column_names.append(str(i)+'_'+name)
|
# column_names.append(str(i)+'_'+name)
|
||||||
@ -270,13 +270,6 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
|||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
print(err)
|
print(err)
|
||||||
|
|
||||||
#dataset = {}
|
|
||||||
#numerical_variables= [item.encode("utf-8") for item in numerical_variables]
|
|
||||||
#dataset['name'] = 'numerical_variable_names'
|
|
||||||
#dataset['data'] = np.array(numerical_variables).reshape((1,len(numerical_variables)))
|
|
||||||
#dataset['shape'] = dataset['data'].shape
|
|
||||||
#dataset['dtype'] = type(dataset['data'])
|
|
||||||
#file_dict['datasets'].append(dataset)
|
|
||||||
|
|
||||||
if categorical_variables:
|
if categorical_variables:
|
||||||
dataset = {}
|
dataset = {}
|
||||||
@ -284,15 +277,12 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
|||||||
dataset['data'] = dataframe_to_np_structured_array(df_categorical_attrs) #df_categorical_attrs.loc[:,categorical_variables].to_numpy()
|
dataset['data'] = dataframe_to_np_structured_array(df_categorical_attrs) #df_categorical_attrs.loc[:,categorical_variables].to_numpy()
|
||||||
dataset['shape'] = dataset['data'].shape
|
dataset['shape'] = dataset['data'].shape
|
||||||
dataset['dtype'] = type(dataset['data'])
|
dataset['dtype'] = type(dataset['data'])
|
||||||
|
if 'timestamps' in categorical_variables:
|
||||||
|
dataset['attributes'] = {'timestamps': metadata.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})}
|
||||||
file_dict['datasets'].append(dataset)
|
file_dict['datasets'].append(dataset)
|
||||||
|
|
||||||
# dataset = {}
|
|
||||||
# categorical_variables = [item.encode("utf-8") for item in categorical_variables]
|
|
||||||
# dataset['name'] = 'categorial_variable_names'
|
|
||||||
# dataset['data'] = np.array(categorical_variables).reshape((1,len(categorical_variables)))
|
|
||||||
# dataset['shape'] = dataset['data'].shape
|
|
||||||
# dataset['dtype'] = type(dataset['data'])
|
|
||||||
# file_dict['datasets'].append(dataset)
|
|
||||||
|
|
||||||
except:
|
except:
|
||||||
return {}
|
return {}
|
||||||
|
Reference in New Issue
Block a user