Added .strip to column names to remove unwanted characters (\r|\t|\n) and included units description to timestamps.

2024-06-04 09:57:37 +02:00
parent fa2990527e
commit a6ddb24eeb
1 changed files with 7 additions and 17 deletions
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@ -165,7 +165,7 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
            if table_header in line.decode(file_encoding):   
                list_of_substrings = line.decode(file_encoding).split(separator)             
                data_start = True  
-                column_names = [str(i)+'_'+name for i, name in enumerate(list_of_substrings)]
+                column_names = [str(i)+'_'+name.strip() for i, name in enumerate(list_of_substrings)]
                #column_names = []
                #for i, name in enumerate(list_of_substrings):
                #    column_names.append(str(i)+'_'+name) 
@ -270,13 +270,6 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
            except ValueError as err:
                print(err)
            #dataset = {}
            #numerical_variables= [item.encode("utf-8") for item in numerical_variables]
            #dataset['name'] = 'numerical_variable_names'
            #dataset['data'] = np.array(numerical_variables).reshape((1,len(numerical_variables)))
            #dataset['shape'] = dataset['data'].shape
            #dataset['dtype'] = type(dataset['data'])
            #file_dict['datasets'].append(dataset)            
        if categorical_variables:
            dataset = {}
@ -284,15 +277,12 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
            dataset['data'] = dataframe_to_np_structured_array(df_categorical_attrs) #df_categorical_attrs.loc[:,categorical_variables].to_numpy()
            dataset['shape'] = dataset['data'].shape
            dataset['dtype'] = type(dataset['data'])
            if 'timestamps' in categorical_variables:
                dataset['attributes'] = {'timestamps': metadata.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})}
            file_dict['datasets'].append(dataset) 
-        #    dataset = {}
+
-        #    categorical_variables = [item.encode("utf-8") for item in categorical_variables]
+
        #    dataset['name'] = 'categorial_variable_names'
        #    dataset['data'] = np.array(categorical_variables).reshape((1,len(categorical_variables)))
        #    dataset['shape'] = dataset['data'].shape
        #    dataset['dtype'] = type(dataset['data'])
        #    file_dict['datasets'].append(dataset)    
    except:
        return {}