Optimzed and included df to np structured array conversion. \n-Replaced loop plus append with list comprehension. \n-Replaced pd df column concatenation based on row-wise concatenation with df.aggr() method that uses column wise concatenation.

This commit is contained in:
2024-05-23 22:20:19 +02:00
parent bd458c6cd0
commit 63b683e4aa

View File

@ -106,15 +106,6 @@ def dataframe_to_np_structured_array(df: pd.DataFrame):
# Convert the DataFrame to a structured array
structured_array = np.array(list(df.itertuples(index=False, name=None)), dtype=dtype)
#return structured_array
#table_header = df.columns
#table = df.to_numpy()
#rows,cols = table.shape
#tmp = [tuple(table[i,:]) for i in range(rows)]
#dtype_tmp = [(table_header[i],'f4') for i in range(cols)]
#data = np.array(tmp, dtype=dtype_tmp)
return structured_array
def read_txt_files_as_dict(filename : str ):
@ -210,7 +201,7 @@ def read_txt_files_as_dict(filename : str ):
df_categorical_attrs['timestamps'] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
#print(df_categorical_attrs)
categorical_variables = [item for item in df_categorical_attrs.columns]