From 2d4ecec806086f4d4009206eca2d27a360e2c825 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Sun, 16 Jun 2024 18:25:08 +0200 Subject: [PATCH] Moved dataframe_to_np_structured_array(df: pd.DataFrame) to src/g5505_utils.py. This is a more generic function that can be used more broadly accross modules. --- src/g5505_file_reader.py | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index aa42909..fed0601 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -90,28 +90,6 @@ def infer_units(column_name): return match -def dataframe_to_np_structured_array(df: pd.DataFrame): - - # Define the dtype for the structured array, ensuring compatibility with h5py - dtype = [] - for col in df.columns: - col_dtype = df[col].dtype - if pd.api.types.is_string_dtype(col_dtype): - # Convert string dtype to fixed-length strings - max_len = df[col].str.len().max() - dtype.append((col, f'S{max_len}')) - elif pd.api.types.is_integer_dtype(col_dtype): - dtype.append((col, 'i4')) # Assuming 32-bit integer - elif pd.api.types.is_float_dtype(col_dtype): - dtype.append((col, 'f4')) # Assuming 32-bit float - else: - raise ValueError(f"Unsupported dtype: {col_dtype}") - - # Convert the DataFrame to a structured array - structured_array = np.array(list(df.itertuples(index=False, name=None)), dtype=dtype) - - return structured_array - from collections import Counter def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ): @@ -271,7 +249,7 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ): if numerical_variables: dataset = {} dataset['name'] = 'data_table'#_numerical_variables' - dataset['data'] = dataframe_to_np_structured_array(pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)) #df_numerical_attrs.to_numpy() + dataset['data'] = utils.dataframe_to_np_structured_array(pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)) #df_numerical_attrs.to_numpy() dataset['shape'] = dataset['data'].shape dataset['dtype'] = type(dataset['data']) #dataset['data_units'] = file_obj['wave']['data_units']