From 2d4ecec806086f4d4009206eca2d27a360e2c825 Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Sun, 16 Jun 2024 18:25:08 +0200
Subject: [PATCH] Moved dataframe_to_np_structured_array(df: pd.DataFrame) to
 src/g5505_utils.py. This is a more generic function that can be used more
 broadly accross modules.

---
 src/g5505_file_reader.py | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py
index aa42909..fed0601 100644
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@@ -90,28 +90,6 @@ def infer_units(column_name):
         
     return match
 
-def dataframe_to_np_structured_array(df: pd.DataFrame):
-
-     # Define the dtype for the structured array, ensuring compatibility with h5py
-    dtype = []
-    for col in df.columns:
-        col_dtype = df[col].dtype
-        if pd.api.types.is_string_dtype(col_dtype):
-            # Convert string dtype to fixed-length strings
-            max_len = df[col].str.len().max()
-            dtype.append((col, f'S{max_len}'))
-        elif pd.api.types.is_integer_dtype(col_dtype):
-            dtype.append((col, 'i4'))  # Assuming 32-bit integer
-        elif pd.api.types.is_float_dtype(col_dtype):
-            dtype.append((col, 'f4'))  # Assuming 32-bit float
-        else:
-            raise ValueError(f"Unsupported dtype: {col_dtype}")
-
-    # Convert the DataFrame to a structured array
-    structured_array = np.array(list(df.itertuples(index=False, name=None)), dtype=dtype)
-
-    return structured_array
-
 from collections import Counter
 
 def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
@@ -271,7 +249,7 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
         if numerical_variables:
             dataset = {}
             dataset['name'] = 'data_table'#_numerical_variables'
-            dataset['data'] = dataframe_to_np_structured_array(pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)) #df_numerical_attrs.to_numpy()
+            dataset['data'] = utils.dataframe_to_np_structured_array(pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)) #df_numerical_attrs.to_numpy()
             dataset['shape'] = dataset['data'].shape
             dataset['dtype'] = type(dataset['data'])       
             #dataset['data_units'] = file_obj['wave']['data_units']