diff --git a/src/g5505_utils.py b/src/g5505_utils.py index a93584b..6eade8d 100644 --- a/src/g5505_utils.py +++ b/src/g5505_utils.py @@ -105,4 +105,26 @@ def created_at(): created_at = now_tz_aware.strftime('%Y-%m-%d')+'_UTC-OFST_' + tz # Make created at timestamp with tz information #created_at = now.isoformat() - return created_at \ No newline at end of file + return created_at + +def dataframe_to_np_structured_array(df: pd.DataFrame): + + # Define the dtype for the structured array, ensuring compatibility with h5py + dtype = [] + for col in df.columns: + col_dtype = df[col].dtype + if pd.api.types.is_string_dtype(col_dtype): + # Convert string dtype to fixed-length strings + max_len = df[col].str.len().max() + dtype.append((col, f'S{max_len}')) + elif pd.api.types.is_integer_dtype(col_dtype): + dtype.append((col, 'i4')) # Assuming 32-bit integer + elif pd.api.types.is_float_dtype(col_dtype): + dtype.append((col, 'f4')) # Assuming 32-bit float + else: + raise ValueError(f"Unsupported dtype: {col_dtype}") + + # Convert the DataFrame to a structured array + structured_array = np.array(list(df.itertuples(index=False, name=None)), dtype=dtype) + + return structured_array \ No newline at end of file