Moved dataframe_to_np_structured_array(df: pd.DataFrame) to src/g5505_utils.py. This is a more generic function that can be used more broadly accross modules.
This commit is contained in:
@ -90,28 +90,6 @@ def infer_units(column_name):
|
|||||||
|
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def dataframe_to_np_structured_array(df: pd.DataFrame):
|
|
||||||
|
|
||||||
# Define the dtype for the structured array, ensuring compatibility with h5py
|
|
||||||
dtype = []
|
|
||||||
for col in df.columns:
|
|
||||||
col_dtype = df[col].dtype
|
|
||||||
if pd.api.types.is_string_dtype(col_dtype):
|
|
||||||
# Convert string dtype to fixed-length strings
|
|
||||||
max_len = df[col].str.len().max()
|
|
||||||
dtype.append((col, f'S{max_len}'))
|
|
||||||
elif pd.api.types.is_integer_dtype(col_dtype):
|
|
||||||
dtype.append((col, 'i4')) # Assuming 32-bit integer
|
|
||||||
elif pd.api.types.is_float_dtype(col_dtype):
|
|
||||||
dtype.append((col, 'f4')) # Assuming 32-bit float
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported dtype: {col_dtype}")
|
|
||||||
|
|
||||||
# Convert the DataFrame to a structured array
|
|
||||||
structured_array = np.array(list(df.itertuples(index=False, name=None)), dtype=dtype)
|
|
||||||
|
|
||||||
return structured_array
|
|
||||||
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
||||||
@ -271,7 +249,7 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
|||||||
if numerical_variables:
|
if numerical_variables:
|
||||||
dataset = {}
|
dataset = {}
|
||||||
dataset['name'] = 'data_table'#_numerical_variables'
|
dataset['name'] = 'data_table'#_numerical_variables'
|
||||||
dataset['data'] = dataframe_to_np_structured_array(pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)) #df_numerical_attrs.to_numpy()
|
dataset['data'] = utils.dataframe_to_np_structured_array(pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)) #df_numerical_attrs.to_numpy()
|
||||||
dataset['shape'] = dataset['data'].shape
|
dataset['shape'] = dataset['data'].shape
|
||||||
dataset['dtype'] = type(dataset['data'])
|
dataset['dtype'] = type(dataset['data'])
|
||||||
#dataset['data_units'] = file_obj['wave']['data_units']
|
#dataset['data_units'] = file_obj['wave']['data_units']
|
||||||
|
Reference in New Issue
Block a user