Created function to save dataframes with annotations in hdf5 format
This commit is contained in:
@ -477,6 +477,62 @@ def create_hdf5_file_from_filesystem_path(output_filename : str,
|
||||
|
||||
return output_filename #, output_yml_filename_path
|
||||
|
||||
import os
|
||||
#import src.hdf5_lib as h5lib
|
||||
import src.g5505_utils as utils
|
||||
import h5py
|
||||
|
||||
def save_processed_dataframe_to_hdf5(df, annotator, src_hdf5_path, script_date, script_name):
|
||||
"""
|
||||
Save processed dataframe columns with annotations to an HDF5 file.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): DataFrame containing processed time series.
|
||||
annotator (): Annotator object with get_metadata method.
|
||||
hdf5_path (str): Path to the source HDF5 file.
|
||||
script_date (str): Date of the data generation script.
|
||||
script_name (str): Name of the data generation script.
|
||||
"""
|
||||
# Convert datetime columns to string
|
||||
datetime_cols = df.select_dtypes(include=['datetime64']).columns
|
||||
df[datetime_cols] = df[datetime_cols].applymap(str)
|
||||
|
||||
# Convert dataframe to structured array
|
||||
icad_data_table = utils.dataframe_to_np_structured_array(df)
|
||||
|
||||
# Get metadata
|
||||
metadata_dict = annotator.get_metadata()
|
||||
|
||||
# Prepare high-level attributes
|
||||
high_level_attributes = {
|
||||
'parent_files': metadata_dict['parent_files'],
|
||||
**metadata_dict['metadata']['sample'],
|
||||
**metadata_dict['metadata']['environment'],
|
||||
**metadata_dict['metadata']['instruments']
|
||||
}
|
||||
|
||||
# Prepare data level attributes
|
||||
data_level_attributes = metadata_dict['metadata']['datasets']
|
||||
|
||||
# Generate output filename
|
||||
parent_file_name = os.path.split(src_hdf5_path)[1]
|
||||
output_filename = f'data_products/processed/fig_{script_date}_{parent_file_name}'
|
||||
|
||||
# Prepare file dictionary
|
||||
file_dict = {
|
||||
'name': script_name,
|
||||
'attributes_dict': high_level_attributes,
|
||||
'datasets': [{
|
||||
'name': "data_table",
|
||||
'data': icad_data_table,
|
||||
'shape': icad_data_table.shape,
|
||||
'attributes': data_level_attributes
|
||||
}]
|
||||
}
|
||||
|
||||
# Write to HDF5
|
||||
with h5py.File(output_filename, 'w') as h5file:
|
||||
transfer_file_dict_to_hdf5(h5file, '/', file_dict)
|
||||
|
||||
|
||||
def create_hdf5_file_from_dataframe(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):
|
||||
|
Reference in New Issue
Block a user