diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py index 8867be9..07e12ce 100644 --- a/src/hdf5_lib.py +++ b/src/hdf5_lib.py @@ -477,6 +477,62 @@ def create_hdf5_file_from_filesystem_path(output_filename : str, return output_filename #, output_yml_filename_path +import os +#import src.hdf5_lib as h5lib +import src.g5505_utils as utils +import h5py + +def save_processed_dataframe_to_hdf5(df, annotator, src_hdf5_path, script_date, script_name): + """ + Save processed dataframe columns with annotations to an HDF5 file. + + Parameters: + df (pd.DataFrame): DataFrame containing processed time series. + annotator (): Annotator object with get_metadata method. + hdf5_path (str): Path to the source HDF5 file. + script_date (str): Date of the data generation script. + script_name (str): Name of the data generation script. + """ + # Convert datetime columns to string + datetime_cols = df.select_dtypes(include=['datetime64']).columns + df[datetime_cols] = df[datetime_cols].applymap(str) + + # Convert dataframe to structured array + icad_data_table = utils.dataframe_to_np_structured_array(df) + + # Get metadata + metadata_dict = annotator.get_metadata() + + # Prepare high-level attributes + high_level_attributes = { + 'parent_files': metadata_dict['parent_files'], + **metadata_dict['metadata']['sample'], + **metadata_dict['metadata']['environment'], + **metadata_dict['metadata']['instruments'] + } + + # Prepare data level attributes + data_level_attributes = metadata_dict['metadata']['datasets'] + + # Generate output filename + parent_file_name = os.path.split(src_hdf5_path)[1] + output_filename = f'data_products/processed/fig_{script_date}_{parent_file_name}' + + # Prepare file dictionary + file_dict = { + 'name': script_name, + 'attributes_dict': high_level_attributes, + 'datasets': [{ + 'name': "data_table", + 'data': icad_data_table, + 'shape': icad_data_table.shape, + 'attributes': data_level_attributes + }] + } + + # Write to HDF5 + with h5py.File(output_filename, 'w') as h5file: + transfer_file_dict_to_hdf5(h5file, '/', file_dict) def create_hdf5_file_from_dataframe(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):