Created function to save dataframes with annotations in hdf5 format
This commit is contained in:
@ -477,6 +477,62 @@ def create_hdf5_file_from_filesystem_path(output_filename : str,
|
|||||||
|
|
||||||
return output_filename #, output_yml_filename_path
|
return output_filename #, output_yml_filename_path
|
||||||
|
|
||||||
|
import os
|
||||||
|
#import src.hdf5_lib as h5lib
|
||||||
|
import src.g5505_utils as utils
|
||||||
|
import h5py
|
||||||
|
|
||||||
|
def save_processed_dataframe_to_hdf5(df, annotator, src_hdf5_path, script_date, script_name):
|
||||||
|
"""
|
||||||
|
Save processed dataframe columns with annotations to an HDF5 file.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): DataFrame containing processed time series.
|
||||||
|
annotator (): Annotator object with get_metadata method.
|
||||||
|
hdf5_path (str): Path to the source HDF5 file.
|
||||||
|
script_date (str): Date of the data generation script.
|
||||||
|
script_name (str): Name of the data generation script.
|
||||||
|
"""
|
||||||
|
# Convert datetime columns to string
|
||||||
|
datetime_cols = df.select_dtypes(include=['datetime64']).columns
|
||||||
|
df[datetime_cols] = df[datetime_cols].applymap(str)
|
||||||
|
|
||||||
|
# Convert dataframe to structured array
|
||||||
|
icad_data_table = utils.dataframe_to_np_structured_array(df)
|
||||||
|
|
||||||
|
# Get metadata
|
||||||
|
metadata_dict = annotator.get_metadata()
|
||||||
|
|
||||||
|
# Prepare high-level attributes
|
||||||
|
high_level_attributes = {
|
||||||
|
'parent_files': metadata_dict['parent_files'],
|
||||||
|
**metadata_dict['metadata']['sample'],
|
||||||
|
**metadata_dict['metadata']['environment'],
|
||||||
|
**metadata_dict['metadata']['instruments']
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prepare data level attributes
|
||||||
|
data_level_attributes = metadata_dict['metadata']['datasets']
|
||||||
|
|
||||||
|
# Generate output filename
|
||||||
|
parent_file_name = os.path.split(src_hdf5_path)[1]
|
||||||
|
output_filename = f'data_products/processed/fig_{script_date}_{parent_file_name}'
|
||||||
|
|
||||||
|
# Prepare file dictionary
|
||||||
|
file_dict = {
|
||||||
|
'name': script_name,
|
||||||
|
'attributes_dict': high_level_attributes,
|
||||||
|
'datasets': [{
|
||||||
|
'name': "data_table",
|
||||||
|
'data': icad_data_table,
|
||||||
|
'shape': icad_data_table.shape,
|
||||||
|
'attributes': data_level_attributes
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Write to HDF5
|
||||||
|
with h5py.File(output_filename, 'w') as h5file:
|
||||||
|
transfer_file_dict_to_hdf5(h5file, '/', file_dict)
|
||||||
|
|
||||||
|
|
||||||
def create_hdf5_file_from_dataframe(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):
|
def create_hdf5_file_from_dataframe(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):
|
||||||
|
Reference in New Issue
Block a user