From 37071945f5ab1c65ea8ad846f23564a8661edfcf Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Sun, 26 May 2024 12:24:15 +0200 Subject: [PATCH] Removed hdf5 file creation redundancy by creating a helper function create_HDF5_file(date_str,select_file_keywords), which handles variations in date_str and keywords. --- src/data_integration_lib.py | 64 ++++++++++--------------------------- 1 file changed, 17 insertions(+), 47 deletions(-) diff --git a/src/data_integration_lib.py b/src/data_integration_lib.py index d4f974e..8bd25b8 100644 --- a/src/data_integration_lib.py +++ b/src/data_integration_lib.py @@ -45,7 +45,16 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'): 'user_name' : config_dict['user_name'], } - if config_dict['datetime_steps']: + def create_hdf5_file(date_str, select_file_keywords): + filename = output_filename(exp_campaign_name, date_str, initials) + output_path = os.path.join(output_dir, filename) + logging.info("Creating HDF5 file at: %s", output_path) + + return hdf5_lib.create_hdf5_file_from_filesystem_path( + output_path, input_file_dir, select_dir_keywords, select_file_keywords, root_metadata_dict=root_metadata + ) + + if config_dict.get('datetime_steps'): datetime_augment_dict = {} for datetime_step in config_dict['datetime_steps']: @@ -55,15 +64,10 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'): if 'experimental_step' in config_dict['integration_mode']: for datetime_step in datetime_augment_dict.keys(): + date_str = datetime_step.strftime('%Y-%m-%d') select_file_keywords = datetime_augment_dict[datetime_step] - output_filename_step = output_filename(exp_campaign_name,datetime_step.strftime('%Y-%m-%d'),initials) - output_filename_step = os.path.join(output_dir,output_filename_step) - print(output_filename_step) - output_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step, - input_file_dir, - select_dir_keywords, - select_file_keywords, - root_metadata_dict = root_metadata) + output_filename_path = create_hdf5_file(date_str, select_file_keywords) + elif 'collection' in config_dict['integration_mode']: select_file_keywords = [] for datetime_step in datetime_augment_dict.keys(): @@ -71,44 +75,10 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'): min_datetime = min(datetime_augment_dict.keys()) max_datetime = max(datetime_augment_dict.keys()) - output_filename_step = output_filename(exp_campaign_name,min_datetime.strftime('%Y-%m-%d')+'_'+max_datetime.strftime('%Y-%m-%d'),initials) - output_filename_step = os.path.join(output_dir,output_filename_step) - output_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step, - input_file_dir, - select_dir_keywords, - select_file_keywords, - root_metadata_dict = root_metadata) - + date_str = min_datetime.strftime('%Y-%m-%d')+'_'+max_datetime.strftime('%Y-%m-%d') + output_filename_path = create_hdf5_file(date_str, select_file_keywords) else: - output_filename_step = output_filename(exp_campaign_name,config_dict['experiment_date'],initials) - output_filename_step = os.path.join(output_dir,output_filename_step) - output_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step, - input_file_dir, - select_dir_keywords, - select_file_keywords=[], - root_metadata_dict = root_metadata) - - - - """for datetime_step in config_dict['datetime_steps']: - - tmp = datetime.strptime(datetime_step,'%Y-%m-%d %H-%M-%S') #convert(datetime_step) - - root_metadata['creation_date'] = datetime_step - - print(tmp) - select_file_keywords = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')] - print(select_file_keywords) - - output_filename_step = output_filename(exp_campaign_name,tmp.strftime('%Y-%m-%d_%H-%M-%S'),initials) - output_filename_step = os.path.join(output_dir,output_filename_step) - print(output_filename_step) - - output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step, - input_file_dir, - select_dir_keywords, - select_file_keywords, - root_metadata_dict = root_metadata)""" - + output_filename_path = create_hdf5_file(date_str = config_dict['experiment_date'], select_file_keywords = []) + return output_filename_path \ No newline at end of file