Removed hdf5 file creation redundancy by creating a helper function create_HDF5_file(date_str,select_file_keywords), which handles variations in date_str and keywords.

This commit is contained in:
2024-05-26 12:24:15 +02:00
parent 4dc09339b5
commit 37071945f5

View File

@ -45,7 +45,16 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
'user_name' : config_dict['user_name'],
}
if config_dict['datetime_steps']:
def create_hdf5_file(date_str, select_file_keywords):
filename = output_filename(exp_campaign_name, date_str, initials)
output_path = os.path.join(output_dir, filename)
logging.info("Creating HDF5 file at: %s", output_path)
return hdf5_lib.create_hdf5_file_from_filesystem_path(
output_path, input_file_dir, select_dir_keywords, select_file_keywords, root_metadata_dict=root_metadata
)
if config_dict.get('datetime_steps'):
datetime_augment_dict = {}
for datetime_step in config_dict['datetime_steps']:
@ -55,15 +64,10 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
if 'experimental_step' in config_dict['integration_mode']:
for datetime_step in datetime_augment_dict.keys():
date_str = datetime_step.strftime('%Y-%m-%d')
select_file_keywords = datetime_augment_dict[datetime_step]
output_filename_step = output_filename(exp_campaign_name,datetime_step.strftime('%Y-%m-%d'),initials)
output_filename_step = os.path.join(output_dir,output_filename_step)
print(output_filename_step)
output_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
input_file_dir,
select_dir_keywords,
select_file_keywords,
root_metadata_dict = root_metadata)
output_filename_path = create_hdf5_file(date_str, select_file_keywords)
elif 'collection' in config_dict['integration_mode']:
select_file_keywords = []
for datetime_step in datetime_augment_dict.keys():
@ -71,44 +75,10 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
min_datetime = min(datetime_augment_dict.keys())
max_datetime = max(datetime_augment_dict.keys())
output_filename_step = output_filename(exp_campaign_name,min_datetime.strftime('%Y-%m-%d')+'_'+max_datetime.strftime('%Y-%m-%d'),initials)
output_filename_step = os.path.join(output_dir,output_filename_step)
output_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
input_file_dir,
select_dir_keywords,
select_file_keywords,
root_metadata_dict = root_metadata)
date_str = min_datetime.strftime('%Y-%m-%d')+'_'+max_datetime.strftime('%Y-%m-%d')
output_filename_path = create_hdf5_file(date_str, select_file_keywords)
else:
output_filename_step = output_filename(exp_campaign_name,config_dict['experiment_date'],initials)
output_filename_step = os.path.join(output_dir,output_filename_step)
output_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
input_file_dir,
select_dir_keywords,
select_file_keywords=[],
root_metadata_dict = root_metadata)
"""for datetime_step in config_dict['datetime_steps']:
tmp = datetime.strptime(datetime_step,'%Y-%m-%d %H-%M-%S') #convert(datetime_step)
root_metadata['creation_date'] = datetime_step
print(tmp)
select_file_keywords = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')]
print(select_file_keywords)
output_filename_step = output_filename(exp_campaign_name,tmp.strftime('%Y-%m-%d_%H-%M-%S'),initials)
output_filename_step = os.path.join(output_dir,output_filename_step)
print(output_filename_step)
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
input_file_dir,
select_dir_keywords,
select_file_keywords,
root_metadata_dict = root_metadata)"""
output_filename_path = create_hdf5_file(date_str = config_dict['experiment_date'], select_file_keywords = [])
return output_filename_path