Implemented function that takes yaml config files specifying data integration output
This commit is contained in:
100
src/data_integration_lib.py
Normal file
100
src/data_integration_lib.py
Normal file
@ -0,0 +1,100 @@
|
||||
import os
|
||||
|
||||
import src.hdf5_lib as hdf5_lib
|
||||
import yaml
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
|
||||
def integrate_data_sources(yaml_config_file_path):
|
||||
|
||||
""" returns an hdf5 file for each experimental campaign specified by the input configuration file.
|
||||
|
||||
"""
|
||||
# TODO: add and commit changes to yaml_config_file_path
|
||||
with open(yaml_config_file_path,'r') as stream:
|
||||
try:
|
||||
config_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
||||
except yaml.YAMLError as exc:
|
||||
print(exc)
|
||||
|
||||
output_filename = lambda name, datetime, initials: '_'.join([name,datetime,initials])+'.h5'
|
||||
exp_campaign_name = config_dict['experiment_name']
|
||||
initials = config_dict['user_name']
|
||||
input_file_dir = config_dict['input_file_directory']
|
||||
output_dir = config_dict['output_file_directory']
|
||||
|
||||
select_dir_keywords = config_dict['instrument_datafolder']
|
||||
|
||||
root_metadata = {'project_name' : config_dict['project_name'],
|
||||
'experiment_name' : config_dict['experiment_name'],
|
||||
'user_name' : config_dict['user_name'],
|
||||
}
|
||||
|
||||
if config_dict['datetime_steps']:
|
||||
|
||||
datetime_augment_dict = {}
|
||||
for datetime_step in config_dict['datetime_steps']:
|
||||
tmp = datetime.strptime(datetime_step,'%Y-%m-%d %H-%M-%S') #convert(datetime_step)
|
||||
datetime_augment_dict[tmp] = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')]
|
||||
print(tmp)
|
||||
|
||||
if 'experimental_step' in config_dict['integration_mode']:
|
||||
for datetime_step in datetime_augment_dict.keys():
|
||||
select_file_keywords = datetime_augment_dict[datetime_step]
|
||||
output_filename_step = output_filename(exp_campaign_name,datetime_step.strftime('%Y-%m-%d'),initials)
|
||||
output_filename_step = os.path.join(output_dir,output_filename_step)
|
||||
print(output_filename_step)
|
||||
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
|
||||
input_file_dir,
|
||||
select_dir_keywords,
|
||||
select_file_keywords,
|
||||
root_metadata_dict = root_metadata)
|
||||
elif 'collection' in config_dict['integration_mode']:
|
||||
select_file_keywords = []
|
||||
for datetime_step in datetime_augment_dict.keys():
|
||||
select_file_keywords = select_file_keywords + datetime_augment_dict[datetime_step]
|
||||
|
||||
min_datetime = min(datetime_augment_dict.keys())
|
||||
max_datetime = max(datetime_augment_dict.keys())
|
||||
output_filename_step = output_filename(exp_campaign_name,min_datetime.strftime('%Y-%m-%d')+'_'+max_datetime.strftime('%Y-%m-%d'),initials)
|
||||
output_filename_step = os.path.join(output_dir,output_filename_step)
|
||||
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
|
||||
input_file_dir,
|
||||
select_dir_keywords,
|
||||
select_file_keywords,
|
||||
root_metadata_dict = root_metadata)
|
||||
|
||||
else:
|
||||
output_filename_step = output_filename(exp_campaign_name,'yyyy-mm-dd',initials)
|
||||
output_filename_step = os.path.join(output_dir,output_filename_step)
|
||||
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
|
||||
input_file_dir,
|
||||
select_dir_keywords,
|
||||
select_file_keywords=[],
|
||||
root_metadata_dict = root_metadata)
|
||||
|
||||
|
||||
|
||||
|
||||
"""for datetime_step in config_dict['datetime_steps']:
|
||||
|
||||
tmp = datetime.strptime(datetime_step,'%Y-%m-%d %H-%M-%S') #convert(datetime_step)
|
||||
|
||||
root_metadata['creation_date'] = datetime_step
|
||||
|
||||
print(tmp)
|
||||
select_file_keywords = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')]
|
||||
print(select_file_keywords)
|
||||
|
||||
output_filename_step = output_filename(exp_campaign_name,tmp.strftime('%Y-%m-%d_%H-%M-%S'),initials)
|
||||
output_filename_step = os.path.join(output_dir,output_filename_step)
|
||||
print(output_filename_step)
|
||||
|
||||
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
|
||||
input_file_dir,
|
||||
select_dir_keywords,
|
||||
select_file_keywords,
|
||||
root_metadata_dict = root_metadata)"""
|
||||
|
||||
return output_filename_path, output_yml_filename_path
|
Reference in New Issue
Block a user