Implemented function that takes yaml config files specifying data integration output
This commit is contained in:
100
src/data_integration_lib.py
Normal file
100
src/data_integration_lib.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import src.hdf5_lib as hdf5_lib
|
||||||
|
import yaml
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def integrate_data_sources(yaml_config_file_path):
|
||||||
|
|
||||||
|
""" returns an hdf5 file for each experimental campaign specified by the input configuration file.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# TODO: add and commit changes to yaml_config_file_path
|
||||||
|
with open(yaml_config_file_path,'r') as stream:
|
||||||
|
try:
|
||||||
|
config_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
||||||
|
except yaml.YAMLError as exc:
|
||||||
|
print(exc)
|
||||||
|
|
||||||
|
output_filename = lambda name, datetime, initials: '_'.join([name,datetime,initials])+'.h5'
|
||||||
|
exp_campaign_name = config_dict['experiment_name']
|
||||||
|
initials = config_dict['user_name']
|
||||||
|
input_file_dir = config_dict['input_file_directory']
|
||||||
|
output_dir = config_dict['output_file_directory']
|
||||||
|
|
||||||
|
select_dir_keywords = config_dict['instrument_datafolder']
|
||||||
|
|
||||||
|
root_metadata = {'project_name' : config_dict['project_name'],
|
||||||
|
'experiment_name' : config_dict['experiment_name'],
|
||||||
|
'user_name' : config_dict['user_name'],
|
||||||
|
}
|
||||||
|
|
||||||
|
if config_dict['datetime_steps']:
|
||||||
|
|
||||||
|
datetime_augment_dict = {}
|
||||||
|
for datetime_step in config_dict['datetime_steps']:
|
||||||
|
tmp = datetime.strptime(datetime_step,'%Y-%m-%d %H-%M-%S') #convert(datetime_step)
|
||||||
|
datetime_augment_dict[tmp] = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')]
|
||||||
|
print(tmp)
|
||||||
|
|
||||||
|
if 'experimental_step' in config_dict['integration_mode']:
|
||||||
|
for datetime_step in datetime_augment_dict.keys():
|
||||||
|
select_file_keywords = datetime_augment_dict[datetime_step]
|
||||||
|
output_filename_step = output_filename(exp_campaign_name,datetime_step.strftime('%Y-%m-%d'),initials)
|
||||||
|
output_filename_step = os.path.join(output_dir,output_filename_step)
|
||||||
|
print(output_filename_step)
|
||||||
|
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
|
||||||
|
input_file_dir,
|
||||||
|
select_dir_keywords,
|
||||||
|
select_file_keywords,
|
||||||
|
root_metadata_dict = root_metadata)
|
||||||
|
elif 'collection' in config_dict['integration_mode']:
|
||||||
|
select_file_keywords = []
|
||||||
|
for datetime_step in datetime_augment_dict.keys():
|
||||||
|
select_file_keywords = select_file_keywords + datetime_augment_dict[datetime_step]
|
||||||
|
|
||||||
|
min_datetime = min(datetime_augment_dict.keys())
|
||||||
|
max_datetime = max(datetime_augment_dict.keys())
|
||||||
|
output_filename_step = output_filename(exp_campaign_name,min_datetime.strftime('%Y-%m-%d')+'_'+max_datetime.strftime('%Y-%m-%d'),initials)
|
||||||
|
output_filename_step = os.path.join(output_dir,output_filename_step)
|
||||||
|
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
|
||||||
|
input_file_dir,
|
||||||
|
select_dir_keywords,
|
||||||
|
select_file_keywords,
|
||||||
|
root_metadata_dict = root_metadata)
|
||||||
|
|
||||||
|
else:
|
||||||
|
output_filename_step = output_filename(exp_campaign_name,'yyyy-mm-dd',initials)
|
||||||
|
output_filename_step = os.path.join(output_dir,output_filename_step)
|
||||||
|
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
|
||||||
|
input_file_dir,
|
||||||
|
select_dir_keywords,
|
||||||
|
select_file_keywords=[],
|
||||||
|
root_metadata_dict = root_metadata)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""for datetime_step in config_dict['datetime_steps']:
|
||||||
|
|
||||||
|
tmp = datetime.strptime(datetime_step,'%Y-%m-%d %H-%M-%S') #convert(datetime_step)
|
||||||
|
|
||||||
|
root_metadata['creation_date'] = datetime_step
|
||||||
|
|
||||||
|
print(tmp)
|
||||||
|
select_file_keywords = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')]
|
||||||
|
print(select_file_keywords)
|
||||||
|
|
||||||
|
output_filename_step = output_filename(exp_campaign_name,tmp.strftime('%Y-%m-%d_%H-%M-%S'),initials)
|
||||||
|
output_filename_step = os.path.join(output_dir,output_filename_step)
|
||||||
|
print(output_filename_step)
|
||||||
|
|
||||||
|
output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step,
|
||||||
|
input_file_dir,
|
||||||
|
select_dir_keywords,
|
||||||
|
select_file_keywords,
|
||||||
|
root_metadata_dict = root_metadata)"""
|
||||||
|
|
||||||
|
return output_filename_path, output_yml_filename_path
|
Reference in New Issue
Block a user