diff --git a/src/data_integration_lib.py b/src/data_integration_lib.py new file mode 100644 index 0000000..2192f2d --- /dev/null +++ b/src/data_integration_lib.py @@ -0,0 +1,100 @@ +import os + +import src.hdf5_lib as hdf5_lib +import yaml +from datetime import datetime + + + +def integrate_data_sources(yaml_config_file_path): + + """ returns an hdf5 file for each experimental campaign specified by the input configuration file. + + """ + # TODO: add and commit changes to yaml_config_file_path + with open(yaml_config_file_path,'r') as stream: + try: + config_dict = yaml.load(stream, Loader=yaml.FullLoader) + except yaml.YAMLError as exc: + print(exc) + + output_filename = lambda name, datetime, initials: '_'.join([name,datetime,initials])+'.h5' + exp_campaign_name = config_dict['experiment_name'] + initials = config_dict['user_name'] + input_file_dir = config_dict['input_file_directory'] + output_dir = config_dict['output_file_directory'] + + select_dir_keywords = config_dict['instrument_datafolder'] + + root_metadata = {'project_name' : config_dict['project_name'], + 'experiment_name' : config_dict['experiment_name'], + 'user_name' : config_dict['user_name'], + } + + if config_dict['datetime_steps']: + + datetime_augment_dict = {} + for datetime_step in config_dict['datetime_steps']: + tmp = datetime.strptime(datetime_step,'%Y-%m-%d %H-%M-%S') #convert(datetime_step) + datetime_augment_dict[tmp] = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')] + print(tmp) + + if 'experimental_step' in config_dict['integration_mode']: + for datetime_step in datetime_augment_dict.keys(): + select_file_keywords = datetime_augment_dict[datetime_step] + output_filename_step = output_filename(exp_campaign_name,datetime_step.strftime('%Y-%m-%d'),initials) + output_filename_step = os.path.join(output_dir,output_filename_step) + print(output_filename_step) + output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step, + input_file_dir, + select_dir_keywords, + select_file_keywords, + root_metadata_dict = root_metadata) + elif 'collection' in config_dict['integration_mode']: + select_file_keywords = [] + for datetime_step in datetime_augment_dict.keys(): + select_file_keywords = select_file_keywords + datetime_augment_dict[datetime_step] + + min_datetime = min(datetime_augment_dict.keys()) + max_datetime = max(datetime_augment_dict.keys()) + output_filename_step = output_filename(exp_campaign_name,min_datetime.strftime('%Y-%m-%d')+'_'+max_datetime.strftime('%Y-%m-%d'),initials) + output_filename_step = os.path.join(output_dir,output_filename_step) + output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step, + input_file_dir, + select_dir_keywords, + select_file_keywords, + root_metadata_dict = root_metadata) + + else: + output_filename_step = output_filename(exp_campaign_name,'yyyy-mm-dd',initials) + output_filename_step = os.path.join(output_dir,output_filename_step) + output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step, + input_file_dir, + select_dir_keywords, + select_file_keywords=[], + root_metadata_dict = root_metadata) + + + + + """for datetime_step in config_dict['datetime_steps']: + + tmp = datetime.strptime(datetime_step,'%Y-%m-%d %H-%M-%S') #convert(datetime_step) + + root_metadata['creation_date'] = datetime_step + + print(tmp) + select_file_keywords = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')] + print(select_file_keywords) + + output_filename_step = output_filename(exp_campaign_name,tmp.strftime('%Y-%m-%d_%H-%M-%S'),initials) + output_filename_step = os.path.join(output_dir,output_filename_step) + print(output_filename_step) + + output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(output_filename_step, + input_file_dir, + select_dir_keywords, + select_file_keywords, + root_metadata_dict = root_metadata)""" + + return output_filename_path, output_yml_filename_path \ No newline at end of file