From 197ad0288aedea2824dc872135fe3c7c92987158 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Tue, 4 Jun 2024 13:37:20 +0200 Subject: [PATCH] Updated file reader and data integration with datastart and dataend properties. --- src/data_integration_lib.py | 19 +++++++++++++------ src/g5505_file_reader.py | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/data_integration_lib.py b/src/data_integration_lib.py index 336e81f..950f67c 100644 --- a/src/data_integration_lib.py +++ b/src/data_integration_lib.py @@ -39,13 +39,13 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'): input_file_dir = config_dict['input_file_directory'] output_dir = config_dict['output_file_directory'] select_dir_keywords = config_dict['instrument_datafolder'] - root_metadata = { + root_metadata_dict = { 'project_name' : config_dict['project_name'], 'experiment_name' : config_dict['experiment_name'], 'user_name' : config_dict['user_name'], } - def create_hdf5_file(date_str, select_file_keywords): + def create_hdf5_file(date_str, select_file_keywords,root_metadata): filename = output_filename(exp_campaign_name, date_str, initials) output_path = os.path.join(output_dir, filename) logging.info("Creating HDF5 file at: %s", output_path) @@ -62,12 +62,15 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'): datetime_augment_dict[tmp] = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')] print(tmp) - if 'experimental_step' in config_dict['integration_mode']: + if 'single_experiment' in config_dict['integration_mode']: output_filename_path = [] for datetime_step in datetime_augment_dict.keys(): date_str = datetime_step.strftime('%Y-%m-%d') select_file_keywords = datetime_augment_dict[datetime_step] - dt_step_output_filename_path= create_hdf5_file(date_str, select_file_keywords) + + root_metadata_dict.update({'experiment_startdate': date_str, + 'experiment_enddate': date_str}) + dt_step_output_filename_path= create_hdf5_file(date_str, select_file_keywords, root_metadata_dict) output_filename_path.append(dt_step_output_filename_path) elif 'collection' in config_dict['integration_mode']: @@ -79,13 +82,17 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'): config_dict['experiment_enddate'] = max(datetime_augment_dict.keys()) startdate = config_dict['experiment_startdate'] enddate = config_dict['experiment_enddate'] + root_metadata_dict.update({'experiment_startdate': startdate, + 'experiment_enddate': enddate}) date_str = f'{startdate}_{enddate}' - output_filename_path = create_hdf5_file(date_str, select_file_keywords) + output_filename_path = create_hdf5_file(date_str, select_file_keywords, root_metadata_dict) else: startdate = config_dict['experiment_startdate'] enddate = config_dict['experiment_enddate'] + root_metadata_dict.update({'experiment_startdate': startdate, + 'experiment_enddate': enddate}) date_str = f'{startdate}_{enddate}' - output_filename_path = create_hdf5_file(date_str, select_file_keywords = []) + output_filename_path = create_hdf5_file(date_str, select_file_keywords = [], root_metadata = root_metadata_dict) return output_filename_path \ No newline at end of file diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index e237eb2..a253c60 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -114,7 +114,7 @@ def dataframe_to_np_structured_array(df: pd.DataFrame): def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ): - with open('src/text_data_sources.yaml','r') as stream: + with open('src/instruments/text_data_sources.yaml','r') as stream: try: config_dict = yaml.load(stream, Loader=yaml.FullLoader) except yaml.YAMLError as exc: