Updated file reader and data integration with datastart and dataend properties.
This commit is contained in:
@ -39,13 +39,13 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
|
|||||||
input_file_dir = config_dict['input_file_directory']
|
input_file_dir = config_dict['input_file_directory']
|
||||||
output_dir = config_dict['output_file_directory']
|
output_dir = config_dict['output_file_directory']
|
||||||
select_dir_keywords = config_dict['instrument_datafolder']
|
select_dir_keywords = config_dict['instrument_datafolder']
|
||||||
root_metadata = {
|
root_metadata_dict = {
|
||||||
'project_name' : config_dict['project_name'],
|
'project_name' : config_dict['project_name'],
|
||||||
'experiment_name' : config_dict['experiment_name'],
|
'experiment_name' : config_dict['experiment_name'],
|
||||||
'user_name' : config_dict['user_name'],
|
'user_name' : config_dict['user_name'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def create_hdf5_file(date_str, select_file_keywords):
|
def create_hdf5_file(date_str, select_file_keywords,root_metadata):
|
||||||
filename = output_filename(exp_campaign_name, date_str, initials)
|
filename = output_filename(exp_campaign_name, date_str, initials)
|
||||||
output_path = os.path.join(output_dir, filename)
|
output_path = os.path.join(output_dir, filename)
|
||||||
logging.info("Creating HDF5 file at: %s", output_path)
|
logging.info("Creating HDF5 file at: %s", output_path)
|
||||||
@ -62,12 +62,15 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
|
|||||||
datetime_augment_dict[tmp] = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')]
|
datetime_augment_dict[tmp] = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')]
|
||||||
print(tmp)
|
print(tmp)
|
||||||
|
|
||||||
if 'experimental_step' in config_dict['integration_mode']:
|
if 'single_experiment' in config_dict['integration_mode']:
|
||||||
output_filename_path = []
|
output_filename_path = []
|
||||||
for datetime_step in datetime_augment_dict.keys():
|
for datetime_step in datetime_augment_dict.keys():
|
||||||
date_str = datetime_step.strftime('%Y-%m-%d')
|
date_str = datetime_step.strftime('%Y-%m-%d')
|
||||||
select_file_keywords = datetime_augment_dict[datetime_step]
|
select_file_keywords = datetime_augment_dict[datetime_step]
|
||||||
dt_step_output_filename_path= create_hdf5_file(date_str, select_file_keywords)
|
|
||||||
|
root_metadata_dict.update({'experiment_startdate': date_str,
|
||||||
|
'experiment_enddate': date_str})
|
||||||
|
dt_step_output_filename_path= create_hdf5_file(date_str, select_file_keywords, root_metadata_dict)
|
||||||
output_filename_path.append(dt_step_output_filename_path)
|
output_filename_path.append(dt_step_output_filename_path)
|
||||||
|
|
||||||
elif 'collection' in config_dict['integration_mode']:
|
elif 'collection' in config_dict['integration_mode']:
|
||||||
@ -79,13 +82,17 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
|
|||||||
config_dict['experiment_enddate'] = max(datetime_augment_dict.keys())
|
config_dict['experiment_enddate'] = max(datetime_augment_dict.keys())
|
||||||
startdate = config_dict['experiment_startdate']
|
startdate = config_dict['experiment_startdate']
|
||||||
enddate = config_dict['experiment_enddate']
|
enddate = config_dict['experiment_enddate']
|
||||||
|
root_metadata_dict.update({'experiment_startdate': startdate,
|
||||||
|
'experiment_enddate': enddate})
|
||||||
|
|
||||||
date_str = f'{startdate}_{enddate}'
|
date_str = f'{startdate}_{enddate}'
|
||||||
output_filename_path = create_hdf5_file(date_str, select_file_keywords)
|
output_filename_path = create_hdf5_file(date_str, select_file_keywords, root_metadata_dict)
|
||||||
else:
|
else:
|
||||||
startdate = config_dict['experiment_startdate']
|
startdate = config_dict['experiment_startdate']
|
||||||
enddate = config_dict['experiment_enddate']
|
enddate = config_dict['experiment_enddate']
|
||||||
|
root_metadata_dict.update({'experiment_startdate': startdate,
|
||||||
|
'experiment_enddate': enddate})
|
||||||
date_str = f'{startdate}_{enddate}'
|
date_str = f'{startdate}_{enddate}'
|
||||||
output_filename_path = create_hdf5_file(date_str, select_file_keywords = [])
|
output_filename_path = create_hdf5_file(date_str, select_file_keywords = [], root_metadata = root_metadata_dict)
|
||||||
|
|
||||||
return output_filename_path
|
return output_filename_path
|
@ -114,7 +114,7 @@ def dataframe_to_np_structured_array(df: pd.DataFrame):
|
|||||||
|
|
||||||
def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
|
||||||
|
|
||||||
with open('src/text_data_sources.yaml','r') as stream:
|
with open('src/instruments/text_data_sources.yaml','r') as stream:
|
||||||
try:
|
try:
|
||||||
config_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
config_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
||||||
except yaml.YAMLError as exc:
|
except yaml.YAMLError as exc:
|
||||||
|
Reference in New Issue
Block a user