From 197ad0288aedea2824dc872135fe3c7c92987158 Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Tue, 4 Jun 2024 13:37:20 +0200
Subject: [PATCH] Updated file reader and data integration with datastart and
 dataend properties.

---
 src/data_integration_lib.py | 19 +++++++++++++------
 src/g5505_file_reader.py    |  2 +-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/data_integration_lib.py b/src/data_integration_lib.py
index 336e81f..950f67c 100644
--- a/src/data_integration_lib.py
+++ b/src/data_integration_lib.py
@@ -39,13 +39,13 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
     input_file_dir = config_dict['input_file_directory']
     output_dir = config_dict['output_file_directory']
     select_dir_keywords = config_dict['instrument_datafolder']
-    root_metadata = {
+    root_metadata_dict = {
         'project_name' : config_dict['project_name'],
         'experiment_name' : config_dict['experiment_name'],
         'user_name' : config_dict['user_name'],
     }
 
-    def create_hdf5_file(date_str, select_file_keywords):
+    def create_hdf5_file(date_str, select_file_keywords,root_metadata):
         filename = output_filename(exp_campaign_name, date_str, initials)
         output_path = os.path.join(output_dir, filename)
         logging.info("Creating HDF5 file at: %s", output_path)
@@ -62,12 +62,15 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
             datetime_augment_dict[tmp] = [tmp.strftime('%Y-%m-%d'),tmp.strftime('%Y_%m_%d'),tmp.strftime('%Y.%m.%d'),tmp.strftime('%Y%m%d')]
             print(tmp)
 
-        if 'experimental_step' in config_dict['integration_mode']: 
+        if 'single_experiment' in config_dict['integration_mode']: 
             output_filename_path = []
             for datetime_step in datetime_augment_dict.keys():
                 date_str = datetime_step.strftime('%Y-%m-%d')
                 select_file_keywords = datetime_augment_dict[datetime_step]
-                dt_step_output_filename_path= create_hdf5_file(date_str, select_file_keywords) 
+
+                root_metadata_dict.update({'experiment_startdate': date_str,
+                                           'experiment_enddate': date_str})
+                dt_step_output_filename_path= create_hdf5_file(date_str, select_file_keywords, root_metadata_dict) 
                 output_filename_path.append(dt_step_output_filename_path)
                 
         elif 'collection' in config_dict['integration_mode']:
@@ -79,13 +82,17 @@ def integrate_data_sources(yaml_config_file_path, log_dir='logs/'):
             config_dict['experiment_enddate'] = max(datetime_augment_dict.keys())
             startdate = config_dict['experiment_startdate']
             enddate = config_dict['experiment_enddate']
+            root_metadata_dict.update({'experiment_startdate': startdate,
+                                       'experiment_enddate': enddate})
 
             date_str = f'{startdate}_{enddate}'
-            output_filename_path = create_hdf5_file(date_str, select_file_keywords)
+            output_filename_path = create_hdf5_file(date_str, select_file_keywords, root_metadata_dict)
     else:
         startdate = config_dict['experiment_startdate']
         enddate = config_dict['experiment_enddate']
+        root_metadata_dict.update({'experiment_startdate': startdate,
+                                   'experiment_enddate': enddate})
         date_str = f'{startdate}_{enddate}'
-        output_filename_path = create_hdf5_file(date_str, select_file_keywords = [])
+        output_filename_path = create_hdf5_file(date_str, select_file_keywords = [],  root_metadata = root_metadata_dict)
               
     return output_filename_path
\ No newline at end of file
diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py
index e237eb2..a253c60 100644
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@@ -114,7 +114,7 @@ def dataframe_to_np_structured_array(df: pd.DataFrame):
 
 def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
 
-    with open('src/text_data_sources.yaml','r') as stream:
+    with open('src/instruments/text_data_sources.yaml','r') as stream:
         try:
             config_dict = yaml.load(stream, Loader=yaml.FullLoader)
         except yaml.YAMLError as exc: