Synch with remote repo

2025-02-03 10:31:48 +01:00
parent a3ccff4079
commit 32bba4239a
102 changed files with 19584 additions and 19584 deletions
--- a/instruments/readers/acsm_tofware_reader.py
+++ b/instruments/readers/acsm_tofware_reader.py
@ -1,223 +1,223 @@
-import sys
-import os
-import pandas as pd
-import collections
-import yaml
-
-#root_dir = os.path.abspath(os.curdir)
-#sys.path.append(root_dir)
-import utils.g5505_utils as utils
-
-
-
-
-def read_acsm_files_as_dict(filename: str, instruments_dir: str = None, work_with_copy: bool = True):
-    # If instruments_dir is not provided, use the default path relative to the module directory
-    if not instruments_dir:
-        # Assuming the instruments folder is one level up from the source module directory
-        module_dir = os.path.dirname(__file__)
-        instruments_dir = os.path.join(module_dir, '..')
-
-    # Normalize the path (resolves any '..' in the path)
-    instrument_configs_path = os.path.abspath(os.path.join(instruments_dir,'dictionaries','ACSM_TOFWARE.yaml'))
-
-    with open(instrument_configs_path,'r') as stream:
-        try:
-            config_dict = yaml.load(stream, Loader=yaml.FullLoader)
-        except yaml.YAMLError as exc:
-            print(exc)
-    # Verify if file can be read by available intrument configurations.
-    #if not any(key in filename.replace(os.sep,'/') for key in config_dict.keys()):
-    #    return {}
-
-
-
-    #TODO: this may be prone to error if assumed folder structure is non compliant 
-
-
-    description_dict = config_dict.get('table_header',{})
-
-    file_encoding = config_dict['config_text_reader'].get('file_encoding','utf-8')
-    separator = config_dict['config_text_reader'].get('separator',None)
-    table_header = config_dict['config_text_reader'].get('table_header',None)
-    timestamp_variables = config_dict['config_text_reader'].get('timestamp',[])
-    datetime_format = config_dict['config_text_reader'].get('datetime_format',[])
-
-
-
-    # Read header as a dictionary and detect where data table starts
-    header_dict = {}
-    data_start = False    
-    # Work with copy of the file for safety
-    if work_with_copy:
-        tmp_filename = utils.make_file_copy(source_file_path=filename)
-    else:
-        tmp_filename = filename
-
-    if not isinstance(table_header, list):
-        table_header = [table_header]
-        file_encoding = [file_encoding]
-        separator = [separator]
-
-    with open(tmp_filename,'rb') as f:
-        table_preamble = []
-        for line_number, line in enumerate(f):   
-
-            
-            for tb_idx, tb in enumerate(table_header):
-                if tb in line.decode(file_encoding[tb_idx]):
-                    break
-
-            if tb in line.decode(file_encoding[tb_idx]):   
-                list_of_substrings = line.decode(file_encoding[tb_idx]).split(separator[tb_idx].replace('\\t','\t'))  
-
-                # Count occurrences of each substring
-                substring_counts = collections.Counter(list_of_substrings)
-                data_start = True  
-                # Generate column names with appended index only for repeated substrings
-                column_names = [f"{i}_{name.strip()}" if substring_counts[name] > 1 else name.strip() for i, name in enumerate(list_of_substrings)]           
-
-                #column_names = [str(i)+'_'+name.strip() for i, name in enumerate(list_of_substrings)]
-                #column_names = []
-                #for i, name in enumerate(list_of_substrings):
-                #    column_names.append(str(i)+'_'+name) 
-
-                #print(line_number, len(column_names ),'\n')
-                break
-            # Subdivide line into words, and join them by single space. 
-            # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
-            list_of_substrings = line.decode(file_encoding[tb_idx]).split()
-            # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
-            #line = ' '.join(list_of_substrings+['\n'])
-            #line = ' '.join(list_of_substrings)     
-            table_preamble.append(' '.join([item for item in list_of_substrings]))# += new_line  
-
-  
-    # TODO: it does not work with separator as none :(. fix for RGA
-    try:
-        df = pd.read_csv(tmp_filename, 
-                        delimiter = separator[tb_idx].replace('\\t','\t'), 
-                        header=line_number, 
-                        #encoding='latin-1',
-                        encoding = file_encoding[tb_idx],
-                        names=column_names,
-                        skip_blank_lines=True)
-   
-        df_numerical_attrs = df.select_dtypes(include ='number')
-        df_categorical_attrs = df.select_dtypes(exclude='number')
-        numerical_variables = [item for item in df_numerical_attrs.columns]       
-
-        # Consolidate into single timestamp column the separate columns 'date' 'time' specified in text_data_source.yaml
-        if timestamp_variables:
-            #df_categorical_attrs['timestamps'] = [' '.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index]
-            #df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
-            
-
-            #df_categorical_attrs['timestamps'] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
-            timestamps_name = ' '.join(timestamp_variables)
-            df_categorical_attrs[ timestamps_name] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
-
-            valid_indices = []
-            if datetime_format:
-                df_categorical_attrs[ timestamps_name] = pd.to_datetime(df_categorical_attrs[ timestamps_name],format=datetime_format,errors='coerce')
-                valid_indices = df_categorical_attrs.dropna(subset=[timestamps_name]).index 
-                df_categorical_attrs = df_categorical_attrs.loc[valid_indices,:]
-                df_numerical_attrs = df_numerical_attrs.loc[valid_indices,:]
-
-                df_categorical_attrs[timestamps_name] = df_categorical_attrs[timestamps_name].dt.strftime(config_dict['default']['desired_format'])
-                startdate = df_categorical_attrs[timestamps_name].min()
-                enddate = df_categorical_attrs[timestamps_name].max()
-
-                df_categorical_attrs[timestamps_name] = df_categorical_attrs[timestamps_name].astype(str)
-                #header_dict.update({'stastrrtdate':startdate,'enddate':enddate})
-                header_dict['startdate']= str(startdate)
-                header_dict['enddate']=str(enddate)
-
-            if len(timestamp_variables) > 1:
-                df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
-            
-                
-                #df_categorical_attrs.reindex(drop=True)
-                #df_numerical_attrs.reindex(drop=True)
-
-
-
-        categorical_variables = [item for item in df_categorical_attrs.columns]
-        ####
-        #elif 'RGA' in filename:
-        #    df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Time(s)' : 'timestamps'})
-
-        ###
-        file_dict = {}
-        path_tail, path_head = os.path.split(tmp_filename)
-
-        file_dict['name'] = path_head
-        # TODO: review this header dictionary, it may not be the best way to represent header data
-        file_dict['attributes_dict'] = header_dict
-        file_dict['datasets'] = []
-        ####
-
-        df = pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)
-
-        #if numerical_variables:
-        dataset = {}
-        dataset['name'] = 'data_table'#_numerical_variables'
-        dataset['data'] = utils.convert_dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
-        dataset['shape'] = dataset['data'].shape
-        dataset['dtype'] = type(dataset['data'])       
-        #dataset['data_units'] = file_obj['wave']['data_units']   
-        # 
-        # Create attribute descriptions based on description_dict
-        dataset['attributes'] = {}
-
-        # Annotate column headers if description_dict is non empty
-        if description_dict:
-            for column_name in df.columns: 
-                column_attr_dict = description_dict.get(column_name,
-                                                                        {'note':'there was no description available. Review instrument files.'})   
-                dataset['attributes'].update({column_name: utils.convert_attrdict_to_np_structured_array(column_attr_dict)})     
-
-        #try:
-        #    dataset['attributes'] = description_dict['table_header'].copy()
-        #    for key in description_dict['table_header'].keys():
-        #        if not key in numerical_variables:
-        #            dataset['attributes'].pop(key) # delete key
-        #        else:
-        #            dataset['attributes'][key] = utils.parse_attribute(dataset['attributes'][key])
-        #    if timestamps_name in categorical_variables:
-        #        dataset['attributes'][timestamps_name] = utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})
-        #except ValueError as err:
-        #    print(err)
-
-    # Represent string values as fixed length strings in the HDF5 file, which need
-    # to be decoded as string when we read them. It provides better control than variable strings,
-    # at the expense of flexibility.
-    # https://docs.h5py.org/en/stable/strings.html
-
-
-        if table_preamble:        
-            #header_dict["table_preamble"] = utils.convert_string_to_bytes(table_preamble)  
-            tp_dataset = {}
-            tp_dataset['name'] = "table_preamble"
-            tp_dataset['data'] = utils.convert_string_to_bytes(table_preamble) 
-            tp_dataset['shape'] = tp_dataset['data'].shape
-            tp_dataset['dtype'] = type(tp_dataset['data'])
-            tp_dataset['attributes'] = {}
-            file_dict['datasets'].append(tp_dataset)
-
-        file_dict['datasets'].append(dataset)
-
-       
-        #if categorical_variables:
-        #    dataset = {}
-        #    dataset['name'] = 'table_categorical_variables'
-        #    dataset['data'] = dataframe_to_np_structured_array(df_categorical_attrs) #df_categorical_attrs.loc[:,categorical_variables].to_numpy()
-        #    dataset['shape'] = dataset['data'].shape
-        #    dataset['dtype'] = type(dataset['data'])
-        #    if timestamps_name in categorical_variables:
-        #        dataset['attributes'] = {timestamps_name: utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})}
-        #    file_dict['datasets'].append(dataset) 
-    except:
-        return {}
-
+import sys
+import os
+import pandas as pd
+import collections
+import yaml
+
+#root_dir = os.path.abspath(os.curdir)
+#sys.path.append(root_dir)
+import utils.g5505_utils as utils
+
+
+
+
+def read_acsm_files_as_dict(filename: str, instruments_dir: str = None, work_with_copy: bool = True):
+    # If instruments_dir is not provided, use the default path relative to the module directory
+    if not instruments_dir:
+        # Assuming the instruments folder is one level up from the source module directory
+        module_dir = os.path.dirname(__file__)
+        instruments_dir = os.path.join(module_dir, '..')
+
+    # Normalize the path (resolves any '..' in the path)
+    instrument_configs_path = os.path.abspath(os.path.join(instruments_dir,'dictionaries','ACSM_TOFWARE.yaml'))
+
+    with open(instrument_configs_path,'r') as stream:
+        try:
+            config_dict = yaml.load(stream, Loader=yaml.FullLoader)
+        except yaml.YAMLError as exc:
+            print(exc)
+    # Verify if file can be read by available intrument configurations.
+    #if not any(key in filename.replace(os.sep,'/') for key in config_dict.keys()):
+    #    return {}
+
+
+
+    #TODO: this may be prone to error if assumed folder structure is non compliant 
+
+
+    description_dict = config_dict.get('table_header',{})
+
+    file_encoding = config_dict['config_text_reader'].get('file_encoding','utf-8')
+    separator = config_dict['config_text_reader'].get('separator',None)
+    table_header = config_dict['config_text_reader'].get('table_header',None)
+    timestamp_variables = config_dict['config_text_reader'].get('timestamp',[])
+    datetime_format = config_dict['config_text_reader'].get('datetime_format',[])
+
+
+
+    # Read header as a dictionary and detect where data table starts
+    header_dict = {}
+    data_start = False    
+    # Work with copy of the file for safety
+    if work_with_copy:
+        tmp_filename = utils.make_file_copy(source_file_path=filename)
+    else:
+        tmp_filename = filename
+
+    if not isinstance(table_header, list):
+        table_header = [table_header]
+        file_encoding = [file_encoding]
+        separator = [separator]
+
+    with open(tmp_filename,'rb') as f:
+        table_preamble = []
+        for line_number, line in enumerate(f):   
+
+            
+            for tb_idx, tb in enumerate(table_header):
+                if tb in line.decode(file_encoding[tb_idx]):
+                    break
+
+            if tb in line.decode(file_encoding[tb_idx]):   
+                list_of_substrings = line.decode(file_encoding[tb_idx]).split(separator[tb_idx].replace('\\t','\t'))  
+
+                # Count occurrences of each substring
+                substring_counts = collections.Counter(list_of_substrings)
+                data_start = True  
+                # Generate column names with appended index only for repeated substrings
+                column_names = [f"{i}_{name.strip()}" if substring_counts[name] > 1 else name.strip() for i, name in enumerate(list_of_substrings)]           
+
+                #column_names = [str(i)+'_'+name.strip() for i, name in enumerate(list_of_substrings)]
+                #column_names = []
+                #for i, name in enumerate(list_of_substrings):
+                #    column_names.append(str(i)+'_'+name) 
+
+                #print(line_number, len(column_names ),'\n')
+                break
+            # Subdivide line into words, and join them by single space. 
+            # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
+            list_of_substrings = line.decode(file_encoding[tb_idx]).split()
+            # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
+            #line = ' '.join(list_of_substrings+['\n'])
+            #line = ' '.join(list_of_substrings)     
+            table_preamble.append(' '.join([item for item in list_of_substrings]))# += new_line  
+
+  
+    # TODO: it does not work with separator as none :(. fix for RGA
+    try:
+        df = pd.read_csv(tmp_filename, 
+                        delimiter = separator[tb_idx].replace('\\t','\t'), 
+                        header=line_number, 
+                        #encoding='latin-1',
+                        encoding = file_encoding[tb_idx],
+                        names=column_names,
+                        skip_blank_lines=True)
+   
+        df_numerical_attrs = df.select_dtypes(include ='number')
+        df_categorical_attrs = df.select_dtypes(exclude='number')
+        numerical_variables = [item for item in df_numerical_attrs.columns]       
+
+        # Consolidate into single timestamp column the separate columns 'date' 'time' specified in text_data_source.yaml
+        if timestamp_variables:
+            #df_categorical_attrs['timestamps'] = [' '.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index]
+            #df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
+            
+
+            #df_categorical_attrs['timestamps'] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
+            timestamps_name = ' '.join(timestamp_variables)
+            df_categorical_attrs[ timestamps_name] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
+
+            valid_indices = []
+            if datetime_format:
+                df_categorical_attrs[ timestamps_name] = pd.to_datetime(df_categorical_attrs[ timestamps_name],format=datetime_format,errors='coerce')
+                valid_indices = df_categorical_attrs.dropna(subset=[timestamps_name]).index 
+                df_categorical_attrs = df_categorical_attrs.loc[valid_indices,:]
+                df_numerical_attrs = df_numerical_attrs.loc[valid_indices,:]
+
+                df_categorical_attrs[timestamps_name] = df_categorical_attrs[timestamps_name].dt.strftime(config_dict['default']['desired_format'])
+                startdate = df_categorical_attrs[timestamps_name].min()
+                enddate = df_categorical_attrs[timestamps_name].max()
+
+                df_categorical_attrs[timestamps_name] = df_categorical_attrs[timestamps_name].astype(str)
+                #header_dict.update({'stastrrtdate':startdate,'enddate':enddate})
+                header_dict['startdate']= str(startdate)
+                header_dict['enddate']=str(enddate)
+
+            if len(timestamp_variables) > 1:
+                df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
+            
+                
+                #df_categorical_attrs.reindex(drop=True)
+                #df_numerical_attrs.reindex(drop=True)
+
+
+
+        categorical_variables = [item for item in df_categorical_attrs.columns]
+        ####
+        #elif 'RGA' in filename:
+        #    df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Time(s)' : 'timestamps'})
+
+        ###
+        file_dict = {}
+        path_tail, path_head = os.path.split(tmp_filename)
+
+        file_dict['name'] = path_head
+        # TODO: review this header dictionary, it may not be the best way to represent header data
+        file_dict['attributes_dict'] = header_dict
+        file_dict['datasets'] = []
+        ####
+
+        df = pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)
+
+        #if numerical_variables:
+        dataset = {}
+        dataset['name'] = 'data_table'#_numerical_variables'
+        dataset['data'] = utils.convert_dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
+        dataset['shape'] = dataset['data'].shape
+        dataset['dtype'] = type(dataset['data'])       
+        #dataset['data_units'] = file_obj['wave']['data_units']   
+        # 
+        # Create attribute descriptions based on description_dict
+        dataset['attributes'] = {}
+
+        # Annotate column headers if description_dict is non empty
+        if description_dict:
+            for column_name in df.columns: 
+                column_attr_dict = description_dict.get(column_name,
+                                                                        {'note':'there was no description available. Review instrument files.'})   
+                dataset['attributes'].update({column_name: utils.convert_attrdict_to_np_structured_array(column_attr_dict)})     
+
+        #try:
+        #    dataset['attributes'] = description_dict['table_header'].copy()
+        #    for key in description_dict['table_header'].keys():
+        #        if not key in numerical_variables:
+        #            dataset['attributes'].pop(key) # delete key
+        #        else:
+        #            dataset['attributes'][key] = utils.parse_attribute(dataset['attributes'][key])
+        #    if timestamps_name in categorical_variables:
+        #        dataset['attributes'][timestamps_name] = utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})
+        #except ValueError as err:
+        #    print(err)
+
+    # Represent string values as fixed length strings in the HDF5 file, which need
+    # to be decoded as string when we read them. It provides better control than variable strings,
+    # at the expense of flexibility.
+    # https://docs.h5py.org/en/stable/strings.html
+
+
+        if table_preamble:        
+            #header_dict["table_preamble"] = utils.convert_string_to_bytes(table_preamble)  
+            tp_dataset = {}
+            tp_dataset['name'] = "table_preamble"
+            tp_dataset['data'] = utils.convert_string_to_bytes(table_preamble) 
+            tp_dataset['shape'] = tp_dataset['data'].shape
+            tp_dataset['dtype'] = type(tp_dataset['data'])
+            tp_dataset['attributes'] = {}
+            file_dict['datasets'].append(tp_dataset)
+
+        file_dict['datasets'].append(dataset)
+
+       
+        #if categorical_variables:
+        #    dataset = {}
+        #    dataset['name'] = 'table_categorical_variables'
+        #    dataset['data'] = dataframe_to_np_structured_array(df_categorical_attrs) #df_categorical_attrs.loc[:,categorical_variables].to_numpy()
+        #    dataset['shape'] = dataset['data'].shape
+        #    dataset['dtype'] = type(dataset['data'])
+        #    if timestamps_name in categorical_variables:
+        #        dataset['attributes'] = {timestamps_name: utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})}
+        #    file_dict['datasets'].append(dataset) 
+    except:
+        return {}
+
    return file_dict
--- a/instruments/readers/config_text_reader.yaml
+++ b/instruments/readers/config_text_reader.yaml
@ -1,112 +1,112 @@
-default:
-  file_encoding : 'utf-8'
-  separator : 'None'
-  table_header : 'None'
-  desired_format: '%Y-%m-%d %H:%M:%S.%f'
-
-RGA:
-  table_header : 'Time(s)      Channel#1   Channel#2   Channel#3   Channel#4   Channel#5   Channel#6   Channel#7   Channel#8'
-  separator : '\t'
-  link_to_description: 'dictionaries/RGA.yaml'
-
-Pressure: 
-  table_header : 'Date	Time	Vapore-Pressure 1 in 	Vapore-Pressure 2 in 	Baratron 1 in 	Baratron 2 in 	Baratron 3 in 	Baratron 4 in 	Temp. Ice-Sample in 	Temp. Heated-Sample in 	Temp. Cooler 1 in 	Temp. Cooler 2 in 	Flow  Gas 1 in 	Pressure Chamber in 	X in 	Y in 	Z in 	None in 	Temp. Sealing in 	Flow Ice-Sample in'      
-  separator : '\t'
-  timestamp: ['Date','Time']
-  datetime_format: '%d.%m.%Y %H:%M:%S'
-  link_to_description: 'dictionaries/Preassure.yaml'
-
-Humidity_Sensors:
-  table_header : 'Date	Time	RH1[%]	RH2[%]	RH3[%]	RH4[%]	RH5[%]	RH6[%]	RH7[%]	RH8[%]	T1[°C]	T2[°C]	T3[°C]	T4[°C]	T5[°C]	T6[°C]	T7[°C]	T8[°C]	DP1[°C]	DP2[°C]	DP3[°C]	DP4[°C]	DP5[°C]	DP6[°C]	DP7[°C]	DP8[°C]'
-  separator : '\t'
-  file_encoding : 'latin-1'
-  timestamp: ['Date','Time']
-  datetime_format: '%d.%m.%Y %H:%M:%S'
-  link_to_description: 'dictionaries/Humidity_Sensors.yaml'
-
-HONO: #ICAD/HONO: 
-  table_header : 'Start Date/Time (UTC)	Duration (s)	NO2 (ppb)	NO2 Uncertainty (ppb)	HONO (ppb)	HONO Uncertainty (ppb)	H2O (ppb)	H2O Uncertainty (ppb)	O4 (ppb)	O4 Uncertainty (ppb)	File Number	Light Intensity	#ICEDOAS iter.	Cell Pressure	Ambient Pressure	Cell Temp	Spec Temp	Lat	Lon	Height	Speed	GPSQuality	0-Air Ref. Time	0-Air Ref. Duration	0-Air Ref. File Number	0-Air Ref. Intensity	0-Air Ref. Rel Intensity	0-Air Ref. Intensity valid	MeasMode	SampleSource'
-  separator : '\t' 
-  file_encoding : 'latin-1'
-  timestamp: ['Start Date/Time (UTC)'] 
-  datetime_format: '%Y-%m-%d %H:%M:%S.%f'
-  link_to_description: 'dictionaries/ICAD_HONO.yaml'
-
-NO2: #ICAD/NO2:
-  table_header : 'Start Date/Time (UTC)	Duration (s)	NO2 (ppb)	NO2 Uncertainty (ppb)	H2O (ppb)	H2O Uncertainty (ppb)	CHOCHO (ppb)	CHOCHO Uncertainty (ppb)	File Number	Light Intensity	#ICEDOAS iter.	Cell Pressure	Ambient Pressure	Cell Temp	Spec Temp	Lat	Lon	Height	Speed	GPSQuality	0-Air Ref. Time	0-Air Ref. Duration	0-Air Ref. File Number	0-Air Ref. Intensity	0-Air Ref. Rel Intensity	0-Air Ref. Intensity valid	MeasMode	SampleSource'
-  separator : '\t'
-  file_encoding : 'latin-1'
-  timestamp: ['Start Date/Time (UTC)'] 
-  datetime_format: '%Y-%m-%d %H:%M:%S.%f'
-  link_to_description: 'dictionaries/ICAD_NO2.yaml'
-
-Lopap:
-  #table_header : 'Date;Time;Ch1;490.1;500.2;510.0;520.0;530.1;540.0;550.7;603.2;700.3;800.0;Ch2;500.5;510.3;520.5;530.7;540.8;550.5;550.8;560.9;570.9;581.2;586.2;591.2;596.1;601.1;606.4;611.3;'
-  table_header : 'Date;Time;Ch1;' 
-  separator : ';'
-  file_encoding : 'latin-1'
-  timestamp: ['Date','Time']
-  datetime_format: '%d.%m.%Y %H:%M:%S'
-  link_to_description: 'dictionaries/Lopap.yaml'
-
-T200_NOx: 
-  table_header : 'Date	Time	NO	NO2	NOx'
-  separator : '\t' 
-  file_encoding : 'latin-1' 
-  timestamp: ['Date','Time']
-  datetime_format: '%d.%m.%Y %H:%M:%S'
-  link_to_description: 'dictionaries/T200_NOx.yaml'
-
-T360U_CO2: 
-  table_header : 'Date	Time	CO2'
-  separator : '\t' 
-  file_encoding : 'latin-1' 
-  timestamp: ['Date','Time']
-  datetime_format: '%d.%m.%Y %H:%M:%S'
-  link_to_description: 'dictionaries/T360U_CO2.yaml'
-
-smps:
-  table_header: 'Sample #	Date	Start Time	Sample Temp (C)	Sample Pressure (kPa)	Relative Humidity (%)	Mean Free Path (m)	Gas Viscosity (Pa*s)	Diameter Midpoint (nm)	 15.7	 16.3	 16.8	 17.5	 18.1	 18.8	 19.5	 20.2	 20.9	 21.7	 22.5	 23.3	 24.1	 25.0	 25.9	 26.9	 27.9	 28.9	 30.0	 31.1	 32.2	 33.4	 34.6	 35.9	 37.2	 38.5	 40.0	 41.4	 42.9	 44.5	 46.1	 47.8	 49.6	 51.4	 53.3	 55.2	 57.3	 59.4	 61.5	 63.8	 66.1	 68.5	 71.0	 73.7	 76.4	 79.1	 82.0	 85.1	 88.2	 91.4	 94.7	 98.2	101.8	105.5	109.4	113.4	117.6	121.9	126.3	131.0	135.8	140.7	145.9	151.2	156.8	162.5	168.5	174.7	181.1	187.7	194.6	201.7	209.1	216.7	224.7	232.9	241.4	250.3	259.5	269.0	278.8	289.0	299.6	310.6	322.0	333.8	346.0	358.7	371.8	385.4	399.5	414.2	429.4	445.1	461.4	478.3	495.8	514.0	532.8	552.3	572.5	593.5	615.3	637.8	Scan Time (s)	Retrace Time (s)	Scan Resolution (Hz)	Scans Per Sample	Sheath Flow (L/min)	Aerosol Flow (L/min)	Bypass Flow (L/min)	Low Voltage (V)	High Voltage (V)	Lower Size (nm)	Upper Size (nm)	Density (g/cm³)	td + 0.5 (s)	tf (s)	D50 (nm)	Neutralizer'
-  separator : '\t'
-  file_encoding : 'latin-1'
-  timestamp: ['Date','Start Time']
-  datetime_format: '%d/%m/%Y %H:%M:%S'
-  link_to_description: 'dictionaries/smps.yaml'  
-
-gas:
-  table_header : 'Date_Time	HoribaNO	HoribaNOy	Thermo42C_NO	Thermo42C_NOx	APHA370 CH4	APHA370THC	HygroclipRH	HygroclipT	ML9850SO2	ozone49c	PAMrh	PAMt	xxxal	xxxal	xxxal	xxxal	ThermoCouple0	ThermoCouple1	ThermoCouple2	ThermoCouple3	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	CPC	xxx	LicorH2Odelta	LicorCO2delta	xxx	2BO2	xxx	xxx	HoribaCO	xxx'
-  separator : '\t'
-  file_encoding : 'utf-8'
-  timestamp: ['Date_Time']
-  datetime_format: '%Y.%m.%d %H:%M:%S'
-  link_to_description: 'dictionaries/gas.yaml'
-
-ACSM_TOFWARE:
-  table_header:
-  #txt:
-    - 't_base	VaporizerTemp_C	HeaterBias_V	FlowRefWave	FlowRate_mb	FlowRate_ccs	FilamentEmission_mA	Detector_V	AnalogInput06_V	ABRefWave	ABsamp	ABCorrFact'
-    - 't_start_Buf,Chl_11000,NH4_11000,SO4_11000,NO3_11000,Org_11000,SO4_48_11000,SO4_62_11000,SO4_82_11000,SO4_81_11000,SO4_98_11000,NO3_30_11000,Org_60_11000,Org_43_11000,Org_44_11000'
-  #csv:
-    - "X4	X5	X6	X7	X8	X9	X10	X11	X12	X13	X14	X15	X16	X17	X18	X19	X20	X21	X22	X23	X24	X25	X26	X27	X28	X29	X30	X31	X32	X33	X34	X35	X36	X37	X38	X39	X40	X41	X42	X43	X44	X45	X46	X47	X48	X49	X50	X51	X52	X53	X54	X55	X56	X57	X58	X59	X60	X61	X62	X63	X64	X65	X66	X67	X68	X69	X70	X71	X72	X73	X74	X75	X76	X77	X78	X79	X80	X81	X82	X83	X84	X85	X86	X87	X88	X89	X90	X91	X92	X93	X94	X95	X96	X97	X98	X99	X100	X101	X102	X103	X104	X105	X106	X107	X108	X109	X110	X111	X112	X113	X114	X115	X116	X117	X118	X119	X120	X121	X122	X123	X124	X125	X126	X127	X128	X129	X130	X131	X132	X133	X134	X135	X136	X137	X138	X139	X140	X141	X142	X143	X144	X145	X146	X147	X148	X149	X150	X151	X152	X153	X154	X155	X156	X157	X158	X159	X160	X161	X162	X163	X164	X165	X166	X167	X168	X169	X170	X171	X172	X173	X174	X175	X176	X177	X178	X179	X180	X181	X182	X183	X184	X185	X186	X187	X188	X189	X190	X191	X192	X193	X194	X195	X196	X197	X198	X199	X200	X201	X202	X203	X204	X205	X206	X207	X208	X209	X210	X211	X212	X213	X214	X215	X216	X217	X218	X219"
-    - "X4	X5	X6	X7	X8	X9	X10	X11	X12	X13	X14	X15	X16	X17	X18	X19	X20	X21	X22	X23	X24	X25	X26	X27	X28	X29	X30	X31	X32	X33	X34	X35	X36	X37	X38	X39	X40	X41	X42	X43	X44	X45	X46	X47	X48	X49	X50	X51	X52	X53	X54	X55	X56	X57	X58	X59	X60	X61	X62	X63	X64	X65	X66	X67	X68	X69	X70	X71	X72	X73	X74	X75	X76	X77	X78	X79	X80	X81	X82	X83	X84	X85	X86	X87	X88	X89	X90	X91	X92	X93	X94	X95	X96	X97	X98	X99	X100	X101	X102	X103	X104	X105	X106	X107	X108	X109	X110	X111	X112	X113	X114	X115	X116	X117	X118	X119	X120	X121	X122	X123	X124	X125	X126	X127	X128	X129	X130	X131	X132	X133	X134	X135	X136	X137	X138	X139	X140	X141	X142	X143	X144	X145	X146	X147	X148	X149	X150	X151	X152	X153	X154	X155	X156	X157	X158	X159	X160	X161	X162	X163	X164	X165	X166	X167	X168	X169	X170	X171	X172	X173	X174	X175	X176	X177	X178	X179	X180	X181	X182	X183	X184	X185	X186	X187	X188	X189	X190	X191	X192	X193	X194	X195	X196	X197	X198	X199	X200	X201	X202	X203	X204	X205	X206	X207	X208	X209	X210	X211	X212	X213	X214	X215	X216	X217	X218	X219"
-    - 'MSS_base'
-    - 'tseries'
-  separator:
-  #txt: 
-    - "\t"
-    - ","
-  #csv:
-    - "\t"
-    - "\t"
-    - "None"
-    - "None"
-  file_encoding:
-  #txt:
-    - "utf-8"
-    - "utf-8"
-  #csv:
-    - "utf-8"
-    - "utf-8"
-    - "utf-8"
-    - "utf-8"
-
+default:
+  file_encoding : 'utf-8'
+  separator : 'None'
+  table_header : 'None'
+  desired_format: '%Y-%m-%d %H:%M:%S.%f'
+
+RGA:
+  table_header : 'Time(s)      Channel#1   Channel#2   Channel#3   Channel#4   Channel#5   Channel#6   Channel#7   Channel#8'
+  separator : '\t'
+  link_to_description: 'dictionaries/RGA.yaml'
+
+Pressure: 
+  table_header : 'Date	Time	Vapore-Pressure 1 in 	Vapore-Pressure 2 in 	Baratron 1 in 	Baratron 2 in 	Baratron 3 in 	Baratron 4 in 	Temp. Ice-Sample in 	Temp. Heated-Sample in 	Temp. Cooler 1 in 	Temp. Cooler 2 in 	Flow  Gas 1 in 	Pressure Chamber in 	X in 	Y in 	Z in 	None in 	Temp. Sealing in 	Flow Ice-Sample in'      
+  separator : '\t'
+  timestamp: ['Date','Time']
+  datetime_format: '%d.%m.%Y %H:%M:%S'
+  link_to_description: 'dictionaries/Preassure.yaml'
+
+Humidity_Sensors:
+  table_header : 'Date	Time	RH1[%]	RH2[%]	RH3[%]	RH4[%]	RH5[%]	RH6[%]	RH7[%]	RH8[%]	T1[°C]	T2[°C]	T3[°C]	T4[°C]	T5[°C]	T6[°C]	T7[°C]	T8[°C]	DP1[°C]	DP2[°C]	DP3[°C]	DP4[°C]	DP5[°C]	DP6[°C]	DP7[°C]	DP8[°C]'
+  separator : '\t'
+  file_encoding : 'latin-1'
+  timestamp: ['Date','Time']
+  datetime_format: '%d.%m.%Y %H:%M:%S'
+  link_to_description: 'dictionaries/Humidity_Sensors.yaml'
+
+HONO: #ICAD/HONO: 
+  table_header : 'Start Date/Time (UTC)	Duration (s)	NO2 (ppb)	NO2 Uncertainty (ppb)	HONO (ppb)	HONO Uncertainty (ppb)	H2O (ppb)	H2O Uncertainty (ppb)	O4 (ppb)	O4 Uncertainty (ppb)	File Number	Light Intensity	#ICEDOAS iter.	Cell Pressure	Ambient Pressure	Cell Temp	Spec Temp	Lat	Lon	Height	Speed	GPSQuality	0-Air Ref. Time	0-Air Ref. Duration	0-Air Ref. File Number	0-Air Ref. Intensity	0-Air Ref. Rel Intensity	0-Air Ref. Intensity valid	MeasMode	SampleSource'
+  separator : '\t' 
+  file_encoding : 'latin-1'
+  timestamp: ['Start Date/Time (UTC)'] 
+  datetime_format: '%Y-%m-%d %H:%M:%S.%f'
+  link_to_description: 'dictionaries/ICAD_HONO.yaml'
+
+NO2: #ICAD/NO2:
+  table_header : 'Start Date/Time (UTC)	Duration (s)	NO2 (ppb)	NO2 Uncertainty (ppb)	H2O (ppb)	H2O Uncertainty (ppb)	CHOCHO (ppb)	CHOCHO Uncertainty (ppb)	File Number	Light Intensity	#ICEDOAS iter.	Cell Pressure	Ambient Pressure	Cell Temp	Spec Temp	Lat	Lon	Height	Speed	GPSQuality	0-Air Ref. Time	0-Air Ref. Duration	0-Air Ref. File Number	0-Air Ref. Intensity	0-Air Ref. Rel Intensity	0-Air Ref. Intensity valid	MeasMode	SampleSource'
+  separator : '\t'
+  file_encoding : 'latin-1'
+  timestamp: ['Start Date/Time (UTC)'] 
+  datetime_format: '%Y-%m-%d %H:%M:%S.%f'
+  link_to_description: 'dictionaries/ICAD_NO2.yaml'
+
+Lopap:
+  #table_header : 'Date;Time;Ch1;490.1;500.2;510.0;520.0;530.1;540.0;550.7;603.2;700.3;800.0;Ch2;500.5;510.3;520.5;530.7;540.8;550.5;550.8;560.9;570.9;581.2;586.2;591.2;596.1;601.1;606.4;611.3;'
+  table_header : 'Date;Time;Ch1;' 
+  separator : ';'
+  file_encoding : 'latin-1'
+  timestamp: ['Date','Time']
+  datetime_format: '%d.%m.%Y %H:%M:%S'
+  link_to_description: 'dictionaries/Lopap.yaml'
+
+T200_NOx: 
+  table_header : 'Date	Time	NO	NO2	NOx'
+  separator : '\t' 
+  file_encoding : 'latin-1' 
+  timestamp: ['Date','Time']
+  datetime_format: '%d.%m.%Y %H:%M:%S'
+  link_to_description: 'dictionaries/T200_NOx.yaml'
+
+T360U_CO2: 
+  table_header : 'Date	Time	CO2'
+  separator : '\t' 
+  file_encoding : 'latin-1' 
+  timestamp: ['Date','Time']
+  datetime_format: '%d.%m.%Y %H:%M:%S'
+  link_to_description: 'dictionaries/T360U_CO2.yaml'
+
+smps:
+  table_header: 'Sample #	Date	Start Time	Sample Temp (C)	Sample Pressure (kPa)	Relative Humidity (%)	Mean Free Path (m)	Gas Viscosity (Pa*s)	Diameter Midpoint (nm)	 15.7	 16.3	 16.8	 17.5	 18.1	 18.8	 19.5	 20.2	 20.9	 21.7	 22.5	 23.3	 24.1	 25.0	 25.9	 26.9	 27.9	 28.9	 30.0	 31.1	 32.2	 33.4	 34.6	 35.9	 37.2	 38.5	 40.0	 41.4	 42.9	 44.5	 46.1	 47.8	 49.6	 51.4	 53.3	 55.2	 57.3	 59.4	 61.5	 63.8	 66.1	 68.5	 71.0	 73.7	 76.4	 79.1	 82.0	 85.1	 88.2	 91.4	 94.7	 98.2	101.8	105.5	109.4	113.4	117.6	121.9	126.3	131.0	135.8	140.7	145.9	151.2	156.8	162.5	168.5	174.7	181.1	187.7	194.6	201.7	209.1	216.7	224.7	232.9	241.4	250.3	259.5	269.0	278.8	289.0	299.6	310.6	322.0	333.8	346.0	358.7	371.8	385.4	399.5	414.2	429.4	445.1	461.4	478.3	495.8	514.0	532.8	552.3	572.5	593.5	615.3	637.8	Scan Time (s)	Retrace Time (s)	Scan Resolution (Hz)	Scans Per Sample	Sheath Flow (L/min)	Aerosol Flow (L/min)	Bypass Flow (L/min)	Low Voltage (V)	High Voltage (V)	Lower Size (nm)	Upper Size (nm)	Density (g/cm³)	td + 0.5 (s)	tf (s)	D50 (nm)	Neutralizer'
+  separator : '\t'
+  file_encoding : 'latin-1'
+  timestamp: ['Date','Start Time']
+  datetime_format: '%d/%m/%Y %H:%M:%S'
+  link_to_description: 'dictionaries/smps.yaml'  
+
+gas:
+  table_header : 'Date_Time	HoribaNO	HoribaNOy	Thermo42C_NO	Thermo42C_NOx	APHA370 CH4	APHA370THC	HygroclipRH	HygroclipT	ML9850SO2	ozone49c	PAMrh	PAMt	xxxal	xxxal	xxxal	xxxal	ThermoCouple0	ThermoCouple1	ThermoCouple2	ThermoCouple3	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	xxxTC	CPC	xxx	LicorH2Odelta	LicorCO2delta	xxx	2BO2	xxx	xxx	HoribaCO	xxx'
+  separator : '\t'
+  file_encoding : 'utf-8'
+  timestamp: ['Date_Time']
+  datetime_format: '%Y.%m.%d %H:%M:%S'
+  link_to_description: 'dictionaries/gas.yaml'
+
+ACSM_TOFWARE:
+  table_header:
+  #txt:
+    - 't_base	VaporizerTemp_C	HeaterBias_V	FlowRefWave	FlowRate_mb	FlowRate_ccs	FilamentEmission_mA	Detector_V	AnalogInput06_V	ABRefWave	ABsamp	ABCorrFact'
+    - 't_start_Buf,Chl_11000,NH4_11000,SO4_11000,NO3_11000,Org_11000,SO4_48_11000,SO4_62_11000,SO4_82_11000,SO4_81_11000,SO4_98_11000,NO3_30_11000,Org_60_11000,Org_43_11000,Org_44_11000'
+  #csv:
+    - "X4	X5	X6	X7	X8	X9	X10	X11	X12	X13	X14	X15	X16	X17	X18	X19	X20	X21	X22	X23	X24	X25	X26	X27	X28	X29	X30	X31	X32	X33	X34	X35	X36	X37	X38	X39	X40	X41	X42	X43	X44	X45	X46	X47	X48	X49	X50	X51	X52	X53	X54	X55	X56	X57	X58	X59	X60	X61	X62	X63	X64	X65	X66	X67	X68	X69	X70	X71	X72	X73	X74	X75	X76	X77	X78	X79	X80	X81	X82	X83	X84	X85	X86	X87	X88	X89	X90	X91	X92	X93	X94	X95	X96	X97	X98	X99	X100	X101	X102	X103	X104	X105	X106	X107	X108	X109	X110	X111	X112	X113	X114	X115	X116	X117	X118	X119	X120	X121	X122	X123	X124	X125	X126	X127	X128	X129	X130	X131	X132	X133	X134	X135	X136	X137	X138	X139	X140	X141	X142	X143	X144	X145	X146	X147	X148	X149	X150	X151	X152	X153	X154	X155	X156	X157	X158	X159	X160	X161	X162	X163	X164	X165	X166	X167	X168	X169	X170	X171	X172	X173	X174	X175	X176	X177	X178	X179	X180	X181	X182	X183	X184	X185	X186	X187	X188	X189	X190	X191	X192	X193	X194	X195	X196	X197	X198	X199	X200	X201	X202	X203	X204	X205	X206	X207	X208	X209	X210	X211	X212	X213	X214	X215	X216	X217	X218	X219"
+    - "X4	X5	X6	X7	X8	X9	X10	X11	X12	X13	X14	X15	X16	X17	X18	X19	X20	X21	X22	X23	X24	X25	X26	X27	X28	X29	X30	X31	X32	X33	X34	X35	X36	X37	X38	X39	X40	X41	X42	X43	X44	X45	X46	X47	X48	X49	X50	X51	X52	X53	X54	X55	X56	X57	X58	X59	X60	X61	X62	X63	X64	X65	X66	X67	X68	X69	X70	X71	X72	X73	X74	X75	X76	X77	X78	X79	X80	X81	X82	X83	X84	X85	X86	X87	X88	X89	X90	X91	X92	X93	X94	X95	X96	X97	X98	X99	X100	X101	X102	X103	X104	X105	X106	X107	X108	X109	X110	X111	X112	X113	X114	X115	X116	X117	X118	X119	X120	X121	X122	X123	X124	X125	X126	X127	X128	X129	X130	X131	X132	X133	X134	X135	X136	X137	X138	X139	X140	X141	X142	X143	X144	X145	X146	X147	X148	X149	X150	X151	X152	X153	X154	X155	X156	X157	X158	X159	X160	X161	X162	X163	X164	X165	X166	X167	X168	X169	X170	X171	X172	X173	X174	X175	X176	X177	X178	X179	X180	X181	X182	X183	X184	X185	X186	X187	X188	X189	X190	X191	X192	X193	X194	X195	X196	X197	X198	X199	X200	X201	X202	X203	X204	X205	X206	X207	X208	X209	X210	X211	X212	X213	X214	X215	X216	X217	X218	X219"
+    - 'MSS_base'
+    - 'tseries'
+  separator:
+  #txt: 
+    - "\t"
+    - ","
+  #csv:
+    - "\t"
+    - "\t"
+    - "None"
+    - "None"
+  file_encoding:
+  #txt:
+    - "utf-8"
+    - "utf-8"
+  #csv:
+    - "utf-8"
+    - "utf-8"
+    - "utf-8"
+    - "utf-8"
+
--- a/instruments/readers/filereader_registry.py
+++ b/instruments/readers/filereader_registry.py
@ -1,80 +1,80 @@
-import os
-import sys
-#root_dir = os.path.abspath(os.curdir)
-#sys.path.append(root_dir)
-
-from instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict
-from instruments.readers.g5505_text_reader import read_txt_files_as_dict
-
-
-file_extensions = ['.ibw','.txt','.dat','.h5','.TXT','.csv','.pkl','.json','.yaml']
-
-# Define the instruments directory (modify this as needed or set to None)
-default_instruments_dir = None  # or provide an absolute path
-
-file_readers = {
-    'ibw': lambda a1: read_xps_ibw_file_as_dict(a1),
-    'txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
-    'TXT': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
-    'dat': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
-    #'ACSM_TOFWARE_txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
-    #'ACSM_TOFWARE_csv': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False)
-}
-
-# Add new "instrument reader (Data flagging app data)"
-
-from instruments.readers.acsm_tofware_reader import read_acsm_files_as_dict
-file_extensions.append('.txt') 
-file_readers.update({'ACSM_TOFWARE_txt' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False)})
-
-file_extensions.append('.csv') 
-file_readers.update({'ACSM_TOFWARE_csv' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False)})
-
-from instruments.readers.flag_reader import read_jsonflag_as_dict
-file_extensions.append('.json') 
-file_readers.update({'ACSM_TOFWARE_flags_json' : lambda x: read_jsonflag_as_dict(x)})
-
-def compute_filereader_key_from_path(hdf5_file_path):
-    """Constructs the key 'instrumentname_ext' based on hdf5_file_path, structured as
-    /instrumentname/to/filename.ext, which access the file reader that should be used to read such a file. 
-
-    Parameters
-    ----------
-    hdf5_file_path : str
-        _description_
-
-    Returns
-    -------
-    _type_
-        _description_
-    """
-
-    parts = hdf5_file_path.strip('/').split('/')
-
-    # Extract the filename and its extension
-    filename, file_extension = os.path.splitext(parts[-1])
-    
-    # Extract the first directory directly under the root directory '/' in the hdf5 file
-    subfolder_name = parts[0] if len(parts) > 1 else ""
-    
-    # Remove leading dot from the file extension
-    file_extension = file_extension.lstrip('.')
-    
-    # Construct the resulting string
-    full_string = f"{subfolder_name}_{file_extension}"
-    
-    return full_string, file_extension
-
-def select_file_reader(path):
-    full_string, extension = compute_filereader_key_from_path(path)
-    
-    # First, try to match the full string
-    if full_string in file_readers:
-        return file_readers[full_string]
-    
-    # If no match, try to match the reader using only the extension
-    if extension in file_readers:
-        return file_readers[extension]
-    
-    # Default case if no reader is found
+import os
+import sys
+#root_dir = os.path.abspath(os.curdir)
+#sys.path.append(root_dir)
+
+from instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict
+from instruments.readers.g5505_text_reader import read_txt_files_as_dict
+
+
+file_extensions = ['.ibw','.txt','.dat','.h5','.TXT','.csv','.pkl','.json','.yaml']
+
+# Define the instruments directory (modify this as needed or set to None)
+default_instruments_dir = None  # or provide an absolute path
+
+file_readers = {
+    'ibw': lambda a1: read_xps_ibw_file_as_dict(a1),
+    'txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
+    'TXT': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
+    'dat': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
+    #'ACSM_TOFWARE_txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
+    #'ACSM_TOFWARE_csv': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False)
+}
+
+# Add new "instrument reader (Data flagging app data)"
+
+from instruments.readers.acsm_tofware_reader import read_acsm_files_as_dict
+file_extensions.append('.txt') 
+file_readers.update({'ACSM_TOFWARE_txt' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False)})
+
+file_extensions.append('.csv') 
+file_readers.update({'ACSM_TOFWARE_csv' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False)})
+
+from instruments.readers.flag_reader import read_jsonflag_as_dict
+file_extensions.append('.json') 
+file_readers.update({'ACSM_TOFWARE_flags_json' : lambda x: read_jsonflag_as_dict(x)})
+
+def compute_filereader_key_from_path(hdf5_file_path):
+    """Constructs the key 'instrumentname_ext' based on hdf5_file_path, structured as
+    /instrumentname/to/filename.ext, which access the file reader that should be used to read such a file. 
+
+    Parameters
+    ----------
+    hdf5_file_path : str
+        _description_
+
+    Returns
+    -------
+    _type_
+        _description_
+    """
+
+    parts = hdf5_file_path.strip('/').split('/')
+
+    # Extract the filename and its extension
+    filename, file_extension = os.path.splitext(parts[-1])
+    
+    # Extract the first directory directly under the root directory '/' in the hdf5 file
+    subfolder_name = parts[0] if len(parts) > 1 else ""
+    
+    # Remove leading dot from the file extension
+    file_extension = file_extension.lstrip('.')
+    
+    # Construct the resulting string
+    full_string = f"{subfolder_name}_{file_extension}"
+    
+    return full_string, file_extension
+
+def select_file_reader(path):
+    full_string, extension = compute_filereader_key_from_path(path)
+    
+    # First, try to match the full string
+    if full_string in file_readers:
+        return file_readers[full_string]
+    
+    # If no match, try to match the reader using only the extension
+    if extension in file_readers:
+        return file_readers[extension]
+    
+    # Default case if no reader is found
    return lambda x : None
--- a/instruments/readers/flag_reader.py
+++ b/instruments/readers/flag_reader.py
@ -1,39 +1,39 @@
-import os
-import json
-
-#root_dir = os.path.abspath(os.curdir)
-#sys.path.append(root_dir)
-#print(__file__)
-
-#from instruments.readers import set_dima_path as configpath
-#configpath.set_dima_path()
-
-from utils import g5505_utils
-
-
-def read_jsonflag_as_dict(path_to_file):
-
-
-    file_dict = {}
-    path_tail, path_head = os.path.split(path_to_file)
-
-    file_dict['name'] = path_head
-    # TODO: review this header dictionary, it may not be the best way to represent header data
-    file_dict['attributes_dict'] = {}
-    file_dict['datasets'] = []
-
-    try:
-        with open(path_to_file, 'r') as stream:
-            flag = json.load(stream)#, Loader=json.FullLoader)
-    except (FileNotFoundError, json.JSONDecodeError) as exc:
-        print(exc)
-
-    dataset = {}
-    dataset['name'] = 'data_table'#_numerical_variables'
-    dataset['data'] = g5505_utils.convert_attrdict_to_np_structured_array(flag) #df_numerical_attrs.to_numpy()
-    dataset['shape'] = dataset['data'].shape
-    dataset['dtype'] = type(dataset['data'])  
-
-    file_dict['datasets'].append(dataset)
-
+import os
+import json
+
+#root_dir = os.path.abspath(os.curdir)
+#sys.path.append(root_dir)
+#print(__file__)
+
+#from instruments.readers import set_dima_path as configpath
+#configpath.set_dima_path()
+
+from utils import g5505_utils
+
+
+def read_jsonflag_as_dict(path_to_file):
+
+
+    file_dict = {}
+    path_tail, path_head = os.path.split(path_to_file)
+
+    file_dict['name'] = path_head
+    # TODO: review this header dictionary, it may not be the best way to represent header data
+    file_dict['attributes_dict'] = {}
+    file_dict['datasets'] = []
+
+    try:
+        with open(path_to_file, 'r') as stream:
+            flag = json.load(stream)#, Loader=json.FullLoader)
+    except (FileNotFoundError, json.JSONDecodeError) as exc:
+        print(exc)
+
+    dataset = {}
+    dataset['name'] = 'data_table'#_numerical_variables'
+    dataset['data'] = g5505_utils.convert_attrdict_to_np_structured_array(flag) #df_numerical_attrs.to_numpy()
+    dataset['shape'] = dataset['data'].shape
+    dataset['dtype'] = type(dataset['data'])  
+
+    file_dict['datasets'].append(dataset)
+
    return file_dict
--- a/instruments/readers/g5505_text_reader.py
+++ b/instruments/readers/g5505_text_reader.py
@ -1,239 +1,239 @@
-import sys
-import os
-import pandas as pd
-import collections
-import yaml
-
-# Import project modules
-root_dir = os.path.abspath(os.curdir)
-sys.path.append(root_dir)
-
-import utils.g5505_utils as utils
-
-
-
-
-def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with_copy: bool = True):
-    # If instruments_dir is not provided, use the default path relative to the module directory
-    if not instruments_dir:
-        # Assuming the instruments folder is one level up from the source module directory
-        module_dir = os.path.dirname(__file__)
-        instruments_dir = os.path.join(module_dir, '..')
-
-    # Normalize the path (resolves any '..' in the path)
-    instrument_configs_path = os.path.abspath(os.path.join(instruments_dir,'readers','config_text_reader.yaml'))
-
-    with open(instrument_configs_path,'r') as stream:
-        try:
-            config_dict = yaml.load(stream, Loader=yaml.FullLoader)
-        except yaml.YAMLError as exc:
-            print(exc)
-    # Verify if file can be read by available intrument configurations.
-    #if not any(key in filename.replace(os.sep,'/') for key in config_dict.keys()):
-    #    return {}
-
-
-    #TODO: this may be prone to error if assumed folder structure is non compliant 
-    file_encoding = config_dict['default']['file_encoding'] #'utf-8'
-    separator = config_dict['default']['separator']
-    table_header = config_dict['default']['table_header']
-
-    for key in config_dict.keys():
-        if key.replace('/',os.sep) in filename:
-            file_encoding = config_dict[key].get('file_encoding',file_encoding)
-            separator = config_dict[key].get('separator',separator)
-            table_header = config_dict[key].get('table_header',table_header)
-            timestamp_variables = config_dict[key].get('timestamp',[])
-            datetime_format = config_dict[key].get('datetime_format',[])
-
-            description_dict = {}
-            link_to_description = config_dict[key].get('link_to_description', '').replace('/', os.sep) 
-
-            if link_to_description:
-                path = os.path.join(instruments_dir, link_to_description)                
-                try:
-                    with open(path, 'r') as stream:
-                        description_dict = yaml.load(stream, Loader=yaml.FullLoader)
-                except (FileNotFoundError, yaml.YAMLError) as exc:
-                    print(exc)
-    #if 'None' in table_header:
-    #    return {}
-
-    # Read header as a dictionary and detect where data table starts
-    header_dict = {}
-    data_start = False    
-    # Work with copy of the file for safety
-    if work_with_copy:
-        tmp_filename = utils.make_file_copy(source_file_path=filename)
-    else:
-        tmp_filename = filename
-
-    #with open(tmp_filename,'rb',encoding=file_encoding,errors='ignore') as f:
-
-    if not isinstance(table_header, list):
-        table_header = [table_header]
-        file_encoding = [file_encoding]
-        separator = [separator]
-
-    with open(tmp_filename,'rb') as f:
-        table_preamble = []
-        for line_number, line in enumerate(f):   
-
-            
-            for tb_idx, tb in enumerate(table_header):
-                if tb in line.decode(file_encoding[tb_idx]):
-                    break
-
-            if tb in line.decode(file_encoding[tb_idx]):   
-                list_of_substrings = line.decode(file_encoding[tb_idx]).split(separator[tb_idx].replace('\\t','\t'))  
-
-                # Count occurrences of each substring
-                substring_counts = collections.Counter(list_of_substrings)
-                data_start = True  
-                # Generate column names with appended index only for repeated substrings
-                column_names = [f"{i}_{name.strip()}" if substring_counts[name] > 1 else name.strip() for i, name in enumerate(list_of_substrings)]           
-
-                #column_names = [str(i)+'_'+name.strip() for i, name in enumerate(list_of_substrings)]
-                #column_names = []
-                #for i, name in enumerate(list_of_substrings):
-                #    column_names.append(str(i)+'_'+name) 
-
-                #print(line_number, len(column_names ),'\n')
-                break
-            # Subdivide line into words, and join them by single space. 
-            # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
-            list_of_substrings = line.decode(file_encoding[tb_idx]).split()
-            # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
-            #line = ' '.join(list_of_substrings+['\n'])
-            #line = ' '.join(list_of_substrings)     
-            table_preamble.append(' '.join([item for item in list_of_substrings]))# += new_line  
-
-  
-    # TODO: it does not work with separator as none :(. fix for RGA
-    try:
-        df = pd.read_csv(tmp_filename, 
-                        delimiter = separator[tb_idx].replace('\\t','\t'), 
-                        header=line_number, 
-                        #encoding='latin-1',
-                        encoding = file_encoding[tb_idx],
-                        names=column_names,
-                        skip_blank_lines=True)
-   
-        df_numerical_attrs = df.select_dtypes(include ='number')
-        df_categorical_attrs = df.select_dtypes(exclude='number')
-        numerical_variables = [item for item in df_numerical_attrs.columns]       
-
-        # Consolidate into single timestamp column the separate columns 'date' 'time' specified in text_data_source.yaml
-        if timestamp_variables:
-            #df_categorical_attrs['timestamps'] = [' '.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index]
-            #df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
-            
-
-            #df_categorical_attrs['timestamps'] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
-            timestamps_name = ' '.join(timestamp_variables)
-            df_categorical_attrs[ timestamps_name] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
-
-            valid_indices = []
-            if datetime_format:
-                df_categorical_attrs[ timestamps_name] = pd.to_datetime(df_categorical_attrs[ timestamps_name],format=datetime_format,errors='coerce')
-                valid_indices = df_categorical_attrs.dropna(subset=[timestamps_name]).index 
-                df_categorical_attrs = df_categorical_attrs.loc[valid_indices,:]
-                df_numerical_attrs = df_numerical_attrs.loc[valid_indices,:]
-
-                df_categorical_attrs[timestamps_name] = df_categorical_attrs[timestamps_name].dt.strftime(config_dict['default']['desired_format'])
-                startdate = df_categorical_attrs[timestamps_name].min()
-                enddate = df_categorical_attrs[timestamps_name].max()
-
-                df_categorical_attrs[timestamps_name] = df_categorical_attrs[timestamps_name].astype(str)
-                #header_dict.update({'stastrrtdate':startdate,'enddate':enddate})
-                header_dict['startdate']= str(startdate)
-                header_dict['enddate']=str(enddate)
-
-            if len(timestamp_variables) > 1:
-                df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
-            
-                
-                #df_categorical_attrs.reindex(drop=True)
-                #df_numerical_attrs.reindex(drop=True)
-
-
-
-        categorical_variables = [item for item in df_categorical_attrs.columns]
-        ####
-        #elif 'RGA' in filename:
-        #    df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Time(s)' : 'timestamps'})
-
-        ###
-        file_dict = {}
-        path_tail, path_head = os.path.split(tmp_filename)
-
-        file_dict['name'] = path_head
-        # TODO: review this header dictionary, it may not be the best way to represent header data
-        file_dict['attributes_dict'] = header_dict
-        file_dict['datasets'] = []
-        ####
-
-        df = pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)
-
-        #if numerical_variables:
-        dataset = {}
-        dataset['name'] = 'data_table'#_numerical_variables'
-        dataset['data'] = utils.convert_dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
-        dataset['shape'] = dataset['data'].shape
-        dataset['dtype'] = type(dataset['data'])       
-        #dataset['data_units'] = file_obj['wave']['data_units']   
-        # 
-        # Create attribute descriptions based on description_dict
-        dataset['attributes'] = {}
-
-        # Annotate column headers if description_dict is non empty
-        if description_dict:
-            for column_name in df.columns: 
-                column_attr_dict = description_dict['table_header'].get(column_name,
-                                                                        {'note':'there was no description available. Review instrument files.'})   
-                dataset['attributes'].update({column_name: utils.convert_attrdict_to_np_structured_array(column_attr_dict)})     
-
-        #try:
-        #    dataset['attributes'] = description_dict['table_header'].copy()
-        #    for key in description_dict['table_header'].keys():
-        #        if not key in numerical_variables:
-        #            dataset['attributes'].pop(key) # delete key
-        #        else:
-        #            dataset['attributes'][key] = utils.parse_attribute(dataset['attributes'][key])
-        #    if timestamps_name in categorical_variables:
-        #        dataset['attributes'][timestamps_name] = utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})
-        #except ValueError as err:
-        #    print(err)
-
-    # Represent string values as fixed length strings in the HDF5 file, which need
-    # to be decoded as string when we read them. It provides better control than variable strings,
-    # at the expense of flexibility.
-    # https://docs.h5py.org/en/stable/strings.html
-
-
-        if table_preamble:        
-            #header_dict["table_preamble"] = utils.convert_string_to_bytes(table_preamble)  
-            tp_dataset = {}
-            tp_dataset['name'] = "table_preamble"
-            tp_dataset['data'] = utils.convert_string_to_bytes(table_preamble) 
-            tp_dataset['shape'] = tp_dataset['data'].shape
-            tp_dataset['dtype'] = type(tp_dataset['data'])
-            tp_dataset['attributes'] = {}
-            file_dict['datasets'].append(tp_dataset)
-
-        file_dict['datasets'].append(dataset)
-
-       
-        #if categorical_variables:
-        #    dataset = {}
-        #    dataset['name'] = 'table_categorical_variables'
-        #    dataset['data'] = dataframe_to_np_structured_array(df_categorical_attrs) #df_categorical_attrs.loc[:,categorical_variables].to_numpy()
-        #    dataset['shape'] = dataset['data'].shape
-        #    dataset['dtype'] = type(dataset['data'])
-        #    if timestamps_name in categorical_variables:
-        #        dataset['attributes'] = {timestamps_name: utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})}
-        #    file_dict['datasets'].append(dataset) 
-    except:
-        return {}
-
+import sys
+import os
+import pandas as pd
+import collections
+import yaml
+
+# Import project modules
+root_dir = os.path.abspath(os.curdir)
+sys.path.append(root_dir)
+
+import utils.g5505_utils as utils
+
+
+
+
+def read_txt_files_as_dict(filename: str, instruments_dir: str = None, work_with_copy: bool = True):
+    # If instruments_dir is not provided, use the default path relative to the module directory
+    if not instruments_dir:
+        # Assuming the instruments folder is one level up from the source module directory
+        module_dir = os.path.dirname(__file__)
+        instruments_dir = os.path.join(module_dir, '..')
+
+    # Normalize the path (resolves any '..' in the path)
+    instrument_configs_path = os.path.abspath(os.path.join(instruments_dir,'readers','config_text_reader.yaml'))
+
+    with open(instrument_configs_path,'r') as stream:
+        try:
+            config_dict = yaml.load(stream, Loader=yaml.FullLoader)
+        except yaml.YAMLError as exc:
+            print(exc)
+    # Verify if file can be read by available intrument configurations.
+    #if not any(key in filename.replace(os.sep,'/') for key in config_dict.keys()):
+    #    return {}
+
+
+    #TODO: this may be prone to error if assumed folder structure is non compliant 
+    file_encoding = config_dict['default']['file_encoding'] #'utf-8'
+    separator = config_dict['default']['separator']
+    table_header = config_dict['default']['table_header']
+
+    for key in config_dict.keys():
+        if key.replace('/',os.sep) in filename:
+            file_encoding = config_dict[key].get('file_encoding',file_encoding)
+            separator = config_dict[key].get('separator',separator)
+            table_header = config_dict[key].get('table_header',table_header)
+            timestamp_variables = config_dict[key].get('timestamp',[])
+            datetime_format = config_dict[key].get('datetime_format',[])
+
+            description_dict = {}
+            link_to_description = config_dict[key].get('link_to_description', '').replace('/', os.sep) 
+
+            if link_to_description:
+                path = os.path.join(instruments_dir, link_to_description)                
+                try:
+                    with open(path, 'r') as stream:
+                        description_dict = yaml.load(stream, Loader=yaml.FullLoader)
+                except (FileNotFoundError, yaml.YAMLError) as exc:
+                    print(exc)
+    #if 'None' in table_header:
+    #    return {}
+
+    # Read header as a dictionary and detect where data table starts
+    header_dict = {}
+    data_start = False    
+    # Work with copy of the file for safety
+    if work_with_copy:
+        tmp_filename = utils.make_file_copy(source_file_path=filename)
+    else:
+        tmp_filename = filename
+
+    #with open(tmp_filename,'rb',encoding=file_encoding,errors='ignore') as f:
+
+    if not isinstance(table_header, list):
+        table_header = [table_header]
+        file_encoding = [file_encoding]
+        separator = [separator]
+
+    with open(tmp_filename,'rb') as f:
+        table_preamble = []
+        for line_number, line in enumerate(f):   
+
+            
+            for tb_idx, tb in enumerate(table_header):
+                if tb in line.decode(file_encoding[tb_idx]):
+                    break
+
+            if tb in line.decode(file_encoding[tb_idx]):   
+                list_of_substrings = line.decode(file_encoding[tb_idx]).split(separator[tb_idx].replace('\\t','\t'))  
+
+                # Count occurrences of each substring
+                substring_counts = collections.Counter(list_of_substrings)
+                data_start = True  
+                # Generate column names with appended index only for repeated substrings
+                column_names = [f"{i}_{name.strip()}" if substring_counts[name] > 1 else name.strip() for i, name in enumerate(list_of_substrings)]           
+
+                #column_names = [str(i)+'_'+name.strip() for i, name in enumerate(list_of_substrings)]
+                #column_names = []
+                #for i, name in enumerate(list_of_substrings):
+                #    column_names.append(str(i)+'_'+name) 
+
+                #print(line_number, len(column_names ),'\n')
+                break
+            # Subdivide line into words, and join them by single space. 
+            # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
+            list_of_substrings = line.decode(file_encoding[tb_idx]).split()
+            # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
+            #line = ' '.join(list_of_substrings+['\n'])
+            #line = ' '.join(list_of_substrings)     
+            table_preamble.append(' '.join([item for item in list_of_substrings]))# += new_line  
+
+  
+    # TODO: it does not work with separator as none :(. fix for RGA
+    try:
+        df = pd.read_csv(tmp_filename, 
+                        delimiter = separator[tb_idx].replace('\\t','\t'), 
+                        header=line_number, 
+                        #encoding='latin-1',
+                        encoding = file_encoding[tb_idx],
+                        names=column_names,
+                        skip_blank_lines=True)
+   
+        df_numerical_attrs = df.select_dtypes(include ='number')
+        df_categorical_attrs = df.select_dtypes(exclude='number')
+        numerical_variables = [item for item in df_numerical_attrs.columns]       
+
+        # Consolidate into single timestamp column the separate columns 'date' 'time' specified in text_data_source.yaml
+        if timestamp_variables:
+            #df_categorical_attrs['timestamps'] = [' '.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index]
+            #df_categorical_attrs['timestamps'] = [ df_categorical_attrs.loc[i,'0_Date']+' '+df_categorical_attrs.loc[i,'1_Time'] for i in df.index]
+            
+
+            #df_categorical_attrs['timestamps'] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
+            timestamps_name = ' '.join(timestamp_variables)
+            df_categorical_attrs[ timestamps_name] = df_categorical_attrs[timestamp_variables].astype(str).agg(' '.join, axis=1)
+
+            valid_indices = []
+            if datetime_format:
+                df_categorical_attrs[ timestamps_name] = pd.to_datetime(df_categorical_attrs[ timestamps_name],format=datetime_format,errors='coerce')
+                valid_indices = df_categorical_attrs.dropna(subset=[timestamps_name]).index 
+                df_categorical_attrs = df_categorical_attrs.loc[valid_indices,:]
+                df_numerical_attrs = df_numerical_attrs.loc[valid_indices,:]
+
+                df_categorical_attrs[timestamps_name] = df_categorical_attrs[timestamps_name].dt.strftime(config_dict['default']['desired_format'])
+                startdate = df_categorical_attrs[timestamps_name].min()
+                enddate = df_categorical_attrs[timestamps_name].max()
+
+                df_categorical_attrs[timestamps_name] = df_categorical_attrs[timestamps_name].astype(str)
+                #header_dict.update({'stastrrtdate':startdate,'enddate':enddate})
+                header_dict['startdate']= str(startdate)
+                header_dict['enddate']=str(enddate)
+
+            if len(timestamp_variables) > 1:
+                df_categorical_attrs = df_categorical_attrs.drop(columns = timestamp_variables)
+            
+                
+                #df_categorical_attrs.reindex(drop=True)
+                #df_numerical_attrs.reindex(drop=True)
+
+
+
+        categorical_variables = [item for item in df_categorical_attrs.columns]
+        ####
+        #elif 'RGA' in filename:
+        #    df_categorical_attrs = df_categorical_attrs.rename(columns={'0_Time(s)' : 'timestamps'})
+
+        ###
+        file_dict = {}
+        path_tail, path_head = os.path.split(tmp_filename)
+
+        file_dict['name'] = path_head
+        # TODO: review this header dictionary, it may not be the best way to represent header data
+        file_dict['attributes_dict'] = header_dict
+        file_dict['datasets'] = []
+        ####
+
+        df = pd.concat((df_categorical_attrs,df_numerical_attrs),axis=1)
+
+        #if numerical_variables:
+        dataset = {}
+        dataset['name'] = 'data_table'#_numerical_variables'
+        dataset['data'] = utils.convert_dataframe_to_np_structured_array(df) #df_numerical_attrs.to_numpy()
+        dataset['shape'] = dataset['data'].shape
+        dataset['dtype'] = type(dataset['data'])       
+        #dataset['data_units'] = file_obj['wave']['data_units']   
+        # 
+        # Create attribute descriptions based on description_dict
+        dataset['attributes'] = {}
+
+        # Annotate column headers if description_dict is non empty
+        if description_dict:
+            for column_name in df.columns: 
+                column_attr_dict = description_dict['table_header'].get(column_name,
+                                                                        {'note':'there was no description available. Review instrument files.'})   
+                dataset['attributes'].update({column_name: utils.convert_attrdict_to_np_structured_array(column_attr_dict)})     
+
+        #try:
+        #    dataset['attributes'] = description_dict['table_header'].copy()
+        #    for key in description_dict['table_header'].keys():
+        #        if not key in numerical_variables:
+        #            dataset['attributes'].pop(key) # delete key
+        #        else:
+        #            dataset['attributes'][key] = utils.parse_attribute(dataset['attributes'][key])
+        #    if timestamps_name in categorical_variables:
+        #        dataset['attributes'][timestamps_name] = utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})
+        #except ValueError as err:
+        #    print(err)
+
+    # Represent string values as fixed length strings in the HDF5 file, which need
+    # to be decoded as string when we read them. It provides better control than variable strings,
+    # at the expense of flexibility.
+    # https://docs.h5py.org/en/stable/strings.html
+
+
+        if table_preamble:        
+            #header_dict["table_preamble"] = utils.convert_string_to_bytes(table_preamble)  
+            tp_dataset = {}
+            tp_dataset['name'] = "table_preamble"
+            tp_dataset['data'] = utils.convert_string_to_bytes(table_preamble) 
+            tp_dataset['shape'] = tp_dataset['data'].shape
+            tp_dataset['dtype'] = type(tp_dataset['data'])
+            tp_dataset['attributes'] = {}
+            file_dict['datasets'].append(tp_dataset)
+
+        file_dict['datasets'].append(dataset)
+
+       
+        #if categorical_variables:
+        #    dataset = {}
+        #    dataset['name'] = 'table_categorical_variables'
+        #    dataset['data'] = dataframe_to_np_structured_array(df_categorical_attrs) #df_categorical_attrs.loc[:,categorical_variables].to_numpy()
+        #    dataset['shape'] = dataset['data'].shape
+        #    dataset['dtype'] = type(dataset['data'])
+        #    if timestamps_name in categorical_variables:
+        #        dataset['attributes'] = {timestamps_name: utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})}
+        #    file_dict['datasets'].append(dataset) 
+    except:
+        return {}
+
    return file_dict
--- a/instruments/readers/xps_ibw_reader.py
+++ b/instruments/readers/xps_ibw_reader.py
@ -1,79 +1,79 @@
-import os
-from igor2.binarywave import load as loadibw
-
-def read_xps_ibw_file_as_dict(filename):
-    """
-    Reads IBW files from the Multiphase Chemistry Group, which contain XPS spectra and acquisition settings,
-    and formats the data into a dictionary with the structure {datasets: list of datasets}. Each dataset in the
-    list has the following structure:
-
-    {
-        'name': 'name',
-        'data': data_array,
-        'data_units': 'units',
-        'shape': data_shape,
-        'dtype': data_type
-    }
-
-    Parameters
-    ----------
-    filename : str
-        The IBW filename from the Multiphase Chemistry Group beamline.
-
-    Returns
-    -------
-    file_dict : dict
-        A dictionary containing the datasets from the IBW file. 
-
-    Raises
-    ------
-    ValueError
-        If the input IBW file is not a valid IBW file.
-        
-    """
-
-
-    file_obj = loadibw(filename)
-
-    required_keys = ['wData','data_units','dimension_units','note'] 
-    if sum([item in required_keys for item in file_obj['wave'].keys()]) < len(required_keys):
-        raise ValueError('This is not a valid xps ibw file. It does not satisfy minimum adimissibility criteria.')
-    
-    file_dict = {}
-    path_tail, path_head = os.path.split(filename)
-
-    # Group name and attributes
-    file_dict['name'] = path_head
-    file_dict['attributes_dict'] = {}
- 
-    # Convert notes of bytes class to string class and split string into a list of elements separated by '\r'. 
-    notes_list = file_obj['wave']['note'].decode("utf-8").split('\r')
-    exclude_list = ['Excitation Energy']
-    for item in notes_list:
-        if '=' in item:
-            key, value = tuple(item.split('='))
-            # TODO: check if value can be converted into a numeric type. Now all values are string type
-            if not key in exclude_list:
-                file_dict['attributes_dict'][key] = value
-
-    # TODO: talk to Thorsten to see if there is an easier way to access the below attributes
-    dimension_labels = file_obj['wave']['dimension_units'].decode("utf-8").split(']')
-    file_dict['attributes_dict']['dimension_units'] = [item+']' for item in dimension_labels[0:len(dimension_labels)-1]]
-
-    # Datasets and their attributes
-
-    file_dict['datasets'] = []
-
-    dataset = {}
-    dataset['name'] = 'spectrum'
-    dataset['data'] = file_obj['wave']['wData']
-    dataset['data_units'] = file_obj['wave']['data_units']
-    dataset['shape'] = dataset['data'].shape
-    dataset['dtype'] = type(dataset['data'])   
-
-    # TODO: include energy axis dataset
-
-    file_dict['datasets'].append(dataset)
-    
-
+import os
+from igor2.binarywave import load as loadibw
+
+def read_xps_ibw_file_as_dict(filename):
+    """
+    Reads IBW files from the Multiphase Chemistry Group, which contain XPS spectra and acquisition settings,
+    and formats the data into a dictionary with the structure {datasets: list of datasets}. Each dataset in the
+    list has the following structure:
+
+    {
+        'name': 'name',
+        'data': data_array,
+        'data_units': 'units',
+        'shape': data_shape,
+        'dtype': data_type
+    }
+
+    Parameters
+    ----------
+    filename : str
+        The IBW filename from the Multiphase Chemistry Group beamline.
+
+    Returns
+    -------
+    file_dict : dict
+        A dictionary containing the datasets from the IBW file. 
+
+    Raises
+    ------
+    ValueError
+        If the input IBW file is not a valid IBW file.
+        
+    """
+
+
+    file_obj = loadibw(filename)
+
+    required_keys = ['wData','data_units','dimension_units','note'] 
+    if sum([item in required_keys for item in file_obj['wave'].keys()]) < len(required_keys):
+        raise ValueError('This is not a valid xps ibw file. It does not satisfy minimum adimissibility criteria.')
+    
+    file_dict = {}
+    path_tail, path_head = os.path.split(filename)
+
+    # Group name and attributes
+    file_dict['name'] = path_head
+    file_dict['attributes_dict'] = {}
+ 
+    # Convert notes of bytes class to string class and split string into a list of elements separated by '\r'. 
+    notes_list = file_obj['wave']['note'].decode("utf-8").split('\r')
+    exclude_list = ['Excitation Energy']
+    for item in notes_list:
+        if '=' in item:
+            key, value = tuple(item.split('='))
+            # TODO: check if value can be converted into a numeric type. Now all values are string type
+            if not key in exclude_list:
+                file_dict['attributes_dict'][key] = value
+
+    # TODO: talk to Thorsten to see if there is an easier way to access the below attributes
+    dimension_labels = file_obj['wave']['dimension_units'].decode("utf-8").split(']')
+    file_dict['attributes_dict']['dimension_units'] = [item+']' for item in dimension_labels[0:len(dimension_labels)-1]]
+
+    # Datasets and their attributes
+
+    file_dict['datasets'] = []
+
+    dataset = {}
+    dataset['name'] = 'spectrum'
+    dataset['data'] = file_obj['wave']['wData']
+    dataset['data_units'] = file_obj['wave']['data_units']
+    dataset['shape'] = dataset['data'].shape
+    dataset['dtype'] = type(dataset['data'])   
+
+    # TODO: include energy axis dataset
+
+    file_dict['datasets'].append(dataset)
+    
+
    return file_dict