diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index f069e63..c4769b5 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -6,7 +6,7 @@ from igor2.binarywave import load as loadibw import src.g5505_utils as utils - +import yaml import h5py ROOT_DIR = os.path.abspath(os.curdir) @@ -75,33 +75,25 @@ def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_n def read_txt_files_as_dict(filename : str ): + with open('src/text_data_sources.yaml','r') as stream: + try: + config_dict = yaml.load(stream, Loader=yaml.FullLoader) + except yaml.YAMLError as exc: + print(exc) #TODO: this may be prone to error if assumed folder structure is non compliant - file_encoding = 'utf-8' - if 'RGA' in filename: #TODO: it does not work with separator as none :(. fix for RGA - #end_of_header = 'Channel, Mass(amu), Name, Cal Factor, Noise Floor, CEM Status', - table_header = 'Time(s) Channel#1 Channel#2 Channel#3 Channel#4 Channel#5 Channel#6 Channel#7 Channel#8' - separator = None - elif 'Pressure' in filename: - table_header = 'Date Time Vapore-Pressure 1 in Vapore-Pressure 2 in Baratron 1 in Baratron 2 in Baratron 3 in Baratron 4 in Temp. Ice-Sample in Temp. Heated-Sample in Temp. Cooler 1 in Temp. Cooler 2 in Flow Gas 1 in Pressure Chamber in X in Y in Z in None in Temp. Sealing in Flow Ice-Sample in' - separator = '\t' - #elif 'gas' in filename: - # end_of_header = 'Date_Time HoribaNO HoribaNOy Thermo42C_NO Thermo42C_NOx APHA370 CH4' - elif 'Humidity_Sensors' in filename: - table_header = 'Date Time RH1[%] RH2[%] RH3[%] RH4[%] RH5[%] RH6[%] RH7[%] RH8[%] T1[°C] T2[°C] T3[°C] T4[°C] T5[°C] T6[°C] T7[°C] T8[°C] DP1[°C] DP2[°C] DP3[°C] DP4[°C] DP5[°C] DP6[°C] DP7[°C] DP8[°C]' - separator = '\t' - file_encoding = 'latin-1' - elif 'ICAD' in filename and 'HONO' in filename: - table_header = 'Start Date/Time (UTC) Duration (s) NO2 (ppb) NO2 Uncertainty (ppb) H2O (ppb) H2O Uncertainty (ppb) CHOCHO (ppb) CHOCHO Uncertainty (ppb) File Number Light Intensity #ICEDOAS iter. Cell Pressure Ambient Pressure Cell Temp Spec Temp Lat Lon Height Speed GPSQuality 0-Air Ref. Time 0-Air Ref. Duration 0-Air Ref. File Number 0-Air Ref. Intensity 0-Air Ref. Rel Intensity 0-Air Ref. Intensity valid MeasMode SampleSource' - separator = '\t' - file_encoding = 'latin-1' - elif 'ICAD' in filename and 'NO2' in filename: - table_header = 'Start Date/Time (UTC) Duration (s) NO2 (ppb) NO2 Uncertainty (ppb) H2O (ppb) H2O Uncertainty (ppb) CHOCHO (ppb) CHOCHO Uncertainty (ppb) File Number Light Intensity #ICEDOAS iter. Cell Pressure Ambient Pressure Cell Temp Spec Temp Lat Lon Height Speed GPSQuality 0-Air Ref. Time 0-Air Ref. Duration 0-Air Ref. File Number 0-Air Ref. Intensity 0-Air Ref. Rel Intensity 0-Air Ref. Intensity valid MeasMode SampleSource' - separator = '\t' - file_encoding = 'latin-1' - else: - return {} - #raise ValueError('intrument_folder must be set as a either "RGA" or "Pressure"') + file_encoding = config_dict['default']['file_encoding'] #'utf-8' + separator = config_dict['default']['separator'] + table_header = config_dict['default']['table_header'] + + for key in config_dict.keys(): + if key.replace('/',os.sep) in filename: + file_encoding = config_dict[key].get('file_encoding',file_encoding) + separator = config_dict[key].get('separator',separator).replace('\\t','\t') + table_header = config_dict[key].get('table_header',table_header) + break + #if 'None' in table_header: + # return {} # Read header as a dictionary and detect where data table starts header_dict = {}