diff --git a/instruments/readers/nasa_ames_reader.py b/instruments/readers/nasa_ames_reader.py index 2850b83..1601542 100644 --- a/instruments/readers/nasa_ames_reader.py +++ b/instruments/readers/nasa_ames_reader.py @@ -23,6 +23,62 @@ import argparse import utils.g5505_utils as utils + +def split_header(header_lines): + header_lines_copy = [] + + for line in header_lines: + if isinstance(line, bytes): + decoded_line = line.decode('utf-8', errors='ignore').strip() + header_lines_copy.append(decoded_line) + else: + header_lines_copy.append(line.strip()) + + # Find the index where the variable descriptions start + var_desc_marker = "Days from the file reference point (start_time)" + + try: + var_start_idx = header_lines_copy.index(var_desc_marker) + except ValueError: + raise Exception("Expected variable description marker not found.") + + # Part 1: Everything before variable description + part1 = header_lines[:var_start_idx] + + # Part 2: Variable descriptions — until the first key-value line (contains ':') + part2 = [] + part3 = [] + in_part3 = False + + for line in header_lines[var_start_idx:]: + if not in_part3 and ':' in line.decode(encoding = "utf-8"): + in_part3 = True # We assume this is where key-value pairs begin + + if in_part3: + part3.append(line) + else: + part2.append(line) + + return part1, part2, part3 + +def extract_var_descriptions(part2): + + nvars = int(part2[1].decode(encoding='utf-8').strip()) + if not sum(float(i) for i in part2[2].decode(encoding='utf-8').strip().split()) == nvars: + line1 = part2[1].decode(encoding='utf-8') + line2 = part2[2].decode(encoding='utf-8') + raise RuntimeError(f'Inconsistent lines. Check lines {line1} and {line2}') + + descriptions = [] + for line_idx in range(4,4+nvars): + descriptions.append(part2[line_idx]) + + return descriptions + + + + + def read_nasa_ames_as_dict(filename, instruments_dir: str = None, work_with_copy: bool = True): # If instruments_dir is not provided, use the default path relative to the module directory @@ -43,23 +99,48 @@ def read_nasa_ames_as_dict(filename, instruments_dir: str = None, work_with_copy description_dict = config_dict.get('table_header',{}) # Read all lines once - with open(filename, 'r') as file: + with open(filename, 'rb') as file: lines = file.readlines() # Extract header length from the first line header_length = int(lines[0].split()[0]) file_header = lines[:header_length] - # Extract start date from line 7 - date_header = lines[6].split() - start_date_str = f"{date_header[0]}-{date_header[1]}-{date_header[2]}" - start_date = datetime.strptime(start_date_str, "%Y-%m-%d") + # Split header in three parts, header preamble, var descriptions, and metadata pairs + part1, part2, part3 = split_header(file_header) + + var_descriptions = extract_var_descriptions(part2) + + table_header = part3[len(part3)-1] + part3.remove(table_header) + + + for line in part3: + if 'Startdate:' in line.decode(encoding = "utf-8"): + line_parts = line.decode(encoding = "utf-8").split(':',1) + attribute_name = line_parts[0] + attribute_value = line_parts[1] + print(attribute_name,attribute_value) + #date_header = lines[6].split() + # Split the string by '.' + #filename_parts = attribute_value.split('.') + + # Extract the datetime strings + start_str = attribute_value.strip() + #end_str = filename_parts[2] + + # Parse into datetime objects + start_date = datetime.strptime(start_str, "%Y%m%d%H%M%S") + #end_date = datetime.strptime(end_str, "%Y%m%d%H%M%S") + + #start_date_str = f"{date_header[0]}-{date_header[1]}-{date_header[2]}" + #start_date = datetime.strptime(start_date_str, "%Y-%m-%d") # Extract number of dependent variables from line 10 - num_dep_vars = int(lines[9].split()[0]) + #num_dep_vars = int(lines[9].split()[0]) # Get variable names: start_time + vars from lines 13 to 13+num_dep_vars-1 (zero-indexed: 12 to 12+num_dep_vars) - vars_list = ["start_time"] + [lines[i].strip() for i in range(12, 12 + num_dep_vars)] + vars_list = table_header.decode(encoding="utf-8").strip().split() #["start_time"] + [lines[i].strip() for i in range(12, 12 + num_dep_vars)] # Get the last line of the header (data column names) dat_head_line = lines[header_length - 1] @@ -79,10 +160,13 @@ def read_nasa_ames_as_dict(filename, instruments_dir: str = None, work_with_copy # Create header metadata dictionary header_metadata_dict = { 'header_length': header_length, - 'start_date': start_date_str, - 'num_dep_vars': num_dep_vars, + 'start_date': start_str, + #'num_dep_vars': num_dep_vars, 'variable_names': vars_list, - 'raw_header': file_header + 'variable_descriptions' : var_descriptions, + 'raw_header_part1': part1, + 'raw_header_part2': part2, + 'raw_header_part3': part3 }