import os import sys import subprocess import yaml #root_dir = os.path.abspath(os.curdir) #sys.path.append(root_dir) #try: # from dima.instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict # from dima.instruments.readers.g5505_text_reader import read_txt_files_as_dict #except ModuleNotFoundError as e: # print(e) from instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict from instruments.readers.g5505_text_reader import read_txt_files_as_dict from instruments.readers.acsm_tofware_reader import read_acsm_files_as_dict from instruments.readers.acsm_flag_reader import read_jsonflag_as_dict from instruments.readers.nasa_ames_reader import read_nasa_ames_as_dict from instruments.readers.structured_file_reader import read_structured_file_as_dict file_extensions = ['.ibw','.txt','.dat','.h5','.TXT','.csv','.pkl','.json','.yaml','yml','.nas'] # Define the instruments directory (modify this as needed or set to None) default_instruments_dir = None # or provide an absolute path file_readers = { 'ibw': lambda a1: read_xps_ibw_file_as_dict(a1), 'txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False), 'dat': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False), 'csv': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False), 'yaml': lambda a1: read_structured_file_as_dict(a1), 'yml': lambda a1: read_structured_file_as_dict(a1), 'json': lambda a1: read_structured_file_as_dict(a1), 'ACSM_TOFWARE_txt' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False), 'ACSM_TOFWARE_csv' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False), 'ACSM_TOFWARE_flags_json' : lambda x: read_jsonflag_as_dict(x), 'ACSM_TOFWARE_nas' : lambda x: read_nasa_ames_as_dict(x)} file_readers.update({'CEDOAS_txt' : lambda x: read_txt_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False)}) REGISTRY_FILE = "registry.yaml" #os.path.join(os.path.dirname(__file__), "registry.yaml") def load_registry(): module_dir = os.path.dirname(__file__) instruments_dir = os.path.join(module_dir, '..') # Normalize the path (resolves any '..' in the path) registry_path = os.path.abspath(os.path.join(module_dir,REGISTRY_FILE)) with open(registry_path, "r") as file: return yaml.safe_load(file)["instruments"] def find_reader(instrument_folder, file_extension): registry = load_registry() for entry in registry: if entry["instrumentFolderName"] == instrument_folder and (file_extension in entry["fileExtension"].split(sep=',')): return entry["fileReaderPath"], entry["InstrumentDictionaryPath"] return None, None # Not found def compute_filereader_key_from_path(hdf5_file_path): """Constructs the key 'instrumentname_ext' based on hdf5_file_path, structured as /instrumentname/to/filename.ext, which access the file reader that should be used to read such a file. Parameters ---------- hdf5_file_path : str _description_ Returns ------- _type_ _description_ """ parts = hdf5_file_path.strip('/').split('/') # Extract the filename and its extension filename, file_extension = os.path.splitext(parts[-1]) file_extension = file_extension.lower() # Extract the first directory directly under the root directory '/' in the hdf5 file subfolder_name = parts[0] if len(parts) > 1 else "" # Remove leading dot from the file extension file_extension = file_extension.lstrip('.') # Construct the resulting string full_string = f"{subfolder_name}_{file_extension}" return full_string, file_extension def select_file_reader(path): full_string, extension = compute_filereader_key_from_path(path) # First, try to match the full string if full_string in file_readers: return file_readers[full_string] # If no match, try to match the reader using only the extension if extension in file_readers: return file_readers[extension] # Default case if no reader is found return lambda x : None def run_reader(hdf5_file_path, src_file_path, dst_group_name): try: thisFilePath = os.path.abspath(__file__) except NameError: print("Error: __file__ is not available. Ensure the script is being run from a file.") print("[Notice] Path to DIMA package may not be resolved properly.") thisFilePath = os.getcwd() # Use current directory or specify a default projectPath = os.path.normpath(os.path.join(thisFilePath, "..",'..')) # Move up to project root # full_string, file_extension = compute_filereader_key_from_path(dst_group_name) full_string_parts = full_string.split("_") full_string_parts.remove(file_extension) instrument_folder = '_'.join(full_string_parts) reader_path, dict_path = find_reader(instrument_folder, file_extension) if reader_path: reader_path = os.path.normpath(os.path.join(projectPath, reader_path)) if not os.path.exists(reader_path): raise FileNotFoundError(f"File reader {reader_path} not found for key {full_string}. Verify the reader is properly referenced in registry.yaml.") else: print(f'Attempting to run {reader_path}') command = ["python", reader_path, hdf5_file_path, src_file_path, instrument_folder] #if dict_path: # args.append(dict_path) print(f"Running: {command}") output = subprocess.run(command, capture_output=True)#, check=True) print('Subprocess output',output.stdout) else: print(f'There is no file reader available to process files in {instrument_folder}.') #logging.info(instFoldermsdEnd )