Files
dima/instruments/filereader_registry.py

150 lines
5.9 KiB
Python

import os
import sys
import subprocess
import yaml
#root_dir = os.path.abspath(os.curdir)
#sys.path.append(root_dir)
#try:
# from dima.instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict
# from dima.instruments.readers.g5505_text_reader import read_txt_files_as_dict
#except ModuleNotFoundError as e:
# print(e)
from instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict
from instruments.readers.g5505_text_reader import read_txt_files_as_dict
from instruments.readers.acsm_tofware_reader import read_acsm_files_as_dict
from instruments.readers.acsm_flag_reader import read_jsonflag_as_dict
from instruments.readers.nasa_ames_reader import read_nasa_ames_as_dict
from instruments.readers.structured_file_reader import read_structured_file_as_dict
file_extensions = ['.ibw','.txt','.dat','.h5','.TXT','.csv','.pkl','.json','.yaml','yml','.nas']
# Define the instruments directory (modify this as needed or set to None)
default_instruments_dir = None # or provide an absolute path
file_readers = {
'ibw': lambda a1: read_xps_ibw_file_as_dict(a1),
'txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
'dat': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
'csv': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
'yaml': lambda a1: read_structured_file_as_dict(a1),
'yml': lambda a1: read_structured_file_as_dict(a1),
'json': lambda a1: read_structured_file_as_dict(a1),
'ACSM_TOFWARE_txt' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False),
'ACSM_TOFWARE_csv' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False),
'ACSM_TOFWARE_flags_json' : lambda x: read_jsonflag_as_dict(x),
'ACSM_TOFWARE_nas' : lambda x: read_nasa_ames_as_dict(x)}
REGISTRY_FILE = "registry.yaml" #os.path.join(os.path.dirname(__file__), "registry.yaml")
def load_registry():
module_dir = os.path.dirname(__file__)
instruments_dir = os.path.join(module_dir, '..')
# Normalize the path (resolves any '..' in the path)
registry_path = os.path.abspath(os.path.join(module_dir,REGISTRY_FILE))
with open(registry_path, "r") as file:
return yaml.safe_load(file)["instruments"]
def find_reader(instrument_folder, file_extension):
registry = load_registry()
for entry in registry:
if entry["instrumentFolderName"] == instrument_folder and (file_extension in entry["fileExtension"].split(sep=',')):
return entry["fileReaderPath"], entry["InstrumentDictionaryPath"]
return None, None # Not found
def compute_filereader_key_from_path(hdf5_file_path):
"""Constructs the key 'instrumentname_ext' based on hdf5_file_path, structured as
/instrumentname/to/filename.ext, which access the file reader that should be used to read such a file.
Parameters
----------
hdf5_file_path : str
_description_
Returns
-------
_type_
_description_
"""
parts = hdf5_file_path.strip('/').split('/')
# Extract the filename and its extension
filename, file_extension = os.path.splitext(parts[-1])
file_extension = file_extension.lower()
# Extract the first directory directly under the root directory '/' in the hdf5 file
subfolder_name = parts[0] if len(parts) > 1 else ""
# Remove leading dot from the file extension
file_extension = file_extension.lstrip('.')
# Construct the resulting string
full_string = f"{subfolder_name}_{file_extension}"
return full_string, file_extension
def select_file_reader(path):
full_string, extension = compute_filereader_key_from_path(path)
# First, try to match the full string
if full_string in file_readers:
return file_readers[full_string]
# If no match, try to match the reader using only the extension
if extension in file_readers:
return file_readers[extension]
# Default case if no reader is found
return lambda x : None
def run_reader(hdf5_file_path, src_file_path, dst_group_name):
try:
thisFilePath = os.path.abspath(__file__)
except NameError:
print("Error: __file__ is not available. Ensure the script is being run from a file.")
print("[Notice] Path to DIMA package may not be resolved properly.")
thisFilePath = os.getcwd() # Use current directory or specify a default
projectPath = os.path.normpath(os.path.join(thisFilePath, "..",'..')) # Move up to project root
#
full_string, file_extension = compute_filereader_key_from_path(dst_group_name)
full_string_parts = full_string.split("_")
full_string_parts.remove(file_extension)
instrument_folder = '_'.join(full_string_parts)
reader_path, dict_path = find_reader(instrument_folder, file_extension)
if reader_path:
reader_path = os.path.normpath(os.path.join(projectPath, reader_path))
if not os.path.exists(reader_path):
raise FileNotFoundError(f"File reader {reader_path} not found for key {full_string}. Verify the reader is properly referenced in registry.yaml.")
else:
print(f'Attempting to run {reader_path}')
command = ["python", reader_path, hdf5_file_path, src_file_path, instrument_folder]
#if dict_path:
# args.append(dict_path)
print(f"Running: {command}")
output = subprocess.run(command, capture_output=True)#, check=True)
print('Subprocess output',output.stdout)
else:
print(f'There is no file reader available to process files in {instrument_folder}.')
#logging.info(instFoldermsdEnd )