Update all file readers with command line interface so we can run them as a subprocess. Added also registry.yaml to decouple code from user-based instrument adaptations or extensions.

This commit is contained in:
2025-02-24 17:27:12 +01:00
parent bb48cfa0cd
commit 92a2560ed7
7 changed files with 495 additions and 85 deletions

View File

@ -1,15 +1,20 @@
import os
import sys
import subprocess
import yaml
#root_dir = os.path.abspath(os.curdir)
#sys.path.append(root_dir)
try:
from dima.instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict
from dima.instruments.readers.g5505_text_reader import read_txt_files_as_dict
#try:
# from dima.instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict
# from dima.instruments.readers.g5505_text_reader import read_txt_files_as_dict
except ModuleNotFoundError:
from instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict
from instruments.readers.g5505_text_reader import read_txt_files_as_dict
#except ModuleNotFoundError as e:
# print(e)
from instruments.readers.xps_ibw_reader import read_xps_ibw_file_as_dict
from instruments.readers.g5505_text_reader import read_txt_files_as_dict
from instruments.readers.acsm_tofware_reader import read_acsm_files_as_dict
from instruments.readers.acsm_flag_reader import read_jsonflag_as_dict
file_extensions = ['.ibw','.txt','.dat','.h5','.TXT','.csv','.pkl','.json','.yaml']
@ -19,24 +24,37 @@ default_instruments_dir = None # or provide an absolute path
file_readers = {
'ibw': lambda a1: read_xps_ibw_file_as_dict(a1),
'txt': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
# 'TXT': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
'dat': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
'csv': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False)
}
'csv': lambda a1: read_txt_files_as_dict(a1, instruments_dir=default_instruments_dir, work_with_copy=False),
'ACSM_TOFWARE_txt' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False),
'ACSM_TOFWARE_csv' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False),
'ACSM_TOFWARE_flags_json' : lambda x: read_jsonflag_as_dict(x)}
# Add new "instrument reader (Data flagging app data)"
REGISTRY_FILE = "registry.yaml" #os.path.join(os.path.dirname(__file__), "registry.yaml")
from instruments.readers.acsm_tofware_reader import read_acsm_files_as_dict
file_extensions.append('.txt')
file_readers.update({'ACSM_TOFWARE_txt' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False)})
def load_registry():
file_extensions.append('.csv')
file_readers.update({'ACSM_TOFWARE_csv' : lambda x: read_acsm_files_as_dict(x, instruments_dir=default_instruments_dir, work_with_copy=False)})
module_dir = os.path.dirname(__file__)
instruments_dir = os.path.join(module_dir, '..')
from instruments.readers.flag_reader import read_jsonflag_as_dict
file_extensions.append('.json')
file_readers.update({'ACSM_TOFWARE_flags_json' : lambda x: read_jsonflag_as_dict(x)})
# Normalize the path (resolves any '..' in the path)
registry_path = os.path.abspath(os.path.join(module_dir,REGISTRY_FILE))
with open(registry_path, "r") as file:
return yaml.safe_load(file)["instruments"]
def find_reader(instrument_folder, file_extension):
registry = load_registry()
for entry in registry:
if entry["instrumentFolderName"] == instrument_folder and entry["fileExtension"] == file_extension:
return entry["fileReaderPath"], entry["InstrumentDictionaryPath"]
return None, None # Not found
def compute_filereader_key_from_path(hdf5_file_path):
"""Constructs the key 'instrumentname_ext' based on hdf5_file_path, structured as
/instrumentname/to/filename.ext, which access the file reader that should be used to read such a file.
@ -81,4 +99,45 @@ def select_file_reader(path):
return file_readers[extension]
# Default case if no reader is found
return lambda x : None
return lambda x : None
def run_reader(hdf5_file_path, src_file_path, dst_group_name):
try:
thisFilePath = os.path.abspath(__file__)
except NameError:
print("Error: __file__ is not available. Ensure the script is being run from a file.")
print("[Notice] Path to DIMA package may not be resolved properly.")
thisFilePath = os.getcwd() # Use current directory or specify a default
projectPath = os.path.normpath(os.path.join(thisFilePath, "..",'..')) # Move up to project root
#
full_string, file_extension = compute_filereader_key_from_path(dst_group_name)
full_string_parts = full_string.split("_")
full_string_parts.remove(file_extension)
instrument_folder = '_'.join(full_string_parts)
reader_path, dict_path = find_reader(instrument_folder, file_extension)
if reader_path:
reader_path = os.path.normpath(os.path.join(projectPath, reader_path))
if not os.path.exists(reader_path):
raise FileNotFoundError(f"File reader {reader_path} not found for key {full_string}. Verify the reader is properly referenced in registry.yaml.")
else:
print(f'Attempting to run {reader_path}')
command = ["python", reader_path, hdf5_file_path, src_file_path, instrument_folder]
#if dict_path:
# args.append(dict_path)
print(f"Running: {command}")
output = subprocess.run(command, capture_output=True)#, check=True)
print('Subprocess output',output.stdout)
else:
print(f'There is no file reader available to process files in {instrument_folder}.')
#logging.info(instFoldermsdEnd )