improve error handling on startup

- configuration errors on startup are no longer collected in the
  'errors' attribute, but logged immediately and counted in a new
  'logError' method.
- generalConfig may be configured to raise the errors immediately
  instead
- Raise immediately on unhandled errors. Some of them were catched
  before which may lead to strange follow up errors.
- do not start poller thread, when the error count is non-zero
- as before, simple connection failures are not preventing startup,
  but now more severe errors (bad uri) will stop the server

Change-Id: I4f84ee8e86cf2925108c0fe92eaab796bb731091
Reviewed-on: https://forge.frm2.tum.de/review/c/secop/frappy/+/37776
Tested-by: Jenkins Automated Tests <pedersen+jenkins@frm2.tum.de>
Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch>
This commit is contained in:
2025-11-05 17:44:56 +01:00
parent 8291976c5d
commit 943995b1d7
6 changed files with 120 additions and 103 deletions

View File

@@ -175,6 +175,9 @@ class IOBase(Communicator):
if repr(e) != self._last_error: if repr(e) != self._last_error:
self._last_error = repr(e) self._last_error = repr(e)
self.log.error(self._last_error) self.log.error(self._last_error)
if not isinstance(e, CommunicationFailedError):
# when this happens on startup, assume it is not worth to continue
self.secNode.error_count += 1
raise SilentError(repr(e)) from e raise SilentError(repr(e)) from e
return self.is_connected return self.is_connected

View File

@@ -32,7 +32,7 @@ from frappy.datatypes import ArrayOf, BoolType, EnumType, FloatRange, \
NoneOr NoneOr
from frappy.errors import BadValueError, CommunicationFailedError, ConfigError, \ from frappy.errors import BadValueError, CommunicationFailedError, ConfigError, \
ProgrammingError, SECoPError, secop_error, RangeError ProgrammingError, SECoPError, secop_error, RangeError
from frappy.lib import formatException, mkthread, UniqueObject from frappy.lib import formatException, mkthread, UniqueObject, generalConfig
from frappy.params import Accessible, Command, Parameter, Limit, PREDEFINED_ACCESSIBLES from frappy.params import Accessible, Command, Parameter, Limit, PREDEFINED_ACCESSIBLES
from frappy.properties import HasProperties, Property from frappy.properties import HasProperties, Property
from frappy.logging import RemoteLogHandler from frappy.logging import RemoteLogHandler
@@ -339,7 +339,6 @@ class Module(HasAccessibles):
self.updateLock = threading.RLock() # for announceUpdate self.updateLock = threading.RLock() # for announceUpdate
self.polledModules = [] # modules polled by thread started in self.startModules self.polledModules = [] # modules polled by thread started in self.startModules
self.attachedModules = {} self.attachedModules = {}
self.errors = []
self._isinitialized = False self._isinitialized = False
self._initfailed = False self._initfailed = False
self.updateCallback = srv.dispatcher.announce_update self.updateCallback = srv.dispatcher.announce_update
@@ -367,7 +366,7 @@ class Module(HasAccessibles):
else: else:
self.setProperty(key, value) self.setProperty(key, value)
except BadValueError: except BadValueError:
self.errors.append(f'{key}: value {value!r} does not match {self.propertyDict[key].datatype!r}!') self.logError(f'{key}: value {value!r} does not match {self.propertyDict[key].datatype!r}!')
# 3) set automatic properties # 3) set automatic properties
mycls, = self.__class__.__bases__ # skip the wrapper class mycls, = self.__class__.__bases__ # skip the wrapper class
@@ -394,7 +393,7 @@ class Module(HasAccessibles):
# 3) complain about names not found as accessible or property names # 3) complain about names not found as accessible or property names
if cfgdict: if cfgdict:
self.errors.append( self.logError(
f"{', '.join(cfgdict.keys())} does not exist (use one of" f"{', '.join(cfgdict.keys())} does not exist (use one of"
f" {', '.join(list(self.accessibles) + list(self.propertyDict))})") f" {', '.join(list(self.accessibles) + list(self.propertyDict))})")
@@ -410,18 +409,17 @@ class Module(HasAccessibles):
self.applyMainUnit(mainunit) self.applyMainUnit(mainunit)
# 6) check complete configuration of * properties # 6) check complete configuration of * properties
if not self.errors: try:
self.checkProperties()
except ProgrammingError:
raise
except SECoPError as e:
self.logError(str(e))
for aname, aobj in self.accessibles.items():
try: try:
self.checkProperties() aobj.checkProperties()
except ConfigError as e: except SECoPError as e:
self.errors.append(str(e)) self.logError(f'{aname}: {e}')
for aname, aobj in self.accessibles.items():
try:
aobj.checkProperties()
except (ConfigError, ProgrammingError) as e:
self.errors.append(f'{aname}: {e}')
# if self.errors:
# raise ConfigError(self.errors)
# helper cfg-editor # helper cfg-editor
def __iter__(self): def __iter__(self):
@@ -459,9 +457,9 @@ class Module(HasAccessibles):
accessible.datatype(cfg[propname]) accessible.datatype(cfg[propname])
accessible.setProperty(propname, propvalue) accessible.setProperty(propname, propvalue)
except KeyError: except KeyError:
self.errors.append(f"'{name}' has no property '{propname}'") self.logError(f"'{name}' has no property '{propname}'")
except BadValueError as e: except SECoPError as e:
self.errors.append(f'{name}.{propname}: {str(e)}') self.logError(type(e)(f'{name}.{propname}: {e}'))
if isinstance(accessible, Parameter): if isinstance(accessible, Parameter):
self._handle_writes(name, accessible) self._handle_writes(name, accessible)
@@ -475,17 +473,17 @@ class Module(HasAccessibles):
basepname = pname.rpartition('_')[0] basepname = pname.rpartition('_')[0]
baseparam = self.parameters.get(basepname) baseparam = self.parameters.get(basepname)
if not baseparam: if not baseparam:
self.errors.append(f'limit {pname!r} is given, but not {basepname!r}') self.logError(f'limit {pname!r} is given, but not {basepname!r}')
return return
if baseparam.datatype is None: if baseparam.datatype is None:
return # an error will be reported on baseparam return # an error will be reported on baseparam
pobj.set_datatype(baseparam.datatype) pobj.set_datatype(baseparam.datatype)
if not pobj.hasDatatype(): if not pobj.hasDatatype():
self.errors.append(f'{pname} needs a datatype') self.logError(f'{pname} needs a datatype')
return return
if pobj.value is None: if pobj.value is None:
if pobj.needscfg: if pobj.needscfg:
self.errors.append(f'{pname!r} has no default value and was not given in config!') self.logError(f'{pname!r} has no default value and was not given in config!')
if pobj.default is None: if pobj.default is None:
# we do not want to call the setter for this parameter for now, # we do not want to call the setter for this parameter for now,
# this should happen on the first read # this should happen on the first read
@@ -610,8 +608,6 @@ class Module(HasAccessibles):
registers it in the server for waiting registers it in the server for waiting
<timeout> defaults to 30 seconds <timeout> defaults to 30 seconds
""" """
# we do not need self.errors any longer. should we delete it?
# del self.errors
if self.polledModules: if self.polledModules:
self.__poller = mkthread(self.__pollThread, self.polledModules, start_events.get_trigger()) self.__poller = mkthread(self.__pollThread, self.polledModules, start_events.get_trigger())
self.startModuleDone = True self.startModuleDone = True
@@ -863,3 +859,15 @@ class Module(HasAccessibles):
raise RangeError(f'{pname} below {pname}_min') raise RangeError(f'{pname} below {pname}_min')
if value > max_: if value > max_:
raise RangeError(f'{pname} above {pname}_max') raise RangeError(f'{pname} above {pname}_max')
def logError(self, error):
"""log error or raise, depending on generalConfig settings
:param error: an exception or a str (considered as ConfigError)
to be used during startup
"""
if generalConfig.raise_config_errors:
raise ConfigError(error) if isinstance(error, str) else error
self.log.error(str(error))
self.secNode.error_count += 1

View File

@@ -23,7 +23,6 @@
import inspect import inspect
from frappy.datatypes import ArrayOf, BoolType, CommandType, DataType, \ from frappy.datatypes import ArrayOf, BoolType, CommandType, DataType, \
DataTypeType, DefaultType, EnumType, FloatRange, NoneOr, OrType, StringType, \ DataTypeType, DefaultType, EnumType, FloatRange, NoneOr, OrType, StringType, \
StructOf, TextType, TupleOf, ValueType StructOf, TextType, TupleOf, ValueType
@@ -342,7 +341,9 @@ class Parameter(Accessible):
try: try:
self.datatype.setProperty(key, value) self.datatype.setProperty(key, value)
except KeyError: except KeyError:
raise ProgrammingError(f'cannot set {key} on parameter with datatype {type(self.datatype).__name__}') from None raise ProgrammingError(
f'cannot set {key} on parameter with datatype'
f' {type(self.datatype).__name__}') from None
except BadValueError as e: except BadValueError as e:
raise ProgrammingError(f'property {key}: {str(e)}') from None raise ProgrammingError(f'property {key}: {str(e)}') from None

View File

@@ -20,20 +20,16 @@
# ***************************************************************************** # *****************************************************************************
import time import time
import traceback
from collections import OrderedDict from collections import OrderedDict
from frappy.dynamic import Pinata from frappy.dynamic import Pinata
from frappy.errors import ConfigError, NoSuchModuleError, NoSuchParameterError from frappy.errors import NoSuchModuleError, NoSuchParameterError, SECoPError, \
from frappy.lib import get_class ConfigError, ProgrammingError
from frappy.lib import get_class, generalConfig
from frappy.version import get_version from frappy.version import get_version
from frappy.modules import Module from frappy.modules import Module
class InitFailed(Exception):
pass
class SecNode: class SecNode:
"""Managing the modules. """Managing the modules.
@@ -42,6 +38,7 @@ class SecNode:
- get_module(modulename) returns the requested module or None if there is - get_module(modulename) returns the requested module or None if there is
no suitable configuration on the server no suitable configuration on the server
""" """
raise_config_errors = False # collect catchable errors instead of raising
def __init__(self, name, logger, options, srv): def __init__(self, name, logger, options, srv):
self.equipment_id = options.pop('equipment_id', name) self.equipment_id = options.pop('equipment_id', name)
@@ -50,11 +47,7 @@ class SecNode:
self.modules = {} self.modules = {}
self.log = logger self.log = logger
self.srv = srv self.srv = srv
# set of modules that failed creation self.error_count = 0 # count catchable errors during initialization
# self.failed_modules = set()
# list of errors that occured during initialization
self.errors = []
self.traceback_counter = 0
self.name = name self.name = name
def add_secnode_property(self, prop, value): def add_secnode_property(self, prop, value):
@@ -62,6 +55,18 @@ class SecNode:
in the description.""" in the description."""
self.nodeprops[prop] = value self.nodeprops[prop] = value
def logError(self, error):
"""log error or raise, depending on generalConfig settings
:param error: an exception or a str (considered as ConfigError)
to be used during startup
"""
if generalConfig.raise_config_errors:
raise ConfigError(error) if isinstance(error, str) else error
self.log.error(str(error))
self.error_count += 1
def get_secnode_property(self, prop): def get_secnode_property(self, prop):
"""Get SECNode property. """Get SECNode property.
@@ -80,24 +85,20 @@ class SecNode:
# also call earlyInit on the modules # also call earlyInit on the modules
self.log.debug('initializing module %r', modulename) self.log.debug('initializing module %r', modulename)
# try:
modobj.earlyInit() modobj.earlyInit()
if not modobj.earlyInitDone: if not modobj.earlyInitDone:
self.errors.append(f'{modobj.earlyInit.__qualname__} was not ' self.logError(ProgrammingError(
f'called, probably missing super call') f'module {modulename}: '
'Module.earlyInit was not called, probably missing super call'))
modobj.earlyInitDone = True
modobj.initModule() modobj.initModule()
if not modobj.initModuleDone: if not modobj.initModuleDone:
self.errors.append(f'{modobj.initModule.__qualname__} was not ' self.logError(ProgrammingError(
f'called, probably missing super call') f'module {modulename}: '
# except Exception as e: 'Module.initModule was not called, probably missing super call'))
# if self.traceback_counter == 0: modobj.initModuleDone = True
# self.log.exception(traceback.format_exc())
# self.traceback_counter += 1
# self.errors.append(f'error initializing {modulename}: {e!r}')
# modobj._initfailed = True
# raise InitFailed('try to access erroneous module') from e
modobj._isinitialized = True modobj._isinitialized = True
self.log.info('initialized module %r', modulename) self.log.debug('initialized module %r', modulename)
return modobj return modobj
def get_module_instance(self, modulename): def get_module_instance(self, modulename):
@@ -121,35 +122,22 @@ class SecNode:
raise NoSuchModuleError(f'Module {modulename!r} does not exist on ' raise NoSuchModuleError(f'Module {modulename!r} does not exist on '
f'this SEC-Node!') f'this SEC-Node!')
opts = dict(opts) opts = dict(opts)
# pymodule = None
classname = opts.pop('cls') classname = opts.pop('cls')
try: # pylint: disable=no-else-return try:
if isinstance(classname, str): if isinstance(classname, str):
# pymodule = classname.rpartition('.')[0]
# if pymodule in self.failed_modules:
# # creation has failed already once, do not try again
# return None
cls = get_class(classname) cls = get_class(classname)
else: else:
# pymodule = classname.__module__
# if pymodule in self.failed_modules:
# # creation has failed already once, do not try again
# return None
cls = classname cls = classname
if not issubclass(cls, Module): if not issubclass(cls, Module):
self.errors.append(f'{cls.__name__} is not a Module') self.logError(f'{cls.__name__} is not a Module')
return None return None
except Exception as e: except AttributeError as e:
if str(e) == 'no such class': if str(e) == 'no such class':
self.errors.append(f'{classname} not found') self.logError(f'{classname} not found')
return None return None
raise raise
else: modobj = cls(modulename, self.log.parent.getChild(modulename),
modobj = cls(modulename, self.log.parent.getChild(modulename), opts, self.srv)
opts, self.srv)
for errtxt in modobj.errors:
self.errors.append(' ' + errtxt)
self.add_module(modobj, modulename)
return modobj return modobj
def create_modules(self): def create_modules(self):
@@ -180,9 +168,19 @@ class SecNode:
self.log.info('Pinata %s found %d modules', self.log.info('Pinata %s found %d modules',
modname, len(pinata_modules)) modname, len(pinata_modules))
todos.extend(pinata_modules) todos.extend(pinata_modules)
# inititalize all modules # initialize all modules
for modulename in self.modules: for modname in self.modules:
self.get_module(modulename) modobj = self.get_module(modname)
# check attached modules for existence
# normal properties are retrieved too, but this does not harm
for prop in modobj.propertyDict:
try:
getattr(modobj, prop)
except SECoPError as e:
if self.raise_config_errors:
raise
self.error_count += 1
modobj.logError(e)
def export_accessibles(self, modobj): def export_accessibles(self, modobj):
self.log.debug('export_accessibles(%r)', modobj.name) self.log.debug('export_accessibles(%r)', modobj.name)
@@ -198,9 +196,9 @@ class SecNode:
def build_descriptive_data(self): def build_descriptive_data(self):
modules = {} modules = {}
result = {'modules': modules} result = {'modules': modules}
for modulename in list(self.modules): for modulename in self.modules:
modobj = self.get_module(modulename) modobj = self.get_module(modulename)
if not modobj or not modobj.export: if not modobj.export:
continue continue
# some of these need rework ! # some of these need rework !
mod_desc = {'accessibles': self.export_accessibles(modobj)} mod_desc = {'accessibles': self.export_accessibles(modobj)}
@@ -260,7 +258,7 @@ class SecNode:
now = time.time() now = time.time()
deadline = now + 0.5 # should be long enough for most read functions to finish deadline = now + 0.5 # should be long enough for most read functions to finish
for mod in self.modules.values(): for mod in self.modules.values():
mod.joinPollThread(max(0, deadline - now)) mod.joinPollThread(max(0.0, deadline - now))
now = time.time() now = time.time()
for name in self._getSortedModules(): for name in self._getSortedModules():
self.modules[name].shutdownModule() self.modules[name].shutdownModule()

View File

@@ -31,7 +31,7 @@ import time
import mlzlog import mlzlog
from frappy.config import load_config from frappy.config import load_config
from frappy.errors import ConfigError from frappy.errors import ConfigError, ProgrammingError
from frappy.lib import formatException, generalConfig, get_class, mkthread from frappy.lib import formatException, generalConfig, get_class, mkthread
from frappy.lib.multievent import MultiEvent from frappy.lib.multievent import MultiEvent
from frappy.logging import init_remote_logging from frappy.logging import init_remote_logging
@@ -39,6 +39,8 @@ from frappy.params import PREDEFINED_ACCESSIBLES
from frappy.secnode import SecNode from frappy.secnode import SecNode
from frappy.protocol.discovery import UDPListener from frappy.protocol.discovery import UDPListener
generalConfig.set_default('raise_config_errors', False)
try: try:
from daemon import DaemonContext from daemon import DaemonContext
try: try:
@@ -299,33 +301,35 @@ class Server:
for k in list(opts): for k in list(opts):
self.secnode.add_secnode_property(k, opts.pop(k)) self.secnode.add_secnode_property(k, opts.pop(k))
self.secnode.create_modules() try:
# initialize modules by calling self.secnode.get_module for all of them self.secnode.create_modules()
# this is done in build_descriptive_data even for unexported modules # initialize modules by calling self.secnode.get_module for all of them
self.secnode.build_descriptive_data() # this is done in build_descriptive_data even for unexported modules
# =========== All modules are initialized =========== self.secnode.build_descriptive_data()
# =========== All modules are initialized ===========
# all errors from initialization process if not self._testonly:
errors = self.secnode.errors start_events = MultiEvent(default_timeout=30)
for modname, modobj in self.secnode.modules.items():
if not self._testonly: # do not start pollers when we have errors already # startModule must return either a timeout value or None (default 30 sec)
start_events = MultiEvent(default_timeout=30) start_events.name = f'module {modname}'
for modname, modobj in self.secnode.modules.items(): if self.secnode.error_count:
# startModule must return either a timeout value or None (default 30 sec) # do not start poll thread. check for startModuleDone only
start_events.name = f'module {modname}' modobj.polledModules = []
modobj.startModule(start_events) modobj.startModule(start_events)
if not modobj.startModuleDone: if not modobj.startModuleDone:
errors.append(f'{modobj.startModule.__qualname__} was not called, probably missing super call') self.secnode.log_error(ProgrammingError(
f'module {modname}: '
if errors: 'Module.startModule was not called, probably missing super call'))
for errtxt in errors: except Exception as e:
for line in errtxt.split('\n'): if self.secnode.error_count:
self.log.error(line) raise type(e)(
# print a list of config errors to stderr f'{e.args[0]} - see also {self.secnode.error_count}'
sys.stderr.write('\n'.join(errors)) ' errors logged above') from e
sys.stderr.write('\n') raise
if self.secnode.error_count:
self.log.error('%d errors during initialisation', self.secnode.error_count)
sys.exit(1) sys.exit(1)
if self._testonly: if self._testonly:
return return
self.log.info('waiting for modules being started') self.log.info('waiting for modules being started')
@@ -334,6 +338,9 @@ class Server:
# some timeout happened # some timeout happened
for name in start_events.waiting_for(): for name in start_events.waiting_for():
self.log.warning('timeout when starting %s', name) self.log.warning('timeout when starting %s', name)
if self.secnode.error_count:
self.log.error('%d errors during startup', self.secnode.error_count)
sys.exit(1)
self.log.info('all modules started') self.log.info('all modules started')
history_path = os.environ.get('FRAPPY_HISTORY') history_path = os.environ.get('FRAPPY_HISTORY')
if history_path: if history_path:

View File

@@ -41,7 +41,7 @@ class DispatcherStub:
# -> we have to inhibit the 'omit unchanged update' feature # -> we have to inhibit the 'omit unchanged update' feature
def __init__(self, updates): def __init__(self, updates):
generalConfig.testinit(omit_unchanged_within=0) generalConfig.testinit(omit_unchanged_within=0, raise_config_errors=True)
self.updates = updates self.updates = updates
def announce_update(self, moduleobj, pobj): def announce_update(self, moduleobj, pobj):
@@ -66,7 +66,7 @@ logger = LoggerStub()
class ServerStub: class ServerStub:
def __init__(self, updates): def __init__(self, updates):
self.dispatcher = DispatcherStub(updates) self.dispatcher = DispatcherStub(updates)
self.secnode = None self.secnode = type('SecNodeStub', (), {'raise_config_errors': True})
class DummyMultiEvent(threading.Event): class DummyMultiEvent(threading.Event):