improve error handling on startup

- configuration errors on startup are no longer collected in the
  'errors' attribute, but logged immediately and counted in a new
  'logError' method.
- generalConfig may be configured to raise the errors immediately
  instead
- Raise immediately on unhandled errors. Some of them were catched
  before which may lead to strange follow up errors.
- do not start poller thread, when the error count is non-zero
- as before, simple connection failures are not preventing startup,
  but now more severe errors (bad uri) will stop the server

Change-Id: I4f84ee8e86cf2925108c0fe92eaab796bb731091
Reviewed-on: https://forge.frm2.tum.de/review/c/secop/frappy/+/37776
Tested-by: Jenkins Automated Tests <pedersen+jenkins@frm2.tum.de>
Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch>
This commit is contained in:
2025-11-05 17:44:56 +01:00
parent 8291976c5d
commit 943995b1d7
6 changed files with 120 additions and 103 deletions

View File

@@ -175,6 +175,9 @@ class IOBase(Communicator):
if repr(e) != self._last_error:
self._last_error = repr(e)
self.log.error(self._last_error)
if not isinstance(e, CommunicationFailedError):
# when this happens on startup, assume it is not worth to continue
self.secNode.error_count += 1
raise SilentError(repr(e)) from e
return self.is_connected

View File

@@ -32,7 +32,7 @@ from frappy.datatypes import ArrayOf, BoolType, EnumType, FloatRange, \
NoneOr
from frappy.errors import BadValueError, CommunicationFailedError, ConfigError, \
ProgrammingError, SECoPError, secop_error, RangeError
from frappy.lib import formatException, mkthread, UniqueObject
from frappy.lib import formatException, mkthread, UniqueObject, generalConfig
from frappy.params import Accessible, Command, Parameter, Limit, PREDEFINED_ACCESSIBLES
from frappy.properties import HasProperties, Property
from frappy.logging import RemoteLogHandler
@@ -339,7 +339,6 @@ class Module(HasAccessibles):
self.updateLock = threading.RLock() # for announceUpdate
self.polledModules = [] # modules polled by thread started in self.startModules
self.attachedModules = {}
self.errors = []
self._isinitialized = False
self._initfailed = False
self.updateCallback = srv.dispatcher.announce_update
@@ -367,7 +366,7 @@ class Module(HasAccessibles):
else:
self.setProperty(key, value)
except BadValueError:
self.errors.append(f'{key}: value {value!r} does not match {self.propertyDict[key].datatype!r}!')
self.logError(f'{key}: value {value!r} does not match {self.propertyDict[key].datatype!r}!')
# 3) set automatic properties
mycls, = self.__class__.__bases__ # skip the wrapper class
@@ -394,7 +393,7 @@ class Module(HasAccessibles):
# 3) complain about names not found as accessible or property names
if cfgdict:
self.errors.append(
self.logError(
f"{', '.join(cfgdict.keys())} does not exist (use one of"
f" {', '.join(list(self.accessibles) + list(self.propertyDict))})")
@@ -410,18 +409,17 @@ class Module(HasAccessibles):
self.applyMainUnit(mainunit)
# 6) check complete configuration of * properties
if not self.errors:
try:
self.checkProperties()
except ConfigError as e:
self.errors.append(str(e))
except ProgrammingError:
raise
except SECoPError as e:
self.logError(str(e))
for aname, aobj in self.accessibles.items():
try:
aobj.checkProperties()
except (ConfigError, ProgrammingError) as e:
self.errors.append(f'{aname}: {e}')
# if self.errors:
# raise ConfigError(self.errors)
except SECoPError as e:
self.logError(f'{aname}: {e}')
# helper cfg-editor
def __iter__(self):
@@ -459,9 +457,9 @@ class Module(HasAccessibles):
accessible.datatype(cfg[propname])
accessible.setProperty(propname, propvalue)
except KeyError:
self.errors.append(f"'{name}' has no property '{propname}'")
except BadValueError as e:
self.errors.append(f'{name}.{propname}: {str(e)}')
self.logError(f"'{name}' has no property '{propname}'")
except SECoPError as e:
self.logError(type(e)(f'{name}.{propname}: {e}'))
if isinstance(accessible, Parameter):
self._handle_writes(name, accessible)
@@ -475,17 +473,17 @@ class Module(HasAccessibles):
basepname = pname.rpartition('_')[0]
baseparam = self.parameters.get(basepname)
if not baseparam:
self.errors.append(f'limit {pname!r} is given, but not {basepname!r}')
self.logError(f'limit {pname!r} is given, but not {basepname!r}')
return
if baseparam.datatype is None:
return # an error will be reported on baseparam
pobj.set_datatype(baseparam.datatype)
if not pobj.hasDatatype():
self.errors.append(f'{pname} needs a datatype')
self.logError(f'{pname} needs a datatype')
return
if pobj.value is None:
if pobj.needscfg:
self.errors.append(f'{pname!r} has no default value and was not given in config!')
self.logError(f'{pname!r} has no default value and was not given in config!')
if pobj.default is None:
# we do not want to call the setter for this parameter for now,
# this should happen on the first read
@@ -610,8 +608,6 @@ class Module(HasAccessibles):
registers it in the server for waiting
<timeout> defaults to 30 seconds
"""
# we do not need self.errors any longer. should we delete it?
# del self.errors
if self.polledModules:
self.__poller = mkthread(self.__pollThread, self.polledModules, start_events.get_trigger())
self.startModuleDone = True
@@ -863,3 +859,15 @@ class Module(HasAccessibles):
raise RangeError(f'{pname} below {pname}_min')
if value > max_:
raise RangeError(f'{pname} above {pname}_max')
def logError(self, error):
"""log error or raise, depending on generalConfig settings
:param error: an exception or a str (considered as ConfigError)
to be used during startup
"""
if generalConfig.raise_config_errors:
raise ConfigError(error) if isinstance(error, str) else error
self.log.error(str(error))
self.secNode.error_count += 1

View File

@@ -23,7 +23,6 @@
import inspect
from frappy.datatypes import ArrayOf, BoolType, CommandType, DataType, \
DataTypeType, DefaultType, EnumType, FloatRange, NoneOr, OrType, StringType, \
StructOf, TextType, TupleOf, ValueType
@@ -342,7 +341,9 @@ class Parameter(Accessible):
try:
self.datatype.setProperty(key, value)
except KeyError:
raise ProgrammingError(f'cannot set {key} on parameter with datatype {type(self.datatype).__name__}') from None
raise ProgrammingError(
f'cannot set {key} on parameter with datatype'
f' {type(self.datatype).__name__}') from None
except BadValueError as e:
raise ProgrammingError(f'property {key}: {str(e)}') from None

View File

@@ -20,20 +20,16 @@
# *****************************************************************************
import time
import traceback
from collections import OrderedDict
from frappy.dynamic import Pinata
from frappy.errors import ConfigError, NoSuchModuleError, NoSuchParameterError
from frappy.lib import get_class
from frappy.errors import NoSuchModuleError, NoSuchParameterError, SECoPError, \
ConfigError, ProgrammingError
from frappy.lib import get_class, generalConfig
from frappy.version import get_version
from frappy.modules import Module
class InitFailed(Exception):
pass
class SecNode:
"""Managing the modules.
@@ -42,6 +38,7 @@ class SecNode:
- get_module(modulename) returns the requested module or None if there is
no suitable configuration on the server
"""
raise_config_errors = False # collect catchable errors instead of raising
def __init__(self, name, logger, options, srv):
self.equipment_id = options.pop('equipment_id', name)
@@ -50,11 +47,7 @@ class SecNode:
self.modules = {}
self.log = logger
self.srv = srv
# set of modules that failed creation
# self.failed_modules = set()
# list of errors that occured during initialization
self.errors = []
self.traceback_counter = 0
self.error_count = 0 # count catchable errors during initialization
self.name = name
def add_secnode_property(self, prop, value):
@@ -62,6 +55,18 @@ class SecNode:
in the description."""
self.nodeprops[prop] = value
def logError(self, error):
"""log error or raise, depending on generalConfig settings
:param error: an exception or a str (considered as ConfigError)
to be used during startup
"""
if generalConfig.raise_config_errors:
raise ConfigError(error) if isinstance(error, str) else error
self.log.error(str(error))
self.error_count += 1
def get_secnode_property(self, prop):
"""Get SECNode property.
@@ -80,24 +85,20 @@ class SecNode:
# also call earlyInit on the modules
self.log.debug('initializing module %r', modulename)
# try:
modobj.earlyInit()
if not modobj.earlyInitDone:
self.errors.append(f'{modobj.earlyInit.__qualname__} was not '
f'called, probably missing super call')
self.logError(ProgrammingError(
f'module {modulename}: '
'Module.earlyInit was not called, probably missing super call'))
modobj.earlyInitDone = True
modobj.initModule()
if not modobj.initModuleDone:
self.errors.append(f'{modobj.initModule.__qualname__} was not '
f'called, probably missing super call')
# except Exception as e:
# if self.traceback_counter == 0:
# self.log.exception(traceback.format_exc())
# self.traceback_counter += 1
# self.errors.append(f'error initializing {modulename}: {e!r}')
# modobj._initfailed = True
# raise InitFailed('try to access erroneous module') from e
self.logError(ProgrammingError(
f'module {modulename}: '
'Module.initModule was not called, probably missing super call'))
modobj.initModuleDone = True
modobj._isinitialized = True
self.log.info('initialized module %r', modulename)
self.log.debug('initialized module %r', modulename)
return modobj
def get_module_instance(self, modulename):
@@ -121,35 +122,22 @@ class SecNode:
raise NoSuchModuleError(f'Module {modulename!r} does not exist on '
f'this SEC-Node!')
opts = dict(opts)
# pymodule = None
classname = opts.pop('cls')
try: # pylint: disable=no-else-return
try:
if isinstance(classname, str):
# pymodule = classname.rpartition('.')[0]
# if pymodule in self.failed_modules:
# # creation has failed already once, do not try again
# return None
cls = get_class(classname)
else:
# pymodule = classname.__module__
# if pymodule in self.failed_modules:
# # creation has failed already once, do not try again
# return None
cls = classname
if not issubclass(cls, Module):
self.errors.append(f'{cls.__name__} is not a Module')
self.logError(f'{cls.__name__} is not a Module')
return None
except Exception as e:
except AttributeError as e:
if str(e) == 'no such class':
self.errors.append(f'{classname} not found')
self.logError(f'{classname} not found')
return None
raise
else:
modobj = cls(modulename, self.log.parent.getChild(modulename),
opts, self.srv)
for errtxt in modobj.errors:
self.errors.append(' ' + errtxt)
self.add_module(modobj, modulename)
return modobj
def create_modules(self):
@@ -180,9 +168,19 @@ class SecNode:
self.log.info('Pinata %s found %d modules',
modname, len(pinata_modules))
todos.extend(pinata_modules)
# inititalize all modules
for modulename in self.modules:
self.get_module(modulename)
# initialize all modules
for modname in self.modules:
modobj = self.get_module(modname)
# check attached modules for existence
# normal properties are retrieved too, but this does not harm
for prop in modobj.propertyDict:
try:
getattr(modobj, prop)
except SECoPError as e:
if self.raise_config_errors:
raise
self.error_count += 1
modobj.logError(e)
def export_accessibles(self, modobj):
self.log.debug('export_accessibles(%r)', modobj.name)
@@ -198,9 +196,9 @@ class SecNode:
def build_descriptive_data(self):
modules = {}
result = {'modules': modules}
for modulename in list(self.modules):
for modulename in self.modules:
modobj = self.get_module(modulename)
if not modobj or not modobj.export:
if not modobj.export:
continue
# some of these need rework !
mod_desc = {'accessibles': self.export_accessibles(modobj)}
@@ -260,7 +258,7 @@ class SecNode:
now = time.time()
deadline = now + 0.5 # should be long enough for most read functions to finish
for mod in self.modules.values():
mod.joinPollThread(max(0, deadline - now))
mod.joinPollThread(max(0.0, deadline - now))
now = time.time()
for name in self._getSortedModules():
self.modules[name].shutdownModule()

View File

@@ -31,7 +31,7 @@ import time
import mlzlog
from frappy.config import load_config
from frappy.errors import ConfigError
from frappy.errors import ConfigError, ProgrammingError
from frappy.lib import formatException, generalConfig, get_class, mkthread
from frappy.lib.multievent import MultiEvent
from frappy.logging import init_remote_logging
@@ -39,6 +39,8 @@ from frappy.params import PREDEFINED_ACCESSIBLES
from frappy.secnode import SecNode
from frappy.protocol.discovery import UDPListener
generalConfig.set_default('raise_config_errors', False)
try:
from daemon import DaemonContext
try:
@@ -299,33 +301,35 @@ class Server:
for k in list(opts):
self.secnode.add_secnode_property(k, opts.pop(k))
try:
self.secnode.create_modules()
# initialize modules by calling self.secnode.get_module for all of them
# this is done in build_descriptive_data even for unexported modules
self.secnode.build_descriptive_data()
# =========== All modules are initialized ===========
# all errors from initialization process
errors = self.secnode.errors
if not self._testonly: # do not start pollers when we have errors already
if not self._testonly:
start_events = MultiEvent(default_timeout=30)
for modname, modobj in self.secnode.modules.items():
# startModule must return either a timeout value or None (default 30 sec)
start_events.name = f'module {modname}'
if self.secnode.error_count:
# do not start poll thread. check for startModuleDone only
modobj.polledModules = []
modobj.startModule(start_events)
if not modobj.startModuleDone:
errors.append(f'{modobj.startModule.__qualname__} was not called, probably missing super call')
if errors:
for errtxt in errors:
for line in errtxt.split('\n'):
self.log.error(line)
# print a list of config errors to stderr
sys.stderr.write('\n'.join(errors))
sys.stderr.write('\n')
self.secnode.log_error(ProgrammingError(
f'module {modname}: '
'Module.startModule was not called, probably missing super call'))
except Exception as e:
if self.secnode.error_count:
raise type(e)(
f'{e.args[0]} - see also {self.secnode.error_count}'
' errors logged above') from e
raise
if self.secnode.error_count:
self.log.error('%d errors during initialisation', self.secnode.error_count)
sys.exit(1)
if self._testonly:
return
self.log.info('waiting for modules being started')
@@ -334,6 +338,9 @@ class Server:
# some timeout happened
for name in start_events.waiting_for():
self.log.warning('timeout when starting %s', name)
if self.secnode.error_count:
self.log.error('%d errors during startup', self.secnode.error_count)
sys.exit(1)
self.log.info('all modules started')
history_path = os.environ.get('FRAPPY_HISTORY')
if history_path:

View File

@@ -41,7 +41,7 @@ class DispatcherStub:
# -> we have to inhibit the 'omit unchanged update' feature
def __init__(self, updates):
generalConfig.testinit(omit_unchanged_within=0)
generalConfig.testinit(omit_unchanged_within=0, raise_config_errors=True)
self.updates = updates
def announce_update(self, moduleobj, pobj):
@@ -66,7 +66,7 @@ logger = LoggerStub()
class ServerStub:
def __init__(self, updates):
self.dispatcher = DispatcherStub(updates)
self.secnode = None
self.secnode = type('SecNodeStub', (), {'raise_config_errors': True})
class DummyMultiEvent(threading.Event):