improve error handling

in the current implementation, in case polling results in an error,
an error_update is generated. But on activate, for this parameter
a normal update will be generated, indicating that this value is ok.

With this change, parameters have an additonal attribue 'readerror',
which save the error generated on the last read, or None, if the
read was successful.

In addition, subsequent repeated errors are only reported the first
time.

Change-Id: I39f18d17debadd8aa5b904e59998d2172f3f11b9
Reviewed-on: https://forge.frm2.tum.de/review/c/sine2020/secop/playground/+/21974
Tested-by: JenkinsCodeReview <bjoern_pedersen@frm2.tum.de>
Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch>
This commit is contained in:
2019-12-06 15:14:16 +01:00
parent 94a539da90
commit 44eeea1159
7 changed files with 105 additions and 86 deletions

View File

@@ -137,9 +137,10 @@ class ChangeWrapper:
class CmdHandlerBase: class CmdHandlerBase:
"""generic command handler""" """generic command handler"""
# def __init__(self, group, ...): def __init__(self, group):
# init must at least initialize self.group, which is is relevant for calling the # group is used for calling the proper analyze_<group> and change_<group> methods
# proper analyze_<group> and change_<group> methods self.group = group
self.parameters = {}
def parse_reply(self, reply): def parse_reply(self, reply):
"""return values from a raw reply""" """return values from a raw reply"""
@@ -159,7 +160,7 @@ class CmdHandlerBase:
If changecmd is given, it is prepended before the query. If changecmd is given, it is prepended before the query.
""" """
querycmd = self.make_query(module) querycmd = self.make_query(module)
reply = module.sendRecv((changecmd + querycmd)) reply = module.sendRecv(changecmd + querycmd)
return self.parse_reply(reply) return self.parse_reply(reply)
def send_change(self, module, *values): def send_change(self, module, *values):
@@ -173,8 +174,16 @@ class CmdHandlerBase:
module.sendRecv(changecmd) # ignore result module.sendRecv(changecmd) # ignore result
return self.send_command(module) return self.send_command(module)
def get_read_func(self, pname): def get_read_func(self, modclass, pname):
"""returns the read function passed to the metaclass""" """returns the read function passed to the metaclass
and registers the parameter in this handler
"""
if not modclass in self.parameters:
self.parameters[modclass] = []
# Make sure that parameters from different module classes are not mixed
# (not sure if this might happen)
self.parameters[modclass].append(pname)
return self.read return self.read
def read(self, module): def read(self, module):
@@ -182,10 +191,21 @@ class CmdHandlerBase:
overwrite with None if not used overwrite with None if not used
""" """
# do a read of the current hw values try:
reply = self.send_command(module) # do a read of the current hw values
# convert them to parameters reply = self.send_command(module)
getattr(module, 'analyze_' + self.group)(*reply) # convert them to parameters
getattr(module, 'analyze_' + self.group)(*reply)
for pname in self.parameters[module.__class__]:
if module.parameters[pname].readerror:
# clear errors on parameters, which were not updated.
# this will also inform all activated clients
setattr(module, pname, getattr(module, pname))
except Exception as e:
# set all parameters of this handler to error
for pname in self.parameters[module.__class__]:
module.setError(pname, e)
raise
return Done # parameters should be updated already return Done # parameters should be updated already
def get_write_func(self, pname): def get_write_func(self, pname):
@@ -213,7 +233,6 @@ class CmdHandlerBase:
return wfunc return wfunc
class CmdHandler(CmdHandlerBase): class CmdHandler(CmdHandlerBase):
"""more evolved command handler """more evolved command handler
@@ -237,7 +256,7 @@ class CmdHandler(CmdHandlerBase):
querycmd: the command for a query, may contain named formats for cmdargs querycmd: the command for a query, may contain named formats for cmdargs
replyfmt: the format for reading the reply with some scanf like behaviour replyfmt: the format for reading the reply with some scanf like behaviour
""" """
self.group = group super().__init__(group)
self.querycmd = querycmd self.querycmd = querycmd
self.replyfmt = CmdParser(replyfmt) self.replyfmt = CmdParser(replyfmt)

View File

@@ -112,7 +112,7 @@ class ModuleMeta(PropertyMeta):
# skip commands for now # skip commands for now
continue continue
rfunc = attrs.get('read_' + pname, None) rfunc = attrs.get('read_' + pname, None)
handler = pobj.handler.get_read_func(pname) if pobj.handler else None handler = pobj.handler.get_read_func(newtype, pname) if pobj.handler else None
if handler: if handler:
if rfunc: if rfunc:
raise ProgrammingError("parameter '%s' can not have a handler " raise ProgrammingError("parameter '%s' can not have a handler "
@@ -129,14 +129,15 @@ class ModuleMeta(PropertyMeta):
def wrapped_rfunc(self, pname=pname, rfunc=rfunc): def wrapped_rfunc(self, pname=pname, rfunc=rfunc):
if rfunc: if rfunc:
self.log.debug("rfunc(%s): call %r" % (pname, rfunc)) self.log.debug("calling %r" % rfunc)
try: try:
value = rfunc(self) value = rfunc(self)
self.log.debug("rfunc(%s) returned %r" % (pname, value))
if value is Done: # the setter is already triggered if value is Done: # the setter is already triggered
return getattr(self, pname) return getattr(self, pname)
except Exception as e: except Exception as e:
pobj = self.accessibles[pname] self.log.debug("rfunc(%s) failed %r" % (pname, e))
self.DISPATCHER.announce_update_error(self, pname, pobj, e) self.setError(pname, e)
raise raise
else: else:
# return cached value # return cached value
@@ -193,7 +194,7 @@ class ModuleMeta(PropertyMeta):
if (not EVENT_ONLY_ON_CHANGED_VALUES) or (value != pobj.value): if (not EVENT_ONLY_ON_CHANGED_VALUES) or (value != pobj.value):
pobj.value = value pobj.value = value
# also send notification # also send notification
if self.accessibles[pname].export: if pobj.export:
self.log.debug('%s is now %r' % (pname, value)) self.log.debug('%s is now %r' % (pname, value))
self.DISPATCHER.announce_update(self, pname, pobj) self.DISPATCHER.announce_update(self, pname, pobj)

View File

@@ -28,9 +28,8 @@ from collections import OrderedDict
from secop.datatypes import EnumType, FloatRange, BoolType, IntRange, \ from secop.datatypes import EnumType, FloatRange, BoolType, IntRange, \
StringType, TupleOf, get_datatype, ArrayOf, TextType StringType, TupleOf, get_datatype, ArrayOf, TextType
from secop.errors import ConfigError, ProgrammingError from secop.errors import ConfigError, ProgrammingError, SECoPError
from secop.lib import formatException, \ from secop.lib import formatException, formatExtendedStack, mkthread
formatExtendedStack, mkthread
from secop.lib.enum import Enum from secop.lib.enum import Enum
from secop.metaclass import ModuleMeta from secop.metaclass import ModuleMeta
from secop.params import PREDEFINED_ACCESSIBLES, Command, Override, Parameter, Parameters, Commands from secop.params import PREDEFINED_ACCESSIBLES, Command, Override, Parameter, Parameters, Commands
@@ -180,33 +179,34 @@ class Module(HasProperties, metaclass=ModuleMeta):
# 4) complain if a Parameter entry has no default value and # 4) complain if a Parameter entry has no default value and
# is not specified in cfgdict # is not specified in cfgdict
for k, v in self.parameters.items(): for pname, pobj in self.parameters.items():
if k not in cfgdict: if pname not in cfgdict:
if v.default is None: if pobj.default is None:
if not v.poll: if not pobj.poll:
raise ConfigError('Module %s: Parameter %r has no default ' raise ConfigError('Module %s: Parameter %r has no default '
'value and was not given in config!' % 'value and was not given in config!' %
(self.name, k)) (self.name, pname))
cfgdict[k] = v.datatype.default # we do not want to call the setter for this parameter for now,
# this should happen on the first read
pobj.readerror = ConfigError('not initialized')
# above error will be triggered on activate after startup,
# when not all hardware parameters are read because of startup timeout
pobj.value = pobj.datatype(pobj.datatype.default)
else: else:
cfgdict[k] = v.default cfgdict[pname] = pobj.default
# 5) 'apply' config: # 5) 'apply' config:
# pass values through the datatypes and store as attributes # pass values through the datatypes and store as attributes
for k, v in list(cfgdict.items()): for k, v in list(cfgdict.items()):
# apply datatype, complain if type does not fit
datatype = self.parameters[k].datatype
try: try:
v = datatype(v) # this checks also for the proper datatype
self.parameters[k].value = v # note: this will NOT call write_* methods!
setattr(self, k, v)
except (ValueError, TypeError): except (ValueError, TypeError):
self.log.exception(formatExtendedStack()) self.log.exception(formatExtendedStack())
raise raise
# raise ConfigError('Module %s: config parameter %r:\n%r' % # raise ConfigError('Module %s: config parameter %r:\n%r' %
# (self.name, k, e)) # (self.name, k, e))
# note: this will NOT call write_* methods!
if k != 'value':
setattr(self, k, v)
cfgdict.pop(k) cfgdict.pop(k)
# Modify units AFTER applying the cfgdict # Modify units AFTER applying the cfgdict
@@ -227,6 +227,14 @@ class Module(HasProperties, metaclass=ModuleMeta):
def __getitem__(self, item): def __getitem__(self, item):
return self.accessibles.__getitem__(item) return self.accessibles.__getitem__(item)
def setError(self, pname, exception):
"""sets a parameter to a read error state
the error will be cleared when the parameter is set
"""
pobj = self.parameters[pname]
self.DISPATCHER.announce_update_error(self, pname, pobj, exception)
def isBusy(self): def isBusy(self):
'''helper function for treating substates of BUSY correctly''' '''helper function for treating substates of BUSY correctly'''
# defined even for non drivable (used for dynamic polling) # defined even for non drivable (used for dynamic polling)
@@ -250,6 +258,15 @@ class Module(HasProperties, metaclass=ModuleMeta):
self.log.debug('empty %s.startModule()' % self.__class__.__name__) self.log.debug('empty %s.startModule()' % self.__class__.__name__)
started_callback() started_callback()
def pollOne(self, pname):
"""call read function and handle error logging"""
try:
getattr(self, 'read_' + pname)()
except SECoPError as e:
self.log.error(str(e))
except Exception as e:
self.log.error(formatException())
class Readable(Module): class Readable(Module):
"""Basic readable Module """Basic readable Module
@@ -322,13 +339,7 @@ class Readable(Module):
continue continue
if nr % abs(int(pobj.poll)) == 0: if nr % abs(int(pobj.poll)) == 0:
# pollParams every 'pobj.pollParams' iteration # pollParams every 'pobj.pollParams' iteration
rfunc = getattr(self, 'read_' + pname, None) self.pollOne(pname)
if rfunc:
try:
rfunc() # pylint: disable = not-callable
except Exception: # really all!
# XXX: Error-handling: send error_update !
pass
return False return False
@@ -384,13 +395,7 @@ class Drivable(Writable):
nr % abs(int(pobj.poll))) == 0: nr % abs(int(pobj.poll))) == 0:
# poll always if pobj.poll is negative and fastpoll (i.e. Module is busy) # poll always if pobj.poll is negative and fastpoll (i.e. Module is busy)
# otherwise poll every 'pobj.poll' iteration # otherwise poll every 'pobj.poll' iteration
rfunc = getattr(self, 'read_' + pname, None) self.pollOne(pname)
if rfunc:
try:
rfunc() # pylint: disable = not-callable
except Exception: # really all!
# XXX: Error-handling: send error_update !
pass
return fastpoll return fastpoll
def do_stop(self): def do_stop(self):

View File

@@ -116,9 +116,6 @@ class Parameter(Accessible):
ValueType(), export=False, default=None, mandatory=False, settable=False), ValueType(), export=False, default=None, mandatory=False, settable=False),
} }
value = None
timestamp = None
def __init__(self, description, datatype, ctr=None, unit=None, **kwds): def __init__(self, description, datatype, ctr=None, unit=None, **kwds):
if ctr is not None: if ctr is not None:
@@ -152,6 +149,7 @@ class Parameter(Accessible):
# internal caching: value and timestamp of last change... # internal caching: value and timestamp of last change...
self.value = self.default self.value = self.default
self.timestamp = 0 self.timestamp = 0
self.readerror = None # if not None, indicates that last read was not successful
def export_value(self): def export_value(self):
return self.datatype.export_value(self.value) return self.datatype.export_value(self.value)

View File

@@ -36,7 +36,7 @@ Usage examples:
import time import time
from threading import Event from threading import Event
from heapq import heapify, heapreplace from heapq import heapify, heapreplace
from secop.lib import mkthread, formatException from secop.lib import mkthread
from secop.errors import ProgrammingError from secop.errors import ProgrammingError
# poll types: # poll types:
@@ -137,8 +137,7 @@ class Poller(PollerBase):
# later, they will be converted to heaps # later, they will be converted to heaps
for pname, pobj in module.parameters.items(): for pname, pobj in module.parameters.items():
polltype = pobj.poll polltype = pobj.poll
rfunc = getattr(module, 'read_' + pname, None) if not polltype:
if not polltype or not rfunc:
continue continue
if not hasattr(module, 'pollinterval'): if not hasattr(module, 'pollinterval'):
raise ProgrammingError("module %s must have a pollinterval" raise ProgrammingError("module %s must have a pollinterval"
@@ -159,7 +158,7 @@ class Poller(PollerBase):
handlers.add(pobj.handler) handlers.add(pobj.handler)
# placeholders 0 are used for due, lastdue and idx # placeholders 0 are used for due, lastdue and idx
self.queues[polltype].append((0, 0, self.queues[polltype].append((0, 0,
(0, module, pobj, rfunc, factors[polltype]))) (0, module, pobj, pname, factors[polltype])))
def poll_next(self, polltype): def poll_next(self, polltype):
'''try to poll next item '''try to poll next item
@@ -177,7 +176,7 @@ class Poller(PollerBase):
due, lastdue, pollitem = queue[0] due, lastdue, pollitem = queue[0]
if now < due: if now < due:
return due return due
_, module, pobj, rfunc, factor = pollitem _, module, pobj, pname, factor = pollitem
if polltype == DYNAMIC and not module.isBusy(): if polltype == DYNAMIC and not module.isBusy():
interval = module.pollinterval # effective interval interval = module.pollinterval # effective interval
@@ -187,11 +186,7 @@ class Poller(PollerBase):
mininterval = interval mininterval = interval
due = max(lastdue + interval, pobj.timestamp + interval * 0.5) due = max(lastdue + interval, pobj.timestamp + interval * 0.5)
if now >= due: if now >= due:
try: module.pollOne(pname)
rfunc()
except Exception: # really all. errors are handled within rfunc
# TODO: filter repeated errors and log just statistics
module.log.error(formatException())
done = True done = True
lastdue = due lastdue = due
due = max(lastdue + mininterval, now + min(self.maxwait, mininterval * 0.5)) due = max(lastdue + mininterval, now + min(self.maxwait, mininterval * 0.5))
@@ -217,16 +212,13 @@ class Poller(PollerBase):
return return
# do all polls once and, at the same time, insert due info # do all polls once and, at the same time, insert due info
for _, queue in sorted(self.queues.items()): # do SLOW polls first for _, queue in sorted(self.queues.items()): # do SLOW polls first
for idx, (_, _, (_, module, pobj, rfunc, factor)) in enumerate(queue): for idx, (_, _, (_, module, pobj, pname, factor)) in enumerate(queue):
lastdue = time.time() lastdue = time.time()
try: module.pollOne(pname)
rfunc()
except Exception: # really all. errors are handled within rfunc
module.log.error(formatException())
due = lastdue + min(self.maxwait, module.pollinterval * factor) due = lastdue + min(self.maxwait, module.pollinterval * factor)
# in python 3 comparing tuples need some care, as not all objects # in python 3 comparing tuples need some care, as not all objects
# are comparable. Inserting a unique idx solves the problem. # are comparable. Inserting a unique idx solves the problem.
queue[idx] = (due, lastdue, (idx, module, pobj, rfunc, factor)) queue[idx] = (due, lastdue, (idx, module, pobj, pname, factor))
heapify(queue) heapify(queue)
started_callback() # signal end of startup started_callback() # signal end of startup
nregular = len(self.queues[REGULAR]) nregular = len(self.queues[REGULAR])

View File

@@ -50,6 +50,16 @@ from secop.protocol.messages import COMMANDREPLY, DESCRIPTIONREPLY, \
HEARTBEATREPLY, IDENTREPLY, IDENTREQUEST, READREPLY, WRITEREPLY HEARTBEATREPLY, IDENTREPLY, IDENTREQUEST, READREPLY, WRITEREPLY
def make_update(modulename, pobj):
if pobj.readerror:
return (ERRORPREFIX + EVENTREPLY, '%s:%s' % (modulename, pobj.export),
# error-report !
[pobj.readerror.name, repr(pobj.readerror), dict(t=pobj.timestamp)])
return (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
[pobj.export_value(), dict(t=pobj.timestamp)])
class Dispatcher: class Dispatcher:
def __init__(self, name, logger, options, srv): def __init__(self, name, logger, options, srv):
@@ -94,9 +104,8 @@ class Dispatcher:
"""called by modules param setters to notify subscribers of new values """called by modules param setters to notify subscribers of new values
""" """
# argument pname is no longer used here - should we remove it? # argument pname is no longer used here - should we remove it?
msg = (EVENTREPLY, '%s:%s' % (moduleobj.name, pobj.export), pobj.readerror = None
[pobj.export_value(), dict(t=pobj.timestamp)]) self.broadcast_event(make_update(moduleobj.name, pobj))
self.broadcast_event(msg)
def announce_update_error(self, moduleobj, pname, pobj, err): def announce_update_error(self, moduleobj, pname, pobj, err):
"""called by modules param setters/getters to notify subscribers """called by modules param setters/getters to notify subscribers
@@ -106,10 +115,11 @@ class Dispatcher:
# argument pname is no longer used here - should we remove it? # argument pname is no longer used here - should we remove it?
if not isinstance(err, SECoPError): if not isinstance(err, SECoPError):
err = InternalError(err) err = InternalError(err)
msg = (ERRORPREFIX + EVENTREPLY, '%s:%s' % (moduleobj.name, pobj.export), if str(err) == str(pobj.readerror):
# error-report ! return # do not send updates for repeated errors
[err.name, repr(err), dict(t=currenttime())]) pobj.readerror = err
self.broadcast_event(msg) pobj.timestamp = currenttime() # indicates the first time this error appeared
self.broadcast_event(make_update(moduleobj.name, pobj))
def subscribe(self, conn, eventname): def subscribe(self, conn, eventname):
self._subscriptions.setdefault(eventname, set()).add(conn) self._subscriptions.setdefault(eventname, set()).add(conn)
@@ -364,20 +374,11 @@ class Dispatcher:
for modulename, pname in modules: for modulename, pname in modules:
moduleobj = self._modules.get(modulename, None) moduleobj = self._modules.get(modulename, None)
if pname: if pname:
pobj = moduleobj.accessibles[pname] conn.queue_async_reply(make_update(modulename, moduleobj.parameters[pname]))
updmsg = (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
[pobj.export_value(), dict(t=pobj.timestamp)])
conn.queue_async_reply(updmsg)
continue continue
for pobj in moduleobj.accessibles.values(): for pobj in moduleobj.accessibles.values():
if not isinstance(pobj, Parameter): if isinstance(pobj, Parameter) and pobj.export:
continue conn.queue_async_reply(make_update(modulename, pobj))
if not pobj.export:
continue
# can not use announce_update here, as this will send to all clients
updmsg = (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
[pobj.export_value(), dict(t=pobj.timestamp)])
conn.queue_async_reply(updmsg)
return (ENABLEEVENTSREPLY, specifier, None) if specifier else (ENABLEEVENTSREPLY, None, None) return (ENABLEEVENTSREPLY, specifier, None) if specifier else (ENABLEEVENTSREPLY, None, None)
def handle_deactivate(self, conn, specifier, data): def handle_deactivate(self, conn, specifier, data):

View File

@@ -157,6 +157,9 @@ class Module:
def isBusy(self): def isBusy(self):
return self.is_busy return self.is_busy
def pollOne(self, pname):
getattr(self, 'read_' + pname)()
def __repr__(self): def __repr__(self):
rdict = self.__dict__.copy() rdict = self.__dict__.copy()
rdict.pop('parameters') rdict.pop('parameters')