improve error handling

in the current implementation, in case polling results in an error,
an error_update is generated. But on activate, for this parameter
a normal update will be generated, indicating that this value is ok.

With this change, parameters have an additonal attribue 'readerror',
which save the error generated on the last read, or None, if the
read was successful.

In addition, subsequent repeated errors are only reported the first
time.

Change-Id: I39f18d17debadd8aa5b904e59998d2172f3f11b9
Reviewed-on: https://forge.frm2.tum.de/review/c/sine2020/secop/playground/+/21974
Tested-by: JenkinsCodeReview <bjoern_pedersen@frm2.tum.de>
Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch>
This commit is contained in:
zolliker 2019-12-06 15:14:16 +01:00
parent 94a539da90
commit 44eeea1159
7 changed files with 105 additions and 86 deletions

View File

@ -137,9 +137,10 @@ class ChangeWrapper:
class CmdHandlerBase:
"""generic command handler"""
# def __init__(self, group, ...):
# init must at least initialize self.group, which is is relevant for calling the
# proper analyze_<group> and change_<group> methods
def __init__(self, group):
# group is used for calling the proper analyze_<group> and change_<group> methods
self.group = group
self.parameters = {}
def parse_reply(self, reply):
"""return values from a raw reply"""
@ -159,7 +160,7 @@ class CmdHandlerBase:
If changecmd is given, it is prepended before the query.
"""
querycmd = self.make_query(module)
reply = module.sendRecv((changecmd + querycmd))
reply = module.sendRecv(changecmd + querycmd)
return self.parse_reply(reply)
def send_change(self, module, *values):
@ -173,8 +174,16 @@ class CmdHandlerBase:
module.sendRecv(changecmd) # ignore result
return self.send_command(module)
def get_read_func(self, pname):
"""returns the read function passed to the metaclass"""
def get_read_func(self, modclass, pname):
"""returns the read function passed to the metaclass
and registers the parameter in this handler
"""
if not modclass in self.parameters:
self.parameters[modclass] = []
# Make sure that parameters from different module classes are not mixed
# (not sure if this might happen)
self.parameters[modclass].append(pname)
return self.read
def read(self, module):
@ -182,10 +191,21 @@ class CmdHandlerBase:
overwrite with None if not used
"""
# do a read of the current hw values
reply = self.send_command(module)
# convert them to parameters
getattr(module, 'analyze_' + self.group)(*reply)
try:
# do a read of the current hw values
reply = self.send_command(module)
# convert them to parameters
getattr(module, 'analyze_' + self.group)(*reply)
for pname in self.parameters[module.__class__]:
if module.parameters[pname].readerror:
# clear errors on parameters, which were not updated.
# this will also inform all activated clients
setattr(module, pname, getattr(module, pname))
except Exception as e:
# set all parameters of this handler to error
for pname in self.parameters[module.__class__]:
module.setError(pname, e)
raise
return Done # parameters should be updated already
def get_write_func(self, pname):
@ -213,7 +233,6 @@ class CmdHandlerBase:
return wfunc
class CmdHandler(CmdHandlerBase):
"""more evolved command handler
@ -237,7 +256,7 @@ class CmdHandler(CmdHandlerBase):
querycmd: the command for a query, may contain named formats for cmdargs
replyfmt: the format for reading the reply with some scanf like behaviour
"""
self.group = group
super().__init__(group)
self.querycmd = querycmd
self.replyfmt = CmdParser(replyfmt)

View File

@ -112,7 +112,7 @@ class ModuleMeta(PropertyMeta):
# skip commands for now
continue
rfunc = attrs.get('read_' + pname, None)
handler = pobj.handler.get_read_func(pname) if pobj.handler else None
handler = pobj.handler.get_read_func(newtype, pname) if pobj.handler else None
if handler:
if rfunc:
raise ProgrammingError("parameter '%s' can not have a handler "
@ -129,14 +129,15 @@ class ModuleMeta(PropertyMeta):
def wrapped_rfunc(self, pname=pname, rfunc=rfunc):
if rfunc:
self.log.debug("rfunc(%s): call %r" % (pname, rfunc))
self.log.debug("calling %r" % rfunc)
try:
value = rfunc(self)
self.log.debug("rfunc(%s) returned %r" % (pname, value))
if value is Done: # the setter is already triggered
return getattr(self, pname)
except Exception as e:
pobj = self.accessibles[pname]
self.DISPATCHER.announce_update_error(self, pname, pobj, e)
self.log.debug("rfunc(%s) failed %r" % (pname, e))
self.setError(pname, e)
raise
else:
# return cached value
@ -193,7 +194,7 @@ class ModuleMeta(PropertyMeta):
if (not EVENT_ONLY_ON_CHANGED_VALUES) or (value != pobj.value):
pobj.value = value
# also send notification
if self.accessibles[pname].export:
if pobj.export:
self.log.debug('%s is now %r' % (pname, value))
self.DISPATCHER.announce_update(self, pname, pobj)

View File

@ -28,9 +28,8 @@ from collections import OrderedDict
from secop.datatypes import EnumType, FloatRange, BoolType, IntRange, \
StringType, TupleOf, get_datatype, ArrayOf, TextType
from secop.errors import ConfigError, ProgrammingError
from secop.lib import formatException, \
formatExtendedStack, mkthread
from secop.errors import ConfigError, ProgrammingError, SECoPError
from secop.lib import formatException, formatExtendedStack, mkthread
from secop.lib.enum import Enum
from secop.metaclass import ModuleMeta
from secop.params import PREDEFINED_ACCESSIBLES, Command, Override, Parameter, Parameters, Commands
@ -180,33 +179,34 @@ class Module(HasProperties, metaclass=ModuleMeta):
# 4) complain if a Parameter entry has no default value and
# is not specified in cfgdict
for k, v in self.parameters.items():
if k not in cfgdict:
if v.default is None:
if not v.poll:
for pname, pobj in self.parameters.items():
if pname not in cfgdict:
if pobj.default is None:
if not pobj.poll:
raise ConfigError('Module %s: Parameter %r has no default '
'value and was not given in config!' %
(self.name, k))
cfgdict[k] = v.datatype.default
(self.name, pname))
# we do not want to call the setter for this parameter for now,
# this should happen on the first read
pobj.readerror = ConfigError('not initialized')
# above error will be triggered on activate after startup,
# when not all hardware parameters are read because of startup timeout
pobj.value = pobj.datatype(pobj.datatype.default)
else:
cfgdict[k] = v.default
cfgdict[pname] = pobj.default
# 5) 'apply' config:
# pass values through the datatypes and store as attributes
for k, v in list(cfgdict.items()):
# apply datatype, complain if type does not fit
datatype = self.parameters[k].datatype
try:
v = datatype(v)
self.parameters[k].value = v
# this checks also for the proper datatype
# note: this will NOT call write_* methods!
setattr(self, k, v)
except (ValueError, TypeError):
self.log.exception(formatExtendedStack())
raise
# raise ConfigError('Module %s: config parameter %r:\n%r' %
# (self.name, k, e))
# note: this will NOT call write_* methods!
if k != 'value':
setattr(self, k, v)
cfgdict.pop(k)
# Modify units AFTER applying the cfgdict
@ -227,6 +227,14 @@ class Module(HasProperties, metaclass=ModuleMeta):
def __getitem__(self, item):
return self.accessibles.__getitem__(item)
def setError(self, pname, exception):
"""sets a parameter to a read error state
the error will be cleared when the parameter is set
"""
pobj = self.parameters[pname]
self.DISPATCHER.announce_update_error(self, pname, pobj, exception)
def isBusy(self):
'''helper function for treating substates of BUSY correctly'''
# defined even for non drivable (used for dynamic polling)
@ -250,6 +258,15 @@ class Module(HasProperties, metaclass=ModuleMeta):
self.log.debug('empty %s.startModule()' % self.__class__.__name__)
started_callback()
def pollOne(self, pname):
"""call read function and handle error logging"""
try:
getattr(self, 'read_' + pname)()
except SECoPError as e:
self.log.error(str(e))
except Exception as e:
self.log.error(formatException())
class Readable(Module):
"""Basic readable Module
@ -322,13 +339,7 @@ class Readable(Module):
continue
if nr % abs(int(pobj.poll)) == 0:
# pollParams every 'pobj.pollParams' iteration
rfunc = getattr(self, 'read_' + pname, None)
if rfunc:
try:
rfunc() # pylint: disable = not-callable
except Exception: # really all!
# XXX: Error-handling: send error_update !
pass
self.pollOne(pname)
return False
@ -384,13 +395,7 @@ class Drivable(Writable):
nr % abs(int(pobj.poll))) == 0:
# poll always if pobj.poll is negative and fastpoll (i.e. Module is busy)
# otherwise poll every 'pobj.poll' iteration
rfunc = getattr(self, 'read_' + pname, None)
if rfunc:
try:
rfunc() # pylint: disable = not-callable
except Exception: # really all!
# XXX: Error-handling: send error_update !
pass
self.pollOne(pname)
return fastpoll
def do_stop(self):

View File

@ -116,9 +116,6 @@ class Parameter(Accessible):
ValueType(), export=False, default=None, mandatory=False, settable=False),
}
value = None
timestamp = None
def __init__(self, description, datatype, ctr=None, unit=None, **kwds):
if ctr is not None:
@ -152,6 +149,7 @@ class Parameter(Accessible):
# internal caching: value and timestamp of last change...
self.value = self.default
self.timestamp = 0
self.readerror = None # if not None, indicates that last read was not successful
def export_value(self):
return self.datatype.export_value(self.value)

View File

@ -36,7 +36,7 @@ Usage examples:
import time
from threading import Event
from heapq import heapify, heapreplace
from secop.lib import mkthread, formatException
from secop.lib import mkthread
from secop.errors import ProgrammingError
# poll types:
@ -137,8 +137,7 @@ class Poller(PollerBase):
# later, they will be converted to heaps
for pname, pobj in module.parameters.items():
polltype = pobj.poll
rfunc = getattr(module, 'read_' + pname, None)
if not polltype or not rfunc:
if not polltype:
continue
if not hasattr(module, 'pollinterval'):
raise ProgrammingError("module %s must have a pollinterval"
@ -159,7 +158,7 @@ class Poller(PollerBase):
handlers.add(pobj.handler)
# placeholders 0 are used for due, lastdue and idx
self.queues[polltype].append((0, 0,
(0, module, pobj, rfunc, factors[polltype])))
(0, module, pobj, pname, factors[polltype])))
def poll_next(self, polltype):
'''try to poll next item
@ -177,7 +176,7 @@ class Poller(PollerBase):
due, lastdue, pollitem = queue[0]
if now < due:
return due
_, module, pobj, rfunc, factor = pollitem
_, module, pobj, pname, factor = pollitem
if polltype == DYNAMIC and not module.isBusy():
interval = module.pollinterval # effective interval
@ -187,11 +186,7 @@ class Poller(PollerBase):
mininterval = interval
due = max(lastdue + interval, pobj.timestamp + interval * 0.5)
if now >= due:
try:
rfunc()
except Exception: # really all. errors are handled within rfunc
# TODO: filter repeated errors and log just statistics
module.log.error(formatException())
module.pollOne(pname)
done = True
lastdue = due
due = max(lastdue + mininterval, now + min(self.maxwait, mininterval * 0.5))
@ -217,16 +212,13 @@ class Poller(PollerBase):
return
# do all polls once and, at the same time, insert due info
for _, queue in sorted(self.queues.items()): # do SLOW polls first
for idx, (_, _, (_, module, pobj, rfunc, factor)) in enumerate(queue):
for idx, (_, _, (_, module, pobj, pname, factor)) in enumerate(queue):
lastdue = time.time()
try:
rfunc()
except Exception: # really all. errors are handled within rfunc
module.log.error(formatException())
module.pollOne(pname)
due = lastdue + min(self.maxwait, module.pollinterval * factor)
# in python 3 comparing tuples need some care, as not all objects
# are comparable. Inserting a unique idx solves the problem.
queue[idx] = (due, lastdue, (idx, module, pobj, rfunc, factor))
queue[idx] = (due, lastdue, (idx, module, pobj, pname, factor))
heapify(queue)
started_callback() # signal end of startup
nregular = len(self.queues[REGULAR])

View File

@ -50,6 +50,16 @@ from secop.protocol.messages import COMMANDREPLY, DESCRIPTIONREPLY, \
HEARTBEATREPLY, IDENTREPLY, IDENTREQUEST, READREPLY, WRITEREPLY
def make_update(modulename, pobj):
if pobj.readerror:
return (ERRORPREFIX + EVENTREPLY, '%s:%s' % (modulename, pobj.export),
# error-report !
[pobj.readerror.name, repr(pobj.readerror), dict(t=pobj.timestamp)])
return (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
[pobj.export_value(), dict(t=pobj.timestamp)])
class Dispatcher:
def __init__(self, name, logger, options, srv):
@ -94,9 +104,8 @@ class Dispatcher:
"""called by modules param setters to notify subscribers of new values
"""
# argument pname is no longer used here - should we remove it?
msg = (EVENTREPLY, '%s:%s' % (moduleobj.name, pobj.export),
[pobj.export_value(), dict(t=pobj.timestamp)])
self.broadcast_event(msg)
pobj.readerror = None
self.broadcast_event(make_update(moduleobj.name, pobj))
def announce_update_error(self, moduleobj, pname, pobj, err):
"""called by modules param setters/getters to notify subscribers
@ -106,10 +115,11 @@ class Dispatcher:
# argument pname is no longer used here - should we remove it?
if not isinstance(err, SECoPError):
err = InternalError(err)
msg = (ERRORPREFIX + EVENTREPLY, '%s:%s' % (moduleobj.name, pobj.export),
# error-report !
[err.name, repr(err), dict(t=currenttime())])
self.broadcast_event(msg)
if str(err) == str(pobj.readerror):
return # do not send updates for repeated errors
pobj.readerror = err
pobj.timestamp = currenttime() # indicates the first time this error appeared
self.broadcast_event(make_update(moduleobj.name, pobj))
def subscribe(self, conn, eventname):
self._subscriptions.setdefault(eventname, set()).add(conn)
@ -364,20 +374,11 @@ class Dispatcher:
for modulename, pname in modules:
moduleobj = self._modules.get(modulename, None)
if pname:
pobj = moduleobj.accessibles[pname]
updmsg = (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
[pobj.export_value(), dict(t=pobj.timestamp)])
conn.queue_async_reply(updmsg)
conn.queue_async_reply(make_update(modulename, moduleobj.parameters[pname]))
continue
for pobj in moduleobj.accessibles.values():
if not isinstance(pobj, Parameter):
continue
if not pobj.export:
continue
# can not use announce_update here, as this will send to all clients
updmsg = (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
[pobj.export_value(), dict(t=pobj.timestamp)])
conn.queue_async_reply(updmsg)
if isinstance(pobj, Parameter) and pobj.export:
conn.queue_async_reply(make_update(modulename, pobj))
return (ENABLEEVENTSREPLY, specifier, None) if specifier else (ENABLEEVENTSREPLY, None, None)
def handle_deactivate(self, conn, specifier, data):

View File

@ -157,6 +157,9 @@ class Module:
def isBusy(self):
return self.is_busy
def pollOne(self, pname):
getattr(self, 'read_' + pname)()
def __repr__(self):
rdict = self.__dict__.copy()
rdict.pop('parameters')