improve error handling

in the current implementation, in case polling results in an error, an error_update is generated. But on activate, for this parameter a normal update will be generated, indicating that this value is ok. With this change, parameters have an additonal attribue 'readerror', which save the error generated on the last read, or None, if the read was successful. In addition, subsequent repeated errors are only reported the first time. Change-Id: I39f18d17debadd8aa5b904e59998d2172f3f11b9 Reviewed-on: https://forge.frm2.tum.de/review/c/sine2020/secop/playground/+/21974 Tested-by: JenkinsCodeReview <bjoern_pedersen@frm2.tum.de> Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch>
2019-12-06 15:14:16 +01:00
parent 94a539da90
commit 44eeea1159
7 changed files with 105 additions and 86 deletions
--- a/secop/commandhandler.py
+++ b/secop/commandhandler.py
@ -137,9 +137,10 @@ class ChangeWrapper:
 class CmdHandlerBase:
    """generic command handler"""

-    # def __init__(self, group, ...):
-    #     init must at least initialize self.group, which is is relevant for calling the
-    # proper analyze_<group> and change_<group> methods
+    def __init__(self, group):
+        # group is used for calling the proper analyze_<group> and change_<group> methods
+        self.group = group
+        self.parameters = {}

    def parse_reply(self, reply):
        """return values from a raw reply"""
@ -159,7 +160,7 @@ class CmdHandlerBase:
        If changecmd is given, it is prepended before the query.
        """
        querycmd = self.make_query(module)
-        reply = module.sendRecv((changecmd + querycmd))
+        reply = module.sendRecv(changecmd + querycmd)
        return self.parse_reply(reply)

    def send_change(self, module, *values):
@ -173,8 +174,16 @@ class CmdHandlerBase:
        module.sendRecv(changecmd) # ignore result
        return self.send_command(module)

-    def get_read_func(self, pname):
-        """returns the read function passed to the metaclass"""
+    def get_read_func(self, modclass, pname):
+        """returns the read function passed to the metaclass
+
+        and registers the parameter in this handler
+        """
+        if not modclass in self.parameters:
+            self.parameters[modclass] = []
+        # Make sure that parameters from different module classes are not mixed
+        # (not sure if this might happen)
+        self.parameters[modclass].append(pname)
        return self.read

    def read(self, module):
@ -182,10 +191,21 @@ class CmdHandlerBase:

        overwrite with None if not used
        """
-        # do a read of the current hw values
-        reply = self.send_command(module)
-        # convert them to parameters
-        getattr(module, 'analyze_' + self.group)(*reply)
+        try:
+            # do a read of the current hw values
+            reply = self.send_command(module)
+            # convert them to parameters
+            getattr(module, 'analyze_' + self.group)(*reply)
+            for pname in self.parameters[module.__class__]:
+                if module.parameters[pname].readerror:
+                    # clear errors on parameters, which were not updated.
+                    # this will also inform all activated clients
+                    setattr(module, pname, getattr(module, pname))
+        except Exception as e:
+            # set all parameters of this handler to error
+            for pname in self.parameters[module.__class__]:
+                module.setError(pname, e)
+            raise
        return Done # parameters should be updated already

    def get_write_func(self, pname):
@ -213,7 +233,6 @@ class CmdHandlerBase:
        return wfunc


-
 class CmdHandler(CmdHandlerBase):
    """more evolved command handler

@ -237,7 +256,7 @@ class CmdHandler(CmdHandlerBase):
        querycmd:  the command for a query, may contain named formats for cmdargs
        replyfmt:  the format for reading the reply with some scanf like behaviour
        """
-        self.group = group
+        super().__init__(group)
        self.querycmd = querycmd
        self.replyfmt = CmdParser(replyfmt)

--- a/secop/metaclass.py
+++ b/secop/metaclass.py
@ -112,7 +112,7 @@ class ModuleMeta(PropertyMeta):
                # skip commands for now
                continue
            rfunc = attrs.get('read_' + pname, None)
-            handler = pobj.handler.get_read_func(pname) if pobj.handler else None
+            handler = pobj.handler.get_read_func(newtype, pname) if pobj.handler else None
            if handler:
                if rfunc:
                    raise ProgrammingError("parameter '%s' can not have a handler "
@ -129,14 +129,15 @@ class ModuleMeta(PropertyMeta):

                def wrapped_rfunc(self, pname=pname, rfunc=rfunc):
                    if rfunc:
-                        self.log.debug("rfunc(%s): call %r" % (pname, rfunc))
+                        self.log.debug("calling %r" % rfunc)
                        try:
                            value = rfunc(self)
+                            self.log.debug("rfunc(%s) returned %r" % (pname, value))
                            if value is Done: # the setter is already triggered
                                return getattr(self, pname)
                        except Exception as e:
-                            pobj = self.accessibles[pname]
-                            self.DISPATCHER.announce_update_error(self, pname, pobj, e)
+                            self.log.debug("rfunc(%s) failed %r" % (pname, e))
+                            self.setError(pname, e)
                            raise
                    else:
                        # return cached value
@ -193,7 +194,7 @@ class ModuleMeta(PropertyMeta):
                if (not EVENT_ONLY_ON_CHANGED_VALUES) or (value != pobj.value):
                    pobj.value = value
                    # also send notification
-                    if self.accessibles[pname].export:
+                    if pobj.export:
                        self.log.debug('%s is now %r' % (pname, value))
                        self.DISPATCHER.announce_update(self, pname, pobj)

--- a/secop/modules.py
+++ b/secop/modules.py
@ -28,9 +28,8 @@ from collections import OrderedDict

 from secop.datatypes import EnumType, FloatRange, BoolType, IntRange, \
    StringType, TupleOf, get_datatype, ArrayOf, TextType
-from secop.errors import ConfigError, ProgrammingError
-from secop.lib import formatException, \
-    formatExtendedStack, mkthread
+from secop.errors import ConfigError, ProgrammingError, SECoPError
+from secop.lib import formatException, formatExtendedStack, mkthread
 from secop.lib.enum import Enum
 from secop.metaclass import ModuleMeta
 from secop.params import PREDEFINED_ACCESSIBLES, Command, Override, Parameter, Parameters, Commands
@ -180,33 +179,34 @@ class Module(HasProperties, metaclass=ModuleMeta):

        # 4) complain if a Parameter entry has no default value and
        #    is not specified in cfgdict
-        for k, v in self.parameters.items():
-            if k not in cfgdict:
-                if v.default is None:
-                    if not v.poll:
+        for pname, pobj in self.parameters.items():
+            if pname not in cfgdict:
+                if pobj.default is None:
+                    if not pobj.poll:
                        raise ConfigError('Module %s: Parameter %r has no default '
                                          'value and was not given in config!' %
-                                          (self.name, k))
-                    cfgdict[k] = v.datatype.default
+                                          (self.name, pname))
+                    # we do not want to call the setter for this parameter for now,
+                    # this should happen on the first read
+                    pobj.readerror = ConfigError('not initialized')
+                    # above error will be triggered on activate after startup,
+                    # when not all hardware parameters are read because of startup timeout
+                    pobj.value = pobj.datatype(pobj.datatype.default)
                else:
-                    cfgdict[k] = v.default
+                    cfgdict[pname] = pobj.default

        # 5) 'apply' config:
        #    pass values through the datatypes and store as attributes
        for k, v in list(cfgdict.items()):
-            # apply datatype, complain if type does not fit
-            datatype = self.parameters[k].datatype
            try:
-                v = datatype(v)
-                self.parameters[k].value = v
+                # this checks also for the proper datatype
+                # note: this will NOT call write_* methods!
+                setattr(self, k, v)
            except (ValueError, TypeError):
                self.log.exception(formatExtendedStack())
                raise
                # raise ConfigError('Module %s: config parameter %r:\n%r' %
                #                   (self.name, k, e))
-            # note: this will NOT call write_* methods!
-            if k != 'value':
-                setattr(self, k, v)
            cfgdict.pop(k)

        # Modify units AFTER applying the cfgdict
@ -227,6 +227,14 @@ class Module(HasProperties, metaclass=ModuleMeta):
    def __getitem__(self, item):
        return self.accessibles.__getitem__(item)

+    def setError(self, pname, exception):
+        """sets a parameter to a read error state
+
+        the error will be cleared when the parameter is set
+        """
+        pobj = self.parameters[pname]
+        self.DISPATCHER.announce_update_error(self, pname, pobj, exception)
+
    def isBusy(self):
        '''helper function for treating substates of BUSY correctly'''
        # defined even for non drivable (used for dynamic polling)
@ -250,6 +258,15 @@ class Module(HasProperties, metaclass=ModuleMeta):
        self.log.debug('empty %s.startModule()' % self.__class__.__name__)
        started_callback()

+    def pollOne(self, pname):
+        """call read function and handle error logging"""
+        try:
+            getattr(self, 'read_' + pname)()
+        except SECoPError as e:
+            self.log.error(str(e))
+        except Exception as e:
+            self.log.error(formatException())
+

 class Readable(Module):
    """Basic readable Module
@ -322,13 +339,7 @@ class Readable(Module):
                continue
            if nr % abs(int(pobj.poll)) == 0:
                # pollParams every 'pobj.pollParams' iteration
-                rfunc = getattr(self, 'read_' + pname, None)
-                if rfunc:
-                    try:
-                        rfunc()  # pylint: disable = not-callable
-                    except Exception:  # really all!
-                        # XXX: Error-handling: send error_update !
-                        pass
+                self.pollOne(pname)
        return False


@ -384,13 +395,7 @@ class Drivable(Writable):
                    nr % abs(int(pobj.poll))) == 0:
                # poll always if pobj.poll is negative and fastpoll (i.e. Module is busy)
                # otherwise poll every 'pobj.poll' iteration
-                rfunc = getattr(self, 'read_' + pname, None)
-                if rfunc:
-                    try:
-                        rfunc()  # pylint: disable = not-callable
-                    except Exception:  # really all!
-                        # XXX: Error-handling: send error_update !
-                        pass
+                self.pollOne(pname)
        return fastpoll

    def do_stop(self):
--- a/secop/params.py
+++ b/secop/params.py
@ -116,9 +116,6 @@ class Parameter(Accessible):
                                 ValueType(), export=False, default=None, mandatory=False, settable=False),
    }

-    value = None
-    timestamp = None
-
    def __init__(self, description, datatype, ctr=None, unit=None, **kwds):

        if ctr is not None:
@ -152,6 +149,7 @@ class Parameter(Accessible):
        # internal caching: value and timestamp of last change...
        self.value = self.default
        self.timestamp = 0
+        self.readerror = None # if not None, indicates that last read was not successful

    def export_value(self):
        return self.datatype.export_value(self.value)
--- a/secop/poller.py
+++ b/secop/poller.py
@ -36,7 +36,7 @@ Usage examples:
 import time
 from threading import Event
 from heapq import heapify, heapreplace
-from secop.lib import mkthread, formatException
+from secop.lib import mkthread
 from secop.errors import ProgrammingError

 # poll types:
@ -137,8 +137,7 @@ class Poller(PollerBase):
        # later, they will be converted to heaps
        for pname, pobj in module.parameters.items():
            polltype = pobj.poll
-            rfunc = getattr(module, 'read_' + pname, None)
-            if not polltype or not rfunc:
+            if not polltype:
                continue
            if not hasattr(module, 'pollinterval'):
                raise ProgrammingError("module %s must have a pollinterval"
@ -159,7 +158,7 @@ class Poller(PollerBase):
                handlers.add(pobj.handler)
            # placeholders 0 are used for due, lastdue and idx
            self.queues[polltype].append((0, 0,
-                (0, module, pobj, rfunc, factors[polltype])))
+                (0, module, pobj, pname, factors[polltype])))

    def poll_next(self, polltype):
        '''try to poll next item
@ -177,7 +176,7 @@ class Poller(PollerBase):
            due, lastdue, pollitem = queue[0]
            if now < due:
                return due
-            _, module, pobj, rfunc, factor = pollitem
+            _, module, pobj, pname, factor = pollitem

            if polltype == DYNAMIC and not module.isBusy():
                interval = module.pollinterval   # effective interval
@ -187,11 +186,7 @@ class Poller(PollerBase):
                mininterval = interval
            due = max(lastdue + interval, pobj.timestamp + interval * 0.5)
            if now >= due:
-                try:
-                    rfunc()
-                except Exception: # really all. errors are handled within rfunc
-                    # TODO: filter repeated errors and log just statistics
-                    module.log.error(formatException())
+                module.pollOne(pname)
                done = True
                lastdue = due
                due = max(lastdue + mininterval, now + min(self.maxwait, mininterval * 0.5))
@ -217,16 +212,13 @@ class Poller(PollerBase):
            return
        # do all polls once and, at the same time, insert due info
        for _, queue in sorted(self.queues.items()): # do SLOW polls first
-            for idx, (_, _, (_, module, pobj, rfunc, factor)) in enumerate(queue):
+            for idx, (_, _, (_, module, pobj, pname, factor)) in enumerate(queue):
                lastdue = time.time()
-                try:
-                    rfunc()
-                except Exception: # really all. errors are handled within rfunc
-                    module.log.error(formatException())
+                module.pollOne(pname)
                due = lastdue + min(self.maxwait, module.pollinterval * factor)
                # in python 3 comparing tuples need some care, as not all objects
                # are comparable. Inserting a unique idx solves the problem.
-                queue[idx] = (due, lastdue, (idx, module, pobj, rfunc, factor))
+                queue[idx] = (due, lastdue, (idx, module, pobj, pname, factor))
            heapify(queue)
        started_callback() # signal end of startup
        nregular = len(self.queues[REGULAR])
--- a/secop/protocol/dispatcher.py
+++ b/secop/protocol/dispatcher.py
@ -50,6 +50,16 @@ from secop.protocol.messages import COMMANDREPLY, DESCRIPTIONREPLY, \
    HEARTBEATREPLY, IDENTREPLY, IDENTREQUEST, READREPLY, WRITEREPLY


+
+def make_update(modulename, pobj):
+    if pobj.readerror:
+        return (ERRORPREFIX + EVENTREPLY, '%s:%s' % (modulename, pobj.export),
+               # error-report !
+               [pobj.readerror.name, repr(pobj.readerror), dict(t=pobj.timestamp)])
+    return (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
+               [pobj.export_value(), dict(t=pobj.timestamp)])
+
+
 class Dispatcher:

    def __init__(self, name, logger, options, srv):
@ -94,9 +104,8 @@ class Dispatcher:
        """called by modules param setters to notify subscribers of new values
        """
        # argument pname is no longer used here - should we remove it?
-        msg = (EVENTREPLY, '%s:%s' % (moduleobj.name, pobj.export),
-               [pobj.export_value(), dict(t=pobj.timestamp)])
-        self.broadcast_event(msg)
+        pobj.readerror = None
+        self.broadcast_event(make_update(moduleobj.name, pobj))

    def announce_update_error(self, moduleobj, pname, pobj, err):
        """called by modules param setters/getters to notify subscribers
@ -106,10 +115,11 @@ class Dispatcher:
        # argument pname is no longer used here - should we remove it?
        if not isinstance(err, SECoPError):
            err = InternalError(err)
-        msg = (ERRORPREFIX + EVENTREPLY, '%s:%s' % (moduleobj.name, pobj.export),
-               # error-report !
-               [err.name, repr(err), dict(t=currenttime())])
-        self.broadcast_event(msg)
+        if str(err) == str(pobj.readerror):
+            return # do not send updates for repeated errors
+        pobj.readerror = err
+        pobj.timestamp = currenttime() # indicates the first time this error appeared
+        self.broadcast_event(make_update(moduleobj.name, pobj))

    def subscribe(self, conn, eventname):
        self._subscriptions.setdefault(eventname, set()).add(conn)
@ -364,20 +374,11 @@ class Dispatcher:
        for modulename, pname in modules:
            moduleobj = self._modules.get(modulename, None)
            if pname:
-                pobj = moduleobj.accessibles[pname]
-                updmsg = (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
-                          [pobj.export_value(), dict(t=pobj.timestamp)])
-                conn.queue_async_reply(updmsg)
+                conn.queue_async_reply(make_update(modulename, moduleobj.parameters[pname]))
                continue
            for pobj in moduleobj.accessibles.values():
-                if not isinstance(pobj, Parameter):
-                    continue
-                if not pobj.export:
-                    continue
-                # can not use announce_update here, as this will send to all clients
-                updmsg = (EVENTREPLY, '%s:%s' % (modulename, pobj.export),
-                          [pobj.export_value(), dict(t=pobj.timestamp)])
-                conn.queue_async_reply(updmsg)
+                if isinstance(pobj, Parameter) and pobj.export:
+                    conn.queue_async_reply(make_update(modulename, pobj))
        return (ENABLEEVENTSREPLY, specifier, None) if specifier else (ENABLEEVENTSREPLY, None, None)

    def handle_deactivate(self, conn, specifier, data):
--- a/test/test_poller.py
+++ b/test/test_poller.py
@ -157,6 +157,9 @@ class Module:
    def isBusy(self):
        return self.is_busy

+    def pollOne(self, pname):
+        getattr(self, 'read_' + pname)()
+
    def __repr__(self):
        rdict = self.__dict__.copy()
        rdict.pop('parameters')