improve poller error handling

- repeated errors on poller are only once logged (per poll
  function / read_* method)
- during exception handling, silent=True on a SECoP error indicates
  that the error is already logged
+ fix the name of HardwareError
+ add test for consistency of SECoPErrors
+ catch socket.timeout in AsynTcp

Change-Id: I9df6c775cc19553b22a4d6e39591092adf7ff9a1
Reviewed-on: https://forge.frm2.tum.de/review/c/sine2020/secop/playground/+/28139
Tested-by: Jenkins Automated Tests <pedersen+jenkins@frm2.tum.de>
Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch>
Reviewed-by: Enrico Faulhaber <enrico.faulhaber@frm2.tum.de>
This commit is contained in:
zolliker 2022-04-11 18:45:56 +02:00
parent 2159753398
commit 7316632fa6
5 changed files with 84 additions and 57 deletions

View File

@ -17,12 +17,14 @@
# #
# Module authors: # Module authors:
# Enrico Faulhaber <enrico.faulhaber@frm2.tum.de> # Enrico Faulhaber <enrico.faulhaber@frm2.tum.de>
# Markus Zolliker <markus.zolliker@psi.ch>
# #
# ***************************************************************************** # *****************************************************************************
"""Define (internal) SECoP Errors""" """Define (internal) SECoP Errors"""
class SECoPError(RuntimeError): class SECoPError(RuntimeError):
silent = False # silent = True indicates that the error is already logged
def __init__(self, *args, **kwds): def __init__(self, *args, **kwds):
super().__init__() super().__init__()
@ -32,7 +34,8 @@ class SECoPError(RuntimeError):
def __repr__(self): def __repr__(self):
args = ', '.join(map(repr, self.args)) args = ', '.join(map(repr, self.args))
kwds = ', '.join(['%s=%r' % i for i in list(self.__dict__.items())]) kwds = ', '.join(['%s=%r' % i for i in list(self.__dict__.items())
if i[0] != 'silent'])
res = [] res = []
if args: if args:
res.append(args) res.append(args)
@ -102,14 +105,6 @@ class CommunicationFailedError(SECoPError):
pass pass
class SilentError(SECoPError):
pass
class CommunicationSilentError(SilentError, CommunicationFailedError):
name = 'CommunicationFailed'
class IsBusyError(SECoPError): class IsBusyError(SECoPError):
pass pass
@ -123,7 +118,7 @@ class DisabledError(SECoPError):
class HardwareError(SECoPError): class HardwareError(SECoPError):
pass name = 'HardwareError'
def make_secop_error(name, text): def make_secop_error(name, text):

View File

@ -31,8 +31,7 @@ import threading
from secop.lib.asynconn import AsynConn, ConnectionClosed from secop.lib.asynconn import AsynConn, ConnectionClosed
from secop.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, IntRange, \ from secop.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, IntRange, \
StringType, TupleOf, ValueType StringType, TupleOf, ValueType
from secop.errors import CommunicationFailedError, CommunicationSilentError, \ from secop.errors import CommunicationFailedError, ConfigError, ProgrammingError
ConfigError, ProgrammingError
from secop.modules import Attached, Command, \ from secop.modules import Attached, Command, \
Communicator, Done, Module, Parameter, Property Communicator, Done, Module, Parameter, Property
from secop.lib import generalConfig from secop.lib import generalConfig
@ -42,6 +41,10 @@ generalConfig.set_default('legacy_hasiodev', False)
HEX_CODE = re.compile(r'[0-9a-fA-F][0-9a-fA-F]$') HEX_CODE = re.compile(r'[0-9a-fA-F][0-9a-fA-F]$')
class SilentError(CommunicationFailedError):
silent = True
class HasIO(Module): class HasIO(Module):
"""Mixin for modules using a communicator""" """Mixin for modules using a communicator"""
io = Attached() io = Attached()
@ -151,11 +154,10 @@ class IOBase(Communicator):
self.callCallbacks() self.callCallbacks()
return Done return Done
except Exception as e: except Exception as e:
if str(e) == self._last_error: if str(e) != self._last_error:
raise CommunicationSilentError(str(e)) from e self._last_error = str(e)
self._last_error = str(e) self.log.error(self._last_error)
self.log.error(self._last_error) raise SilentError(repr(e)) from e
raise
return Done return Done
def write_is_connected(self, value): def write_is_connected(self, value):
@ -249,10 +251,10 @@ class StringIO(IOBase):
""" """
command = command.encode(self.encoding) command = command.encode(self.encoding)
if not self.is_connected: if not self.is_connected:
# do not try to reconnect here
# read_is_connected is doing this when called by its poller
self.read_is_connected() # try to reconnect self.read_is_connected() # try to reconnect
if not self._conn: raise SilentError('disconnected') from None
self.log.debug('can not connect to %r' % self.uri)
raise CommunicationSilentError('can not connect to %r' % self.uri)
try: try:
with self._lock: with self._lock:
# read garbage and wait before send # read garbage and wait before send
@ -279,11 +281,12 @@ class StringIO(IOBase):
self.comLog('< %s', reply) self.comLog('< %s', reply)
return reply return reply
except Exception as e: except Exception as e:
if str(e) == self._last_error: if self._conn is None:
raise CommunicationSilentError(str(e)) from None raise SilentError('disconnected') from None
self._last_error = str(e) if repr(e) != self._last_error:
self.log.error(self._last_error) self._last_error = str(e)
raise self.log.error(self._last_error)
raise SilentError(repr(e)) from e
@Command(ArrayOf(StringType()), result=ArrayOf(StringType())) @Command(ArrayOf(StringType()), result=ArrayOf(StringType()))
def multicomm(self, commands): def multicomm(self, commands):
@ -357,9 +360,10 @@ class BytesIO(IOBase):
def communicate(self, request, replylen): # pylint: disable=arguments-differ def communicate(self, request, replylen): # pylint: disable=arguments-differ
"""send a request and receive (at least) <replylen> bytes as reply""" """send a request and receive (at least) <replylen> bytes as reply"""
if not self.is_connected: if not self.is_connected:
# do not try to reconnect here
# read_is_connected is doing this when called by its poller
self.read_is_connected() # try to reconnect self.read_is_connected() # try to reconnect
if not self._conn: raise SilentError('disconnected') from None
raise CommunicationSilentError('can not connect to %r' % self.uri)
try: try:
with self._lock: with self._lock:
# read garbage and wait before send # read garbage and wait before send
@ -378,11 +382,12 @@ class BytesIO(IOBase):
self.comLog('< %s', hexify(reply)) self.comLog('< %s', hexify(reply))
return self.getFullReply(request, reply) return self.getFullReply(request, reply)
except Exception as e: except Exception as e:
if str(e) == self._last_error: if self._conn is None:
raise CommunicationSilentError(str(e)) from None raise SilentError('disconnected') from None
self._last_error = str(e) if repr(e) != self._last_error:
self.log.error(self._last_error) self._last_error = str(e)
raise self.log.error(self._last_error)
raise SilentError(repr(e)) from e
@Command((ArrayOf(TupleOf(BLOBType(), IntRange(0)))), result=ArrayOf(BLOBType())) @Command((ArrayOf(TupleOf(BLOBType(), IntRange(0)))), result=ArrayOf(BLOBType()))
def multicomm(self, requests): def multicomm(self, requests):

View File

@ -167,7 +167,7 @@ class AsynTcp(AsynConn):
uri = uri[6:] uri = uri[6:]
try: try:
self.connection = tcpSocket(uri, self.defaultport, self.timeout) self.connection = tcpSocket(uri, self.defaultport, self.timeout)
except (ConnectionRefusedError, socket.gaierror) as e: except (ConnectionRefusedError, socket.gaierror, socket.timeout) as e:
# indicate that retrying might make sense # indicate that retrying might make sense
raise CommunicationFailedError(str(e)) from None raise CommunicationFailedError(str(e)) from None

View File

@ -31,7 +31,7 @@ from functools import wraps
from secop.datatypes import ArrayOf, BoolType, EnumType, FloatRange, \ from secop.datatypes import ArrayOf, BoolType, EnumType, FloatRange, \
IntRange, StatusType, StringType, TextType, TupleOf, DiscouragedConversion IntRange, StatusType, StringType, TextType, TupleOf, DiscouragedConversion
from secop.errors import BadValueError, ConfigError, \ from secop.errors import BadValueError, ConfigError, \
ProgrammingError, SECoPError, SilentError, secop_error ProgrammingError, SECoPError, secop_error
from secop.lib import formatException, mkthread, UniqueObject, generalConfig from secop.lib import formatException, mkthread, UniqueObject, generalConfig
from secop.lib.enum import Enum from secop.lib.enum import Enum
from secop.params import Accessible, Command, Parameter from secop.params import Accessible, Command, Parameter
@ -233,7 +233,7 @@ class PollInfo:
self.interval = pollinterval self.interval = pollinterval
self.last_main = 0 self.last_main = 0
self.last_slow = 0 self.last_slow = 0
self.last_error = None self.last_error = {} # dict [<name of poll func>] of (None or str(last exception))
self.polled_parameters = [] self.polled_parameters = []
self.fast_flag = False self.fast_flag = False
self.trigger_event = trigger_event self.trigger_event = trigger_event
@ -633,12 +633,19 @@ class Module(HasAccessibles):
"""call read method with proper error handling""" """call read method with proper error handling"""
try: try:
rfunc() rfunc()
except SilentError: self.pollInfo.last_error[rfunc.__name__] = None
pass except Exception as e:
except SECoPError as e: name = rfunc.__name__
self.log.error(str(e)) if str(e) != self.pollInfo.last_error.get(name):
except Exception: self.pollInfo.last_error[name] = str(e)
self.log.error(formatException()) if isinstance(e, SECoPError):
if e.silent:
self.log.debug('%s: %s', name, str(e))
else:
self.log.error('%s: %s', name, str(e))
else:
# uncatched error: this is more serious
self.log.error('%s: %s', name, formatException())
def __pollThread(self, modules, started_callback): def __pollThread(self, modules, started_callback):
"""poll thread body """poll thread body
@ -693,25 +700,14 @@ class Module(HasAccessibles):
pinfo = mobj.pollInfo pinfo = mobj.pollInfo
if now > pinfo.last_main + pinfo.interval: if now > pinfo.last_main + pinfo.interval:
pinfo.last_main = (now // pinfo.interval) * pinfo.interval pinfo.last_main = (now // pinfo.interval) * pinfo.interval
try: mobj.callPollFunc(mobj.doPoll)
mobj.doPoll()
pinfo.last_error = None
except Exception as e:
if str(e) != str(pinfo.last_error) and not isinstance(e, SilentError):
mobj.log.error('doPoll: %r', e)
pinfo.last_error = e
now = time.time() now = time.time()
# find ONE due slow poll and call it # find ONE due slow poll and call it
loop = True loop = True
while loop: # loops max. 2 times, when to_poll is at end while loop: # loops max. 2 times, when to_poll is at end
for mobj, rfunc, pobj in to_poll: for mobj, rfunc, pobj in to_poll:
if now > pobj.timestamp + mobj.slowinterval * 0.5: if now > pobj.timestamp + mobj.slowinterval * 0.5:
try: mobj.callPollFunc(rfunc)
prev_err = pobj.readerror
rfunc()
except Exception as e:
if not isinstance(e, SilentError) and str(pobj.readerror) != str(prev_err):
mobj.log.error('%s: %r', pobj.name, e)
loop = False # one poll done loop = False # one poll done
break break
else: else:
@ -744,10 +740,11 @@ class Module(HasAccessibles):
try: try:
self.log.debug('initialize parameter %s', pname) self.log.debug('initialize parameter %s', pname)
wfunc(value) wfunc(value)
except SilentError:
pass
except SECoPError as e: except SECoPError as e:
self.log.error(str(e)) if e.silent:
self.log.debug('%s: %s', pname, str(e))
else:
self.log.error('%s: %s', pname, str(e))
except Exception: except Exception:
self.log.error(formatException()) self.log.error(formatException())
if started_callback: if started_callback:

30
test/test_errors.py Normal file
View File

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
# *****************************************************************************
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Module authors:
# Markus Zolliker <markus.zolliker@psi.ch>
#
# *****************************************************************************
"""test data types."""
import secop.errors
def test_errors():
"""check consistence of secop.errors.EXCEPTIONS"""
for e in secop.errors.EXCEPTIONS.values():
assert secop.errors.EXCEPTIONS[e().name] == e