improve error handling during polls
improve the mechanism which avoids duplicated error messages in the log file: - extend to errors in doPoll - trigger error messages every day, so that they appear in every logfile + add missing space in some error message Change-Id: Icfaa06b43ee53f477483bab1f84def832fd0b977 Reviewed-on: https://forge.frm2.tum.de/review/c/secop/frappy/+/37432 Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch> Tested-by: Jenkins Automated Tests <pedersen+jenkins@frm2.tum.de>
This commit is contained in:
@@ -33,7 +33,6 @@ class SECoPError(RuntimeError):
|
||||
clsname2class = {} # needed to convert error reports back to classes
|
||||
name = 'InternalError'
|
||||
name2class = {}
|
||||
report_error = True
|
||||
raising_methods = None
|
||||
|
||||
def __init_subclass__(cls):
|
||||
@@ -76,7 +75,7 @@ class SECoPError(RuntimeError):
|
||||
if mlist and stripped:
|
||||
mlist = mlist[:-1] # do not pop, as this would change self.raising_methods
|
||||
prefix = '' if self.name2class.get(self.name) == type(self) else type(self).__name__
|
||||
prefix += ''.join(' in ' + m for m in mlist).strip()
|
||||
prefix = (prefix + ''.join(' in ' + m for m in mlist)).strip()
|
||||
if prefix:
|
||||
return f'{prefix}: {super().__str__()}'
|
||||
return super().__str__()
|
||||
|
||||
48
frappy/io.py
48
frappy/io.py
@@ -30,7 +30,7 @@ import time
|
||||
from frappy.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, \
|
||||
IntRange, StringType, StructOf, TupleOf, ValueType
|
||||
from frappy.errors import CommunicationFailedError, ConfigError, \
|
||||
ProgrammingError, SilentCommunicationFailedError as SilentError
|
||||
ProgrammingError, SECoPError, SilentCommunicationFailedError as SilentError
|
||||
from frappy.lib import generalConfig
|
||||
from frappy.lib.asynconn import AsynConn, ConnectionClosed
|
||||
from frappy.modules import Attached, Command, Communicator, Module, \
|
||||
@@ -125,7 +125,7 @@ class IOBase(Communicator):
|
||||
|
||||
_reconnectCallbacks = None
|
||||
_conn = None
|
||||
_last_error = None
|
||||
_last_error = None # this is None only until the first connection success
|
||||
_lock = None
|
||||
_last_connect_attempt = 0
|
||||
|
||||
@@ -167,10 +167,10 @@ class IOBase(Communicator):
|
||||
try:
|
||||
self.connectStart()
|
||||
if self._last_error:
|
||||
# we do not get here before the first connect success
|
||||
self.log.info('connected')
|
||||
self._last_error = 'connected'
|
||||
self.callCallbacks()
|
||||
return self.is_connected
|
||||
self._last_error = 'connected'
|
||||
except Exception as e:
|
||||
if repr(e) != self._last_error:
|
||||
self._last_error = repr(e)
|
||||
@@ -309,6 +309,7 @@ class StringIO(IOBase):
|
||||
"""
|
||||
command = command.encode(self.encoding)
|
||||
self.check_connection()
|
||||
new_error = 'no error' # in case of success (must not be None)
|
||||
try:
|
||||
with self._lock:
|
||||
# read garbage and wait before send
|
||||
@@ -337,12 +338,19 @@ class StringIO(IOBase):
|
||||
self.comLog('< %s', reply)
|
||||
return reply
|
||||
except Exception as e:
|
||||
if self._conn is None:
|
||||
raise SilentError('disconnected') from None
|
||||
if repr(e) != self._last_error:
|
||||
self._last_error = repr(e)
|
||||
self.log.error(self._last_error)
|
||||
raise SilentError(repr(e)) from e
|
||||
new_error = 'disconnected' if self._conn is None else repr(e)
|
||||
if new_error != self._last_error:
|
||||
# suppress subsequent equal error messages
|
||||
# this is in addition to the mechanism in Module.callPollFunc
|
||||
# as the same error would appear potentially in a lot of
|
||||
# methods
|
||||
if isinstance(e, SECoPError):
|
||||
self.log.error(new_error)
|
||||
else:
|
||||
self.log.exception(new_error)
|
||||
raise SilentError(new_error) from e
|
||||
finally:
|
||||
self._last_error = new_error
|
||||
|
||||
@Command(StringType())
|
||||
def writeline(self, command):
|
||||
@@ -472,6 +480,7 @@ class BytesIO(IOBase):
|
||||
def communicate(self, request, replylen): # pylint: disable=arguments-differ
|
||||
"""send a request and receive (at least) <replylen> bytes as reply"""
|
||||
self.check_connection()
|
||||
new_error = 'no error' # in case of success (must not be None)
|
||||
try:
|
||||
with self._lock:
|
||||
# read garbage and wait before send
|
||||
@@ -490,12 +499,19 @@ class BytesIO(IOBase):
|
||||
self.comLog('< %s', hexify(reply))
|
||||
return self.getFullReply(request, reply)
|
||||
except Exception as e:
|
||||
if self._conn is None:
|
||||
raise SilentError('disconnected') from None
|
||||
if repr(e) != self._last_error:
|
||||
self._last_error = str(e)
|
||||
self.log.error(self._last_error)
|
||||
raise SilentError(repr(e)) from e
|
||||
new_error = 'disconnected' if self._conn is None else repr(e)
|
||||
if new_error != self._last_error:
|
||||
# suppress subsequent equal error messages
|
||||
# this is in addition to the mechanism in Module.callPollFunc
|
||||
# as the same error would appear potentially in a lot of
|
||||
# methods
|
||||
if isinstance(e, SECoPError):
|
||||
self.log.error(new_error)
|
||||
else:
|
||||
self.log.exception(new_error)
|
||||
raise SilentError(new_error) from e
|
||||
finally:
|
||||
self._last_error = new_error
|
||||
|
||||
@Command(StructOf(requests=ArrayOf(TupleOf(BLOBType(), IntRange(0), FloatRange(0, unit='s')))),
|
||||
result=ArrayOf(BLOBType()))
|
||||
|
||||
@@ -244,7 +244,7 @@ class PollInfo:
|
||||
self.interval = pollinterval
|
||||
self.last_main = 0
|
||||
self.last_slow = 0
|
||||
self.pending_errors = set()
|
||||
self.pending_errors = {}
|
||||
self.polled_parameters = []
|
||||
self.fast_flag = False
|
||||
self.trigger_event = trigger_event
|
||||
@@ -531,7 +531,6 @@ class Module(HasAccessibles):
|
||||
pobj.value = value
|
||||
if err:
|
||||
if secop_error(err) == pobj.readerror:
|
||||
err.report_error = False
|
||||
return # no updates for repeated errors
|
||||
err = secop_error(err)
|
||||
value_err = value, err
|
||||
@@ -666,29 +665,31 @@ class Module(HasAccessibles):
|
||||
self.pollInfo.interval = fast_interval if flag else self.pollinterval
|
||||
self.pollInfo.trigger()
|
||||
|
||||
def callPollFunc(self, rfunc, raise_com_failed=False):
|
||||
def callPollFunc(self, rfunc, pollname=None, raise_com_failed=False):
|
||||
"""call read method with proper error handling"""
|
||||
try:
|
||||
name = pollname or rfunc.__name__
|
||||
rfunc()
|
||||
if rfunc.__name__ in self.pollInfo.pending_errors:
|
||||
self.log.info('%s: o.k.', rfunc.__name__)
|
||||
self.pollInfo.pending_errors.discard(rfunc.__name__)
|
||||
if self.pollInfo.pending_errors.pop(name, None):
|
||||
self.log.info('%s: o.k.', name)
|
||||
except Exception as e:
|
||||
if getattr(e, 'report_error', True):
|
||||
name = rfunc.__name__
|
||||
self.pollInfo.pending_errors.add(name) # trigger o.k. message after error is resolved
|
||||
if isinstance(e, SECoPError):
|
||||
e.raising_methods.append(name)
|
||||
if e.silent:
|
||||
self.log.debug('%s', e.format(False))
|
||||
else:
|
||||
self.log.error('%s', e.format(False))
|
||||
if raise_com_failed and isinstance(e, CommunicationFailedError):
|
||||
raise
|
||||
else:
|
||||
# not a SECoPError: this is proabably a programming error
|
||||
prev = self.pollInfo.pending_errors.get(name)
|
||||
if isinstance(e, SECoPError):
|
||||
if pollname:
|
||||
e.raising_methods.append(pollname)
|
||||
self.log.debug('%s failed with %r', pollname, e)
|
||||
if raise_com_failed and isinstance(e, CommunicationFailedError):
|
||||
raise
|
||||
efmt = None if e.silent else e.format(False)
|
||||
if efmt != prev:
|
||||
self.log.error('%s', efmt)
|
||||
else:
|
||||
# not a SECoPError: this is proabably a programming error
|
||||
efmt = repr(e)
|
||||
if efmt != prev:
|
||||
# we want to log the traceback
|
||||
self.log.error('%s', formatException())
|
||||
self.log.exception('%s', efmt)
|
||||
self.pollInfo.pending_errors[name] = efmt
|
||||
|
||||
def __pollThread(self, modules, started_callback):
|
||||
"""poll thread body
|
||||
@@ -742,8 +743,19 @@ class Module(HasAccessibles):
|
||||
if not polled_modules: # no polls needed - exit thread
|
||||
return
|
||||
to_poll = ()
|
||||
report_day = time.localtime().tm_min
|
||||
while modules: # modules will be cleared on shutdown
|
||||
now = time.time()
|
||||
today = time.localtime().tm_min
|
||||
if today != report_day:
|
||||
report_day = today
|
||||
for mobj in modules:
|
||||
pending = mobj.pollInfo.pending_errors
|
||||
if pending:
|
||||
self.log.info('%d pending errors', len(pending))
|
||||
# this will trigger again logging these errors
|
||||
# or logging o.k. on success
|
||||
pending.update((k, 'x') for k in pending)
|
||||
wait_time = 999
|
||||
for mobj in modules:
|
||||
pinfo = mobj.pollInfo
|
||||
@@ -763,7 +775,7 @@ class Module(HasAccessibles):
|
||||
pinfo.last_main = (now // pinfo.interval) * pinfo.interval
|
||||
except ZeroDivisionError:
|
||||
pinfo.last_main = now
|
||||
mobj.callPollFunc(mobj.doPoll)
|
||||
mobj.callPollFunc(mobj.doPoll, f'{mobj.name}.doPoll')
|
||||
now = time.time()
|
||||
# find ONE due slow poll and call it
|
||||
loop = True
|
||||
|
||||
Reference in New Issue
Block a user