improve error handling during polls

improve the mechanism which avoids duplicated error messages
in the log file:

- extend to errors in doPoll
- trigger error messages every day, so that they appear in every
  logfile

+ add missing space in some error message

Change-Id: Icfaa06b43ee53f477483bab1f84def832fd0b977
Reviewed-on: https://forge.frm2.tum.de/review/c/secop/frappy/+/37432
Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch>
Tested-by: Jenkins Automated Tests <pedersen+jenkins@frm2.tum.de>
This commit is contained in:
2025-09-16 17:24:35 +02:00
parent 943995b1d7
commit 9fe040e9d3
3 changed files with 66 additions and 39 deletions

View File

@@ -33,7 +33,6 @@ class SECoPError(RuntimeError):
clsname2class = {} # needed to convert error reports back to classes
name = 'InternalError'
name2class = {}
report_error = True
raising_methods = None
def __init_subclass__(cls):
@@ -76,7 +75,7 @@ class SECoPError(RuntimeError):
if mlist and stripped:
mlist = mlist[:-1] # do not pop, as this would change self.raising_methods
prefix = '' if self.name2class.get(self.name) == type(self) else type(self).__name__
prefix += ''.join(' in ' + m for m in mlist).strip()
prefix = (prefix + ''.join(' in ' + m for m in mlist)).strip()
if prefix:
return f'{prefix}: {super().__str__()}'
return super().__str__()

View File

@@ -30,7 +30,7 @@ import time
from frappy.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, \
IntRange, StringType, StructOf, TupleOf, ValueType
from frappy.errors import CommunicationFailedError, ConfigError, \
ProgrammingError, SilentCommunicationFailedError as SilentError
ProgrammingError, SECoPError, SilentCommunicationFailedError as SilentError
from frappy.lib import generalConfig
from frappy.lib.asynconn import AsynConn, ConnectionClosed
from frappy.modules import Attached, Command, Communicator, Module, \
@@ -125,7 +125,7 @@ class IOBase(Communicator):
_reconnectCallbacks = None
_conn = None
_last_error = None
_last_error = None # this is None only until the first connection success
_lock = None
_last_connect_attempt = 0
@@ -167,10 +167,10 @@ class IOBase(Communicator):
try:
self.connectStart()
if self._last_error:
# we do not get here before the first connect success
self.log.info('connected')
self._last_error = 'connected'
self.callCallbacks()
return self.is_connected
self._last_error = 'connected'
except Exception as e:
if repr(e) != self._last_error:
self._last_error = repr(e)
@@ -309,6 +309,7 @@ class StringIO(IOBase):
"""
command = command.encode(self.encoding)
self.check_connection()
new_error = 'no error' # in case of success (must not be None)
try:
with self._lock:
# read garbage and wait before send
@@ -337,12 +338,19 @@ class StringIO(IOBase):
self.comLog('< %s', reply)
return reply
except Exception as e:
if self._conn is None:
raise SilentError('disconnected') from None
if repr(e) != self._last_error:
self._last_error = repr(e)
self.log.error(self._last_error)
raise SilentError(repr(e)) from e
new_error = 'disconnected' if self._conn is None else repr(e)
if new_error != self._last_error:
# suppress subsequent equal error messages
# this is in addition to the mechanism in Module.callPollFunc
# as the same error would appear potentially in a lot of
# methods
if isinstance(e, SECoPError):
self.log.error(new_error)
else:
self.log.exception(new_error)
raise SilentError(new_error) from e
finally:
self._last_error = new_error
@Command(StringType())
def writeline(self, command):
@@ -472,6 +480,7 @@ class BytesIO(IOBase):
def communicate(self, request, replylen): # pylint: disable=arguments-differ
"""send a request and receive (at least) <replylen> bytes as reply"""
self.check_connection()
new_error = 'no error' # in case of success (must not be None)
try:
with self._lock:
# read garbage and wait before send
@@ -490,12 +499,19 @@ class BytesIO(IOBase):
self.comLog('< %s', hexify(reply))
return self.getFullReply(request, reply)
except Exception as e:
if self._conn is None:
raise SilentError('disconnected') from None
if repr(e) != self._last_error:
self._last_error = str(e)
self.log.error(self._last_error)
raise SilentError(repr(e)) from e
new_error = 'disconnected' if self._conn is None else repr(e)
if new_error != self._last_error:
# suppress subsequent equal error messages
# this is in addition to the mechanism in Module.callPollFunc
# as the same error would appear potentially in a lot of
# methods
if isinstance(e, SECoPError):
self.log.error(new_error)
else:
self.log.exception(new_error)
raise SilentError(new_error) from e
finally:
self._last_error = new_error
@Command(StructOf(requests=ArrayOf(TupleOf(BLOBType(), IntRange(0), FloatRange(0, unit='s')))),
result=ArrayOf(BLOBType()))

View File

@@ -244,7 +244,7 @@ class PollInfo:
self.interval = pollinterval
self.last_main = 0
self.last_slow = 0
self.pending_errors = set()
self.pending_errors = {}
self.polled_parameters = []
self.fast_flag = False
self.trigger_event = trigger_event
@@ -531,7 +531,6 @@ class Module(HasAccessibles):
pobj.value = value
if err:
if secop_error(err) == pobj.readerror:
err.report_error = False
return # no updates for repeated errors
err = secop_error(err)
value_err = value, err
@@ -666,29 +665,31 @@ class Module(HasAccessibles):
self.pollInfo.interval = fast_interval if flag else self.pollinterval
self.pollInfo.trigger()
def callPollFunc(self, rfunc, raise_com_failed=False):
def callPollFunc(self, rfunc, pollname=None, raise_com_failed=False):
"""call read method with proper error handling"""
try:
name = pollname or rfunc.__name__
rfunc()
if rfunc.__name__ in self.pollInfo.pending_errors:
self.log.info('%s: o.k.', rfunc.__name__)
self.pollInfo.pending_errors.discard(rfunc.__name__)
if self.pollInfo.pending_errors.pop(name, None):
self.log.info('%s: o.k.', name)
except Exception as e:
if getattr(e, 'report_error', True):
name = rfunc.__name__
self.pollInfo.pending_errors.add(name) # trigger o.k. message after error is resolved
if isinstance(e, SECoPError):
e.raising_methods.append(name)
if e.silent:
self.log.debug('%s', e.format(False))
else:
self.log.error('%s', e.format(False))
if raise_com_failed and isinstance(e, CommunicationFailedError):
raise
else:
# not a SECoPError: this is proabably a programming error
prev = self.pollInfo.pending_errors.get(name)
if isinstance(e, SECoPError):
if pollname:
e.raising_methods.append(pollname)
self.log.debug('%s failed with %r', pollname, e)
if raise_com_failed and isinstance(e, CommunicationFailedError):
raise
efmt = None if e.silent else e.format(False)
if efmt != prev:
self.log.error('%s', efmt)
else:
# not a SECoPError: this is proabably a programming error
efmt = repr(e)
if efmt != prev:
# we want to log the traceback
self.log.error('%s', formatException())
self.log.exception('%s', efmt)
self.pollInfo.pending_errors[name] = efmt
def __pollThread(self, modules, started_callback):
"""poll thread body
@@ -742,8 +743,19 @@ class Module(HasAccessibles):
if not polled_modules: # no polls needed - exit thread
return
to_poll = ()
report_day = time.localtime().tm_min
while modules: # modules will be cleared on shutdown
now = time.time()
today = time.localtime().tm_min
if today != report_day:
report_day = today
for mobj in modules:
pending = mobj.pollInfo.pending_errors
if pending:
self.log.info('%d pending errors', len(pending))
# this will trigger again logging these errors
# or logging o.k. on success
pending.update((k, 'x') for k in pending)
wait_time = 999
for mobj in modules:
pinfo = mobj.pollInfo
@@ -763,7 +775,7 @@ class Module(HasAccessibles):
pinfo.last_main = (now // pinfo.interval) * pinfo.interval
except ZeroDivisionError:
pinfo.last_main = now
mobj.callPollFunc(mobj.doPoll)
mobj.callPollFunc(mobj.doPoll, f'{mobj.name}.doPoll')
now = time.time()
# find ONE due slow poll and call it
loop = True