From 9fe040e9d3612cba3fba2d794ee6093a368d9195 Mon Sep 17 00:00:00 2001 From: Markus Zolliker Date: Tue, 16 Sep 2025 17:24:35 +0200 Subject: [PATCH] improve error handling during polls improve the mechanism which avoids duplicated error messages in the log file: - extend to errors in doPoll - trigger error messages every day, so that they appear in every logfile + add missing space in some error message Change-Id: Icfaa06b43ee53f477483bab1f84def832fd0b977 Reviewed-on: https://forge.frm2.tum.de/review/c/secop/frappy/+/37432 Reviewed-by: Markus Zolliker Tested-by: Jenkins Automated Tests --- frappy/errors.py | 3 +-- frappy/io.py | 48 ++++++++++++++++++++++++++------------- frappy/modulebase.py | 54 +++++++++++++++++++++++++++----------------- 3 files changed, 66 insertions(+), 39 deletions(-) diff --git a/frappy/errors.py b/frappy/errors.py index 801ed5f9..b9eb8af9 100644 --- a/frappy/errors.py +++ b/frappy/errors.py @@ -33,7 +33,6 @@ class SECoPError(RuntimeError): clsname2class = {} # needed to convert error reports back to classes name = 'InternalError' name2class = {} - report_error = True raising_methods = None def __init_subclass__(cls): @@ -76,7 +75,7 @@ class SECoPError(RuntimeError): if mlist and stripped: mlist = mlist[:-1] # do not pop, as this would change self.raising_methods prefix = '' if self.name2class.get(self.name) == type(self) else type(self).__name__ - prefix += ''.join(' in ' + m for m in mlist).strip() + prefix = (prefix + ''.join(' in ' + m for m in mlist)).strip() if prefix: return f'{prefix}: {super().__str__()}' return super().__str__() diff --git a/frappy/io.py b/frappy/io.py index 5a4d6fdd..cb52d083 100644 --- a/frappy/io.py +++ b/frappy/io.py @@ -30,7 +30,7 @@ import time from frappy.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, \ IntRange, StringType, StructOf, TupleOf, ValueType from frappy.errors import CommunicationFailedError, ConfigError, \ - ProgrammingError, SilentCommunicationFailedError as SilentError + ProgrammingError, SECoPError, SilentCommunicationFailedError as SilentError from frappy.lib import generalConfig from frappy.lib.asynconn import AsynConn, ConnectionClosed from frappy.modules import Attached, Command, Communicator, Module, \ @@ -125,7 +125,7 @@ class IOBase(Communicator): _reconnectCallbacks = None _conn = None - _last_error = None + _last_error = None # this is None only until the first connection success _lock = None _last_connect_attempt = 0 @@ -167,10 +167,10 @@ class IOBase(Communicator): try: self.connectStart() if self._last_error: + # we do not get here before the first connect success self.log.info('connected') - self._last_error = 'connected' self.callCallbacks() - return self.is_connected + self._last_error = 'connected' except Exception as e: if repr(e) != self._last_error: self._last_error = repr(e) @@ -309,6 +309,7 @@ class StringIO(IOBase): """ command = command.encode(self.encoding) self.check_connection() + new_error = 'no error' # in case of success (must not be None) try: with self._lock: # read garbage and wait before send @@ -337,12 +338,19 @@ class StringIO(IOBase): self.comLog('< %s', reply) return reply except Exception as e: - if self._conn is None: - raise SilentError('disconnected') from None - if repr(e) != self._last_error: - self._last_error = repr(e) - self.log.error(self._last_error) - raise SilentError(repr(e)) from e + new_error = 'disconnected' if self._conn is None else repr(e) + if new_error != self._last_error: + # suppress subsequent equal error messages + # this is in addition to the mechanism in Module.callPollFunc + # as the same error would appear potentially in a lot of + # methods + if isinstance(e, SECoPError): + self.log.error(new_error) + else: + self.log.exception(new_error) + raise SilentError(new_error) from e + finally: + self._last_error = new_error @Command(StringType()) def writeline(self, command): @@ -472,6 +480,7 @@ class BytesIO(IOBase): def communicate(self, request, replylen): # pylint: disable=arguments-differ """send a request and receive (at least) bytes as reply""" self.check_connection() + new_error = 'no error' # in case of success (must not be None) try: with self._lock: # read garbage and wait before send @@ -490,12 +499,19 @@ class BytesIO(IOBase): self.comLog('< %s', hexify(reply)) return self.getFullReply(request, reply) except Exception as e: - if self._conn is None: - raise SilentError('disconnected') from None - if repr(e) != self._last_error: - self._last_error = str(e) - self.log.error(self._last_error) - raise SilentError(repr(e)) from e + new_error = 'disconnected' if self._conn is None else repr(e) + if new_error != self._last_error: + # suppress subsequent equal error messages + # this is in addition to the mechanism in Module.callPollFunc + # as the same error would appear potentially in a lot of + # methods + if isinstance(e, SECoPError): + self.log.error(new_error) + else: + self.log.exception(new_error) + raise SilentError(new_error) from e + finally: + self._last_error = new_error @Command(StructOf(requests=ArrayOf(TupleOf(BLOBType(), IntRange(0), FloatRange(0, unit='s')))), result=ArrayOf(BLOBType())) diff --git a/frappy/modulebase.py b/frappy/modulebase.py index 3bce234b..92acc9c4 100644 --- a/frappy/modulebase.py +++ b/frappy/modulebase.py @@ -244,7 +244,7 @@ class PollInfo: self.interval = pollinterval self.last_main = 0 self.last_slow = 0 - self.pending_errors = set() + self.pending_errors = {} self.polled_parameters = [] self.fast_flag = False self.trigger_event = trigger_event @@ -531,7 +531,6 @@ class Module(HasAccessibles): pobj.value = value if err: if secop_error(err) == pobj.readerror: - err.report_error = False return # no updates for repeated errors err = secop_error(err) value_err = value, err @@ -666,29 +665,31 @@ class Module(HasAccessibles): self.pollInfo.interval = fast_interval if flag else self.pollinterval self.pollInfo.trigger() - def callPollFunc(self, rfunc, raise_com_failed=False): + def callPollFunc(self, rfunc, pollname=None, raise_com_failed=False): """call read method with proper error handling""" try: + name = pollname or rfunc.__name__ rfunc() - if rfunc.__name__ in self.pollInfo.pending_errors: - self.log.info('%s: o.k.', rfunc.__name__) - self.pollInfo.pending_errors.discard(rfunc.__name__) + if self.pollInfo.pending_errors.pop(name, None): + self.log.info('%s: o.k.', name) except Exception as e: - if getattr(e, 'report_error', True): - name = rfunc.__name__ - self.pollInfo.pending_errors.add(name) # trigger o.k. message after error is resolved - if isinstance(e, SECoPError): - e.raising_methods.append(name) - if e.silent: - self.log.debug('%s', e.format(False)) - else: - self.log.error('%s', e.format(False)) - if raise_com_failed and isinstance(e, CommunicationFailedError): - raise - else: - # not a SECoPError: this is proabably a programming error + prev = self.pollInfo.pending_errors.get(name) + if isinstance(e, SECoPError): + if pollname: + e.raising_methods.append(pollname) + self.log.debug('%s failed with %r', pollname, e) + if raise_com_failed and isinstance(e, CommunicationFailedError): + raise + efmt = None if e.silent else e.format(False) + if efmt != prev: + self.log.error('%s', efmt) + else: + # not a SECoPError: this is proabably a programming error + efmt = repr(e) + if efmt != prev: # we want to log the traceback - self.log.error('%s', formatException()) + self.log.exception('%s', efmt) + self.pollInfo.pending_errors[name] = efmt def __pollThread(self, modules, started_callback): """poll thread body @@ -742,8 +743,19 @@ class Module(HasAccessibles): if not polled_modules: # no polls needed - exit thread return to_poll = () + report_day = time.localtime().tm_min while modules: # modules will be cleared on shutdown now = time.time() + today = time.localtime().tm_min + if today != report_day: + report_day = today + for mobj in modules: + pending = mobj.pollInfo.pending_errors + if pending: + self.log.info('%d pending errors', len(pending)) + # this will trigger again logging these errors + # or logging o.k. on success + pending.update((k, 'x') for k in pending) wait_time = 999 for mobj in modules: pinfo = mobj.pollInfo @@ -763,7 +775,7 @@ class Module(HasAccessibles): pinfo.last_main = (now // pinfo.interval) * pinfo.interval except ZeroDivisionError: pinfo.last_main = now - mobj.callPollFunc(mobj.doPoll) + mobj.callPollFunc(mobj.doPoll, f'{mobj.name}.doPoll') now = time.time() # find ONE due slow poll and call it loop = True