improve error handling during polls

improve the mechanism which avoids duplicated error messages
in the log file:

- extend to errors in doPoll
- trigger error messages every day, so that they appear in every
  logfile

+ add missing space in some error message

Change-Id: Icfaa06b43ee53f477483bab1f84def832fd0b977
Reviewed-on: https://forge.frm2.tum.de/review/c/secop/frappy/+/37432
Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch>
Tested-by: Jenkins Automated Tests <pedersen+jenkins@frm2.tum.de>
This commit is contained in:
2025-09-16 17:24:35 +02:00
parent 943995b1d7
commit 9fe040e9d3
3 changed files with 66 additions and 39 deletions

View File

@@ -33,7 +33,6 @@ class SECoPError(RuntimeError):
clsname2class = {} # needed to convert error reports back to classes clsname2class = {} # needed to convert error reports back to classes
name = 'InternalError' name = 'InternalError'
name2class = {} name2class = {}
report_error = True
raising_methods = None raising_methods = None
def __init_subclass__(cls): def __init_subclass__(cls):
@@ -76,7 +75,7 @@ class SECoPError(RuntimeError):
if mlist and stripped: if mlist and stripped:
mlist = mlist[:-1] # do not pop, as this would change self.raising_methods mlist = mlist[:-1] # do not pop, as this would change self.raising_methods
prefix = '' if self.name2class.get(self.name) == type(self) else type(self).__name__ prefix = '' if self.name2class.get(self.name) == type(self) else type(self).__name__
prefix += ''.join(' in ' + m for m in mlist).strip() prefix = (prefix + ''.join(' in ' + m for m in mlist)).strip()
if prefix: if prefix:
return f'{prefix}: {super().__str__()}' return f'{prefix}: {super().__str__()}'
return super().__str__() return super().__str__()

View File

@@ -30,7 +30,7 @@ import time
from frappy.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, \ from frappy.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, \
IntRange, StringType, StructOf, TupleOf, ValueType IntRange, StringType, StructOf, TupleOf, ValueType
from frappy.errors import CommunicationFailedError, ConfigError, \ from frappy.errors import CommunicationFailedError, ConfigError, \
ProgrammingError, SilentCommunicationFailedError as SilentError ProgrammingError, SECoPError, SilentCommunicationFailedError as SilentError
from frappy.lib import generalConfig from frappy.lib import generalConfig
from frappy.lib.asynconn import AsynConn, ConnectionClosed from frappy.lib.asynconn import AsynConn, ConnectionClosed
from frappy.modules import Attached, Command, Communicator, Module, \ from frappy.modules import Attached, Command, Communicator, Module, \
@@ -125,7 +125,7 @@ class IOBase(Communicator):
_reconnectCallbacks = None _reconnectCallbacks = None
_conn = None _conn = None
_last_error = None _last_error = None # this is None only until the first connection success
_lock = None _lock = None
_last_connect_attempt = 0 _last_connect_attempt = 0
@@ -167,10 +167,10 @@ class IOBase(Communicator):
try: try:
self.connectStart() self.connectStart()
if self._last_error: if self._last_error:
# we do not get here before the first connect success
self.log.info('connected') self.log.info('connected')
self._last_error = 'connected'
self.callCallbacks() self.callCallbacks()
return self.is_connected self._last_error = 'connected'
except Exception as e: except Exception as e:
if repr(e) != self._last_error: if repr(e) != self._last_error:
self._last_error = repr(e) self._last_error = repr(e)
@@ -309,6 +309,7 @@ class StringIO(IOBase):
""" """
command = command.encode(self.encoding) command = command.encode(self.encoding)
self.check_connection() self.check_connection()
new_error = 'no error' # in case of success (must not be None)
try: try:
with self._lock: with self._lock:
# read garbage and wait before send # read garbage and wait before send
@@ -337,12 +338,19 @@ class StringIO(IOBase):
self.comLog('< %s', reply) self.comLog('< %s', reply)
return reply return reply
except Exception as e: except Exception as e:
if self._conn is None: new_error = 'disconnected' if self._conn is None else repr(e)
raise SilentError('disconnected') from None if new_error != self._last_error:
if repr(e) != self._last_error: # suppress subsequent equal error messages
self._last_error = repr(e) # this is in addition to the mechanism in Module.callPollFunc
self.log.error(self._last_error) # as the same error would appear potentially in a lot of
raise SilentError(repr(e)) from e # methods
if isinstance(e, SECoPError):
self.log.error(new_error)
else:
self.log.exception(new_error)
raise SilentError(new_error) from e
finally:
self._last_error = new_error
@Command(StringType()) @Command(StringType())
def writeline(self, command): def writeline(self, command):
@@ -472,6 +480,7 @@ class BytesIO(IOBase):
def communicate(self, request, replylen): # pylint: disable=arguments-differ def communicate(self, request, replylen): # pylint: disable=arguments-differ
"""send a request and receive (at least) <replylen> bytes as reply""" """send a request and receive (at least) <replylen> bytes as reply"""
self.check_connection() self.check_connection()
new_error = 'no error' # in case of success (must not be None)
try: try:
with self._lock: with self._lock:
# read garbage and wait before send # read garbage and wait before send
@@ -490,12 +499,19 @@ class BytesIO(IOBase):
self.comLog('< %s', hexify(reply)) self.comLog('< %s', hexify(reply))
return self.getFullReply(request, reply) return self.getFullReply(request, reply)
except Exception as e: except Exception as e:
if self._conn is None: new_error = 'disconnected' if self._conn is None else repr(e)
raise SilentError('disconnected') from None if new_error != self._last_error:
if repr(e) != self._last_error: # suppress subsequent equal error messages
self._last_error = str(e) # this is in addition to the mechanism in Module.callPollFunc
self.log.error(self._last_error) # as the same error would appear potentially in a lot of
raise SilentError(repr(e)) from e # methods
if isinstance(e, SECoPError):
self.log.error(new_error)
else:
self.log.exception(new_error)
raise SilentError(new_error) from e
finally:
self._last_error = new_error
@Command(StructOf(requests=ArrayOf(TupleOf(BLOBType(), IntRange(0), FloatRange(0, unit='s')))), @Command(StructOf(requests=ArrayOf(TupleOf(BLOBType(), IntRange(0), FloatRange(0, unit='s')))),
result=ArrayOf(BLOBType())) result=ArrayOf(BLOBType()))

View File

@@ -244,7 +244,7 @@ class PollInfo:
self.interval = pollinterval self.interval = pollinterval
self.last_main = 0 self.last_main = 0
self.last_slow = 0 self.last_slow = 0
self.pending_errors = set() self.pending_errors = {}
self.polled_parameters = [] self.polled_parameters = []
self.fast_flag = False self.fast_flag = False
self.trigger_event = trigger_event self.trigger_event = trigger_event
@@ -531,7 +531,6 @@ class Module(HasAccessibles):
pobj.value = value pobj.value = value
if err: if err:
if secop_error(err) == pobj.readerror: if secop_error(err) == pobj.readerror:
err.report_error = False
return # no updates for repeated errors return # no updates for repeated errors
err = secop_error(err) err = secop_error(err)
value_err = value, err value_err = value, err
@@ -666,29 +665,31 @@ class Module(HasAccessibles):
self.pollInfo.interval = fast_interval if flag else self.pollinterval self.pollInfo.interval = fast_interval if flag else self.pollinterval
self.pollInfo.trigger() self.pollInfo.trigger()
def callPollFunc(self, rfunc, raise_com_failed=False): def callPollFunc(self, rfunc, pollname=None, raise_com_failed=False):
"""call read method with proper error handling""" """call read method with proper error handling"""
try: try:
name = pollname or rfunc.__name__
rfunc() rfunc()
if rfunc.__name__ in self.pollInfo.pending_errors: if self.pollInfo.pending_errors.pop(name, None):
self.log.info('%s: o.k.', rfunc.__name__) self.log.info('%s: o.k.', name)
self.pollInfo.pending_errors.discard(rfunc.__name__)
except Exception as e: except Exception as e:
if getattr(e, 'report_error', True): prev = self.pollInfo.pending_errors.get(name)
name = rfunc.__name__ if isinstance(e, SECoPError):
self.pollInfo.pending_errors.add(name) # trigger o.k. message after error is resolved if pollname:
if isinstance(e, SECoPError): e.raising_methods.append(pollname)
e.raising_methods.append(name) self.log.debug('%s failed with %r', pollname, e)
if e.silent: if raise_com_failed and isinstance(e, CommunicationFailedError):
self.log.debug('%s', e.format(False)) raise
else: efmt = None if e.silent else e.format(False)
self.log.error('%s', e.format(False)) if efmt != prev:
if raise_com_failed and isinstance(e, CommunicationFailedError): self.log.error('%s', efmt)
raise else:
else: # not a SECoPError: this is proabably a programming error
# not a SECoPError: this is proabably a programming error efmt = repr(e)
if efmt != prev:
# we want to log the traceback # we want to log the traceback
self.log.error('%s', formatException()) self.log.exception('%s', efmt)
self.pollInfo.pending_errors[name] = efmt
def __pollThread(self, modules, started_callback): def __pollThread(self, modules, started_callback):
"""poll thread body """poll thread body
@@ -742,8 +743,19 @@ class Module(HasAccessibles):
if not polled_modules: # no polls needed - exit thread if not polled_modules: # no polls needed - exit thread
return return
to_poll = () to_poll = ()
report_day = time.localtime().tm_min
while modules: # modules will be cleared on shutdown while modules: # modules will be cleared on shutdown
now = time.time() now = time.time()
today = time.localtime().tm_min
if today != report_day:
report_day = today
for mobj in modules:
pending = mobj.pollInfo.pending_errors
if pending:
self.log.info('%d pending errors', len(pending))
# this will trigger again logging these errors
# or logging o.k. on success
pending.update((k, 'x') for k in pending)
wait_time = 999 wait_time = 999
for mobj in modules: for mobj in modules:
pinfo = mobj.pollInfo pinfo = mobj.pollInfo
@@ -763,7 +775,7 @@ class Module(HasAccessibles):
pinfo.last_main = (now // pinfo.interval) * pinfo.interval pinfo.last_main = (now // pinfo.interval) * pinfo.interval
except ZeroDivisionError: except ZeroDivisionError:
pinfo.last_main = now pinfo.last_main = now
mobj.callPollFunc(mobj.doPoll) mobj.callPollFunc(mobj.doPoll, f'{mobj.name}.doPoll')
now = time.time() now = time.time()
# find ONE due slow poll and call it # find ONE due slow poll and call it
loop = True loop = True