improve error handling during polls
improve the mechanism which avoids duplicated error messages in the log file: - extend to errors in doPoll - trigger error messages every day, so that they appear in every logfile + add missing space in some error message Change-Id: Icfaa06b43ee53f477483bab1f84def832fd0b977 Reviewed-on: https://forge.frm2.tum.de/review/c/secop/frappy/+/37432 Reviewed-by: Markus Zolliker <markus.zolliker@psi.ch> Tested-by: Jenkins Automated Tests <pedersen+jenkins@frm2.tum.de>
This commit is contained in:
@@ -33,7 +33,6 @@ class SECoPError(RuntimeError):
|
|||||||
clsname2class = {} # needed to convert error reports back to classes
|
clsname2class = {} # needed to convert error reports back to classes
|
||||||
name = 'InternalError'
|
name = 'InternalError'
|
||||||
name2class = {}
|
name2class = {}
|
||||||
report_error = True
|
|
||||||
raising_methods = None
|
raising_methods = None
|
||||||
|
|
||||||
def __init_subclass__(cls):
|
def __init_subclass__(cls):
|
||||||
@@ -76,7 +75,7 @@ class SECoPError(RuntimeError):
|
|||||||
if mlist and stripped:
|
if mlist and stripped:
|
||||||
mlist = mlist[:-1] # do not pop, as this would change self.raising_methods
|
mlist = mlist[:-1] # do not pop, as this would change self.raising_methods
|
||||||
prefix = '' if self.name2class.get(self.name) == type(self) else type(self).__name__
|
prefix = '' if self.name2class.get(self.name) == type(self) else type(self).__name__
|
||||||
prefix += ''.join(' in ' + m for m in mlist).strip()
|
prefix = (prefix + ''.join(' in ' + m for m in mlist)).strip()
|
||||||
if prefix:
|
if prefix:
|
||||||
return f'{prefix}: {super().__str__()}'
|
return f'{prefix}: {super().__str__()}'
|
||||||
return super().__str__()
|
return super().__str__()
|
||||||
|
|||||||
48
frappy/io.py
48
frappy/io.py
@@ -30,7 +30,7 @@ import time
|
|||||||
from frappy.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, \
|
from frappy.datatypes import ArrayOf, BLOBType, BoolType, FloatRange, \
|
||||||
IntRange, StringType, StructOf, TupleOf, ValueType
|
IntRange, StringType, StructOf, TupleOf, ValueType
|
||||||
from frappy.errors import CommunicationFailedError, ConfigError, \
|
from frappy.errors import CommunicationFailedError, ConfigError, \
|
||||||
ProgrammingError, SilentCommunicationFailedError as SilentError
|
ProgrammingError, SECoPError, SilentCommunicationFailedError as SilentError
|
||||||
from frappy.lib import generalConfig
|
from frappy.lib import generalConfig
|
||||||
from frappy.lib.asynconn import AsynConn, ConnectionClosed
|
from frappy.lib.asynconn import AsynConn, ConnectionClosed
|
||||||
from frappy.modules import Attached, Command, Communicator, Module, \
|
from frappy.modules import Attached, Command, Communicator, Module, \
|
||||||
@@ -125,7 +125,7 @@ class IOBase(Communicator):
|
|||||||
|
|
||||||
_reconnectCallbacks = None
|
_reconnectCallbacks = None
|
||||||
_conn = None
|
_conn = None
|
||||||
_last_error = None
|
_last_error = None # this is None only until the first connection success
|
||||||
_lock = None
|
_lock = None
|
||||||
_last_connect_attempt = 0
|
_last_connect_attempt = 0
|
||||||
|
|
||||||
@@ -167,10 +167,10 @@ class IOBase(Communicator):
|
|||||||
try:
|
try:
|
||||||
self.connectStart()
|
self.connectStart()
|
||||||
if self._last_error:
|
if self._last_error:
|
||||||
|
# we do not get here before the first connect success
|
||||||
self.log.info('connected')
|
self.log.info('connected')
|
||||||
self._last_error = 'connected'
|
|
||||||
self.callCallbacks()
|
self.callCallbacks()
|
||||||
return self.is_connected
|
self._last_error = 'connected'
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if repr(e) != self._last_error:
|
if repr(e) != self._last_error:
|
||||||
self._last_error = repr(e)
|
self._last_error = repr(e)
|
||||||
@@ -309,6 +309,7 @@ class StringIO(IOBase):
|
|||||||
"""
|
"""
|
||||||
command = command.encode(self.encoding)
|
command = command.encode(self.encoding)
|
||||||
self.check_connection()
|
self.check_connection()
|
||||||
|
new_error = 'no error' # in case of success (must not be None)
|
||||||
try:
|
try:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
# read garbage and wait before send
|
# read garbage and wait before send
|
||||||
@@ -337,12 +338,19 @@ class StringIO(IOBase):
|
|||||||
self.comLog('< %s', reply)
|
self.comLog('< %s', reply)
|
||||||
return reply
|
return reply
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self._conn is None:
|
new_error = 'disconnected' if self._conn is None else repr(e)
|
||||||
raise SilentError('disconnected') from None
|
if new_error != self._last_error:
|
||||||
if repr(e) != self._last_error:
|
# suppress subsequent equal error messages
|
||||||
self._last_error = repr(e)
|
# this is in addition to the mechanism in Module.callPollFunc
|
||||||
self.log.error(self._last_error)
|
# as the same error would appear potentially in a lot of
|
||||||
raise SilentError(repr(e)) from e
|
# methods
|
||||||
|
if isinstance(e, SECoPError):
|
||||||
|
self.log.error(new_error)
|
||||||
|
else:
|
||||||
|
self.log.exception(new_error)
|
||||||
|
raise SilentError(new_error) from e
|
||||||
|
finally:
|
||||||
|
self._last_error = new_error
|
||||||
|
|
||||||
@Command(StringType())
|
@Command(StringType())
|
||||||
def writeline(self, command):
|
def writeline(self, command):
|
||||||
@@ -472,6 +480,7 @@ class BytesIO(IOBase):
|
|||||||
def communicate(self, request, replylen): # pylint: disable=arguments-differ
|
def communicate(self, request, replylen): # pylint: disable=arguments-differ
|
||||||
"""send a request and receive (at least) <replylen> bytes as reply"""
|
"""send a request and receive (at least) <replylen> bytes as reply"""
|
||||||
self.check_connection()
|
self.check_connection()
|
||||||
|
new_error = 'no error' # in case of success (must not be None)
|
||||||
try:
|
try:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
# read garbage and wait before send
|
# read garbage and wait before send
|
||||||
@@ -490,12 +499,19 @@ class BytesIO(IOBase):
|
|||||||
self.comLog('< %s', hexify(reply))
|
self.comLog('< %s', hexify(reply))
|
||||||
return self.getFullReply(request, reply)
|
return self.getFullReply(request, reply)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self._conn is None:
|
new_error = 'disconnected' if self._conn is None else repr(e)
|
||||||
raise SilentError('disconnected') from None
|
if new_error != self._last_error:
|
||||||
if repr(e) != self._last_error:
|
# suppress subsequent equal error messages
|
||||||
self._last_error = str(e)
|
# this is in addition to the mechanism in Module.callPollFunc
|
||||||
self.log.error(self._last_error)
|
# as the same error would appear potentially in a lot of
|
||||||
raise SilentError(repr(e)) from e
|
# methods
|
||||||
|
if isinstance(e, SECoPError):
|
||||||
|
self.log.error(new_error)
|
||||||
|
else:
|
||||||
|
self.log.exception(new_error)
|
||||||
|
raise SilentError(new_error) from e
|
||||||
|
finally:
|
||||||
|
self._last_error = new_error
|
||||||
|
|
||||||
@Command(StructOf(requests=ArrayOf(TupleOf(BLOBType(), IntRange(0), FloatRange(0, unit='s')))),
|
@Command(StructOf(requests=ArrayOf(TupleOf(BLOBType(), IntRange(0), FloatRange(0, unit='s')))),
|
||||||
result=ArrayOf(BLOBType()))
|
result=ArrayOf(BLOBType()))
|
||||||
|
|||||||
@@ -244,7 +244,7 @@ class PollInfo:
|
|||||||
self.interval = pollinterval
|
self.interval = pollinterval
|
||||||
self.last_main = 0
|
self.last_main = 0
|
||||||
self.last_slow = 0
|
self.last_slow = 0
|
||||||
self.pending_errors = set()
|
self.pending_errors = {}
|
||||||
self.polled_parameters = []
|
self.polled_parameters = []
|
||||||
self.fast_flag = False
|
self.fast_flag = False
|
||||||
self.trigger_event = trigger_event
|
self.trigger_event = trigger_event
|
||||||
@@ -531,7 +531,6 @@ class Module(HasAccessibles):
|
|||||||
pobj.value = value
|
pobj.value = value
|
||||||
if err:
|
if err:
|
||||||
if secop_error(err) == pobj.readerror:
|
if secop_error(err) == pobj.readerror:
|
||||||
err.report_error = False
|
|
||||||
return # no updates for repeated errors
|
return # no updates for repeated errors
|
||||||
err = secop_error(err)
|
err = secop_error(err)
|
||||||
value_err = value, err
|
value_err = value, err
|
||||||
@@ -666,29 +665,31 @@ class Module(HasAccessibles):
|
|||||||
self.pollInfo.interval = fast_interval if flag else self.pollinterval
|
self.pollInfo.interval = fast_interval if flag else self.pollinterval
|
||||||
self.pollInfo.trigger()
|
self.pollInfo.trigger()
|
||||||
|
|
||||||
def callPollFunc(self, rfunc, raise_com_failed=False):
|
def callPollFunc(self, rfunc, pollname=None, raise_com_failed=False):
|
||||||
"""call read method with proper error handling"""
|
"""call read method with proper error handling"""
|
||||||
try:
|
try:
|
||||||
|
name = pollname or rfunc.__name__
|
||||||
rfunc()
|
rfunc()
|
||||||
if rfunc.__name__ in self.pollInfo.pending_errors:
|
if self.pollInfo.pending_errors.pop(name, None):
|
||||||
self.log.info('%s: o.k.', rfunc.__name__)
|
self.log.info('%s: o.k.', name)
|
||||||
self.pollInfo.pending_errors.discard(rfunc.__name__)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if getattr(e, 'report_error', True):
|
prev = self.pollInfo.pending_errors.get(name)
|
||||||
name = rfunc.__name__
|
|
||||||
self.pollInfo.pending_errors.add(name) # trigger o.k. message after error is resolved
|
|
||||||
if isinstance(e, SECoPError):
|
if isinstance(e, SECoPError):
|
||||||
e.raising_methods.append(name)
|
if pollname:
|
||||||
if e.silent:
|
e.raising_methods.append(pollname)
|
||||||
self.log.debug('%s', e.format(False))
|
self.log.debug('%s failed with %r', pollname, e)
|
||||||
else:
|
|
||||||
self.log.error('%s', e.format(False))
|
|
||||||
if raise_com_failed and isinstance(e, CommunicationFailedError):
|
if raise_com_failed and isinstance(e, CommunicationFailedError):
|
||||||
raise
|
raise
|
||||||
|
efmt = None if e.silent else e.format(False)
|
||||||
|
if efmt != prev:
|
||||||
|
self.log.error('%s', efmt)
|
||||||
else:
|
else:
|
||||||
# not a SECoPError: this is proabably a programming error
|
# not a SECoPError: this is proabably a programming error
|
||||||
|
efmt = repr(e)
|
||||||
|
if efmt != prev:
|
||||||
# we want to log the traceback
|
# we want to log the traceback
|
||||||
self.log.error('%s', formatException())
|
self.log.exception('%s', efmt)
|
||||||
|
self.pollInfo.pending_errors[name] = efmt
|
||||||
|
|
||||||
def __pollThread(self, modules, started_callback):
|
def __pollThread(self, modules, started_callback):
|
||||||
"""poll thread body
|
"""poll thread body
|
||||||
@@ -742,8 +743,19 @@ class Module(HasAccessibles):
|
|||||||
if not polled_modules: # no polls needed - exit thread
|
if not polled_modules: # no polls needed - exit thread
|
||||||
return
|
return
|
||||||
to_poll = ()
|
to_poll = ()
|
||||||
|
report_day = time.localtime().tm_min
|
||||||
while modules: # modules will be cleared on shutdown
|
while modules: # modules will be cleared on shutdown
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
today = time.localtime().tm_min
|
||||||
|
if today != report_day:
|
||||||
|
report_day = today
|
||||||
|
for mobj in modules:
|
||||||
|
pending = mobj.pollInfo.pending_errors
|
||||||
|
if pending:
|
||||||
|
self.log.info('%d pending errors', len(pending))
|
||||||
|
# this will trigger again logging these errors
|
||||||
|
# or logging o.k. on success
|
||||||
|
pending.update((k, 'x') for k in pending)
|
||||||
wait_time = 999
|
wait_time = 999
|
||||||
for mobj in modules:
|
for mobj in modules:
|
||||||
pinfo = mobj.pollInfo
|
pinfo = mobj.pollInfo
|
||||||
@@ -763,7 +775,7 @@ class Module(HasAccessibles):
|
|||||||
pinfo.last_main = (now // pinfo.interval) * pinfo.interval
|
pinfo.last_main = (now // pinfo.interval) * pinfo.interval
|
||||||
except ZeroDivisionError:
|
except ZeroDivisionError:
|
||||||
pinfo.last_main = now
|
pinfo.last_main = now
|
||||||
mobj.callPollFunc(mobj.doPoll)
|
mobj.callPollFunc(mobj.doPoll, f'{mobj.name}.doPoll')
|
||||||
now = time.time()
|
now = time.time()
|
||||||
# find ONE due slow poll and call it
|
# find ONE due slow poll and call it
|
||||||
loop = True
|
loop = True
|
||||||
|
|||||||
Reference in New Issue
Block a user