put try/catch for all monitor callbacks (suggestion Xiaoqiang)

This commit is contained in:
2023-06-30 14:32:48 +02:00
parent 0a965f66ce
commit e910083b1a
4 changed files with 229 additions and 57 deletions

131
Readme.md
View File

@@ -169,6 +169,137 @@ cd /sf/cristallina/applications/mx/zamofing_t/ESB_MX
git checkout master git checkout master
cd /sf/cristallina/applications/mx/zamofing_t/ESB_MX/python/SwissMX cd /sf/cristallina/applications/mx/zamofing_t/ESB_MX/python/SwissMX
git checkout master git checkout master
```
22.6.23 debug segmentation fault
--------------------------------
```
THE CHRISTALLINA CONTROL ROOM RUNS NORMALLY ON saresc-cons-05
[saresc-cons-05 ~]$
ulimit -a
ulimit -c unlimited
python -X faulthandler -X tracemalloc -X dev swissmx.py 2>&1 | tee /tmp/swissmx000.log
reset;tail -c+0 -F /tmp/swissmx000.log
python -X faulthandler -X tracemalloc -X importtime -X dev swissmx.py --sim 0xff
python -X faulthandler -X tracemalloc -X dev swissmx.py --sim 0xff
zamofing_t@ganymede:~/Documents/prj/SwissFEL/epics_ioc_modules/ESB_MX/python/SwissMX$
rsync -vai swissmx.py saresc-cons-03:/sf/cristallina/applications/mx/zamofing_t/ESB_MX/python/SwissMX/swissmx_segFault.py
read: https://docs.python.org/3/library/faulthandler.html
coredump erzeugen
cat /proc/sys/kernel/core_pattern
sudo echo "/tmp/core" > /proc/sys/kernel/core_pattern
ulimit -c unlimited
python -c "import ctypes; ctypes.string_at(0)"
python -X faulthandler -c "import ctypes; ctypes.string_at(0)"
Rene did:
[root@saresc-cons-05 ~]# sysctl -w kernel.core_pattern="/tmp/%e_core_dump.%p"
cat /proc/sys/kernel/core_pattern
/tmp/%e_core_dump.%p
ll /tmp/python_core_dump.*
gdb python /tmp/core
bt (for facktrace)
trying python c code extention:
/home/zamofing_t/Documents/prj/scratch/python/sample_c_extension
Thread 0x00007fd2e16e8700 (most recent call first):
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 620 in get_with_metadata
...
File "/gfa/.mounts/sf_cristallina/applications/mx/zamofing_t/ESB_MX/python/SwissMX/epics_widgets/MotorTweak.py", line 216 in update_label
...
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
```
Coredump 29.6.23 18h40
----------------------
```
grep -c '5000/5000' *
swissmx000.log:57
swissmx001.log:36
swissmx002.log:35
swissmx002.log 35*5000 frames
35*5000/100/60 -> every 29.16min of acquisition a crash
rsync -vai gac-cristall@saresc-cons-05:/tmp/swissmx* ~/Documents/prj/SwissFEL/epics_ioc_modules/ESB_MX/python/SwissMX/log
ll /tmp/swissmx*
ll /tmp/python_core_dump.*
Thread 0x00007fb9b13f7700 (most recent call first):
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 620 in get_with_metadata
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 620 in get_with_metadata
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/ca.py", line 1122 in element_count
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 489 in get
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 620 in get_with_metadata
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 981 in nelm
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/ca.py", line 579 in wrapper
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/ca.py", line 1122 in element_count
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/ca.py", line 549 in wrapper
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/ca.py", line 871 in current_context
File "/sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/pyepics-3.4.3-py3.9.egg/epics/pv.py", line 48 in wrapped
conda activate crmx38
ll /tmp/python_core_dump.*
gdb python /tmp/python_core_dump.21072
Core was generated by `python -X faulthandler -X tracemalloc -X dev swissmx.py'.
Program terminated with signal 11, Segmentation fault.
bt
0 write_thread_id.isra.3 (is_current=0, fd=<optimized out>) at /opt/conda/conda-bld/python-split_1648465063888/work/Python/traceback.c:849
#1 _Py_DumpTracebackThreads () at /opt/conda/conda-bld/python-split_1648465063888/work/Python/traceback.c:914
#2 0x000055c2b7ef6635 in faulthandler_dump_traceback.isra.2 (fd=fd@entry=2, all_threads=1) at /opt/conda/conda-bld/python-split_1648465063888/work/Modules/faulthandler.c:242
#3 0x000055c2b7ef67a3 in faulthandler_fatal_error (signum=11) at /opt/conda/conda-bld/python-split_1648465063888/work/Modules/faulthandler.c:348
#4 <signal handler called>
#5 0x00007fb9c7ed4ef4 in QWidgetTextControl::document() const () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Widgets.so.5
#6 0x00007fb9c7e676e1 in ?? () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Widgets.so.5
#7 0x00007fb9c7e69230 in QLabel::paintEvent(QPaintEvent*) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Widgets.so.5
#8 0x00007fb9d7fa16d3 in sipQLabel::paintEvent(QPaintEvent*) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/QtWidgets.abi3.so
#9 0x00007fb9c7dcc580 in QWidget::event(QEvent*) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Widgets.so.5
#10 0x00007fb9c7e36203 in QFrame::event(QEvent*) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Widgets.so.5
#11 0x00007fb9d7fa2d03 in sipQLabel::event(QEvent*) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/QtWidgets.abi3.so
#12 0x00007fb9c7da20f1 in QApplicationPrivate::notify_helper(QObject*, QEvent*) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Widgets.so.5
#13 0x00007fb9d8097afe in sipQApplication::notify(QObject*, QEvent*) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/QtWidgets.abi3.so
#14 0x00007fb9e6cadd62 in QCoreApplication::notifyInternal2(QObject*, QEvent*) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Core.so.5
#15 0x00007fb9c7dc6de6 in QWidgetPrivate::sendPaintEvent(QRegion const&) () from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Widgets.so.5
#16 0x00007fb9c7dc774e in QWidgetPrivate::drawWidget(QPaintDevice*, QRegion const&, QPoint const&, QFlags<QWidgetPrivate::DrawWidgetFlag>, QPainter*, QWidgetRepaintManager*) ()
from /sf/cristallina/applications/conda/envs/crmx38/lib/python3.8/site-packages/PyQt5/../../../libQt5Widgets.so.5
#17 0x00007fb9c7dc8147 in QWidgetPrivate::paintSiblingsRecursive(QPaintDevice*, QList<QObject*> const&, int, QRegion const&, QPoint const&, QFlags<QWidgetPrivate::DrawWidgetFlag>, QPainter*, QWidgetRe
```
Localize mionitors and callbacks:
```
grep -n 'def cb_update_img(self):' *.py
swissmx.py:698: self.sigNewCamImg.connect(self.cb_update_img)
swissmx.py:745: def cb_update_img(self):
``` ```

View File

@@ -144,7 +144,7 @@ class AppCfg(QSettings):
if AppCfg.DAQ_PV_CH not in keys: if AppCfg.DAQ_PV_CH not in keys:
dflt.append((AppCfg.DAQ_PV_CH, () )) #list of PVs dflt.append((AppCfg.DAQ_PV_CH, () )) #list of PVs
if AppCfg.DFT_POS_GONIO not in keys: if AppCfg.DFT_POS_GONIO not in keys:
dflt.append((AppCfg.DFT_POS_GONIO, {'mount':(0.,0.,0.,0.),'align':(0.,0.,0.,0.)}))#default positions dflt.append((AppCfg.DFT_POS_GONIO, {'pos_mount':(0.,0.,0.,0.),'pos_align':(0.,0.,0.,0.)}))#default positions
if AppCfg.DFT_POS_BKLGT not in keys: if AppCfg.DFT_POS_BKLGT not in keys:
dflt.append((AppCfg.DFT_POS_BKLGT, {'pos_in': -30000.0, 'pos_out': 1000.0, 'pos_diode': -30000.0}))#default positions dflt.append((AppCfg.DFT_POS_BKLGT, {'pos_in': -30000.0, 'pos_out': 1000.0, 'pos_diode': -30000.0}))#default positions
@@ -363,8 +363,8 @@ verbose bits:
{'name':'set_out', 'title':'use current position as "out"', 'type':'action'}, {'name':'set_out', 'title':'use current position as "out"', 'type':'action'},
]}, ]},
{'name':AppCfg.DFT_POS_GONIO, 'title':'gonio reference positions', 'type':'group', 'expanded':False, 'children':[ {'name':AppCfg.DFT_POS_GONIO, 'title':'gonio reference positions', 'type':'group', 'expanded':False, 'children':[
{'name':'pos_mount', 'title':'Mount position', 'value':dft_pos_gonio.get('mount'), 'type':'str' }, {'name':'pos_mount', 'title':'Mount position', 'value':dft_pos_gonio.get('pos_mount'), 'type':'str' },
{'name':'pos_align', 'title':'Align position', 'value':dft_pos_gonio.get('align'), 'type':'str' }, {'name':'pos_align', 'title':'Align position', 'value':dft_pos_gonio.get('pos_align'), 'type':'str' },
{'name':'set_mount', 'title':'use current position as "mount"', 'type':'action'}, {'name':'set_mount', 'title':'use current position as "mount"', 'type':'action'},
{'name':'set_align', 'title':'use current position as "align"', 'type':'action'}, {'name':'set_align', 'title':'use current position as "align"', 'type':'action'},
]}, ]},

View File

@@ -95,10 +95,13 @@ class MotorTweak(QWidget, Ui_MotorTweak):
def set_val(self, **kw): def set_val(self, **kw):
v = kw['char_value'] try: #pv-monitor-func
_log.debug('updating VAL = {}'.format(v)) v = kw['char_value']
self._val=float(v) # rewrite in case of tweaking _log.debug('updating VAL = {}'.format(v))
self._drive_val.setText(v) self._val=float(v) # rewrite in case of tweaking
self._drive_val.setText(v)
except Exception as e:
_log.critical(f'{e}')
def set_motor_validator(self, **kwargs): def set_motor_validator(self, **kwargs):
@@ -187,33 +190,39 @@ class MotorTweak(QWidget, Ui_MotorTweak):
:return: :return:
''' '''
field = kw['motor_field'] try: #pv-monitor-func
src = kw['source_field'] field = kw['motor_field']
kw['alias'] = self._label src = kw['source_field']
if field != src: kw['alias'] = self._label
return if field != src:
if field == 'VAL': return
self.event_val.emit(self._rec_name, kw) if field == 'VAL':
elif field == 'RBV': self.event_val.emit(self._rec_name, kw)
self.event_rbv.emit(kw['alias'], kw['value'], kw) elif field == 'RBV':
elif field == 'LVIO': self.event_rbv.emit(kw['alias'], kw['value'], kw)
self.event_soft_limit.emit(self._rec_name, kw) elif field == 'LVIO':
elif field == 'HLS': self.event_soft_limit.emit(self._rec_name, kw)
self.event_high_hard_limit.emit(self._rec_name, kw) elif field == 'HLS':
self.event_axis_fault.emit(self._rec_name, kw) self.event_high_hard_limit.emit(self._rec_name, kw)
elif field == 'LVIO': self.event_axis_fault.emit(self._rec_name, kw)
self.event_low_hard_limit.emit(self._rec_name, kw) elif field == 'LVIO':
self.event_axis_fault.emit(self._rec_name, kw) self.event_low_hard_limit.emit(self._rec_name, kw)
elif field == 'STAT': self.event_axis_fault.emit(self._rec_name, kw)
self.event_axis_fault.emit(self._rec_name, kw) elif field == 'STAT':
self.event_axis_fault.emit(self._rec_name, kw)
except Exception as e:
_log.critical(f'{e}')
def update_label(self, **kwargs): def update_label(self, **kwargs):
m = self._motor try: #pv-monitor-func
self.label.setText(self._templates[self._label_style].format(rbv=m.readback)) m = self._motor
self.jog_forward.setToolTip('jog forward at {:.3f} {}/s'.format(m.jog_speed, m.units)) self.label.setText(self._templates[self._label_style].format(rbv=m.readback))
self.jog_reverse.setToolTip('jog reverse at {:.3f} {}/s'.format(m.jog_speed, m.units)) self.jog_forward.setToolTip('jog forward at {:.3f} {}/s'.format(m.jog_speed, m.units))
self.tweak_forward.setToolTip('tweak forward by {:.3f} {}'.format(m.tweak_val, m.units)) self.jog_reverse.setToolTip('jog reverse at {:.3f} {}/s'.format(m.jog_speed, m.units))
self.tweak_reverse.setToolTip('tweak reverse by {:.3f} {}'.format(m.tweak_val, m.units)) self.tweak_forward.setToolTip('tweak forward by {:.3f} {}'.format(m.tweak_val, m.units))
self.tweak_reverse.setToolTip('tweak reverse by {:.3f} {}'.format(m.tweak_val, m.units))
except Exception as e:
_log.critical(f'{e}')
def update_jog_speed(self, event): def update_jog_speed(self, event):
m = self._motor m = self._motor

View File

@@ -717,30 +717,33 @@ class WndSwissMx(QMainWindow, Ui_MainWindow):
cam.run(self.cb_new_frame_pv) cam.run(self.cb_new_frame_pv)
def cb_new_frame_pv(self, **kwargs): def cb_new_frame_pv(self, **kwargs):
#thrd=threading.current_thread() try: #pv-monitor-func
#_log.debug(f'thread:{thrd.getName()}, {thrd.native_id}') #thrd=threading.current_thread()
#_log.debug(f"{kwargs['timestamp']}") #_log.debug(f'thread:{thrd.getName()}, {thrd.native_id}')
#_log.debug(f"{kwargs['timestamp']}")
app=QApplication.instance() app=QApplication.instance()
cam=app._camera cam=app._camera
sz=cam._sz sz=cam._sz
if kwargs['count']==sz[0]*sz[1]: if kwargs['count']==sz[0]*sz[1]:
pic=kwargs['value'].reshape(sz[::-1]) pic=kwargs['value'].reshape(sz[::-1])
else: else:
sz=app._camera.update_size() sz=app._camera.update_size()
pic=kwargs['value'].reshape(sz[::-1]) pic=kwargs['value'].reshape(sz[::-1])
if pic.dtype==np.int16: if pic.dtype==np.int16:
pic.dtype=np.uint16 pic.dtype=np.uint16
camera.epics_cam.set_fiducial(pic, 255) camera.epics_cam.set_fiducial(pic, 255)
cam._pic=pic cam._pic=pic
cam._timestamp=kwargs['timestamp'] cam._timestamp=kwargs['timestamp']
try: try:
cam.process() cam.process()
except AttributeError as e: except AttributeError as e:
pass pass
# self._goImg.setImage(cam._pic) caused some deadlocks. # self._goImg.setImage(cam._pic) caused some deadlocks.
# therefore try to update the image with signals instead # therefore try to update the image with signals instead
self.sigNewCamImg.emit() self.sigNewCamImg.emit()
except Exception as e:
_log.critical(f'{e}')
def cb_update_img(self): def cb_update_img(self):
#thrd=threading.current_thread() #thrd=threading.current_thread()
@@ -748,22 +751,43 @@ class WndSwissMx(QMainWindow, Ui_MainWindow):
app=QApplication.instance() app=QApplication.instance()
cam=app._camera cam=app._camera
self._goImg.setImage(cam._pic) self._goImg.setImage(cam._pic)
#vb.setRange(QRectF(-1300,-1100,1400,1200)) #vb.setRange(QRectF(-1300,-1100,1400,1200))
#force a segmentation fault
#try:
# dbgSegFault=self._dbgSegFault
#except AttributeError as e:
# dbgSegFault=self._dbgSegFault=0
#_log.warning(f'dbgSegFault:{dbgSegFault}')
#if dbgSegFault>100:
# _log.critical('Xforce a segmentation fault')
# import ctypes
# ctypes.string_at(0)
#self._dbgSegFault+=1
def cb_new_frame_sim(self, **kwargs): def cb_new_frame_sim(self, **kwargs):
app=QApplication.instance() app=QApplication.instance()
sim=app._camera._sim sim=app._camera._sim
imgSeq=sim['imgSeq'] imgSeq=sim['imgSeq']
idx=sim['imgIdx'] idx=sim['imgIdx']
sim['imgIdx']=(idx+1)%imgSeq.shape[0] sim['imgIdx']=(idx+1)
imgIdx=idx%imgSeq.shape[0]
# _log.info('simulated idx:{}'.format(idx)) # _log.info('simulated idx:{}'.format(idx))
pic=imgSeq[idx] pic=imgSeq[imgIdx]
self._goImg.setImage(pic) self._goImg.setImage(pic)
delay=500 # ms -> 2fps delay=500 # ms -> 2fps
QtCore.QTimer.singleShot(delay, self.cb_new_frame_sim) QtCore.QTimer.singleShot(delay, self.cb_new_frame_sim)
#force a segmentation fault
#_log.warning(f'imgIdx:{imgIdx},idx:{idx}')
#if idx==20:
# _log.critical('force a segmentation fault')
# import ctypes
# ctypes.string_at(0)
def load_stylesheet(self): def load_stylesheet(self):
with open("swissmx.css", "r") as sheet: with open("swissmx.css", "r") as sheet:
self.setStyleSheet(sheet.read()) self.setStyleSheet(sheet.read())
@@ -969,6 +993,14 @@ class WndSwissMx(QMainWindow, Ui_MainWindow):
] ]
qutilities.add_item_to_toolbox(toolbox,"Fast Stage",widget_list=widgets) qutilities.add_item_to_toolbox(toolbox,"Fast Stage",widget_list=widgets)
#pv-monitor-func
#_log.info('modify monitors')
#for w in widgets[1:-1]: #ignore last item
# for k,pv in w._motor._pvs.items():
# pv.auto_monitor=False
# for k,pv in w._motor._pvs.items():
# pv.auto_monitor=True
def build_group_collimator(self, toolbox): def build_group_collimator(self, toolbox):
pfx=QApplication.instance()._cfg.value(AppCfg.GBL_DEV_PREFIX)[1] pfx=QApplication.instance()._cfg.value(AppCfg.GBL_DEV_PREFIX)[1]
c=QWidget() c=QWidget()