fix: improve integration with feedback from the beamline

This commit is contained in:
2026-01-27 09:59:40 +01:00
parent b818181da2
commit b19bfb7ca4
4 changed files with 113 additions and 47 deletions

View File

@@ -0,0 +1,38 @@
# Overview
Integration module for Eiger detectors at the cSAXS beamline with JungfrauJoch backend.
There are currently two supported Eiger detectors:
- EIGER 1.5M
- EIGER 9M
This module provides a base integration for both detectors. A short list of useful
information is also provided below.
## JungfrauJoch Service
The JungfrauJoch WEB UI is available on http://sls-jfjoch-001:8080. This is an interface
to the broker which runs on sls-jfjoch-001.psi.ch. The writer service runs on
xbl-daq-34.psi.ch. Permissions to get access to these machines and run systemctl or
journalctl commands can be requested with the Infrastructure and Services group in AWI.
Beamline scientists need to check if they have the necessary permissions to connect
to these machines and run the commands below.
Useful commands for the broker service on sls-jfjoch-001.psi.ch:
- sudo systemctl status jfjoch_broker # Check status
- sudo systemctl start jfjoch_broker # Start service
- sudo systemctl stop jfjoch_broker # Stop service
- sudo systemctl restart jfjoch_broker # Restart service
For the writer service on xbl-daq-34.psi.ch:
- sudo journalctl -u jfjoch_writer -f # streams live logs
- sudo systemctl status jfjoch_writer # Check status
- sudo systemctl start jfjoch_writer # Start service
- sudo systemctl stop jfjoch_writer # Stop service
- sudo systemctl restart jfjoch_writer # Restart service
More information about the JungfrauJoch and API client can be found at: (https://jungfraujoch.readthedocs.io/en/latest/index.html)
### JungfrauJoch API Client
A thin wrapper for the JungfrauJoch API client is provided in the [jungfrau_joch_client](./jungfrau_joch_client.py).
Details about the specific integration are provided in the code.
## Eiger debugging
For debugging the Eiger hardware, please contact the detector group for support.

View File

@@ -1,28 +1,17 @@
"""
Generic integration of JungfrauJoch backend with Eiger detectors
for the cSAXS beamline at the Swiss Light Source.
The WEB UI is available on http://sls-jfjoch-001:8080
Integration module for Eiger detectors at the cSAXS beamline with JungfrauJoch backend.
NOTE: this may not be the best place to store this information. It should be migrated to
beamline documentation for debugging of Eiger & JungfrauJoch.
A few notes on setup and operation of the Eiger detectors through the JungfrauJoch broker:
The JungfrauJoch server for cSAXS runs on sls-jfjoch-001.psi.ch
User with sufficient rights may use:
- sudo systemctl restart jfjoch_broker
- sudo systemctl status jfjoch_broker
to check and/or restart the broker for the JungfrauJoch server.
Some extra notes for setting up the detector:
- If the energy on JFJ is set via DetectorSettings, the variable in DatasetSettings will be ignored
- Changes in energy may take time, good to implement logic that only resets energy if needed.
- For the Eiger, the frame_time_us in DetectorSettings is ignored, only the frame_time_us in
the DatasetSettings is relevant
- The bit_depth will be adjusted automatically based on the exp_time. Here, we need to ensure
that subsequent triggers properly
consider the readout_time of the boards. For Jungfrau detectors, the difference between
count_time_us and frame_time_us is the readout_time of the boards. For the Eiger, this needs
to be taken into account during the integration.
that subsequent triggers properly consider the readout_time of the boards. For the Eiger detectors
at cSAXS, a readout time of 20us is configured through the JungfrauJoch deployment config. This
setting is sufficiently large for the detectors if they run in parallel mode.
- beam_center and detector settings are required input arguments, thus, they may be set to wrong
values for acquisitions to start. Please keep this in mind.
@@ -39,9 +28,6 @@ Hardware related notes:
- cd power_control_user/
- ./on
- ./off
Further information that may be relevant for debugging:
JungfrauJoch - one needs to connect to the jfj-server (sls-jfjoch-001)
"""
from __future__ import annotations
@@ -266,6 +252,7 @@ class Eiger(PSIDeviceBase):
# JFJ adds _master.h5 automatically
path = os.path.relpath(self._full_path, start="/sls/x12sa/data").removesuffix("_master.h5")
# path = os.path.relpath(self._full_path, start="/sls/x12sa/data")
data_settings = DatasetSettings(
image_time_us=int(frame_time_us * 1e6), # This is currently ignored
@@ -311,11 +298,20 @@ class Eiger(PSIDeviceBase):
def wait_for_complete():
start_time = time.time()
timeout = 20
for _ in range(timeout):
while time.time() - start_time < timeout:
if self.jfj_client.wait_for_idle(
timeout=1, request_timeout=10, raise_on_timeout=False
timeout=1, request_timeout=1, raise_on_timeout=False
):
logger.info(f"Device {self.name} completed acquisition.")
# TODO add check if data acquisition finished in success
statistics: MeasurementStatistics = (
self.jfj_client.api.statistics_data_collection_get(_request_timeout=5)
)
broker_status = self.jfj_client.jjf_state
logger.info(
f"Device {self.name} completed acquisition. \n \n"
f"Broker status: \n{yaml.dump(broker_status.to_dict(), indent=4)} \n \n"
f"statistics: \n{yaml.dump(statistics.to_dict(), indent=4)}"
)
return
logger.info(
f"Device {self.name} running loop to wait for complete, time elapsed: {time.time() - start_time}."
@@ -323,8 +319,11 @@ class Eiger(PSIDeviceBase):
statistics: MeasurementStatistics = self.jfj_client.api.statistics_data_collection_get(
_request_timeout=5
)
broker_status = self.jfj_client.jjf_state
raise TimeoutError(
f"Timeout after waiting for device {self.name} to complete for {time.time()-start_time:.2f}s, measurement statistics: {yaml.dump(statistics.to_dict(), indent=4)}"
f"Timeout after waiting for device {self.name} to complete for {time.time()-start_time:.2f}s \n \n"
f"Broker status: \n{yaml.dump(broker_status.to_dict(), indent=4)} \n \n"
f"Measurement statistics: \n{yaml.dump(statistics.to_dict(), indent=4)}"
)
status = self.task_handler.submit_task(wait_for_complete, run=True)
@@ -337,7 +336,11 @@ class Eiger(PSIDeviceBase):
def on_stop(self) -> None:
"""Called when the device is stopped."""
self.jfj_client.stop(
request_timeout=0.5
) # Call should not block more than 0.5 seconds to stop all devices...
self.jfj_client.stop(request_timeout=0.5)
self.task_handler.shutdown()
def on_destroy(self):
"""Called when the device is destroyed."""
self.jfj_preview_client.stop()
self.on_stop()
return super().on_destroy()

View File

@@ -1,4 +1,4 @@
"""Module with client interface for the Jungfrau Joch detector API"""
"""Module with a thin client wrapper around the Jungfrau Joch detector API"""
from __future__ import annotations
@@ -29,8 +29,14 @@ class JungfrauJochClientError(Exception):
"""Base class for exceptions in this module."""
"Inactive", "Idle", "Busy", "Measuring", "Pedestal", "Error"
class DetectorState(str, enum.Enum):
"""Possible Detector states for Jungfrau Joch detector"""
"""
Enum states of the BrokerStatus state. The pydantic model validates in runtime,
thus we keep the possible states here for a convenient overview and access.
"""
INACTIVE = "Inactive"
IDLE = "Idle"
@@ -41,13 +47,13 @@ class DetectorState(str, enum.Enum):
class JungfrauJochClient:
"""Thin wrapper around the Jungfrau Joch API client.
"""
Jungfrau Joch API client wrapper. It provides a few thin wrappers around the API client,
that allow to connect, initialise, wait for state changes, set settings, start and stop acquisitions.
sudo systemctl restart jfjoch_broker
sudo systemctl status jfjoch_broker
It looks as if the detector is not being stopped properly.
One module remains running, how can we restart the detector?
Args:
host (str): Hostname of the Jungfrau Joch broker service. Default is "http://sls-jfjoch-001:8080"
parent (Device, optional): Parent ophyd device, used for logging purposes.
"""
def __init__(
@@ -61,33 +67,35 @@ class JungfrauJochClient:
@property
def jjf_state(self) -> BrokerStatus:
"""Get the status of JungfrauJoch"""
"""Broker status of JungfrauJoch."""
response = self.api.status_get()
return BrokerStatus(**response.to_dict())
@property
def initialised(self) -> bool:
"""Check if jfj is connected and ready to receive commands"""
return self._initialised
@initialised.setter
def initialised(self, value: bool) -> None:
"""Set the connected status"""
self._initialised = value
# TODO this is not correct, as it may be that the state in INACTIVE. Models are not in sync...
# REMOVE all model enums as most of the validation takes place in the Pydantic models, i.e. BrokerStatus here..
@property
def detector_state(self) -> DetectorState:
"""Get the status of JungfrauJoch"""
"""Detector state of JungfrauJoch."""
return DetectorState(self.jjf_state.state)
def connect_and_initialise(self, timeout: int = 10, **kwargs) -> None:
"""Check if JungfrauJoch is connected and ready to receive commands"""
"""
Connect and initialise the JungfrauJoch detector. The detector must be in
IDLE state to become initialised. This is a blocking call.
Args:
timeout (int): Timeout in seconds for the initialisation and waiting for IDLE state.
"""
status = self.detector_state
if status != DetectorState.IDLE:
self.api.initialize_post() # This is a blocking call....
self.wait_for_idle(timeout, request_timeout=timeout) # Blocking call
self.api.initialize_post()
self.wait_for_idle(timeout, request_timeout=timeout)
self.initialised = True
def set_detector_settings(self, settings: dict | DetectorSettings, timeout: int = 10) -> None:
@@ -114,9 +122,12 @@ class JungfrauJochClient:
raise TimeoutError(f"Timeout while setting detector settings for {self._parent_name}")
except Exception:
content = traceback.format_exc()
raise JungfrauJochClientError(
logger.error(
f"Error while setting detector settings for {self._parent_name}: {content}"
)
raise JungfrauJochClientError(
f"Error while setting detector settings for parent device {self._parent_name}."
)
def start(self, settings: dict | DatasetSettings, request_timeout: float = 10) -> None:
"""Start the mesaurement. DatasetSettings must be provided, and JungfrauJoch must be in IDLE state.
@@ -142,14 +153,21 @@ class JungfrauJochClient:
dataset_settings=settings, _request_timeout=request_timeout
)
except requests.exceptions.Timeout:
content = traceback.format_exc()
logger.error(
f"TimeoutError in JungfrauJochClient for parent device {self._parent_name} during 'start' call: {content}"
)
raise TimeoutError(
f"TimeoutError in JungfrauJochClient for parent device {self._parent_name} for 'start' call"
)
except Exception:
content = traceback.format_exc()
raise JungfrauJochClientError(
logger.error(
f"Error in JungfrauJochClient for parent device {self._parent_name} during 'start' call: {content}"
)
raise JungfrauJochClientError(
f"Error in JungfrauJochClient for parent device {self._parent_name} during 'start' post."
)
def stop(self, request_timeout: float = 0.5) -> None:
"""Stop the acquisition, this only logs errors and is not raising."""
@@ -188,6 +206,10 @@ class JungfrauJochClient:
try:
self.api.wait_till_done_post(timeout=timeout, _request_timeout=request_timeout)
except requests.exceptions.Timeout:
content = traceback.format_exc()
logger.debug(
f"HTTP request timeout in wait_for_idle for {self._parent_name}: {content}"
)
if raise_on_timeout:
raise TimeoutError(
f"HTTP request timeout in wait_for_idle for {self._parent_name}."

View File

@@ -92,7 +92,7 @@ tag_decoders = {
}
def tag_hook(tag: int):
def tag_hook(decoder, tag: int):
"""Get the decoder for Dectris specific CBOR tags. tag must be in tag_decoders."""
tag_decoder = tag_decoders.get(tag.tag)
return tag_decoder(tag) if tag_decoder else tag
@@ -167,7 +167,7 @@ class JungfrauJochPreview:
"""Stop the ZMQ update loop and wait for the thread to finish."""
self._shutdown_event.set()
if self._zmq_thread:
self._zmq_thread.join()
self._zmq_thread.join(timeout=1.0)
def _zmq_update_loop(self):
"""Zmq update loop with polling for new data. The loop runs at maximum 10 Hz."""
@@ -180,6 +180,9 @@ class JungfrauJochPreview:
# Happens when ZMQ partially delivers the multipart message
pass
except zmq.error.Again:
logger.debug(
f"ZMQ Again exception, receive queue is empty for JFJ preview at {self.url}."
)
# Happens when receive queue is empty
time.sleep(0.1) # NOTE: Change sleep time to control polling rate