fix: improve integration with feedback from the beamline
This commit is contained in:
38
csaxs_bec/devices/jungfraujoch/README.MD
Normal file
38
csaxs_bec/devices/jungfraujoch/README.MD
Normal file
@@ -0,0 +1,38 @@
|
||||
# Overview
|
||||
Integration module for Eiger detectors at the cSAXS beamline with JungfrauJoch backend.
|
||||
There are currently two supported Eiger detectors:
|
||||
- EIGER 1.5M
|
||||
- EIGER 9M
|
||||
|
||||
This module provides a base integration for both detectors. A short list of useful
|
||||
information is also provided below.
|
||||
|
||||
## JungfrauJoch Service
|
||||
The JungfrauJoch WEB UI is available on http://sls-jfjoch-001:8080. This is an interface
|
||||
to the broker which runs on sls-jfjoch-001.psi.ch. The writer service runs on
|
||||
xbl-daq-34.psi.ch. Permissions to get access to these machines and run systemctl or
|
||||
journalctl commands can be requested with the Infrastructure and Services group in AWI.
|
||||
Beamline scientists need to check if they have the necessary permissions to connect
|
||||
to these machines and run the commands below.
|
||||
|
||||
Useful commands for the broker service on sls-jfjoch-001.psi.ch:
|
||||
- sudo systemctl status jfjoch_broker # Check status
|
||||
- sudo systemctl start jfjoch_broker # Start service
|
||||
- sudo systemctl stop jfjoch_broker # Stop service
|
||||
- sudo systemctl restart jfjoch_broker # Restart service
|
||||
|
||||
For the writer service on xbl-daq-34.psi.ch:
|
||||
- sudo journalctl -u jfjoch_writer -f # streams live logs
|
||||
- sudo systemctl status jfjoch_writer # Check status
|
||||
- sudo systemctl start jfjoch_writer # Start service
|
||||
- sudo systemctl stop jfjoch_writer # Stop service
|
||||
- sudo systemctl restart jfjoch_writer # Restart service
|
||||
|
||||
More information about the JungfrauJoch and API client can be found at: (https://jungfraujoch.readthedocs.io/en/latest/index.html)
|
||||
|
||||
### JungfrauJoch API Client
|
||||
A thin wrapper for the JungfrauJoch API client is provided in the [jungfrau_joch_client](./jungfrau_joch_client.py).
|
||||
Details about the specific integration are provided in the code.
|
||||
|
||||
## Eiger debugging
|
||||
For debugging the Eiger hardware, please contact the detector group for support.
|
||||
@@ -1,28 +1,17 @@
|
||||
"""
|
||||
Generic integration of JungfrauJoch backend with Eiger detectors
|
||||
for the cSAXS beamline at the Swiss Light Source.
|
||||
|
||||
The WEB UI is available on http://sls-jfjoch-001:8080
|
||||
Integration module for Eiger detectors at the cSAXS beamline with JungfrauJoch backend.
|
||||
|
||||
NOTE: this may not be the best place to store this information. It should be migrated to
|
||||
beamline documentation for debugging of Eiger & JungfrauJoch.
|
||||
A few notes on setup and operation of the Eiger detectors through the JungfrauJoch broker:
|
||||
|
||||
The JungfrauJoch server for cSAXS runs on sls-jfjoch-001.psi.ch
|
||||
User with sufficient rights may use:
|
||||
- sudo systemctl restart jfjoch_broker
|
||||
- sudo systemctl status jfjoch_broker
|
||||
to check and/or restart the broker for the JungfrauJoch server.
|
||||
|
||||
Some extra notes for setting up the detector:
|
||||
- If the energy on JFJ is set via DetectorSettings, the variable in DatasetSettings will be ignored
|
||||
- Changes in energy may take time, good to implement logic that only resets energy if needed.
|
||||
- For the Eiger, the frame_time_us in DetectorSettings is ignored, only the frame_time_us in
|
||||
the DatasetSettings is relevant
|
||||
- The bit_depth will be adjusted automatically based on the exp_time. Here, we need to ensure
|
||||
that subsequent triggers properly
|
||||
consider the readout_time of the boards. For Jungfrau detectors, the difference between
|
||||
count_time_us and frame_time_us is the readout_time of the boards. For the Eiger, this needs
|
||||
to be taken into account during the integration.
|
||||
that subsequent triggers properly consider the readout_time of the boards. For the Eiger detectors
|
||||
at cSAXS, a readout time of 20us is configured through the JungfrauJoch deployment config. This
|
||||
setting is sufficiently large for the detectors if they run in parallel mode.
|
||||
- beam_center and detector settings are required input arguments, thus, they may be set to wrong
|
||||
values for acquisitions to start. Please keep this in mind.
|
||||
|
||||
@@ -39,9 +28,6 @@ Hardware related notes:
|
||||
- cd power_control_user/
|
||||
- ./on
|
||||
- ./off
|
||||
|
||||
Further information that may be relevant for debugging:
|
||||
JungfrauJoch - one needs to connect to the jfj-server (sls-jfjoch-001)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -266,6 +252,7 @@ class Eiger(PSIDeviceBase):
|
||||
|
||||
# JFJ adds _master.h5 automatically
|
||||
path = os.path.relpath(self._full_path, start="/sls/x12sa/data").removesuffix("_master.h5")
|
||||
# path = os.path.relpath(self._full_path, start="/sls/x12sa/data")
|
||||
|
||||
data_settings = DatasetSettings(
|
||||
image_time_us=int(frame_time_us * 1e6), # This is currently ignored
|
||||
@@ -311,11 +298,20 @@ class Eiger(PSIDeviceBase):
|
||||
def wait_for_complete():
|
||||
start_time = time.time()
|
||||
timeout = 20
|
||||
for _ in range(timeout):
|
||||
while time.time() - start_time < timeout:
|
||||
if self.jfj_client.wait_for_idle(
|
||||
timeout=1, request_timeout=10, raise_on_timeout=False
|
||||
timeout=1, request_timeout=1, raise_on_timeout=False
|
||||
):
|
||||
logger.info(f"Device {self.name} completed acquisition.")
|
||||
# TODO add check if data acquisition finished in success
|
||||
statistics: MeasurementStatistics = (
|
||||
self.jfj_client.api.statistics_data_collection_get(_request_timeout=5)
|
||||
)
|
||||
broker_status = self.jfj_client.jjf_state
|
||||
logger.info(
|
||||
f"Device {self.name} completed acquisition. \n \n"
|
||||
f"Broker status: \n{yaml.dump(broker_status.to_dict(), indent=4)} \n \n"
|
||||
f"statistics: \n{yaml.dump(statistics.to_dict(), indent=4)}"
|
||||
)
|
||||
return
|
||||
logger.info(
|
||||
f"Device {self.name} running loop to wait for complete, time elapsed: {time.time() - start_time}."
|
||||
@@ -323,8 +319,11 @@ class Eiger(PSIDeviceBase):
|
||||
statistics: MeasurementStatistics = self.jfj_client.api.statistics_data_collection_get(
|
||||
_request_timeout=5
|
||||
)
|
||||
broker_status = self.jfj_client.jjf_state
|
||||
raise TimeoutError(
|
||||
f"Timeout after waiting for device {self.name} to complete for {time.time()-start_time:.2f}s, measurement statistics: {yaml.dump(statistics.to_dict(), indent=4)}"
|
||||
f"Timeout after waiting for device {self.name} to complete for {time.time()-start_time:.2f}s \n \n"
|
||||
f"Broker status: \n{yaml.dump(broker_status.to_dict(), indent=4)} \n \n"
|
||||
f"Measurement statistics: \n{yaml.dump(statistics.to_dict(), indent=4)}"
|
||||
)
|
||||
|
||||
status = self.task_handler.submit_task(wait_for_complete, run=True)
|
||||
@@ -337,7 +336,11 @@ class Eiger(PSIDeviceBase):
|
||||
|
||||
def on_stop(self) -> None:
|
||||
"""Called when the device is stopped."""
|
||||
self.jfj_client.stop(
|
||||
request_timeout=0.5
|
||||
) # Call should not block more than 0.5 seconds to stop all devices...
|
||||
self.jfj_client.stop(request_timeout=0.5)
|
||||
self.task_handler.shutdown()
|
||||
|
||||
def on_destroy(self):
|
||||
"""Called when the device is destroyed."""
|
||||
self.jfj_preview_client.stop()
|
||||
self.on_stop()
|
||||
return super().on_destroy()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Module with client interface for the Jungfrau Joch detector API"""
|
||||
"""Module with a thin client wrapper around the Jungfrau Joch detector API"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -29,8 +29,14 @@ class JungfrauJochClientError(Exception):
|
||||
"""Base class for exceptions in this module."""
|
||||
|
||||
|
||||
"Inactive", "Idle", "Busy", "Measuring", "Pedestal", "Error"
|
||||
|
||||
|
||||
class DetectorState(str, enum.Enum):
|
||||
"""Possible Detector states for Jungfrau Joch detector"""
|
||||
"""
|
||||
Enum states of the BrokerStatus state. The pydantic model validates in runtime,
|
||||
thus we keep the possible states here for a convenient overview and access.
|
||||
"""
|
||||
|
||||
INACTIVE = "Inactive"
|
||||
IDLE = "Idle"
|
||||
@@ -41,13 +47,13 @@ class DetectorState(str, enum.Enum):
|
||||
|
||||
|
||||
class JungfrauJochClient:
|
||||
"""Thin wrapper around the Jungfrau Joch API client.
|
||||
"""
|
||||
Jungfrau Joch API client wrapper. It provides a few thin wrappers around the API client,
|
||||
that allow to connect, initialise, wait for state changes, set settings, start and stop acquisitions.
|
||||
|
||||
sudo systemctl restart jfjoch_broker
|
||||
sudo systemctl status jfjoch_broker
|
||||
|
||||
It looks as if the detector is not being stopped properly.
|
||||
One module remains running, how can we restart the detector?
|
||||
Args:
|
||||
host (str): Hostname of the Jungfrau Joch broker service. Default is "http://sls-jfjoch-001:8080"
|
||||
parent (Device, optional): Parent ophyd device, used for logging purposes.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -61,33 +67,35 @@ class JungfrauJochClient:
|
||||
|
||||
@property
|
||||
def jjf_state(self) -> BrokerStatus:
|
||||
"""Get the status of JungfrauJoch"""
|
||||
"""Broker status of JungfrauJoch."""
|
||||
response = self.api.status_get()
|
||||
return BrokerStatus(**response.to_dict())
|
||||
|
||||
@property
|
||||
def initialised(self) -> bool:
|
||||
"""Check if jfj is connected and ready to receive commands"""
|
||||
return self._initialised
|
||||
|
||||
@initialised.setter
|
||||
def initialised(self, value: bool) -> None:
|
||||
"""Set the connected status"""
|
||||
self._initialised = value
|
||||
|
||||
# TODO this is not correct, as it may be that the state in INACTIVE. Models are not in sync...
|
||||
# REMOVE all model enums as most of the validation takes place in the Pydantic models, i.e. BrokerStatus here..
|
||||
@property
|
||||
def detector_state(self) -> DetectorState:
|
||||
"""Get the status of JungfrauJoch"""
|
||||
"""Detector state of JungfrauJoch."""
|
||||
return DetectorState(self.jjf_state.state)
|
||||
|
||||
def connect_and_initialise(self, timeout: int = 10, **kwargs) -> None:
|
||||
"""Check if JungfrauJoch is connected and ready to receive commands"""
|
||||
"""
|
||||
Connect and initialise the JungfrauJoch detector. The detector must be in
|
||||
IDLE state to become initialised. This is a blocking call.
|
||||
|
||||
Args:
|
||||
timeout (int): Timeout in seconds for the initialisation and waiting for IDLE state.
|
||||
"""
|
||||
status = self.detector_state
|
||||
if status != DetectorState.IDLE:
|
||||
self.api.initialize_post() # This is a blocking call....
|
||||
self.wait_for_idle(timeout, request_timeout=timeout) # Blocking call
|
||||
self.api.initialize_post()
|
||||
self.wait_for_idle(timeout, request_timeout=timeout)
|
||||
self.initialised = True
|
||||
|
||||
def set_detector_settings(self, settings: dict | DetectorSettings, timeout: int = 10) -> None:
|
||||
@@ -114,9 +122,12 @@ class JungfrauJochClient:
|
||||
raise TimeoutError(f"Timeout while setting detector settings for {self._parent_name}")
|
||||
except Exception:
|
||||
content = traceback.format_exc()
|
||||
raise JungfrauJochClientError(
|
||||
logger.error(
|
||||
f"Error while setting detector settings for {self._parent_name}: {content}"
|
||||
)
|
||||
raise JungfrauJochClientError(
|
||||
f"Error while setting detector settings for parent device {self._parent_name}."
|
||||
)
|
||||
|
||||
def start(self, settings: dict | DatasetSettings, request_timeout: float = 10) -> None:
|
||||
"""Start the mesaurement. DatasetSettings must be provided, and JungfrauJoch must be in IDLE state.
|
||||
@@ -142,14 +153,21 @@ class JungfrauJochClient:
|
||||
dataset_settings=settings, _request_timeout=request_timeout
|
||||
)
|
||||
except requests.exceptions.Timeout:
|
||||
content = traceback.format_exc()
|
||||
logger.error(
|
||||
f"TimeoutError in JungfrauJochClient for parent device {self._parent_name} during 'start' call: {content}"
|
||||
)
|
||||
raise TimeoutError(
|
||||
f"TimeoutError in JungfrauJochClient for parent device {self._parent_name} for 'start' call"
|
||||
)
|
||||
except Exception:
|
||||
content = traceback.format_exc()
|
||||
raise JungfrauJochClientError(
|
||||
logger.error(
|
||||
f"Error in JungfrauJochClient for parent device {self._parent_name} during 'start' call: {content}"
|
||||
)
|
||||
raise JungfrauJochClientError(
|
||||
f"Error in JungfrauJochClient for parent device {self._parent_name} during 'start' post."
|
||||
)
|
||||
|
||||
def stop(self, request_timeout: float = 0.5) -> None:
|
||||
"""Stop the acquisition, this only logs errors and is not raising."""
|
||||
@@ -188,6 +206,10 @@ class JungfrauJochClient:
|
||||
try:
|
||||
self.api.wait_till_done_post(timeout=timeout, _request_timeout=request_timeout)
|
||||
except requests.exceptions.Timeout:
|
||||
content = traceback.format_exc()
|
||||
logger.debug(
|
||||
f"HTTP request timeout in wait_for_idle for {self._parent_name}: {content}"
|
||||
)
|
||||
if raise_on_timeout:
|
||||
raise TimeoutError(
|
||||
f"HTTP request timeout in wait_for_idle for {self._parent_name}."
|
||||
|
||||
@@ -92,7 +92,7 @@ tag_decoders = {
|
||||
}
|
||||
|
||||
|
||||
def tag_hook(tag: int):
|
||||
def tag_hook(decoder, tag: int):
|
||||
"""Get the decoder for Dectris specific CBOR tags. tag must be in tag_decoders."""
|
||||
tag_decoder = tag_decoders.get(tag.tag)
|
||||
return tag_decoder(tag) if tag_decoder else tag
|
||||
@@ -167,7 +167,7 @@ class JungfrauJochPreview:
|
||||
"""Stop the ZMQ update loop and wait for the thread to finish."""
|
||||
self._shutdown_event.set()
|
||||
if self._zmq_thread:
|
||||
self._zmq_thread.join()
|
||||
self._zmq_thread.join(timeout=1.0)
|
||||
|
||||
def _zmq_update_loop(self):
|
||||
"""Zmq update loop with polling for new data. The loop runs at maximum 10 Hz."""
|
||||
@@ -180,6 +180,9 @@ class JungfrauJochPreview:
|
||||
# Happens when ZMQ partially delivers the multipart message
|
||||
pass
|
||||
except zmq.error.Again:
|
||||
logger.debug(
|
||||
f"ZMQ Again exception, receive queue is empty for JFJ preview at {self.url}."
|
||||
)
|
||||
# Happens when receive queue is empty
|
||||
time.sleep(0.1) # NOTE: Change sleep time to control polling rate
|
||||
|
||||
|
||||
Reference in New Issue
Block a user