From b19bfb7ca412bdbb4bc4295e3ff68461fcfa907a Mon Sep 17 00:00:00 2001 From: appel_c Date: Tue, 27 Jan 2026 09:59:40 +0100 Subject: [PATCH] fix: improve integration with feedback from the beamline --- csaxs_bec/devices/jungfraujoch/README.MD | 38 ++++++++++++ csaxs_bec/devices/jungfraujoch/eiger.py | 55 +++++++++-------- .../jungfraujoch/jungfrau_joch_client.py | 60 +++++++++++++------ .../jungfraujoch/jungfraujoch_preview.py | 7 ++- 4 files changed, 113 insertions(+), 47 deletions(-) create mode 100644 csaxs_bec/devices/jungfraujoch/README.MD diff --git a/csaxs_bec/devices/jungfraujoch/README.MD b/csaxs_bec/devices/jungfraujoch/README.MD new file mode 100644 index 0000000..e03961c --- /dev/null +++ b/csaxs_bec/devices/jungfraujoch/README.MD @@ -0,0 +1,38 @@ +# Overview +Integration module for Eiger detectors at the cSAXS beamline with JungfrauJoch backend. +There are currently two supported Eiger detectors: +- EIGER 1.5M +- EIGER 9M + +This module provides a base integration for both detectors. A short list of useful +information is also provided below. + +## JungfrauJoch Service +The JungfrauJoch WEB UI is available on http://sls-jfjoch-001:8080. This is an interface +to the broker which runs on sls-jfjoch-001.psi.ch. The writer service runs on +xbl-daq-34.psi.ch. Permissions to get access to these machines and run systemctl or +journalctl commands can be requested with the Infrastructure and Services group in AWI. +Beamline scientists need to check if they have the necessary permissions to connect +to these machines and run the commands below. + +Useful commands for the broker service on sls-jfjoch-001.psi.ch: +- sudo systemctl status jfjoch_broker # Check status +- sudo systemctl start jfjoch_broker # Start service +- sudo systemctl stop jfjoch_broker # Stop service +- sudo systemctl restart jfjoch_broker # Restart service + +For the writer service on xbl-daq-34.psi.ch: +- sudo journalctl -u jfjoch_writer -f # streams live logs +- sudo systemctl status jfjoch_writer # Check status +- sudo systemctl start jfjoch_writer # Start service +- sudo systemctl stop jfjoch_writer # Stop service +- sudo systemctl restart jfjoch_writer # Restart service + +More information about the JungfrauJoch and API client can be found at: (https://jungfraujoch.readthedocs.io/en/latest/index.html) + +### JungfrauJoch API Client +A thin wrapper for the JungfrauJoch API client is provided in the [jungfrau_joch_client](./jungfrau_joch_client.py). +Details about the specific integration are provided in the code. + +## Eiger debugging +For debugging the Eiger hardware, please contact the detector group for support. \ No newline at end of file diff --git a/csaxs_bec/devices/jungfraujoch/eiger.py b/csaxs_bec/devices/jungfraujoch/eiger.py index 15294fa..f8273de 100644 --- a/csaxs_bec/devices/jungfraujoch/eiger.py +++ b/csaxs_bec/devices/jungfraujoch/eiger.py @@ -1,28 +1,17 @@ """ -Generic integration of JungfrauJoch backend with Eiger detectors -for the cSAXS beamline at the Swiss Light Source. -The WEB UI is available on http://sls-jfjoch-001:8080 +Integration module for Eiger detectors at the cSAXS beamline with JungfrauJoch backend. -NOTE: this may not be the best place to store this information. It should be migrated to -beamline documentation for debugging of Eiger & JungfrauJoch. +A few notes on setup and operation of the Eiger detectors through the JungfrauJoch broker: -The JungfrauJoch server for cSAXS runs on sls-jfjoch-001.psi.ch -User with sufficient rights may use: -- sudo systemctl restart jfjoch_broker -- sudo systemctl status jfjoch_broker -to check and/or restart the broker for the JungfrauJoch server. - -Some extra notes for setting up the detector: - If the energy on JFJ is set via DetectorSettings, the variable in DatasetSettings will be ignored - Changes in energy may take time, good to implement logic that only resets energy if needed. - For the Eiger, the frame_time_us in DetectorSettings is ignored, only the frame_time_us in the DatasetSettings is relevant - The bit_depth will be adjusted automatically based on the exp_time. Here, we need to ensure - that subsequent triggers properly - consider the readout_time of the boards. For Jungfrau detectors, the difference between - count_time_us and frame_time_us is the readout_time of the boards. For the Eiger, this needs - to be taken into account during the integration. + that subsequent triggers properly consider the readout_time of the boards. For the Eiger detectors + at cSAXS, a readout time of 20us is configured through the JungfrauJoch deployment config. This + setting is sufficiently large for the detectors if they run in parallel mode. - beam_center and detector settings are required input arguments, thus, they may be set to wrong values for acquisitions to start. Please keep this in mind. @@ -39,9 +28,6 @@ Hardware related notes: - cd power_control_user/ - ./on - ./off - -Further information that may be relevant for debugging: -JungfrauJoch - one needs to connect to the jfj-server (sls-jfjoch-001) """ from __future__ import annotations @@ -266,6 +252,7 @@ class Eiger(PSIDeviceBase): # JFJ adds _master.h5 automatically path = os.path.relpath(self._full_path, start="/sls/x12sa/data").removesuffix("_master.h5") + # path = os.path.relpath(self._full_path, start="/sls/x12sa/data") data_settings = DatasetSettings( image_time_us=int(frame_time_us * 1e6), # This is currently ignored @@ -311,11 +298,20 @@ class Eiger(PSIDeviceBase): def wait_for_complete(): start_time = time.time() timeout = 20 - for _ in range(timeout): + while time.time() - start_time < timeout: if self.jfj_client.wait_for_idle( - timeout=1, request_timeout=10, raise_on_timeout=False + timeout=1, request_timeout=1, raise_on_timeout=False ): - logger.info(f"Device {self.name} completed acquisition.") + # TODO add check if data acquisition finished in success + statistics: MeasurementStatistics = ( + self.jfj_client.api.statistics_data_collection_get(_request_timeout=5) + ) + broker_status = self.jfj_client.jjf_state + logger.info( + f"Device {self.name} completed acquisition. \n \n" + f"Broker status: \n{yaml.dump(broker_status.to_dict(), indent=4)} \n \n" + f"statistics: \n{yaml.dump(statistics.to_dict(), indent=4)}" + ) return logger.info( f"Device {self.name} running loop to wait for complete, time elapsed: {time.time() - start_time}." @@ -323,8 +319,11 @@ class Eiger(PSIDeviceBase): statistics: MeasurementStatistics = self.jfj_client.api.statistics_data_collection_get( _request_timeout=5 ) + broker_status = self.jfj_client.jjf_state raise TimeoutError( - f"Timeout after waiting for device {self.name} to complete for {time.time()-start_time:.2f}s, measurement statistics: {yaml.dump(statistics.to_dict(), indent=4)}" + f"Timeout after waiting for device {self.name} to complete for {time.time()-start_time:.2f}s \n \n" + f"Broker status: \n{yaml.dump(broker_status.to_dict(), indent=4)} \n \n" + f"Measurement statistics: \n{yaml.dump(statistics.to_dict(), indent=4)}" ) status = self.task_handler.submit_task(wait_for_complete, run=True) @@ -337,7 +336,11 @@ class Eiger(PSIDeviceBase): def on_stop(self) -> None: """Called when the device is stopped.""" - self.jfj_client.stop( - request_timeout=0.5 - ) # Call should not block more than 0.5 seconds to stop all devices... + self.jfj_client.stop(request_timeout=0.5) self.task_handler.shutdown() + + def on_destroy(self): + """Called when the device is destroyed.""" + self.jfj_preview_client.stop() + self.on_stop() + return super().on_destroy() diff --git a/csaxs_bec/devices/jungfraujoch/jungfrau_joch_client.py b/csaxs_bec/devices/jungfraujoch/jungfrau_joch_client.py index bddc4eb..da15049 100644 --- a/csaxs_bec/devices/jungfraujoch/jungfrau_joch_client.py +++ b/csaxs_bec/devices/jungfraujoch/jungfrau_joch_client.py @@ -1,4 +1,4 @@ -"""Module with client interface for the Jungfrau Joch detector API""" +"""Module with a thin client wrapper around the Jungfrau Joch detector API""" from __future__ import annotations @@ -29,8 +29,14 @@ class JungfrauJochClientError(Exception): """Base class for exceptions in this module.""" +"Inactive", "Idle", "Busy", "Measuring", "Pedestal", "Error" + + class DetectorState(str, enum.Enum): - """Possible Detector states for Jungfrau Joch detector""" + """ + Enum states of the BrokerStatus state. The pydantic model validates in runtime, + thus we keep the possible states here for a convenient overview and access. + """ INACTIVE = "Inactive" IDLE = "Idle" @@ -41,13 +47,13 @@ class DetectorState(str, enum.Enum): class JungfrauJochClient: - """Thin wrapper around the Jungfrau Joch API client. + """ + Jungfrau Joch API client wrapper. It provides a few thin wrappers around the API client, + that allow to connect, initialise, wait for state changes, set settings, start and stop acquisitions. - sudo systemctl restart jfjoch_broker - sudo systemctl status jfjoch_broker - - It looks as if the detector is not being stopped properly. - One module remains running, how can we restart the detector? + Args: + host (str): Hostname of the Jungfrau Joch broker service. Default is "http://sls-jfjoch-001:8080" + parent (Device, optional): Parent ophyd device, used for logging purposes. """ def __init__( @@ -61,33 +67,35 @@ class JungfrauJochClient: @property def jjf_state(self) -> BrokerStatus: - """Get the status of JungfrauJoch""" + """Broker status of JungfrauJoch.""" response = self.api.status_get() return BrokerStatus(**response.to_dict()) @property def initialised(self) -> bool: - """Check if jfj is connected and ready to receive commands""" return self._initialised @initialised.setter def initialised(self, value: bool) -> None: - """Set the connected status""" self._initialised = value - # TODO this is not correct, as it may be that the state in INACTIVE. Models are not in sync... - # REMOVE all model enums as most of the validation takes place in the Pydantic models, i.e. BrokerStatus here.. @property def detector_state(self) -> DetectorState: - """Get the status of JungfrauJoch""" + """Detector state of JungfrauJoch.""" return DetectorState(self.jjf_state.state) def connect_and_initialise(self, timeout: int = 10, **kwargs) -> None: - """Check if JungfrauJoch is connected and ready to receive commands""" + """ + Connect and initialise the JungfrauJoch detector. The detector must be in + IDLE state to become initialised. This is a blocking call. + + Args: + timeout (int): Timeout in seconds for the initialisation and waiting for IDLE state. + """ status = self.detector_state if status != DetectorState.IDLE: - self.api.initialize_post() # This is a blocking call.... - self.wait_for_idle(timeout, request_timeout=timeout) # Blocking call + self.api.initialize_post() + self.wait_for_idle(timeout, request_timeout=timeout) self.initialised = True def set_detector_settings(self, settings: dict | DetectorSettings, timeout: int = 10) -> None: @@ -114,9 +122,12 @@ class JungfrauJochClient: raise TimeoutError(f"Timeout while setting detector settings for {self._parent_name}") except Exception: content = traceback.format_exc() - raise JungfrauJochClientError( + logger.error( f"Error while setting detector settings for {self._parent_name}: {content}" ) + raise JungfrauJochClientError( + f"Error while setting detector settings for parent device {self._parent_name}." + ) def start(self, settings: dict | DatasetSettings, request_timeout: float = 10) -> None: """Start the mesaurement. DatasetSettings must be provided, and JungfrauJoch must be in IDLE state. @@ -142,14 +153,21 @@ class JungfrauJochClient: dataset_settings=settings, _request_timeout=request_timeout ) except requests.exceptions.Timeout: + content = traceback.format_exc() + logger.error( + f"TimeoutError in JungfrauJochClient for parent device {self._parent_name} during 'start' call: {content}" + ) raise TimeoutError( f"TimeoutError in JungfrauJochClient for parent device {self._parent_name} for 'start' call" ) except Exception: content = traceback.format_exc() - raise JungfrauJochClientError( + logger.error( f"Error in JungfrauJochClient for parent device {self._parent_name} during 'start' call: {content}" ) + raise JungfrauJochClientError( + f"Error in JungfrauJochClient for parent device {self._parent_name} during 'start' post." + ) def stop(self, request_timeout: float = 0.5) -> None: """Stop the acquisition, this only logs errors and is not raising.""" @@ -188,6 +206,10 @@ class JungfrauJochClient: try: self.api.wait_till_done_post(timeout=timeout, _request_timeout=request_timeout) except requests.exceptions.Timeout: + content = traceback.format_exc() + logger.debug( + f"HTTP request timeout in wait_for_idle for {self._parent_name}: {content}" + ) if raise_on_timeout: raise TimeoutError( f"HTTP request timeout in wait_for_idle for {self._parent_name}." diff --git a/csaxs_bec/devices/jungfraujoch/jungfraujoch_preview.py b/csaxs_bec/devices/jungfraujoch/jungfraujoch_preview.py index 3aeb6f4..7f60950 100644 --- a/csaxs_bec/devices/jungfraujoch/jungfraujoch_preview.py +++ b/csaxs_bec/devices/jungfraujoch/jungfraujoch_preview.py @@ -92,7 +92,7 @@ tag_decoders = { } -def tag_hook(tag: int): +def tag_hook(decoder, tag: int): """Get the decoder for Dectris specific CBOR tags. tag must be in tag_decoders.""" tag_decoder = tag_decoders.get(tag.tag) return tag_decoder(tag) if tag_decoder else tag @@ -167,7 +167,7 @@ class JungfrauJochPreview: """Stop the ZMQ update loop and wait for the thread to finish.""" self._shutdown_event.set() if self._zmq_thread: - self._zmq_thread.join() + self._zmq_thread.join(timeout=1.0) def _zmq_update_loop(self): """Zmq update loop with polling for new data. The loop runs at maximum 10 Hz.""" @@ -180,6 +180,9 @@ class JungfrauJochPreview: # Happens when ZMQ partially delivers the multipart message pass except zmq.error.Again: + logger.debug( + f"ZMQ Again exception, receive queue is empty for JFJ preview at {self.url}." + ) # Happens when receive queue is empty time.sleep(0.1) # NOTE: Change sleep time to control polling rate