refactor(jungfrau-joch-client): Improve wait_till_idle method

This commit is contained in:
2025-09-10 15:14:52 +02:00
parent 390ca9e500
commit 19a95d325b
3 changed files with 62 additions and 56 deletions

View File

@@ -209,19 +209,19 @@ class DDG1(PSIDeviceBase, DelayGeneratorCSAXS):
The 20ms sleep was added to ensure that the event status is not polled too frequently,
and to give the device time to process the previous command. This was found empirically
to be necessary to avoid missing events.
IMPORTANT: Do not remove sleeps or try to optimize this logic. This seems to be a
fragile balance between polling frequency and device processing time. Also in between
start/stop of polling. Please also consider that there is a sleep in on_trigger and
that this might also be necessary to avoid that HW becomes unavailable/unstable.
"""
self.state.proc_status.put(1, use_complete=True)
if (
self._poll_thread_run_event.wait(timeout=0.02)
and not self._poll_thread_kill_event.is_set()
):
time.sleep(0.02) # 20ms delay for processing, important for not missing events
if self._poll_thread_kill_event.is_set() or not self._poll_thread_run_event.is_set():
return
self.state.event_status.get(use_monitor=False)
if (
self._poll_thread_run_event.wait(timeout=0.02)
and not self._poll_thread_kill_event.is_set()
):
if self._poll_thread_kill_event.is_set() or not self._poll_thread_run_event.is_set():
return
time.sleep(0.02) # 20ms delay for processing, important for not missing events
def _start_polling(self) -> None:
"""Start the polling loop in the background thread."""
@@ -267,12 +267,12 @@ class DDG1(PSIDeviceBase, DelayGeneratorCSAXS):
If we don't then subsequent triggers may reach the DDG too early, and will be ignored. To
avoid this, we've added the option to specify a delay via add_delay, default here is 50ms.
"""
# Keep sleep here for software trigger mode as 20ms delay between subsequent commands
# to the HW are necessary to avoid crashes and missing events.
time.sleep(0.02)
# Stop polling, poll once manually to ensure that the register is clean
self._stop_polling()
self._poll_thread_poll_loop_done.wait(timeout=1)
# IMPORTANT: Keep this sleep setting, as it is necessary to avoid that the HW
# becomes unresponsive. This was found empirically and seems to be necessary
time.sleep(0.02)
# Prepare the MCS card for the next software trigger
mcs = self.device_manager.devices.get("mcs", None)

View File

@@ -245,13 +245,13 @@ class Eiger(PSIDeviceBase):
prep_time = start_time - time.time()
logger.info(f"Prepared information for eiger to start acquisition in {prep_time:.2f}s")
self.jfj_client.start(settings=data_settings)
start_call_returns = time.time() - start_time - prep_time
logger.info(f"Start Rest call from JFJ took {start_call_returns:.2f}s")
sleep_time = 0.5
time.sleep(sleep_time)
logger.info(
f"Eiger {self.name} staged and ready for acquisition; with additional sleep of {sleep_time:.2f}s"
)
# start_call_returns = time.time() - start_time - prep_time
# logger.info(f"Start Rest call from JFJ took {start_call_returns:.2f}s")
# sleep_time = 0.5
# time.sleep(sleep_time)
# logger.info(
# f"Eiger {self.name} staged and ready for acquisition; with additional sleep of {sleep_time:.2f}s"
# )
def on_unstage(self) -> DeviceStatus:
"""Called while unstaging the device."""
@@ -277,22 +277,7 @@ class Eiger(PSIDeviceBase):
def wait_for_complete():
timeout = 10
for _ in range(timeout):
try:
self.jfj_client.wait_till_done(timeout=1, _request_timeout=5)
except (
JungfrauJochClientError
): # Means that timeout was triggered, and not _request_timeout
continue
except TimeoutError: # Timeout exception from wait_till_done
content = traceback.format_exc()
raise TimeoutError(f"Timeout for request during complete call: {content}")
except Exception as e: # This should actually never occur..
raise ValueError(f"Error in complete for {self.name}, exception: {e}") from e
else:
# How can I check if the aquisition was successful?
# If not successfull, we have to raise an error here!
# For instance if packages were lost..
if self.jfj_client.wait_for_idle(timeout=1, request_timeout=10):
break
status = self.task_handler.submit_task(wait_for_complete, run=True)

View File

@@ -85,7 +85,7 @@ class JungfrauJochClient:
status = self.detector_state
if status != DetectorState.IDLE:
self.api.initialize_post() # This is a blocking call....
self.wait_till_done(timeout, **kwargs) # Blocking call
self.wait_for_idle(timeout, request_timeout=timeout) # Blocking call
self.initialised = True
def set_detector_settings(self, settings: dict | DetectorSettings, timeout: int = 10) -> None:
@@ -164,32 +164,53 @@ class JungfrauJochClient:
f"Error in JungFrauJochClient for device {self._parent_name} during stop: {content}"
)
def wait_till_done(self, timeout: int = 10, **kwargs) -> None:
def wait_for_idle(self, timeout: int = 10, request_timeout: float | None = None) -> bool:
"""Wait for JungfrauJoch to be in Idle state. Blocking call with timeout.
Args:
timeout (int): timeout in seconds
Returns:
bool: True if the detector is in IDLE state, False if timeout occurred
"""
if request_timeout is None:
request_timeout = timeout
try:
self.api.wait_till_done_post_with_http_info(math.ceil(timeout=timeout / 2), **kwargs)
self.api.wait_till_done_post(timeout=timeout, _request_timeout=request_timeout)
except requests.exceptions.Timeout:
raise TimeoutError(
f"Timeout in JungfrauJochClient for parent device {self._parent_name} for 'wait_till_done' call"
)
raise TimeoutError(f"HTTP request timeout in wait_for_idle for {self._parent_name}")
except Exception:
logger.info(
f"Waiting for device {self._parent_name}, jungfrau joch to become IDLE, retry after {timeout/2} seconds"
)
try:
self.api.wait_till_done_post_with_http_info(
timeout=math.floor(timeout / 2), **kwargs
)
except requests.exceptions.Timeout:
raise TimeoutError(
f"Timeout in JungfrauJochClient for parent device {self._parent_name} for 'wait_till_done' call"
)
except Exception:
content = traceback.format_exc()
raise JungfrauJochClientError(
f"JungfrauJoch Error in wait_till_done post for device {self._parent_name}: {content}"
)
content = traceback.format_exc()
logger.debug(f"Waiting for device {self._parent_name} to become IDLE: {content}")
return False
return True
# # TODO improve this method for error handling and reporting...
# def wait_till_done(self, timeout: int = 10, **kwargs) -> None:
# """Wait for JungfrauJoch to be in Idle state. Blocking call with timeout.
# Args:
# timeout (int): timeout in seconds
# """
# try:
# self.api.wait_till_done_post_with_http_info(timeout=math.ceil(timeout / 2), **kwargs)
# except requests.exceptions.Timeout:
# raise TimeoutError(
# f"Timeout in JungfrauJochClient for parent device {self._parent_name} for 'wait_till_done' call"
# )
# except Exception:
# logger.info(
# f"Waiting for device {self._parent_name}, jungfrau joch to become IDLE, retry after {math.ceil(timeout / 2)} seconds"
# )
# try:
# self.api.wait_till_done_post_with_http_info(
# timeout=math.ceil(timeout / 2), **kwargs
# )
# except requests.exceptions.Timeout:
# raise TimeoutError(
# f"Timeout in JungfrauJochClient for parent device {self._parent_name} for 'wait_till_done' call"
# )
# except Exception:
# content = traceback.format_exc()
# raise JungfrauJochClientError(
# f"JungfrauJoch Error in wait_till_done post for device {self._parent_name}: {content}"
# )