diff --git a/csaxs_bec/devices/epics/delay_generator_csaxs/ddg_1.py b/csaxs_bec/devices/epics/delay_generator_csaxs/ddg_1.py index f678741..4990799 100644 --- a/csaxs_bec/devices/epics/delay_generator_csaxs/ddg_1.py +++ b/csaxs_bec/devices/epics/delay_generator_csaxs/ddg_1.py @@ -209,19 +209,19 @@ class DDG1(PSIDeviceBase, DelayGeneratorCSAXS): The 20ms sleep was added to ensure that the event status is not polled too frequently, and to give the device time to process the previous command. This was found empirically to be necessary to avoid missing events. + IMPORTANT: Do not remove sleeps or try to optimize this logic. This seems to be a + fragile balance between polling frequency and device processing time. Also in between + start/stop of polling. Please also consider that there is a sleep in on_trigger and + that this might also be necessary to avoid that HW becomes unavailable/unstable. """ self.state.proc_status.put(1, use_complete=True) - if ( - self._poll_thread_run_event.wait(timeout=0.02) - and not self._poll_thread_kill_event.is_set() - ): + time.sleep(0.02) # 20ms delay for processing, important for not missing events + if self._poll_thread_kill_event.is_set() or not self._poll_thread_run_event.is_set(): return self.state.event_status.get(use_monitor=False) - if ( - self._poll_thread_run_event.wait(timeout=0.02) - and not self._poll_thread_kill_event.is_set() - ): + if self._poll_thread_kill_event.is_set() or not self._poll_thread_run_event.is_set(): return + time.sleep(0.02) # 20ms delay for processing, important for not missing events def _start_polling(self) -> None: """Start the polling loop in the background thread.""" @@ -267,12 +267,12 @@ class DDG1(PSIDeviceBase, DelayGeneratorCSAXS): If we don't then subsequent triggers may reach the DDG too early, and will be ignored. To avoid this, we've added the option to specify a delay via add_delay, default here is 50ms. """ - # Keep sleep here for software trigger mode as 20ms delay between subsequent commands - # to the HW are necessary to avoid crashes and missing events. - time.sleep(0.02) # Stop polling, poll once manually to ensure that the register is clean self._stop_polling() self._poll_thread_poll_loop_done.wait(timeout=1) + # IMPORTANT: Keep this sleep setting, as it is necessary to avoid that the HW + # becomes unresponsive. This was found empirically and seems to be necessary + time.sleep(0.02) # Prepare the MCS card for the next software trigger mcs = self.device_manager.devices.get("mcs", None) diff --git a/csaxs_bec/devices/jungfraujoch/eiger.py b/csaxs_bec/devices/jungfraujoch/eiger.py index 196600b..33e1b10 100644 --- a/csaxs_bec/devices/jungfraujoch/eiger.py +++ b/csaxs_bec/devices/jungfraujoch/eiger.py @@ -245,13 +245,13 @@ class Eiger(PSIDeviceBase): prep_time = start_time - time.time() logger.info(f"Prepared information for eiger to start acquisition in {prep_time:.2f}s") self.jfj_client.start(settings=data_settings) - start_call_returns = time.time() - start_time - prep_time - logger.info(f"Start Rest call from JFJ took {start_call_returns:.2f}s") - sleep_time = 0.5 - time.sleep(sleep_time) - logger.info( - f"Eiger {self.name} staged and ready for acquisition; with additional sleep of {sleep_time:.2f}s" - ) + # start_call_returns = time.time() - start_time - prep_time + # logger.info(f"Start Rest call from JFJ took {start_call_returns:.2f}s") + # sleep_time = 0.5 + # time.sleep(sleep_time) + # logger.info( + # f"Eiger {self.name} staged and ready for acquisition; with additional sleep of {sleep_time:.2f}s" + # ) def on_unstage(self) -> DeviceStatus: """Called while unstaging the device.""" @@ -277,22 +277,7 @@ class Eiger(PSIDeviceBase): def wait_for_complete(): timeout = 10 for _ in range(timeout): - try: - self.jfj_client.wait_till_done(timeout=1, _request_timeout=5) - - except ( - JungfrauJochClientError - ): # Means that timeout was triggered, and not _request_timeout - continue - except TimeoutError: # Timeout exception from wait_till_done - content = traceback.format_exc() - raise TimeoutError(f"Timeout for request during complete call: {content}") - except Exception as e: # This should actually never occur.. - raise ValueError(f"Error in complete for {self.name}, exception: {e}") from e - else: - # How can I check if the aquisition was successful? - # If not successfull, we have to raise an error here! - # For instance if packages were lost.. + if self.jfj_client.wait_for_idle(timeout=1, request_timeout=10): break status = self.task_handler.submit_task(wait_for_complete, run=True) diff --git a/csaxs_bec/devices/jungfraujoch/jungfrau_joch_client.py b/csaxs_bec/devices/jungfraujoch/jungfrau_joch_client.py index 7e679b9..fb27e99 100644 --- a/csaxs_bec/devices/jungfraujoch/jungfrau_joch_client.py +++ b/csaxs_bec/devices/jungfraujoch/jungfrau_joch_client.py @@ -85,7 +85,7 @@ class JungfrauJochClient: status = self.detector_state if status != DetectorState.IDLE: self.api.initialize_post() # This is a blocking call.... - self.wait_till_done(timeout, **kwargs) # Blocking call + self.wait_for_idle(timeout, request_timeout=timeout) # Blocking call self.initialised = True def set_detector_settings(self, settings: dict | DetectorSettings, timeout: int = 10) -> None: @@ -164,32 +164,53 @@ class JungfrauJochClient: f"Error in JungFrauJochClient for device {self._parent_name} during stop: {content}" ) - def wait_till_done(self, timeout: int = 10, **kwargs) -> None: + def wait_for_idle(self, timeout: int = 10, request_timeout: float | None = None) -> bool: """Wait for JungfrauJoch to be in Idle state. Blocking call with timeout. Args: timeout (int): timeout in seconds + Returns: + bool: True if the detector is in IDLE state, False if timeout occurred """ + if request_timeout is None: + request_timeout = timeout try: - self.api.wait_till_done_post_with_http_info(math.ceil(timeout=timeout / 2), **kwargs) + self.api.wait_till_done_post(timeout=timeout, _request_timeout=request_timeout) except requests.exceptions.Timeout: - raise TimeoutError( - f"Timeout in JungfrauJochClient for parent device {self._parent_name} for 'wait_till_done' call" - ) + raise TimeoutError(f"HTTP request timeout in wait_for_idle for {self._parent_name}") except Exception: - logger.info( - f"Waiting for device {self._parent_name}, jungfrau joch to become IDLE, retry after {timeout/2} seconds" - ) - try: - self.api.wait_till_done_post_with_http_info( - timeout=math.floor(timeout / 2), **kwargs - ) - except requests.exceptions.Timeout: - raise TimeoutError( - f"Timeout in JungfrauJochClient for parent device {self._parent_name} for 'wait_till_done' call" - ) - except Exception: - content = traceback.format_exc() - raise JungfrauJochClientError( - f"JungfrauJoch Error in wait_till_done post for device {self._parent_name}: {content}" - ) + content = traceback.format_exc() + logger.debug(f"Waiting for device {self._parent_name} to become IDLE: {content}") + return False + return True + + # # TODO improve this method for error handling and reporting... + # def wait_till_done(self, timeout: int = 10, **kwargs) -> None: + # """Wait for JungfrauJoch to be in Idle state. Blocking call with timeout. + + # Args: + # timeout (int): timeout in seconds + # """ + # try: + # self.api.wait_till_done_post_with_http_info(timeout=math.ceil(timeout / 2), **kwargs) + # except requests.exceptions.Timeout: + # raise TimeoutError( + # f"Timeout in JungfrauJochClient for parent device {self._parent_name} for 'wait_till_done' call" + # ) + # except Exception: + # logger.info( + # f"Waiting for device {self._parent_name}, jungfrau joch to become IDLE, retry after {math.ceil(timeout / 2)} seconds" + # ) + # try: + # self.api.wait_till_done_post_with_http_info( + # timeout=math.ceil(timeout / 2), **kwargs + # ) + # except requests.exceptions.Timeout: + # raise TimeoutError( + # f"Timeout in JungfrauJochClient for parent device {self._parent_name} for 'wait_till_done' call" + # ) + # except Exception: + # content = traceback.format_exc() + # raise JungfrauJochClientError( + # f"JungfrauJoch Error in wait_till_done post for device {self._parent_name}: {content}" + # )