From df40665c5e8e8a7df4053074136f3b0459ebee68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Fr=C3=B6jdh?= Date: Fri, 25 Nov 2022 09:55:51 +0100 Subject: [PATCH] Using a loop to stop (#585) * loop for stop, 10 retries For example, if module A got a "stop" during an acquisition, it stops to an 'Idle' state. If module B gets a "stop" before an acquisition, it will return it is in 'Idle' state and continue to start the next acquisition, but module B then waits for "ready for trigger" synchronization from module A, which it will never get. Since module B missed the asynchronous stop command, the workaround in the client is to send another "stop" command (upto 10 retries) till it returns Idle. --- RELEASE.txt | 26 +++++++++++++++++---- slsDetectorSoftware/src/Detector.cpp | 34 +++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/RELEASE.txt b/RELEASE.txt index 42ee2aca7..e336ee43d 100755 --- a/RELEASE.txt +++ b/RELEASE.txt @@ -32,11 +32,10 @@ This document describes the differences between v6.1.2 and v6.1.1. kernel or on-board detector server, depending on the current kernel version. - 2. [Eiger] Stop command freezes server (apparent in large detectors) - Stop command sometimes did not receive processing done signal from - firmware. A workaround is implemented in software until fixed in - firmware. After 1 s, it will check status and throw if it is still - running. + 2. [Eiger] locking for start and stop + Since the start and stop has a few commands within the server itself, + they are locked to ensure they dont disturb the other to have undefined + behavior. 3. [Eiger] Quad Trimbits or threshold Loading trimbits or setting threshold will throw an incorrect exception @@ -49,6 +48,23 @@ This document describes the differences between v6.1.2 and v6.1.1. Fixed some minor functions returned empty error messages when failing. + Client + ------ + + 1. [Eiger] Stop command freezes server (apparent in large detectors) + For example, if module A got a "stop" during an acquisition, + it stops to an 'Idle' state. If module B gets a "stop" before an + acquisition, it will return it is in 'Idle' state and continue + to start the next acquisition, but module B then waits for "ready + for trigger" synchronization from module A, which it will never get. + Since module B missed the asynchronous stop command, the workaround + in the client is to send another "stop" command (upto 10 retries) + till it returns Idle. + + 2. [Eiger][Jungfrau][Moench][Ctb] + A "stop" command will also check for inconsistent 'nextframenumber' + between the modules and set it to the max + 1 to ensure that they + all start with the same frame number for the next acquisition. 2. On-board Detector Server Compatibility diff --git a/slsDetectorSoftware/src/Detector.cpp b/slsDetectorSoftware/src/Detector.cpp index 3a4c6d42e..92e680c5d 100644 --- a/slsDetectorSoftware/src/Detector.cpp +++ b/slsDetectorSoftware/src/Detector.cpp @@ -780,7 +780,39 @@ void Detector::startDetectorReadout() { } void Detector::stopDetector(Positions pos) { - pimpl->Parallel(&Module::stopAcquisition, pos); + + // stop and check status X times + int retries{0}; + //avoid default construction of runStatus::IDLE on squash + auto status = getDetectorStatus().squash(defs::runStatus::RUNNING); + while(status != defs::runStatus::IDLE){ + pimpl->Parallel(&Module::stopAcquisition, pos); + status = getDetectorStatus().squash(defs::runStatus::RUNNING); + ++retries; + + if(retries == 10) + throw RuntimeError("Could not stop detector"); + } + + + // validate consistent frame numbers + switch (getDetectorType().squash()) { + case defs::EIGER: + case defs::JUNGFRAU: + case defs::MOENCH: + case defs::CHIPTESTBOARD: { + auto res = getNextFrameNumber(pos); + if (!res.equal()) { + uint64_t maxVal = 0; + for (auto it : res) { + maxVal = std::max(maxVal, it); + } + setNextFrameNumber(maxVal + 1); + } + } break; + default: + break; + } } Result Detector::getDetectorStatus(Positions pos) const {