AcquisitionDevice: Improve detection of erroneous frames without killing data acquisition
This commit is contained in:
+1
-3
@@ -382,9 +382,7 @@ message FPGAStatus {
|
||||
uint32 git_sha1 = 16;
|
||||
uint32 mailbox_err_reg = 17;
|
||||
uint32 mailbox_status_reg = 18;
|
||||
bool frame_statistics_alignment_err = 21;
|
||||
bool frame_statistics_tlast_err = 22;
|
||||
bool frame_statistics_work_req_err = 23;
|
||||
repeated string host_writer_err = 21;
|
||||
uint64 slowest_head = 24;
|
||||
|
||||
float fpga_temp_degC = 26;
|
||||
|
||||
+46
-46
File diff suppressed because one or more lines are too long
@@ -92,15 +92,18 @@ void AcquisitionDevice::WaitForActionComplete() {
|
||||
Cancel();
|
||||
// this frame is not of any interest, therefore its location can be immediately released
|
||||
SendWorkRequest(c.handle);
|
||||
} else if (c.module_number >= max_modules) {
|
||||
// Module number out of bounds, don't process
|
||||
if (logger != nullptr)
|
||||
logger->Error("Completion with wrong module number data stream {} completion frame number {} module {} handle {} timestamp {} status {}",
|
||||
data_stream, c.frame_number, c.module_number, c.handle, c.timestamp, c.status);
|
||||
SendWorkRequest(c.handle);
|
||||
} else
|
||||
counters.UpdateCounters(&c);
|
||||
|
||||
if (logger != nullptr)
|
||||
logger->Debug("Data stream " + std::to_string(data_stream)
|
||||
+ " completion frame number " + std::to_string(c.frame_number)
|
||||
+ " module " + std::to_string(c.module_number)
|
||||
+ " handle " + std::to_string(c.handle)
|
||||
+ " timestamp " + std::to_string(c.timestamp));
|
||||
logger->Debug("Data stream {} completion frame number {} module {} handle {} timestamp {} status {}",
|
||||
data_stream, c.frame_number, c.module_number, c.handle, c.timestamp, c.status);
|
||||
|
||||
c = work_completion_queue.GetBlocking();
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ Completion parse_hw_completion(uint32_t tmp[16]) {
|
||||
c.handle = tmp[0];
|
||||
c.module_number = tmp[1] & 0xFF;
|
||||
c.packet_count = (tmp[1] & (0xFFFF0000)) >> 16;
|
||||
c.status = (tmp[1] >> 8) & 0xFF;
|
||||
c.data_collection_id = tmp[11] & UINT16_MAX;
|
||||
|
||||
uint64_t detector_frame_number = bit_concat(tmp[2], tmp[3]);
|
||||
@@ -34,7 +35,6 @@ Completion parse_hw_completion(uint32_t tmp[16]) {
|
||||
c.frame_number = detector_frame_number;
|
||||
} else {
|
||||
c.type = Completion::Type::Image;
|
||||
|
||||
if (detector_frame_number == 0)
|
||||
throw JFJochException(JFJochExceptionCategory::HardwareParityError, "Detector frame number cannot be zero");
|
||||
else
|
||||
|
||||
@@ -23,7 +23,8 @@ struct Completion {
|
||||
uint32_t handle;
|
||||
uint16_t packet_count;
|
||||
uint16_t data_collection_id;
|
||||
uint8_t module_number;
|
||||
uint16_t status;
|
||||
uint16_t module_number;
|
||||
};
|
||||
|
||||
Completion parse_hw_completion(uint32_t hw_input[16]);
|
||||
|
||||
@@ -171,6 +171,12 @@ inline void FIFO_check(JFJochProtoBuf::FPGAStatus &fpga_status,
|
||||
fifo_status->set_value(JFJochProtoBuf::FPGAFIFOStatusEnum::PARTIAL);
|
||||
}
|
||||
|
||||
inline void CheckHostWriterErr(JFJochProtoBuf::FPGAStatus &output, uint32_t status_register,
|
||||
uint32_t bit, const std::string &name) {
|
||||
if (status_register & (1 << (24+bit)))
|
||||
output.add_host_writer_err(name);
|
||||
}
|
||||
|
||||
JFJochProtoBuf::FPGAStatus FPGAAcquisitionDevice::GetStatus() const {
|
||||
|
||||
ActionStatus status{};
|
||||
@@ -210,9 +216,12 @@ JFJochProtoBuf::FPGAStatus FPGAAcquisitionDevice::GetStatus() const {
|
||||
ret.set_error_packet_len(status.udp_err_len);
|
||||
ret.set_cancel_bit(full_status_register & (1<<2));
|
||||
ret.set_host_writer_idle(full_status_register & (1<<4));
|
||||
ret.set_frame_statistics_alignment_err(full_status_register & (1 << 24));
|
||||
ret.set_frame_statistics_tlast_err(full_status_register & (1 << 25));
|
||||
ret.set_frame_statistics_work_req_err(full_status_register & (1 << 26));
|
||||
CheckHostWriterErr(ret, full_status_register, 0, "Alignment error");
|
||||
CheckHostWriterErr(ret, full_status_register, 1, "TLAST error");
|
||||
CheckHostWriterErr(ret, full_status_register, 2, "Work request parity error");
|
||||
CheckHostWriterErr(ret, full_status_register, 3, "Handle error");
|
||||
CheckHostWriterErr(ret, full_status_register, 4, "Null pointer");
|
||||
CheckHostWriterErr(ret, full_status_register, 5, "Module number exceeded");
|
||||
|
||||
ret.set_mailbox_status_reg(env.mailbox_status_reg);
|
||||
ret.set_mailbox_err_reg(env.mailbox_err_reg);
|
||||
|
||||
@@ -256,7 +256,11 @@ void JFJochReceiver::AcquireThread(uint16_t data_stream) {
|
||||
} catch (const JFJochException &e) {
|
||||
Cancel(e);
|
||||
}
|
||||
|
||||
auto status = acquisition_device[data_stream]->GetStatus();
|
||||
if (!status.host_writer_err().empty()) {
|
||||
for (const auto &i: status.host_writer_err())
|
||||
logger.Error("Device thread {}: host writer error {}", data_stream, i);
|
||||
}
|
||||
logger.Info("Device thread {} done", data_stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -70,6 +70,12 @@ int main(int argc, char **argv) {
|
||||
std::cout << s << std::endl;
|
||||
}
|
||||
|
||||
if (!status.host_writer_err().empty()) {
|
||||
std::cout << "FPGA host writer errors: " << std::endl;
|
||||
for (const auto &i: status.host_writer_err())
|
||||
std::cout << " " << i << std::endl;
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
std::cout << "Packet counters - ETH " << status.packets_ether() << std::endl;
|
||||
std::cout << " - UDP " << status.packets_udp() << std::endl;
|
||||
|
||||
Reference in New Issue
Block a user