Files
Jungfraujoch/receiver/host/AcquisitionDevice.cpp

453 lines
18 KiB
C++

// Copyright (2019-2022) Paul Scherrer Institute
// SPDX-License-Identifier: GPL-3.0-or-later
#ifdef JFJOCH_USE_NUMA
#include <numaif.h>
#endif
#include <sys/mman.h>
#include <thread>
#include <bitset>
#include <fstream>
#include <cmath>
#include <sstream>
#include <iomanip>
#include "../../common/JFJochException.h"
#include "AcquisitionDevice.h"
#include "../../common/NetworkAddressConvert.h"
void *mmap_acquisition_buffer(size_t size, int16_t numa_node) {
void *ret = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ret == nullptr) {
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "frame_buffer");
}
#ifdef JFJOCH_USE_NUMA
if (numa_node >= 0) {
unsigned long nodemask = 1L << numa_node;;
if (numa_node > sizeof(nodemask)*8)
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Mask too small for NUMA node");
if (mbind(ret, size, MPOL_BIND, &nodemask, sizeof(nodemask)*8, MPOL_MF_STRICT) == -1)
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Cannot apply NUMA policy");
}
#endif
memset(ret, 0, size);
return ret;
}
AcquisitionDevice::AcquisitionDevice(uint16_t in_data_stream) :
buffer_err(RAW_MODULE_SIZE), internal_pkt_gen_frame(RAW_MODULE_SIZE) {
logger = nullptr;
data_stream = in_data_stream;
for (int i = 0; i < RAW_MODULE_SIZE; i++)
internal_pkt_gen_frame[i] = i % 65536;
}
bool AcquisitionDevice::IsFullModuleCollected(size_t frame, uint8_t module_number) const {
return counters.IsFullModuleCollected(frame, module_number);
}
uint64_t AcquisitionDevice::GetBufferHandle(size_t frame, uint8_t module_number) const {
return counters.GetBufferHandle(frame, module_number);
}
void AcquisitionDevice::FillActionRegister(const DiffractionExperiment& x, ActionConfig &job) {
job.nmodules = x.GetModulesNum(data_stream);
job.nframes = x.GetFrameNum();
job.one_over_energy = std::lround((1<<20)/ x.GetPhotonEnergy_keV());
job.nstorage_cells = x.GetStorageCellNumber() - 1;
job.mode = 0;
if ((x.GetDetectorMode() == DetectorMode::Conversion) && x.GetConversionOnFPGA())
job.mode |= MODE_CONV;
if (x.IsUsingInternalPacketGen())
job.mode |= MODE_INTERNAL_PACKET_GEN;
}
void AcquisitionDevice::PrepareAction(const DiffractionExperiment &experiment) {
if (!HW_IsIdle())
throw(JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
"Hardware action running prior to start of data acquisition"));
if (experiment.GetModulesNum(data_stream) > max_modules)
throw(JFJochException(JFJochExceptionCategory::InputParameterAboveMax,
"Number of modules exceeds max possible for FPGA"));
counters.Reset(experiment, data_stream);
}
void AcquisitionDevice::StartAction(const DiffractionExperiment &experiment) {
if (!HW_IsIdle())
throw(JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
"Hardware action running prior to start of data acquisition"));
if (experiment.GetModulesNum(data_stream) > max_modules)
throw(JFJochException(JFJochExceptionCategory::InputParameterAboveMax,
"Number of modules exceeds max possible for FPGA"));
for (int i = 0; i < RAW_MODULE_SIZE; i++) {
if (experiment.GetDetectorMode() == DetectorMode::Conversion)
buffer_err[i] = PIXEL_OUT_LOST;
else
buffer_err[i] = -1;
}
counters.Reset(experiment, data_stream);
completion_vector.Reset(experiment, data_stream);
expected_frames = experiment.GetFrameNum();
ActionConfig cfg_in{}, cfg_out{};
FillActionRegister(experiment, cfg_in);
HW_WriteActionRegister(&cfg_in);
HW_ReadActionRegister(&cfg_out);
if (experiment.IsUsingInternalPacketGen())
CopyInternalPacketGenFrameToDeviceBuffer();
if (cfg_out.mode != cfg_in.mode)
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
"Mismatch between expected and actual values of configuration registers (mode)");
if (cfg_out.nframes != cfg_in.nframes)
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
"Mismatch between expected and actual values of configuration registers (Frames per trigger)");
if (cfg_out.nmodules != cfg_in.nmodules)
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
"Mismatch between expected and actual values of configuration registers (#modules)");
// Ensure internal WR queue is empty
work_request_queue.Clear();
HW_StartAction();
send_work_request_future = std::async(std::launch::async, &AcquisitionDevice::SendWorkRequestThread, this);
for (uint32_t i = 0; i < buffer_device.size(); i++)
SendWorkRequest(i);
auto c = work_completion_queue.GetBlocking();
if (c.type != Completion::Type::Start)
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Mismatch in completion queue");
start_time = std::chrono::system_clock::now();
}
void AcquisitionDevice::CopyInternalPacketGenFrameToDeviceBuffer() {
memcpy(buffer_device[0], internal_pkt_gen_frame.data(),
RAW_MODULE_SIZE * sizeof(uint16_t));
}
int64_t AcquisitionDevice::CalculateDelay(size_t curr_frame, uint16_t module_number) const {
return counters.CalculateDelay(curr_frame, module_number);
}
void AcquisitionDevice::WaitForFrame(size_t curr_frame, uint16_t module_number) const {
counters.WaitForFrame(curr_frame, module_number);
}
void AcquisitionDevice::SetCustomInternalGeneratorFrame(const std::vector<uint16_t> &v) {
if (v.size() != RAW_MODULE_SIZE)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Error in size of custom internal generator frame");
for (int i = 0; i < RAW_MODULE_SIZE; i++)
internal_pkt_gen_frame[i] = v[i];
}
const std::vector<uint16_t> &AcquisitionDevice::GetInternalGeneratorFrame() const {
return internal_pkt_gen_frame;
}
void AcquisitionDevice::WaitForActionComplete() {
auto c = work_completion_queue.GetBlocking();
while (c.type != Completion::Type::End) {
if (c.frame_number >= expected_frames) {
HW_SetCancelDataCollectionBit();
// this frame is not of any interest, therefore its location can be immediately released
SendWorkRequest(c.handle);
} else {
counters.UpdateCounters(&c);
completion_vector.Add(c);
}
if (logger != nullptr)
logger->Debug("Data stream " + std::to_string(data_stream)
+ " completion frame number " + std::to_string(c.frame_number)
+ " module " + std::to_string(c.module)
+ " handle " + std::to_string(c.handle)
+ " timestamp " + std::to_string(c.timestamp));
c = work_completion_queue.GetBlocking();
}
bytes_received = c.frame_number * 8192LU;
counters.SetAcquisitionFinished();
end_time = std::chrono::system_clock::now();
EndWorkRequestAndSignalQueues();
HW_EndAction();
while (!HW_IsIdle())
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
void AcquisitionDevice::EndWorkRequestAndSignalQueues() {
HW_SetCancelDataCollectionBit();
work_request_queue.Put(0); // 0 = end, this is to ensure that in a priority queue end marker is always first to take
send_work_request_future.get();
}
void AcquisitionDevice::SendWorkRequest(uint32_t handle) {
work_request_queue.Put(handle+1);
}
uint64_t AcquisitionDevice::GetBytesReceived() const {
return bytes_received;
}
void AcquisitionDevice::SaveStatistics(const DiffractionExperiment &experiment,
JFJochProtoBuf::AcquisitionDeviceStatistics &statistics) const {
statistics.set_bytes_received(GetBytesReceived());
statistics.set_start_timestamp(start_time.time_since_epoch().count());
statistics.set_end_timestamp(end_time.time_since_epoch().count());
completion_vector.FillStatistics(experiment, data_stream, statistics);
*statistics.mutable_fpga_status() = GetStatus();
}
uint64_t AcquisitionDevice::GetHead(uint8_t module_number) const {
return counters.GetHead(module_number);
}
uint64_t AcquisitionDevice::GetSlowestHead() const {
return counters.GetSlowestHead();
}
bool AcquisitionDevice::IsDone() const {
return counters.IsAcquisitionFinished();
}
void AcquisitionDevice::ActionAbort() {
HW_SetCancelDataCollectionBit();
}
const int16_t *AcquisitionDevice::GetFrameBuffer(size_t frame_number, uint16_t module_number) const {
auto handle = GetBufferHandle(frame_number, module_number);
if (handle != HandleNotValid)
return (int16_t *) buffer_device.at(handle);
else
return GetErrorFrameBuffer();
}
const int16_t *AcquisitionDevice::GetErrorFrameBuffer() const {
return buffer_err.data();
}
int16_t *AcquisitionDevice::GetDeviceBuffer(size_t handle) {
if (handle >= buffer_device.size())
throw JFJochException(JFJochExceptionCategory::ArrayOutOfBounds, "Handle outside of range");
else
return (int16_t *) buffer_device.at(handle);
}
void AcquisitionDevice::InitializeCalibration(const DiffractionExperiment &experiment, const JFCalibration &calib) {
auto offset = experiment.GetFirstModuleOfDataStream(data_stream);
if (calib.GetModulesNum() != experiment.GetModulesNum())
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Mismatch regarding module count in calibration and experiment description");
if (calib.GetStorageCellNum() != experiment.GetStorageCellNumber())
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Mismatch regarding storage cell count in calibration and experiment description");
size_t modules = experiment.GetModulesNum(data_stream);
if (1 + modules * (3 + 3 * experiment.GetStorageCellNumber()) > buffer_device.size())
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Not enough host/FPGA buffers to load all calibration constants");
for (int m = 0; m < modules; m++) {
calib.GainCalibration(m).ExportG0(buffer_device[1 + m]);
calib.GainCalibration(m).ExportG1(buffer_device[1 + m + modules]);
calib.GainCalibration(m).ExportG2(buffer_device[1 + m + modules * 2]);
}
for (int s = 0; s < experiment.GetStorageCellNumber(); s++) {
auto mask = calib.CalculateMask(experiment, s);
for (int m = 0; m < modules; m++) {
auto pedestal_g0 = calib.Pedestal(offset + m, 0, s).GetPedestal();
auto pedestal_g1 = calib.Pedestal(offset + m, 1, s).GetPedestal();
auto pedestal_g2 = calib.Pedestal(offset + m, 2, s).GetPedestal();
for (int i = 0; i < RAW_MODULE_SIZE; i++) {
if (experiment.GetApplyPixelMaskInFPGA() && (mask[(offset + m) * RAW_MODULE_SIZE + i] != 0)) {
buffer_device[1 + m + (3 + 0 * 16 + s) * modules][i] = 16384;
buffer_device[1 + m + (3 + 1 * 16 + s) * modules][i] = 16384;
buffer_device[1 + m + (3 + 2 * 16 + s) * modules][i] = 16384;
} else {
buffer_device[1 + m + (3 + 0 * 16 + s) * modules][i] = pedestal_g0[i];
buffer_device[1 + m + (3 + 1 * 16 + s) * modules][i] = pedestal_g1[i];
buffer_device[1 + m + (3 + 2 * 16 + s) * modules][i] = pedestal_g2[i];
}
}
}
}
}
void AcquisitionDevice::MapBuffersStandard(size_t c2h_buffer_count, size_t h2c_buffer_count, int16_t numa_node) {
try {
for (int i = 0; i < std::max(c2h_buffer_count, h2c_buffer_count); i++)
buffer_device.emplace_back((uint16_t *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node));
} catch (const JFJochException &e) {
UnmapBuffers();
throw;
}
}
void AcquisitionDevice::UnmapBuffers() {
for (auto &i: buffer_device)
if (i != nullptr) munmap(i, FPGA_BUFFER_LOCATION_SIZE);
}
void AcquisitionDevice::SendWorkRequestThread() {
auto handle = work_request_queue.GetBlocking();
while (handle != 0) {
// Preferably use the smallest handle (to reduce buffer size for better TLB usage)
// So if work request cannot be sent, return handle and check again for the smallest one
if (!HW_SendWorkRequest(handle - 1)) {
work_request_queue.Put(handle);
std::this_thread::sleep_for(std::chrono::microseconds(10));
}
handle = work_request_queue.GetBlocking();
}
while (!HW_SendWorkRequest(UINT32_MAX))
std::this_thread::sleep_for(std::chrono::microseconds(10));
}
void AcquisitionDevice::FrameBufferRelease(size_t frame_number, uint16_t module_number) {
auto handle = counters.GetBufferHandleAndClear(frame_number, module_number);
if (handle != AcquisitionOnlineCounters::HandleNotFound)
SendWorkRequest(handle);
}
void AcquisitionDevice::EnableLogging(Logger *in_logger) {
logger = in_logger;
}
inline JFJochProtoBuf::FPGAFIFOStatus FIFO_check(uint32_t fifo_register, uint16_t pos_empty, uint16_t pos_full) {
if (std::bitset<32>(fifo_register).test(pos_empty))
return JFJochProtoBuf::FPGAFIFOStatus::EMPTY;
if (std::bitset<32>(fifo_register).test(pos_full))
return JFJochProtoBuf::FPGAFIFOStatus::FULL;
return JFJochProtoBuf::FPGAFIFOStatus::PARTIAL;
}
JFJochProtoBuf::FPGAStatus AcquisitionDevice::GetStatus() const {
ActionStatus status{};
ActionEnvParams env{};
HW_GetStatus(&status);
HW_GetEnvParams(&env);
JFJochProtoBuf::FPGAStatus ret;
auto full_status_register = status.ctrl_reg;
ret.set_full_status_register(full_status_register);
ret.set_stalls_hbm(status.pipeline_stalls_hbm);
ret.set_stalls_host(status.pipeline_stalls_host);
ret.set_max_modules(status.max_modules);
ret.set_git_sha1(status.git_sha1);
(*ret.mutable_fifo_status())["Conversion input (data)"] = FIFO_check(status.fifo_status, 0, 1);
(*ret.mutable_fifo_status())["Conversion input (cmd)"] = FIFO_check(status.fifo_status, 2, 3);
(*ret.mutable_fifo_status())["UDP"] = FIFO_check(status.fifo_status, 6, 7);
(*ret.mutable_fifo_status())["Work Request"] = FIFO_check(status.fifo_status, 12, 13);
(*ret.mutable_fifo_status())["Work Completion"] = FIFO_check(status.fifo_status, 14, 15);
(*ret.mutable_fifo_status())["Host mem (data)"] = FIFO_check(status.fifo_status, 8, 9);
(*ret.mutable_fifo_status())["Host mem (cmd)"] = FIFO_check(status.fifo_status, 10, 11);
(*ret.mutable_fifo_status())["Data FIFO #8"] = FIFO_check(status.fifo_status, 16, 17);
(*ret.mutable_fifo_status())["Addr FIFO #3"] = FIFO_check(status.fifo_status, 18, 19);
ret.set_fpga_idle(HW_IsIdle());
ret.set_packets_ether(status.packets_eth);
ret.set_packets_udp(status.packets_udp);
ret.set_packets_icmp(status.packets_icmp);
ret.set_packets_jfjoch(status.packets_processed);
ret.set_packets_sls(status.packets_sls);
ret.set_error_eth(status.udp_err_eth);
ret.set_error_packet_len(status.udp_err_len);
ret.set_datamover_mm2s_error(full_status_register & (1 << 10));
ret.set_datamover_s2mm_error(full_status_register & (1 << 11));
ret.set_frame_statistics_alignment_err(full_status_register & (1 << 24));
ret.set_frame_statistics_tlast_err(full_status_register & (1 << 25));
ret.set_frame_statistics_work_req_err(full_status_register & (1 << 26));
ret.set_mailbox_status_reg(env.mailbox_status_reg);
ret.set_mailbox_err_reg(env.mailbox_err_reg);
ret.set_fpga_temp_degc(env.fpga_temp_C);
ret.set_current_edge_12v_a(static_cast<double>(env.fpga_pcie_12V_I_mA) / 1000.0);
ret.set_voltage_edge_12v_v(static_cast<double>(env.fpga_pcie_12V_V_mV) / 1000.0);
ret.set_current_edge_3p3v_a(static_cast<double>(env.fpga_pcie_3p3V_I_mA) / 1000.0);
ret.set_voltage_edge_3p3v_v(static_cast<double>(env.fpga_pcie_3p3V_V_mV) / 1000.0);
ret.set_pcie_c2h_beats(env.pcie_c2h_beats);
ret.set_pcie_h2c_beats(env.pcie_h2c_beats);
ret.set_pcie_c2h_descriptors(env.pcie_c2h_descriptors);
ret.set_pcie_h2c_descriptors(env.pcie_h2c_descriptors);
ret.set_pcie_c2h_status(env.pcie_c2h_status);
ret.set_pcie_h2c_status(env.pcie_h2c_status);
ret.set_ethernet_rx_aligned(env.ethernet_aligned);
ret.set_hbm_temp_0_degc(env.hbm_0_temp_C);
ret.set_hbm_temp_1_degc(env.hbm_1_temp_C);
ret.set_slowest_head(GetSlowestHead());
return ret;
}
std::string AcquisitionDevice::GetMACAddress() const {
return MacAddressToStr(HW_GetMACAddress());
}
std::string AcquisitionDevice::GetIPv4Address() const {
return IPv4AddressToStr(HW_GetIPv4Address());
}
ActionConfig AcquisitionDevice::ReadActionRegister() {
ActionConfig cfg{};
HW_ReadActionRegister(&cfg);
return cfg;
}
int32_t AcquisitionDevice::GetNUMANode() const {
return -1;
}
uint16_t AcquisitionDevice::GetUDPPort() const {
return 1234;
}
uint64_t AcquisitionDevice::GetBunchID(size_t curr_frame, uint16_t module_number) const {
return counters.GetBunchID(curr_frame, module_number);
}
uint32_t AcquisitionDevice::GetJFInfo(size_t curr_frame, uint16_t module_number) const {
return counters.GetJFInfo(curr_frame, module_number);
}
uint32_t AcquisitionDevice::GetTimestamp(size_t curr_frame, uint16_t module_number) const {
return counters.GetTimestamp(curr_frame, module_number);
}