453 lines
18 KiB
C++
453 lines
18 KiB
C++
// Copyright (2019-2022) Paul Scherrer Institute
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#ifdef JFJOCH_USE_NUMA
|
|
#include <numaif.h>
|
|
#endif
|
|
|
|
#include <sys/mman.h>
|
|
#include <thread>
|
|
#include <bitset>
|
|
#include <fstream>
|
|
#include <cmath>
|
|
#include <sstream>
|
|
#include <iomanip>
|
|
|
|
#include "../../common/JFJochException.h"
|
|
#include "AcquisitionDevice.h"
|
|
#include "../../common/NetworkAddressConvert.h"
|
|
|
|
void *mmap_acquisition_buffer(size_t size, int16_t numa_node) {
|
|
void *ret = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
if (ret == nullptr) {
|
|
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "frame_buffer");
|
|
}
|
|
#ifdef JFJOCH_USE_NUMA
|
|
if (numa_node >= 0) {
|
|
unsigned long nodemask = 1L << numa_node;;
|
|
if (numa_node > sizeof(nodemask)*8)
|
|
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Mask too small for NUMA node");
|
|
if (mbind(ret, size, MPOL_BIND, &nodemask, sizeof(nodemask)*8, MPOL_MF_STRICT) == -1)
|
|
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Cannot apply NUMA policy");
|
|
}
|
|
#endif
|
|
memset(ret, 0, size);
|
|
return ret;
|
|
}
|
|
|
|
AcquisitionDevice::AcquisitionDevice(uint16_t in_data_stream) :
|
|
buffer_err(RAW_MODULE_SIZE), internal_pkt_gen_frame(RAW_MODULE_SIZE) {
|
|
logger = nullptr;
|
|
data_stream = in_data_stream;
|
|
|
|
for (int i = 0; i < RAW_MODULE_SIZE; i++)
|
|
internal_pkt_gen_frame[i] = i % 65536;
|
|
}
|
|
|
|
bool AcquisitionDevice::IsFullModuleCollected(size_t frame, uint8_t module_number) const {
|
|
return counters.IsFullModuleCollected(frame, module_number);
|
|
}
|
|
|
|
uint64_t AcquisitionDevice::GetBufferHandle(size_t frame, uint8_t module_number) const {
|
|
return counters.GetBufferHandle(frame, module_number);
|
|
}
|
|
|
|
void AcquisitionDevice::FillActionRegister(const DiffractionExperiment& x, ActionConfig &job) {
|
|
|
|
job.nmodules = x.GetModulesNum(data_stream);
|
|
job.nframes = x.GetFrameNum();
|
|
job.one_over_energy = std::lround((1<<20)/ x.GetPhotonEnergy_keV());
|
|
job.nstorage_cells = x.GetStorageCellNumber() - 1;
|
|
job.mode = 0;
|
|
|
|
if ((x.GetDetectorMode() == DetectorMode::Conversion) && x.GetConversionOnFPGA())
|
|
job.mode |= MODE_CONV;
|
|
|
|
if (x.IsUsingInternalPacketGen())
|
|
job.mode |= MODE_INTERNAL_PACKET_GEN;
|
|
}
|
|
|
|
void AcquisitionDevice::PrepareAction(const DiffractionExperiment &experiment) {
|
|
if (!HW_IsIdle())
|
|
throw(JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
|
|
"Hardware action running prior to start of data acquisition"));
|
|
|
|
if (experiment.GetModulesNum(data_stream) > max_modules)
|
|
throw(JFJochException(JFJochExceptionCategory::InputParameterAboveMax,
|
|
"Number of modules exceeds max possible for FPGA"));
|
|
|
|
counters.Reset(experiment, data_stream);
|
|
}
|
|
|
|
void AcquisitionDevice::StartAction(const DiffractionExperiment &experiment) {
|
|
if (!HW_IsIdle())
|
|
throw(JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
|
|
"Hardware action running prior to start of data acquisition"));
|
|
|
|
if (experiment.GetModulesNum(data_stream) > max_modules)
|
|
throw(JFJochException(JFJochExceptionCategory::InputParameterAboveMax,
|
|
"Number of modules exceeds max possible for FPGA"));
|
|
|
|
for (int i = 0; i < RAW_MODULE_SIZE; i++) {
|
|
if (experiment.GetDetectorMode() == DetectorMode::Conversion)
|
|
buffer_err[i] = PIXEL_OUT_LOST;
|
|
else
|
|
buffer_err[i] = -1;
|
|
}
|
|
|
|
counters.Reset(experiment, data_stream);
|
|
completion_vector.Reset(experiment, data_stream);
|
|
expected_frames = experiment.GetFrameNum();
|
|
|
|
ActionConfig cfg_in{}, cfg_out{};
|
|
|
|
FillActionRegister(experiment, cfg_in);
|
|
HW_WriteActionRegister(&cfg_in);
|
|
HW_ReadActionRegister(&cfg_out);
|
|
|
|
if (experiment.IsUsingInternalPacketGen())
|
|
CopyInternalPacketGenFrameToDeviceBuffer();
|
|
|
|
if (cfg_out.mode != cfg_in.mode)
|
|
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
|
|
"Mismatch between expected and actual values of configuration registers (mode)");
|
|
if (cfg_out.nframes != cfg_in.nframes)
|
|
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
|
|
"Mismatch between expected and actual values of configuration registers (Frames per trigger)");
|
|
if (cfg_out.nmodules != cfg_in.nmodules)
|
|
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
|
|
"Mismatch between expected and actual values of configuration registers (#modules)");
|
|
|
|
// Ensure internal WR queue is empty
|
|
work_request_queue.Clear();
|
|
|
|
HW_StartAction();
|
|
|
|
send_work_request_future = std::async(std::launch::async, &AcquisitionDevice::SendWorkRequestThread, this);
|
|
|
|
for (uint32_t i = 0; i < buffer_device.size(); i++)
|
|
SendWorkRequest(i);
|
|
|
|
auto c = work_completion_queue.GetBlocking();
|
|
if (c.type != Completion::Type::Start)
|
|
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Mismatch in completion queue");
|
|
|
|
start_time = std::chrono::system_clock::now();
|
|
}
|
|
|
|
void AcquisitionDevice::CopyInternalPacketGenFrameToDeviceBuffer() {
|
|
memcpy(buffer_device[0], internal_pkt_gen_frame.data(),
|
|
RAW_MODULE_SIZE * sizeof(uint16_t));
|
|
}
|
|
|
|
int64_t AcquisitionDevice::CalculateDelay(size_t curr_frame, uint16_t module_number) const {
|
|
return counters.CalculateDelay(curr_frame, module_number);
|
|
}
|
|
|
|
void AcquisitionDevice::WaitForFrame(size_t curr_frame, uint16_t module_number) const {
|
|
counters.WaitForFrame(curr_frame, module_number);
|
|
}
|
|
|
|
void AcquisitionDevice::SetCustomInternalGeneratorFrame(const std::vector<uint16_t> &v) {
|
|
if (v.size() != RAW_MODULE_SIZE)
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
|
|
"Error in size of custom internal generator frame");
|
|
for (int i = 0; i < RAW_MODULE_SIZE; i++)
|
|
internal_pkt_gen_frame[i] = v[i];
|
|
}
|
|
|
|
const std::vector<uint16_t> &AcquisitionDevice::GetInternalGeneratorFrame() const {
|
|
return internal_pkt_gen_frame;
|
|
}
|
|
|
|
void AcquisitionDevice::WaitForActionComplete() {
|
|
auto c = work_completion_queue.GetBlocking();
|
|
|
|
while (c.type != Completion::Type::End) {
|
|
if (c.frame_number >= expected_frames) {
|
|
HW_SetCancelDataCollectionBit();
|
|
// this frame is not of any interest, therefore its location can be immediately released
|
|
SendWorkRequest(c.handle);
|
|
} else {
|
|
counters.UpdateCounters(&c);
|
|
completion_vector.Add(c);
|
|
}
|
|
|
|
if (logger != nullptr)
|
|
logger->Debug("Data stream " + std::to_string(data_stream)
|
|
+ " completion frame number " + std::to_string(c.frame_number)
|
|
+ " module " + std::to_string(c.module)
|
|
+ " handle " + std::to_string(c.handle)
|
|
+ " timestamp " + std::to_string(c.timestamp));
|
|
|
|
c = work_completion_queue.GetBlocking();
|
|
}
|
|
bytes_received = c.frame_number * 8192LU;
|
|
|
|
counters.SetAcquisitionFinished();
|
|
|
|
end_time = std::chrono::system_clock::now();
|
|
|
|
EndWorkRequestAndSignalQueues();
|
|
|
|
HW_EndAction();
|
|
|
|
while (!HW_IsIdle())
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
|
}
|
|
|
|
void AcquisitionDevice::EndWorkRequestAndSignalQueues() {
|
|
HW_SetCancelDataCollectionBit();
|
|
work_request_queue.Put(0); // 0 = end, this is to ensure that in a priority queue end marker is always first to take
|
|
send_work_request_future.get();
|
|
}
|
|
|
|
void AcquisitionDevice::SendWorkRequest(uint32_t handle) {
|
|
work_request_queue.Put(handle+1);
|
|
}
|
|
|
|
uint64_t AcquisitionDevice::GetBytesReceived() const {
|
|
return bytes_received;
|
|
}
|
|
|
|
void AcquisitionDevice::SaveStatistics(const DiffractionExperiment &experiment,
|
|
JFJochProtoBuf::AcquisitionDeviceStatistics &statistics) const {
|
|
|
|
statistics.set_bytes_received(GetBytesReceived());
|
|
statistics.set_start_timestamp(start_time.time_since_epoch().count());
|
|
statistics.set_end_timestamp(end_time.time_since_epoch().count());
|
|
|
|
completion_vector.FillStatistics(experiment, data_stream, statistics);
|
|
*statistics.mutable_fpga_status() = GetStatus();
|
|
}
|
|
|
|
uint64_t AcquisitionDevice::GetHead(uint8_t module_number) const {
|
|
return counters.GetHead(module_number);
|
|
}
|
|
|
|
uint64_t AcquisitionDevice::GetSlowestHead() const {
|
|
return counters.GetSlowestHead();
|
|
}
|
|
|
|
bool AcquisitionDevice::IsDone() const {
|
|
return counters.IsAcquisitionFinished();
|
|
}
|
|
|
|
void AcquisitionDevice::ActionAbort() {
|
|
HW_SetCancelDataCollectionBit();
|
|
}
|
|
|
|
const int16_t *AcquisitionDevice::GetFrameBuffer(size_t frame_number, uint16_t module_number) const {
|
|
auto handle = GetBufferHandle(frame_number, module_number);
|
|
if (handle != HandleNotValid)
|
|
return (int16_t *) buffer_device.at(handle);
|
|
else
|
|
return GetErrorFrameBuffer();
|
|
}
|
|
|
|
const int16_t *AcquisitionDevice::GetErrorFrameBuffer() const {
|
|
return buffer_err.data();
|
|
}
|
|
|
|
int16_t *AcquisitionDevice::GetDeviceBuffer(size_t handle) {
|
|
if (handle >= buffer_device.size())
|
|
throw JFJochException(JFJochExceptionCategory::ArrayOutOfBounds, "Handle outside of range");
|
|
else
|
|
return (int16_t *) buffer_device.at(handle);
|
|
}
|
|
|
|
void AcquisitionDevice::InitializeCalibration(const DiffractionExperiment &experiment, const JFCalibration &calib) {
|
|
auto offset = experiment.GetFirstModuleOfDataStream(data_stream);
|
|
|
|
if (calib.GetModulesNum() != experiment.GetModulesNum())
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
|
|
"Mismatch regarding module count in calibration and experiment description");
|
|
|
|
if (calib.GetStorageCellNum() != experiment.GetStorageCellNumber())
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
|
|
"Mismatch regarding storage cell count in calibration and experiment description");
|
|
|
|
size_t modules = experiment.GetModulesNum(data_stream);
|
|
|
|
if (1 + modules * (3 + 3 * experiment.GetStorageCellNumber()) > buffer_device.size())
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
|
|
"Not enough host/FPGA buffers to load all calibration constants");
|
|
|
|
for (int m = 0; m < modules; m++) {
|
|
calib.GainCalibration(m).ExportG0(buffer_device[1 + m]);
|
|
calib.GainCalibration(m).ExportG1(buffer_device[1 + m + modules]);
|
|
calib.GainCalibration(m).ExportG2(buffer_device[1 + m + modules * 2]);
|
|
}
|
|
|
|
for (int s = 0; s < experiment.GetStorageCellNumber(); s++) {
|
|
auto mask = calib.CalculateMask(experiment, s);
|
|
for (int m = 0; m < modules; m++) {
|
|
auto pedestal_g0 = calib.Pedestal(offset + m, 0, s).GetPedestal();
|
|
auto pedestal_g1 = calib.Pedestal(offset + m, 1, s).GetPedestal();
|
|
auto pedestal_g2 = calib.Pedestal(offset + m, 2, s).GetPedestal();
|
|
for (int i = 0; i < RAW_MODULE_SIZE; i++) {
|
|
if (experiment.GetApplyPixelMaskInFPGA() && (mask[(offset + m) * RAW_MODULE_SIZE + i] != 0)) {
|
|
buffer_device[1 + m + (3 + 0 * 16 + s) * modules][i] = 16384;
|
|
buffer_device[1 + m + (3 + 1 * 16 + s) * modules][i] = 16384;
|
|
buffer_device[1 + m + (3 + 2 * 16 + s) * modules][i] = 16384;
|
|
} else {
|
|
buffer_device[1 + m + (3 + 0 * 16 + s) * modules][i] = pedestal_g0[i];
|
|
buffer_device[1 + m + (3 + 1 * 16 + s) * modules][i] = pedestal_g1[i];
|
|
buffer_device[1 + m + (3 + 2 * 16 + s) * modules][i] = pedestal_g2[i];
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
void AcquisitionDevice::MapBuffersStandard(size_t c2h_buffer_count, size_t h2c_buffer_count, int16_t numa_node) {
|
|
try {
|
|
for (int i = 0; i < std::max(c2h_buffer_count, h2c_buffer_count); i++)
|
|
buffer_device.emplace_back((uint16_t *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node));
|
|
} catch (const JFJochException &e) {
|
|
UnmapBuffers();
|
|
throw;
|
|
}
|
|
}
|
|
|
|
void AcquisitionDevice::UnmapBuffers() {
|
|
for (auto &i: buffer_device)
|
|
if (i != nullptr) munmap(i, FPGA_BUFFER_LOCATION_SIZE);
|
|
}
|
|
|
|
void AcquisitionDevice::SendWorkRequestThread() {
|
|
auto handle = work_request_queue.GetBlocking();
|
|
while (handle != 0) {
|
|
// Preferably use the smallest handle (to reduce buffer size for better TLB usage)
|
|
// So if work request cannot be sent, return handle and check again for the smallest one
|
|
if (!HW_SendWorkRequest(handle - 1)) {
|
|
work_request_queue.Put(handle);
|
|
std::this_thread::sleep_for(std::chrono::microseconds(10));
|
|
}
|
|
handle = work_request_queue.GetBlocking();
|
|
}
|
|
while (!HW_SendWorkRequest(UINT32_MAX))
|
|
std::this_thread::sleep_for(std::chrono::microseconds(10));
|
|
}
|
|
|
|
void AcquisitionDevice::FrameBufferRelease(size_t frame_number, uint16_t module_number) {
|
|
auto handle = counters.GetBufferHandleAndClear(frame_number, module_number);
|
|
if (handle != AcquisitionOnlineCounters::HandleNotFound)
|
|
SendWorkRequest(handle);
|
|
}
|
|
|
|
void AcquisitionDevice::EnableLogging(Logger *in_logger) {
|
|
logger = in_logger;
|
|
}
|
|
|
|
inline JFJochProtoBuf::FPGAFIFOStatus FIFO_check(uint32_t fifo_register, uint16_t pos_empty, uint16_t pos_full) {
|
|
if (std::bitset<32>(fifo_register).test(pos_empty))
|
|
return JFJochProtoBuf::FPGAFIFOStatus::EMPTY;
|
|
if (std::bitset<32>(fifo_register).test(pos_full))
|
|
return JFJochProtoBuf::FPGAFIFOStatus::FULL;
|
|
return JFJochProtoBuf::FPGAFIFOStatus::PARTIAL;
|
|
}
|
|
|
|
JFJochProtoBuf::FPGAStatus AcquisitionDevice::GetStatus() const {
|
|
ActionStatus status{};
|
|
ActionEnvParams env{};
|
|
|
|
HW_GetStatus(&status);
|
|
HW_GetEnvParams(&env);
|
|
|
|
JFJochProtoBuf::FPGAStatus ret;
|
|
auto full_status_register = status.ctrl_reg;
|
|
ret.set_full_status_register(full_status_register);
|
|
|
|
ret.set_stalls_hbm(status.pipeline_stalls_hbm);
|
|
ret.set_stalls_host(status.pipeline_stalls_host);
|
|
|
|
ret.set_max_modules(status.max_modules);
|
|
ret.set_git_sha1(status.git_sha1);
|
|
|
|
(*ret.mutable_fifo_status())["Conversion input (data)"] = FIFO_check(status.fifo_status, 0, 1);
|
|
(*ret.mutable_fifo_status())["Conversion input (cmd)"] = FIFO_check(status.fifo_status, 2, 3);
|
|
(*ret.mutable_fifo_status())["UDP"] = FIFO_check(status.fifo_status, 6, 7);
|
|
(*ret.mutable_fifo_status())["Work Request"] = FIFO_check(status.fifo_status, 12, 13);
|
|
(*ret.mutable_fifo_status())["Work Completion"] = FIFO_check(status.fifo_status, 14, 15);
|
|
(*ret.mutable_fifo_status())["Host mem (data)"] = FIFO_check(status.fifo_status, 8, 9);
|
|
(*ret.mutable_fifo_status())["Host mem (cmd)"] = FIFO_check(status.fifo_status, 10, 11);
|
|
(*ret.mutable_fifo_status())["Data FIFO #8"] = FIFO_check(status.fifo_status, 16, 17);
|
|
(*ret.mutable_fifo_status())["Addr FIFO #3"] = FIFO_check(status.fifo_status, 18, 19);
|
|
|
|
ret.set_fpga_idle(HW_IsIdle());
|
|
|
|
ret.set_packets_ether(status.packets_eth);
|
|
ret.set_packets_udp(status.packets_udp);
|
|
ret.set_packets_icmp(status.packets_icmp);
|
|
ret.set_packets_jfjoch(status.packets_processed);
|
|
ret.set_packets_sls(status.packets_sls);
|
|
ret.set_error_eth(status.udp_err_eth);
|
|
ret.set_error_packet_len(status.udp_err_len);
|
|
|
|
ret.set_datamover_mm2s_error(full_status_register & (1 << 10));
|
|
ret.set_datamover_s2mm_error(full_status_register & (1 << 11));
|
|
|
|
ret.set_frame_statistics_alignment_err(full_status_register & (1 << 24));
|
|
ret.set_frame_statistics_tlast_err(full_status_register & (1 << 25));
|
|
ret.set_frame_statistics_work_req_err(full_status_register & (1 << 26));
|
|
|
|
ret.set_mailbox_status_reg(env.mailbox_status_reg);
|
|
ret.set_mailbox_err_reg(env.mailbox_err_reg);
|
|
|
|
ret.set_fpga_temp_degc(env.fpga_temp_C);
|
|
|
|
ret.set_current_edge_12v_a(static_cast<double>(env.fpga_pcie_12V_I_mA) / 1000.0);
|
|
ret.set_voltage_edge_12v_v(static_cast<double>(env.fpga_pcie_12V_V_mV) / 1000.0);
|
|
|
|
ret.set_current_edge_3p3v_a(static_cast<double>(env.fpga_pcie_3p3V_I_mA) / 1000.0);
|
|
ret.set_voltage_edge_3p3v_v(static_cast<double>(env.fpga_pcie_3p3V_V_mV) / 1000.0);
|
|
|
|
ret.set_pcie_c2h_beats(env.pcie_c2h_beats);
|
|
ret.set_pcie_h2c_beats(env.pcie_h2c_beats);
|
|
ret.set_pcie_c2h_descriptors(env.pcie_c2h_descriptors);
|
|
ret.set_pcie_h2c_descriptors(env.pcie_h2c_descriptors);
|
|
ret.set_pcie_c2h_status(env.pcie_c2h_status);
|
|
ret.set_pcie_h2c_status(env.pcie_h2c_status);
|
|
|
|
ret.set_ethernet_rx_aligned(env.ethernet_aligned);
|
|
ret.set_hbm_temp_0_degc(env.hbm_0_temp_C);
|
|
ret.set_hbm_temp_1_degc(env.hbm_1_temp_C);
|
|
ret.set_slowest_head(GetSlowestHead());
|
|
return ret;
|
|
}
|
|
|
|
std::string AcquisitionDevice::GetMACAddress() const {
|
|
return MacAddressToStr(HW_GetMACAddress());
|
|
}
|
|
|
|
std::string AcquisitionDevice::GetIPv4Address() const {
|
|
return IPv4AddressToStr(HW_GetIPv4Address());
|
|
}
|
|
|
|
ActionConfig AcquisitionDevice::ReadActionRegister() {
|
|
ActionConfig cfg{};
|
|
HW_ReadActionRegister(&cfg);
|
|
return cfg;
|
|
}
|
|
|
|
int32_t AcquisitionDevice::GetNUMANode() const {
|
|
return -1;
|
|
}
|
|
|
|
uint16_t AcquisitionDevice::GetUDPPort() const {
|
|
return 1234;
|
|
}
|
|
|
|
uint64_t AcquisitionDevice::GetBunchID(size_t curr_frame, uint16_t module_number) const {
|
|
return counters.GetBunchID(curr_frame, module_number);
|
|
}
|
|
|
|
uint32_t AcquisitionDevice::GetJFInfo(size_t curr_frame, uint16_t module_number) const {
|
|
return counters.GetJFInfo(curr_frame, module_number);
|
|
}
|
|
|
|
uint32_t AcquisitionDevice::GetTimestamp(size_t curr_frame, uint16_t module_number) const {
|
|
return counters.GetTimestamp(curr_frame, module_number);
|
|
} |