// Copyright (2019-2022) Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-or-later #ifdef JFJOCH_USE_NUMA #include #endif #include #include #include #include #include #include #include #include "../../common/JFJochException.h" #include "AcquisitionDevice.h" #include "../../common/NetworkAddressConvert.h" void *mmap_acquisition_buffer(size_t size, int16_t numa_node) { void *ret = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ret == nullptr) { throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "frame_buffer"); } #ifdef JFJOCH_USE_NUMA if (numa_node >= 0) { unsigned long nodemask = 1L << numa_node;; if (numa_node > sizeof(nodemask)*8) throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Mask too small for NUMA node"); if (mbind(ret, size, MPOL_BIND, &nodemask, sizeof(nodemask)*8, MPOL_MF_STRICT) == -1) throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Cannot apply NUMA policy"); } #endif memset(ret, 0, size); return ret; } AcquisitionDevice::AcquisitionDevice(uint16_t in_data_stream) : buffer_err(RAW_MODULE_SIZE), internal_pkt_gen_frame(RAW_MODULE_SIZE) { logger = nullptr; data_stream = in_data_stream; for (int i = 0; i < RAW_MODULE_SIZE; i++) internal_pkt_gen_frame[i] = i % 65536; } bool AcquisitionDevice::IsFullModuleCollected(size_t frame, uint8_t module_number) const { return counters.IsFullModuleCollected(frame, module_number); } uint64_t AcquisitionDevice::GetBufferHandle(size_t frame, uint8_t module_number) const { return counters.GetBufferHandle(frame, module_number); } void AcquisitionDevice::FillActionRegister(const DiffractionExperiment& x, ActionConfig &job) { job.nmodules = x.GetModulesNum(data_stream); job.nframes = x.GetFrameNum(); job.one_over_energy = std::lround((1<<20)/ x.GetPhotonEnergy_keV()); job.nstorage_cells = x.GetStorageCellNumber() - 1; job.mode = fpga_non_blocking_mode ? MODE_NONBLOCKING_ON_WR : 0; if ((x.GetDetectorMode() == DetectorMode::Conversion) && x.GetConversionOnFPGA()) job.mode |= MODE_CONV; if (x.IsUsingInternalPacketGen()) job.mode |= MODE_INTERNAL_PACKET_GEN; } void AcquisitionDevice::PrepareAction(const DiffractionExperiment &experiment) { if (!HW_IsIdle()) throw(JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Hardware action running prior to start of data acquisition")); if (experiment.GetModulesNum(data_stream) > max_modules) throw(JFJochException(JFJochExceptionCategory::InputParameterAboveMax, "Number of modules exceeds max possible for FPGA")); counters.Reset(experiment, data_stream); } void AcquisitionDevice::StartAction(const DiffractionExperiment &experiment) { HW_SetCancelDataCollectionBit(); if (!HW_IsIdle()) throw(JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Hardware action running prior to start of data acquisition")); if (experiment.GetModulesNum(data_stream) > max_modules) throw(JFJochException(JFJochExceptionCategory::InputParameterAboveMax, "Number of modules exceeds max possible for FPGA")); for (int i = 0; i < RAW_MODULE_SIZE; i++) { if (experiment.GetDetectorMode() == DetectorMode::Conversion) buffer_err[i] = PIXEL_OUT_LOST; else buffer_err[i] = -1; } counters.Reset(experiment, data_stream); completion_vector.Reset(experiment, data_stream); expected_frames = experiment.GetFrameNum(); ActionConfig cfg_in{}, cfg_out{}; FillActionRegister(experiment, cfg_in); HW_WriteActionRegister(&cfg_in); HW_ReadActionRegister(&cfg_out); if (experiment.IsUsingInternalPacketGen()) CopyInternalPacketGenFrameToDeviceBuffer(); if (cfg_out.mode != cfg_in.mode) throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Mismatch between expected and actual values of configuration registers (mode)"); if (cfg_out.nframes != cfg_in.nframes) throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Mismatch between expected and actual values of configuration registers (Frames per trigger)"); if (cfg_out.nmodules != cfg_in.nmodules) throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Mismatch between expected and actual values of configuration registers (#modules)"); // Ensure internal WR queue is empty work_request_queue.Clear(); HW_StartAction(); for (uint32_t i = 0; i < buffer_device.size(); i++) SendWorkRequest(i); auto c = work_completion_queue.GetBlocking(); while (c.type != Completion::Type::Start) { c = work_completion_queue.GetBlocking(); } StartSendingWorkRequests(); start_time = std::chrono::system_clock::now(); if (logger) logger->Info("Started"); } void AcquisitionDevice::CopyInternalPacketGenFrameToDeviceBuffer() { memcpy(buffer_device[0], internal_pkt_gen_frame.data(), RAW_MODULE_SIZE * sizeof(uint16_t)); } int64_t AcquisitionDevice::CalculateDelay(size_t curr_frame, uint16_t module_number) const { return counters.CalculateDelay(curr_frame, module_number); } void AcquisitionDevice::WaitForFrame(size_t curr_frame, uint16_t module_number) const { counters.WaitForFrame(curr_frame, module_number); } void AcquisitionDevice::SetCustomInternalGeneratorFrame(const std::vector &v) { if (v.size() != RAW_MODULE_SIZE) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Error in size of custom internal generator frame"); for (int i = 0; i < RAW_MODULE_SIZE; i++) internal_pkt_gen_frame[i] = v[i]; } const std::vector &AcquisitionDevice::GetInternalGeneratorFrame() const { return internal_pkt_gen_frame; } void AcquisitionDevice::WaitForActionComplete() { auto c = work_completion_queue.GetBlocking(); while (c.type != Completion::Type::End) { if (c.frame_number >= expected_frames) { HW_SetCancelDataCollectionBit(); // this frame is not of any interest, therefore its location can be immediately released SendWorkRequest(c.handle); } else { counters.UpdateCounters(&c); completion_vector.Add(c); } if (logger != nullptr) logger->Debug("Data stream " + std::to_string(data_stream) + " completion frame number " + std::to_string(c.frame_number) + " module " + std::to_string(c.module) + " handle " + std::to_string(c.handle) + " timestamp " + std::to_string(c.timestamp)); c = work_completion_queue.GetBlocking(); } bytes_received = c.frame_number * 8192LU; counters.SetAcquisitionFinished(); end_time = std::chrono::system_clock::now(); HW_SetCancelDataCollectionBit(); HW_EndAction(); while (!HW_IsIdle()) std::this_thread::sleep_for(std::chrono::milliseconds(1)); } void AcquisitionDevice::SendWorkRequest(uint32_t handle) { work_request_queue.Put(WorkRequest{ .ptr = buffer_device.at(handle), .handle = handle }); } uint64_t AcquisitionDevice::GetBytesReceived() const { return bytes_received; } void AcquisitionDevice::SaveStatistics(const DiffractionExperiment &experiment, JFJochProtoBuf::AcquisitionDeviceStatistics &statistics) const { statistics.set_bytes_received(GetBytesReceived()); statistics.set_start_timestamp(start_time.time_since_epoch().count()); statistics.set_end_timestamp(end_time.time_since_epoch().count()); completion_vector.FillStatistics(experiment, data_stream, statistics); *statistics.mutable_fpga_status() = GetStatus(); } uint64_t AcquisitionDevice::GetHead(uint8_t module_number) const { return counters.GetHead(module_number); } uint64_t AcquisitionDevice::GetSlowestHead() const { return counters.GetSlowestHead(); } bool AcquisitionDevice::IsDone() const { return counters.IsAcquisitionFinished(); } void AcquisitionDevice::ActionAbort() { HW_SetCancelDataCollectionBit(); } const int16_t *AcquisitionDevice::GetFrameBuffer(size_t frame_number, uint16_t module_number) const { auto handle = GetBufferHandle(frame_number, module_number); if (handle != HandleNotValid) return (int16_t *) buffer_device.at(handle); else return GetErrorFrameBuffer(); } const int16_t *AcquisitionDevice::GetErrorFrameBuffer() const { return buffer_err.data(); } int16_t *AcquisitionDevice::GetDeviceBuffer(size_t handle) { if (handle >= buffer_device.size()) throw JFJochException(JFJochExceptionCategory::ArrayOutOfBounds, "Handle outside of range"); else return (int16_t *) buffer_device.at(handle); } void AcquisitionDevice::InitializeCalibration(const DiffractionExperiment &experiment, const JFCalibration &calib) { auto offset = experiment.GetFirstModuleOfDataStream(data_stream); if (calib.GetModulesNum() != experiment.GetModulesNum()) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Mismatch regarding module count in calibration and experiment description"); if (calib.GetStorageCellNum() != experiment.GetStorageCellNumber()) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Mismatch regarding storage cell count in calibration and experiment description"); size_t modules = experiment.GetModulesNum(data_stream); if (1 + modules * (3 + 3 * experiment.GetStorageCellNumber()) > buffer_device.size()) throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Not enough host/FPGA buffers to load all calibration constants"); for (int m = 0; m < modules; m++) { calib.GainCalibration(m).ExportG0(buffer_device[1 + m]); calib.GainCalibration(m).ExportG1(buffer_device[1 + m + modules]); calib.GainCalibration(m).ExportG2(buffer_device[1 + m + modules * 2]); } for (int s = 0; s < experiment.GetStorageCellNumber(); s++) { auto mask = calib.CalculateMask(experiment, s); for (int m = 0; m < modules; m++) { auto pedestal_g0 = calib.Pedestal(offset + m, 0, s).GetPedestal(); auto pedestal_g1 = calib.Pedestal(offset + m, 1, s).GetPedestal(); auto pedestal_g2 = calib.Pedestal(offset + m, 2, s).GetPedestal(); for (int i = 0; i < RAW_MODULE_SIZE; i++) { if (experiment.GetApplyPixelMaskInFPGA() && (mask[(offset + m) * RAW_MODULE_SIZE + i] != 0)) { buffer_device[1 + m + (3 + 0 * 16 + s) * modules][i] = 16384; buffer_device[1 + m + (3 + 1 * 16 + s) * modules][i] = 16384; buffer_device[1 + m + (3 + 2 * 16 + s) * modules][i] = 16384; } else { buffer_device[1 + m + (3 + 0 * 16 + s) * modules][i] = pedestal_g0[i]; buffer_device[1 + m + (3 + 1 * 16 + s) * modules][i] = pedestal_g1[i]; buffer_device[1 + m + (3 + 2 * 16 + s) * modules][i] = pedestal_g2[i]; } } } } } void AcquisitionDevice::MapBuffersStandard(size_t c2h_buffer_count, size_t h2c_buffer_count, int16_t numa_node) { try { for (int i = 0; i < std::max(c2h_buffer_count, h2c_buffer_count); i++) buffer_device.emplace_back((uint16_t *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node)); } catch (const JFJochException &e) { UnmapBuffers(); throw; } } void AcquisitionDevice::UnmapBuffers() { for (auto &i: buffer_device) if (i != nullptr) munmap(i, FPGA_BUFFER_LOCATION_SIZE); } void AcquisitionDevice::FrameBufferRelease(size_t frame_number, uint16_t module_number) { auto handle = counters.GetBufferHandleAndClear(frame_number, module_number); if (handle != AcquisitionOnlineCounters::HandleNotFound) SendWorkRequest(handle); } void AcquisitionDevice::EnableLogging(Logger *in_logger) { logger = in_logger; } inline JFJochProtoBuf::FPGAFIFOStatus FIFO_check(uint32_t fifo_register, uint16_t pos_empty, uint16_t pos_full) { if (std::bitset<32>(fifo_register).test(pos_empty)) return JFJochProtoBuf::FPGAFIFOStatus::EMPTY; if (std::bitset<32>(fifo_register).test(pos_full)) return JFJochProtoBuf::FPGAFIFOStatus::FULL; return JFJochProtoBuf::FPGAFIFOStatus::PARTIAL; } JFJochProtoBuf::FPGAStatus AcquisitionDevice::GetStatus() const { ActionStatus status{}; ActionEnvParams env{}; HW_GetStatus(&status); HW_GetEnvParams(&env); JFJochProtoBuf::FPGAStatus ret; auto full_status_register = status.ctrl_reg; ret.set_full_status_register(full_status_register); ret.set_stalls_hbm(status.pipeline_stalls_hbm); ret.set_stalls_host(status.pipeline_stalls_host); ret.set_max_modules(status.max_modules); ret.set_git_sha1(status.git_sha1); (*ret.mutable_fifo_status())["Conversion input (data)"] = FIFO_check(status.fifo_status, 0, 1); (*ret.mutable_fifo_status())["Conversion input (cmd)"] = FIFO_check(status.fifo_status, 2, 3); (*ret.mutable_fifo_status())["UDP"] = FIFO_check(status.fifo_status, 6, 7); (*ret.mutable_fifo_status())["Work Request"] = FIFO_check(status.fifo_status, 12, 13); (*ret.mutable_fifo_status())["Work Completion"] = FIFO_check(status.fifo_status, 14, 15); (*ret.mutable_fifo_status())["Host mem (data)"] = FIFO_check(status.fifo_status, 8, 9); (*ret.mutable_fifo_status())["Host mem (cmd)"] = FIFO_check(status.fifo_status, 10, 11); (*ret.mutable_fifo_status())["Data FIFO #8"] = FIFO_check(status.fifo_status, 16, 17); (*ret.mutable_fifo_status())["Addr FIFO #3"] = FIFO_check(status.fifo_status, 18, 19); ret.set_fpga_idle(HW_IsIdle()); ret.set_packets_ether(status.packets_eth); ret.set_packets_udp(status.packets_udp); ret.set_packets_icmp(status.packets_icmp); ret.set_packets_jfjoch(status.packets_processed); ret.set_packets_sls(status.packets_sls); ret.set_error_eth(status.udp_err_eth); ret.set_error_packet_len(status.udp_err_len); ret.set_host_writer_idle(full_status_register & (1<<4)); ret.set_frame_statistics_alignment_err(full_status_register & (1 << 24)); ret.set_frame_statistics_tlast_err(full_status_register & (1 << 25)); ret.set_frame_statistics_work_req_err(full_status_register & (1 << 26)); ret.set_mailbox_status_reg(env.mailbox_status_reg); ret.set_mailbox_err_reg(env.mailbox_err_reg); ret.set_fpga_temp_degc(env.fpga_temp_C); ret.set_current_edge_12v_a(static_cast(env.fpga_pcie_12V_I_mA) / 1000.0); ret.set_voltage_edge_12v_v(static_cast(env.fpga_pcie_12V_V_mV) / 1000.0); ret.set_current_edge_3p3v_a(static_cast(env.fpga_pcie_3p3V_I_mA) / 1000.0); ret.set_voltage_edge_3p3v_v(static_cast(env.fpga_pcie_3p3V_V_mV) / 1000.0); ret.set_pcie_c2h_beats(env.pcie_c2h_beats); ret.set_pcie_h2c_beats(env.pcie_h2c_beats); ret.set_pcie_c2h_descriptors(env.pcie_c2h_descriptors); ret.set_pcie_h2c_descriptors(env.pcie_h2c_descriptors); ret.set_pcie_c2h_status(env.pcie_c2h_status); ret.set_pcie_h2c_status(env.pcie_h2c_status); ret.set_ethernet_rx_aligned(env.ethernet_aligned); ret.set_hbm_temp_0_degc(env.hbm_0_temp_C); ret.set_hbm_temp_1_degc(env.hbm_1_temp_C); ret.set_slowest_head(GetSlowestHead()); return ret; } std::string AcquisitionDevice::GetMACAddress() const { return MacAddressToStr(HW_GetMACAddress()); } std::string AcquisitionDevice::GetIPv4Address() const { return IPv4AddressToStr(HW_GetIPv4Address()); } ActionConfig AcquisitionDevice::ReadActionRegister() { ActionConfig cfg{}; HW_ReadActionRegister(&cfg); return cfg; } int32_t AcquisitionDevice::GetNUMANode() const { return -1; } uint16_t AcquisitionDevice::GetUDPPort() const { return 1234; } uint64_t AcquisitionDevice::GetBunchID(size_t curr_frame, uint16_t module_number) const { return counters.GetBunchID(curr_frame, module_number); } uint32_t AcquisitionDevice::GetJFInfo(size_t curr_frame, uint16_t module_number) const { return counters.GetJFInfo(curr_frame, module_number); } uint32_t AcquisitionDevice::GetExptime(size_t curr_frame, uint16_t module_number) const { return counters.GetExptime(curr_frame, module_number); } uint64_t AcquisitionDevice::GetTimestamp(size_t curr_frame, uint16_t module_number) const { return counters.GetTimestamp(curr_frame, module_number); } void AcquisitionDevice::SetFPGANonBlockingMode(bool input) { fpga_non_blocking_mode = input; }