diff --git a/acquisition_device/AcquisitionDevice.cpp b/acquisition_device/AcquisitionDevice.cpp index 452338fa..fc31af1e 100644 --- a/acquisition_device/AcquisitionDevice.cpp +++ b/acquisition_device/AcquisitionDevice.cpp @@ -1,11 +1,6 @@ // SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only -#ifdef JFJOCH_USE_NUMA -#include -#endif - -#include #include #include #include @@ -14,24 +9,6 @@ #include "AcquisitionDevice.h" #include "../common/NetworkAddressConvert.h" -void *mmap_acquisition_buffer(size_t size, int16_t numa_node) { - void *ret = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) { - throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "frame_buffer"); - } -#ifdef JFJOCH_USE_NUMA - if (numa_node >= 0) { - unsigned long nodemask = 1L << numa_node;; - if (numa_node > sizeof(nodemask)*8) - throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Mask too small for NUMA node"); - if (mbind(ret, size, MPOL_BIND, &nodemask, sizeof(nodemask)*8, MPOL_MF_STRICT) == -1) - throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Cannot apply NUMA policy"); - } -#endif - memset(ret, 0, size); - return ret; -} - AcquisitionDevice::AcquisitionDevice(uint16_t in_data_stream) { logger = nullptr; data_stream = in_data_stream; @@ -238,21 +215,6 @@ void AcquisitionDevice::InitializePixelMask(const DiffractionExperiment &experim } } -void AcquisitionDevice::MapBuffersStandard(size_t c2h_buffer_count, int16_t numa_node) { - try { - for (int i = 0; i < c2h_buffer_count; i++) - buffer_device.emplace_back((DeviceOutput *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node)); - } catch (const JFJochException &e) { - UnmapBuffers(); - throw; - } -} - -void AcquisitionDevice::UnmapBuffers() { - for (auto &i: buffer_device) - if (i != nullptr) munmap(i, FPGA_BUFFER_LOCATION_SIZE); -} - void AcquisitionDevice::FrameBufferRelease(size_t frame_number, uint16_t module_number) { auto handle = counters.GetBufferHandleAndClear(frame_number, module_number); if (handle != AcquisitionCounters::HandleNotFound) diff --git a/acquisition_device/AcquisitionDevice.h b/acquisition_device/AcquisitionDevice.h index 71e60ca3..7b9d59e1 100644 --- a/acquisition_device/AcquisitionDevice.h +++ b/acquisition_device/AcquisitionDevice.h @@ -49,6 +49,9 @@ protected: ThreadSafeFIFO work_completion_queue; ThreadSafeFIFO work_request_queue; + // Non-owning view of the per-buffer addresses. Each device subclass owns the backing memory + // and its lifecycle: PCIExpressDevice mmap's/munmap's kernel DMA buffers, HLSSimulatedDevice + // points these at plain heap buffers it owns. std::vector buffer_device; Logger *logger; @@ -58,8 +61,6 @@ protected: uint32_t ipv4_addr; explicit AcquisitionDevice(uint16_t data_stream); - void UnmapBuffers(); - void MapBuffersStandard(size_t c2h_buffer_count, int16_t numa_node); const DeviceOutput *GetDeviceOutput(size_t handle) const; DeviceOutput *GetDeviceOutput(size_t handle); virtual void HW_RunInternalGenerator(const FrameGeneratorConfig& config) = 0; @@ -70,7 +71,7 @@ public: static constexpr const uint64_t HandleNotValid = UINT64_MAX; - virtual ~AcquisitionDevice() { UnmapBuffers(); }; + virtual ~AcquisitionDevice() = default; void StartAction(const DiffractionExperiment &experiment, uint32_t optional_flags = 0); void PrepareAction(const DiffractionExperiment &experiment); diff --git a/acquisition_device/HLSSimulatedDevice.cpp b/acquisition_device/HLSSimulatedDevice.cpp index 24fd222b..54e8c5b3 100644 --- a/acquisition_device/HLSSimulatedDevice.cpp +++ b/acquisition_device/HLSSimulatedDevice.cpp @@ -3,14 +3,18 @@ #include "HLSSimulatedDevice.h" -HLSSimulatedDevice::HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules, int16_t numa_node) +HLSSimulatedDevice::HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules) : FPGAAcquisitionDevice(data_stream) { mac_addr = 0xCCAA11223344; ipv4_addr = 0x0132010A; max_modules = MAX_MODULES_FPGA; - MapBuffersStandard(in_frame_buffer_size_modules, numa_node); + buffers.reserve(in_frame_buffer_size_modules); + for (size_t i = 0; i < in_frame_buffer_size_modules; i++) { + buffers.push_back(std::make_unique()); // zero-initialised, 64-byte aligned + buffer_device.push_back(reinterpret_cast(buffers.back().get())); + } device = std::make_unique(buffer_device); } diff --git a/acquisition_device/HLSSimulatedDevice.h b/acquisition_device/HLSSimulatedDevice.h index 4fee45bc..83e40ef9 100644 --- a/acquisition_device/HLSSimulatedDevice.h +++ b/acquisition_device/HLSSimulatedDevice.h @@ -11,6 +11,13 @@ #include "FPGAAcquisitionDevice.h" class HLSSimulatedDevice : public FPGAAcquisitionDevice { + // Owns the simulated frame buffers. Plain heap (this path is not performance-critical, so no + // NUMA placement and no mmap), but page-aligned (4 KiB) to match the real device's kernel DMA + // buffers - more than enough for the data path's alignment needs (AXI datamover 64 B, + // FPGAIntegrationTest 128 B). Declared before `device` so the buffers outlive the HLSDevice + // action thread that writes into them; buffer_device points into these. + struct alignas(4096) FrameBuffer { uint8_t data[FPGA_BUFFER_LOCATION_SIZE]; }; + std::vector> buffers; std::unique_ptr device; void HW_ReadActionRegister(DataCollectionConfig *job) override; @@ -25,7 +32,7 @@ class HLSSimulatedDevice : public FPGAAcquisitionDevice { void HW_SetSpotFinderParameters(const SpotFinderParameters ¶ms) override; void HW_RunInternalGenerator(const FrameGeneratorConfig &config) override; public: - HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules, int16_t numa_node = -1); + HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules); ~HLSSimulatedDevice() override = default; void CreateJFPacket(const DiffractionExperiment& experiment, uint64_t frame_number, uint32_t eth_packet, uint32_t module_number, const uint16_t *data, int8_t adjust_axis = 0, uint8_t user = 0); diff --git a/acquisition_device/PCIExpressDevice.cpp b/acquisition_device/PCIExpressDevice.cpp index 1ddb6f9b..3fa146fd 100644 --- a/acquisition_device/PCIExpressDevice.cpp +++ b/acquisition_device/PCIExpressDevice.cpp @@ -33,6 +33,15 @@ PCIExpressDevice::PCIExpressDevice(uint16_t data_stream, const std::string &devi } } +PCIExpressDevice::~PCIExpressDevice() { + UnmapBuffers(); +} + +void PCIExpressDevice::UnmapBuffers() { + for (auto &buf: buffer_device) + if (buf != nullptr) dev.UnmapKernelBuffer(buf); +} + bool PCIExpressDevice::HW_ReadMailbox(uint32_t *values) { PCI_EXCEPT(return dev.ReadWorkCompletion(values);) } diff --git a/acquisition_device/PCIExpressDevice.h b/acquisition_device/PCIExpressDevice.h index 2362a1fe..ac1f1b3c 100644 --- a/acquisition_device/PCIExpressDevice.h +++ b/acquisition_device/PCIExpressDevice.h @@ -20,10 +20,12 @@ class PCIExpressDevice : public FPGAAcquisitionDevice { void FPGA_EndAction() override; uint32_t GetNumKernelBuffers() const; void HW_RunInternalGenerator(const FrameGeneratorConfig &config) override; + void UnmapBuffers(); public: explicit PCIExpressDevice(uint16_t data_stream); PCIExpressDevice(uint16_t data_stream, uint16_t pci_slot); PCIExpressDevice(uint16_t data_stream, const std::string &device_name); + ~PCIExpressDevice() override; void Cancel() override; int32_t GetNUMANode() const override;