acquisition_device: give each device sole ownership of its buffers

The base AcquisitionDevice no longer allocates or frees frame buffers;
buffer_device is now just a non-owning view of addresses. Each subclass
owns its backing memory and the matching lifecycle:

- PCIExpressDevice mmap's the kernel DMA buffers and munmap's them in its
  own destructor (and on ctor failure), symmetric with MapKernelBuffer.
- HLSSimulatedDevice owns plain zeroed heap buffers it points
  buffer_device into, declared before the HLSDevice so they outlive the
  action thread that writes them. The buffers are page-aligned to match
  the real device's kernel DMA buffers - the modelled AXI datamover and
  FPGAIntegrationTest require aligned output buffers.

This drops the NUMA/mmap dance from the simulated path (not
performance-critical) - removing libnuma from acquisition_device - and
replaces the base-class cleanup that had to guess the allocation
strategy with a single clear owner per device.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-17 17:09:28 +02:00
parent d373ba0490
commit 040cdeacf1
6 changed files with 29 additions and 44 deletions
-38
View File
@@ -1,11 +1,6 @@
// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#ifdef JFJOCH_USE_NUMA
#include <numaif.h>
#endif
#include <sys/mman.h>
#include <thread>
#include <fstream>
#include <cmath>
@@ -14,24 +9,6 @@
#include "AcquisitionDevice.h"
#include "../common/NetworkAddressConvert.h"
void *mmap_acquisition_buffer(size_t size, int16_t numa_node) {
void *ret = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ret == MAP_FAILED) {
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "frame_buffer");
}
#ifdef JFJOCH_USE_NUMA
if (numa_node >= 0) {
unsigned long nodemask = 1L << numa_node;;
if (numa_node > sizeof(nodemask)*8)
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Mask too small for NUMA node");
if (mbind(ret, size, MPOL_BIND, &nodemask, sizeof(nodemask)*8, MPOL_MF_STRICT) == -1)
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Cannot apply NUMA policy");
}
#endif
memset(ret, 0, size);
return ret;
}
AcquisitionDevice::AcquisitionDevice(uint16_t in_data_stream) {
logger = nullptr;
data_stream = in_data_stream;
@@ -238,21 +215,6 @@ void AcquisitionDevice::InitializePixelMask(const DiffractionExperiment &experim
}
}
void AcquisitionDevice::MapBuffersStandard(size_t c2h_buffer_count, int16_t numa_node) {
try {
for (int i = 0; i < c2h_buffer_count; i++)
buffer_device.emplace_back((DeviceOutput *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node));
} catch (const JFJochException &e) {
UnmapBuffers();
throw;
}
}
void AcquisitionDevice::UnmapBuffers() {
for (auto &i: buffer_device)
if (i != nullptr) munmap(i, FPGA_BUFFER_LOCATION_SIZE);
}
void AcquisitionDevice::FrameBufferRelease(size_t frame_number, uint16_t module_number) {
auto handle = counters.GetBufferHandleAndClear(frame_number, module_number);
if (handle != AcquisitionCounters::HandleNotFound)
+4 -3
View File
@@ -49,6 +49,9 @@ protected:
ThreadSafeFIFO<Completion> work_completion_queue;
ThreadSafeFIFO<WorkRequest> work_request_queue;
// Non-owning view of the per-buffer addresses. Each device subclass owns the backing memory
// and its lifecycle: PCIExpressDevice mmap's/munmap's kernel DMA buffers, HLSSimulatedDevice
// points these at plain heap buffers it owns.
std::vector<DeviceOutput *> buffer_device;
Logger *logger;
@@ -58,8 +61,6 @@ protected:
uint32_t ipv4_addr;
explicit AcquisitionDevice(uint16_t data_stream);
void UnmapBuffers();
void MapBuffersStandard(size_t c2h_buffer_count, int16_t numa_node);
const DeviceOutput *GetDeviceOutput(size_t handle) const;
DeviceOutput *GetDeviceOutput(size_t handle);
virtual void HW_RunInternalGenerator(const FrameGeneratorConfig& config) = 0;
@@ -70,7 +71,7 @@ public:
static constexpr const uint64_t HandleNotValid = UINT64_MAX;
virtual ~AcquisitionDevice() { UnmapBuffers(); };
virtual ~AcquisitionDevice() = default;
void StartAction(const DiffractionExperiment &experiment, uint32_t optional_flags = 0);
void PrepareAction(const DiffractionExperiment &experiment);
+6 -2
View File
@@ -3,14 +3,18 @@
#include "HLSSimulatedDevice.h"
HLSSimulatedDevice::HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules, int16_t numa_node)
HLSSimulatedDevice::HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules)
: FPGAAcquisitionDevice(data_stream) {
mac_addr = 0xCCAA11223344;
ipv4_addr = 0x0132010A;
max_modules = MAX_MODULES_FPGA;
MapBuffersStandard(in_frame_buffer_size_modules, numa_node);
buffers.reserve(in_frame_buffer_size_modules);
for (size_t i = 0; i < in_frame_buffer_size_modules; i++) {
buffers.push_back(std::make_unique<FrameBuffer>()); // zero-initialised, 64-byte aligned
buffer_device.push_back(reinterpret_cast<DeviceOutput *>(buffers.back().get()));
}
device = std::make_unique<HLSDevice>(buffer_device);
}
+8 -1
View File
@@ -11,6 +11,13 @@
#include "FPGAAcquisitionDevice.h"
class HLSSimulatedDevice : public FPGAAcquisitionDevice {
// Owns the simulated frame buffers. Plain heap (this path is not performance-critical, so no
// NUMA placement and no mmap), but page-aligned (4 KiB) to match the real device's kernel DMA
// buffers - more than enough for the data path's alignment needs (AXI datamover 64 B,
// FPGAIntegrationTest 128 B). Declared before `device` so the buffers outlive the HLSDevice
// action thread that writes into them; buffer_device points into these.
struct alignas(4096) FrameBuffer { uint8_t data[FPGA_BUFFER_LOCATION_SIZE]; };
std::vector<std::unique_ptr<FrameBuffer>> buffers;
std::unique_ptr<HLSDevice> device;
void HW_ReadActionRegister(DataCollectionConfig *job) override;
@@ -25,7 +32,7 @@ class HLSSimulatedDevice : public FPGAAcquisitionDevice {
void HW_SetSpotFinderParameters(const SpotFinderParameters &params) override;
void HW_RunInternalGenerator(const FrameGeneratorConfig &config) override;
public:
HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules, int16_t numa_node = -1);
HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules);
~HLSSimulatedDevice() override = default;
void CreateJFPacket(const DiffractionExperiment& experiment, uint64_t frame_number, uint32_t eth_packet,
uint32_t module_number, const uint16_t *data, int8_t adjust_axis = 0, uint8_t user = 0);
+9
View File
@@ -33,6 +33,15 @@ PCIExpressDevice::PCIExpressDevice(uint16_t data_stream, const std::string &devi
}
}
PCIExpressDevice::~PCIExpressDevice() {
UnmapBuffers();
}
void PCIExpressDevice::UnmapBuffers() {
for (auto &buf: buffer_device)
if (buf != nullptr) dev.UnmapKernelBuffer(buf);
}
bool PCIExpressDevice::HW_ReadMailbox(uint32_t *values) {
PCI_EXCEPT(return dev.ReadWorkCompletion(values);)
}
+2
View File
@@ -20,10 +20,12 @@ class PCIExpressDevice : public FPGAAcquisitionDevice {
void FPGA_EndAction() override;
uint32_t GetNumKernelBuffers() const;
void HW_RunInternalGenerator(const FrameGeneratorConfig &config) override;
void UnmapBuffers();
public:
explicit PCIExpressDevice(uint16_t data_stream);
PCIExpressDevice(uint16_t data_stream, uint16_t pci_slot);
PCIExpressDevice(uint16_t data_stream, const std::string &device_name);
~PCIExpressDevice() override;
void Cancel() override;
int32_t GetNUMANode() const override;