acquisition_device: give each device sole ownership of its buffers
The base AcquisitionDevice no longer allocates or frees frame buffers; buffer_device is now just a non-owning view of addresses. Each subclass owns its backing memory and the matching lifecycle: - PCIExpressDevice mmap's the kernel DMA buffers and munmap's them in its own destructor (and on ctor failure), symmetric with MapKernelBuffer. - HLSSimulatedDevice owns plain zeroed heap buffers it points buffer_device into, declared before the HLSDevice so they outlive the action thread that writes them. The buffers are page-aligned to match the real device's kernel DMA buffers - the modelled AXI datamover and FPGAIntegrationTest require aligned output buffers. This drops the NUMA/mmap dance from the simulated path (not performance-critical) - removing libnuma from acquisition_device - and replaces the base-class cleanup that had to guess the allocation strategy with a single clear owner per device. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,11 +1,6 @@
|
||||
// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
||||
// SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
#ifdef JFJOCH_USE_NUMA
|
||||
#include <numaif.h>
|
||||
#endif
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <thread>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
@@ -14,24 +9,6 @@
|
||||
#include "AcquisitionDevice.h"
|
||||
#include "../common/NetworkAddressConvert.h"
|
||||
|
||||
void *mmap_acquisition_buffer(size_t size, int16_t numa_node) {
|
||||
void *ret = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (ret == MAP_FAILED) {
|
||||
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "frame_buffer");
|
||||
}
|
||||
#ifdef JFJOCH_USE_NUMA
|
||||
if (numa_node >= 0) {
|
||||
unsigned long nodemask = 1L << numa_node;;
|
||||
if (numa_node > sizeof(nodemask)*8)
|
||||
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Mask too small for NUMA node");
|
||||
if (mbind(ret, size, MPOL_BIND, &nodemask, sizeof(nodemask)*8, MPOL_MF_STRICT) == -1)
|
||||
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "Cannot apply NUMA policy");
|
||||
}
|
||||
#endif
|
||||
memset(ret, 0, size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
AcquisitionDevice::AcquisitionDevice(uint16_t in_data_stream) {
|
||||
logger = nullptr;
|
||||
data_stream = in_data_stream;
|
||||
@@ -238,21 +215,6 @@ void AcquisitionDevice::InitializePixelMask(const DiffractionExperiment &experim
|
||||
}
|
||||
}
|
||||
|
||||
void AcquisitionDevice::MapBuffersStandard(size_t c2h_buffer_count, int16_t numa_node) {
|
||||
try {
|
||||
for (int i = 0; i < c2h_buffer_count; i++)
|
||||
buffer_device.emplace_back((DeviceOutput *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node));
|
||||
} catch (const JFJochException &e) {
|
||||
UnmapBuffers();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void AcquisitionDevice::UnmapBuffers() {
|
||||
for (auto &i: buffer_device)
|
||||
if (i != nullptr) munmap(i, FPGA_BUFFER_LOCATION_SIZE);
|
||||
}
|
||||
|
||||
void AcquisitionDevice::FrameBufferRelease(size_t frame_number, uint16_t module_number) {
|
||||
auto handle = counters.GetBufferHandleAndClear(frame_number, module_number);
|
||||
if (handle != AcquisitionCounters::HandleNotFound)
|
||||
|
||||
@@ -49,6 +49,9 @@ protected:
|
||||
ThreadSafeFIFO<Completion> work_completion_queue;
|
||||
ThreadSafeFIFO<WorkRequest> work_request_queue;
|
||||
|
||||
// Non-owning view of the per-buffer addresses. Each device subclass owns the backing memory
|
||||
// and its lifecycle: PCIExpressDevice mmap's/munmap's kernel DMA buffers, HLSSimulatedDevice
|
||||
// points these at plain heap buffers it owns.
|
||||
std::vector<DeviceOutput *> buffer_device;
|
||||
Logger *logger;
|
||||
|
||||
@@ -58,8 +61,6 @@ protected:
|
||||
uint32_t ipv4_addr;
|
||||
explicit AcquisitionDevice(uint16_t data_stream);
|
||||
|
||||
void UnmapBuffers();
|
||||
void MapBuffersStandard(size_t c2h_buffer_count, int16_t numa_node);
|
||||
const DeviceOutput *GetDeviceOutput(size_t handle) const;
|
||||
DeviceOutput *GetDeviceOutput(size_t handle);
|
||||
virtual void HW_RunInternalGenerator(const FrameGeneratorConfig& config) = 0;
|
||||
@@ -70,7 +71,7 @@ public:
|
||||
|
||||
static constexpr const uint64_t HandleNotValid = UINT64_MAX;
|
||||
|
||||
virtual ~AcquisitionDevice() { UnmapBuffers(); };
|
||||
virtual ~AcquisitionDevice() = default;
|
||||
|
||||
void StartAction(const DiffractionExperiment &experiment, uint32_t optional_flags = 0);
|
||||
void PrepareAction(const DiffractionExperiment &experiment);
|
||||
|
||||
@@ -3,14 +3,18 @@
|
||||
|
||||
#include "HLSSimulatedDevice.h"
|
||||
|
||||
HLSSimulatedDevice::HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules, int16_t numa_node)
|
||||
HLSSimulatedDevice::HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules)
|
||||
: FPGAAcquisitionDevice(data_stream) {
|
||||
mac_addr = 0xCCAA11223344;
|
||||
ipv4_addr = 0x0132010A;
|
||||
|
||||
max_modules = MAX_MODULES_FPGA;
|
||||
|
||||
MapBuffersStandard(in_frame_buffer_size_modules, numa_node);
|
||||
buffers.reserve(in_frame_buffer_size_modules);
|
||||
for (size_t i = 0; i < in_frame_buffer_size_modules; i++) {
|
||||
buffers.push_back(std::make_unique<FrameBuffer>()); // zero-initialised, 64-byte aligned
|
||||
buffer_device.push_back(reinterpret_cast<DeviceOutput *>(buffers.back().get()));
|
||||
}
|
||||
|
||||
device = std::make_unique<HLSDevice>(buffer_device);
|
||||
}
|
||||
|
||||
@@ -11,6 +11,13 @@
|
||||
#include "FPGAAcquisitionDevice.h"
|
||||
|
||||
class HLSSimulatedDevice : public FPGAAcquisitionDevice {
|
||||
// Owns the simulated frame buffers. Plain heap (this path is not performance-critical, so no
|
||||
// NUMA placement and no mmap), but page-aligned (4 KiB) to match the real device's kernel DMA
|
||||
// buffers - more than enough for the data path's alignment needs (AXI datamover 64 B,
|
||||
// FPGAIntegrationTest 128 B). Declared before `device` so the buffers outlive the HLSDevice
|
||||
// action thread that writes into them; buffer_device points into these.
|
||||
struct alignas(4096) FrameBuffer { uint8_t data[FPGA_BUFFER_LOCATION_SIZE]; };
|
||||
std::vector<std::unique_ptr<FrameBuffer>> buffers;
|
||||
std::unique_ptr<HLSDevice> device;
|
||||
|
||||
void HW_ReadActionRegister(DataCollectionConfig *job) override;
|
||||
@@ -25,7 +32,7 @@ class HLSSimulatedDevice : public FPGAAcquisitionDevice {
|
||||
void HW_SetSpotFinderParameters(const SpotFinderParameters ¶ms) override;
|
||||
void HW_RunInternalGenerator(const FrameGeneratorConfig &config) override;
|
||||
public:
|
||||
HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules, int16_t numa_node = -1);
|
||||
HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules);
|
||||
~HLSSimulatedDevice() override = default;
|
||||
void CreateJFPacket(const DiffractionExperiment& experiment, uint64_t frame_number, uint32_t eth_packet,
|
||||
uint32_t module_number, const uint16_t *data, int8_t adjust_axis = 0, uint8_t user = 0);
|
||||
|
||||
@@ -33,6 +33,15 @@ PCIExpressDevice::PCIExpressDevice(uint16_t data_stream, const std::string &devi
|
||||
}
|
||||
}
|
||||
|
||||
PCIExpressDevice::~PCIExpressDevice() {
|
||||
UnmapBuffers();
|
||||
}
|
||||
|
||||
void PCIExpressDevice::UnmapBuffers() {
|
||||
for (auto &buf: buffer_device)
|
||||
if (buf != nullptr) dev.UnmapKernelBuffer(buf);
|
||||
}
|
||||
|
||||
bool PCIExpressDevice::HW_ReadMailbox(uint32_t *values) {
|
||||
PCI_EXCEPT(return dev.ReadWorkCompletion(values);)
|
||||
}
|
||||
|
||||
@@ -20,10 +20,12 @@ class PCIExpressDevice : public FPGAAcquisitionDevice {
|
||||
void FPGA_EndAction() override;
|
||||
uint32_t GetNumKernelBuffers() const;
|
||||
void HW_RunInternalGenerator(const FrameGeneratorConfig &config) override;
|
||||
void UnmapBuffers();
|
||||
public:
|
||||
explicit PCIExpressDevice(uint16_t data_stream);
|
||||
PCIExpressDevice(uint16_t data_stream, uint16_t pci_slot);
|
||||
PCIExpressDevice(uint16_t data_stream, const std::string &device_name);
|
||||
~PCIExpressDevice() override;
|
||||
|
||||
void Cancel() override;
|
||||
int32_t GetNUMANode() const override;
|
||||
|
||||
Reference in New Issue
Block a user