Merge branch 'dev231212' into 'main'

Minor fixes for CI, dependencies, and documentation

See merge request jungfraujoch/nextgendcu!16
This commit is contained in:
2023-12-14 22:39:17 +01:00
32 changed files with 297 additions and 150 deletions
+12 -46
View File
@@ -16,45 +16,10 @@ build:x86:gcc:
script:
- mkdir build
- cd build
- source /opt/rh/gcc-toolset-12/enable
- cmake -DCMAKE_BUILD_TYPE=Release ..
- make -j48 jfjoch
build:x86:icpc:
stage: build
variables:
GIT_SUBMODULE_STRATEGY: recursive
CC: icx
CXX: icpx
tags:
- oneapi
- x86
needs: []
script:
- source /opt/grpc/grpc.sh
- source /opt/intel/oneapi/setvars.sh
- mkdir build
- cd build
- cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON ..
- make -j48 jfjoch
build:x86:aocc:
stage: build
variables:
GIT_SUBMODULE_STRATEGY: recursive
CC: clang
CXX: clang++
tags:
- aocc
- x86
needs: []
script:
- source /opt/grpc/grpc.sh
- source /opt/AMD/aocc-compiler-4.0.0/setenv_AOCC.sh
- mkdir build
- cd build
- cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON ..
- make -j48 jfjoch
build:x86:driver:
stage: build
variables:
@@ -89,7 +54,7 @@ build:x86:vitis_hls:
- source /opt/Xilinx/Vitis_HLS/2022.1/settings64.sh
- mkdir build
- cd build
- cmake ..
- /usr/bin/cmake ..
- make hls
build:x86:frontend:
@@ -125,6 +90,7 @@ test:x86:gcc:
- x86
- ib
script:
- source /opt/rh/gcc-toolset-12/enable
- mkdir -p build
- cd build
- cmake -DCMAKE_BUILD_TYPE=Release ..
@@ -154,10 +120,10 @@ test:x86:crystfel:
- x86
- crystfel
script:
- source /opt/grpc/grpc.sh
- source /opt/rh/gcc-toolset-12/enable
- mkdir -p build
- cd build
- cmake -DCMAKE_BUILD_TYPE=Release -DgRPC_FROM_SYSTEM=1 ..
- cmake -DCMAKE_BUILD_TYPE=Release ..
- make -j8 HDF5DatasetWriteTest
- cd ../tests/crystfel
- HDF5DATASET_WRITE_TEST_IMAGES_PER_FILE=0 ../../build/tools/HDF5DatasetWriteTest ../../tests/test_data/compression_benchmark.h5 10
@@ -178,10 +144,10 @@ test:x86:xds_durin:
- x86
- xds
script:
- source /opt/grpc/grpc.sh
- source /opt/rh/gcc-toolset-12/enable
- mkdir -p build
- cd build
- cmake -DCMAKE_BUILD_TYPE=Release -DgRPC_FROM_SYSTEM=1 ..
- cmake -DCMAKE_BUILD_TYPE=Release ..
- make -j8 HDF5DatasetWriteTest
- cd ../tests/xds_durin
- HDF5DATASET_WRITE_TEST_IMAGES_PER_FILE=0 ../../build/tools/HDF5DatasetWriteTest ../../tests/test_data/compression_benchmark.h5 100
@@ -202,15 +168,15 @@ test:x86:xia2.ssx:
- x86
- xds
script:
- source /opt/grpc/grpc.sh
- source /opt/rh/gcc-toolset-12/enable
- mkdir -p build
- mkdir -p dials_test
- cd build
- cmake -DCMAKE_BUILD_TYPE=Release -DgRPC_FROM_SYSTEM=1 ..
- cmake -DCMAKE_BUILD_TYPE=Release ..
- make -j8 HDF5DatasetWriteTest
- cd ../dials_test
- ../build/tools/HDF5DatasetWriteTest ../tests/test_data/compression_benchmark.h5 100
- source /usr/local/dials-v3-13-0/dials_env.sh
- source /usr/local/dials-v3-17-0/dials_env.sh
- xia2.ssx image=writing_test_master.h5 space_group=P43212 unit_cell=78.551,78.551,36.914,90.000,90.000,90.000
synthesis:vivado_pcie_100g:
@@ -235,11 +201,11 @@ synthesis:vivado_pcie_100g:
- "*.mcs"
expire_in: 1 week
script:
- source /opt/grpc/grpc.sh
- source /opt/rh/gcc-toolset-12/enable
- source /opt/Xilinx/Vivado/2022.1/settings64.sh
- mkdir -p build
- cd build
- cmake ..
- /usr/bin/cmake ..
- make action_pcie
- mv fpga/*.mcs ..
needs: ["build:x86:gcc", "build:x86:vitis_hls", "test:x86:gcc"]
-2
View File
@@ -8,8 +8,6 @@ SET(CMAKE_CXX_STANDARD_REQUIRED True)
SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native")
SET(CMAKE_C_FLAGS_RELEASE "-O3 -march=native -mtune=native")
SET(HDF5_USE_STATIC_LIBRARIES ON)
INCLUDE(CheckLanguage)
CHECK_LANGUAGE(CUDA)
+2 -9
View File
@@ -16,7 +16,7 @@
void *mmap_acquisition_buffer(size_t size, int16_t numa_node) {
void *ret = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ret == nullptr) {
if (ret == MAP_FAILED) {
throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "frame_buffer");
}
#ifdef JFJOCH_USE_NUMA
@@ -75,8 +75,6 @@ void AcquisitionDevice::StartAction(const DiffractionExperiment &experiment, uin
if (c.type != Completion::Type::Start)
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Mismatch in work completions");
work_completion_count = 0;
StartSendingWorkRequests();
start_time = std::chrono::system_clock::now();
@@ -86,10 +84,6 @@ void AcquisitionDevice::WaitForActionComplete(bool pedestal_mode) {
auto c = work_completion_queue.GetBlocking();
while (c.type != Completion::Type::End) {
work_completion_count++;
while (work_completion_count > GetCompletedDescriptors() / GetExpectedDescriptorsPerModule() )
std::this_thread::sleep_for(std::chrono::milliseconds(1));
auto output = GetDeviceOutput(c.handle);
c.module_number = output->module_statistics.module_number;
c.packet_count = output->module_statistics.packet_count;
@@ -127,7 +121,6 @@ void AcquisitionDevice::WaitForActionComplete(bool pedestal_mode) {
void AcquisitionDevice::SendWorkRequest(uint32_t handle) {
work_request_queue.Put(WorkRequest{
.ptr = buffer_device.at(handle),
.handle = handle
});
}
@@ -182,7 +175,7 @@ void AcquisitionDevice::InitializeIntegrationMap(const DiffractionExperiment &ex
void AcquisitionDevice::MapBuffersStandard(size_t c2h_buffer_count, size_t h2c_buffer_count, int16_t numa_node) {
try {
for (int i = 0; i < std::max(c2h_buffer_count, h2c_buffer_count); i++)
buffer_device.emplace_back((uint16_t *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node));
buffer_device.emplace_back((DeviceOutput *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node));
} catch (const JFJochException &e) {
UnmapBuffers();
throw;
+2 -3
View File
@@ -18,7 +18,7 @@
#include "AcquisitionCounters.h"
#include "Completion.h"
#include "../common/DeviceOutput.h"
#include "../fpga/host_library/DeviceOutput.h"
struct AcquisitionDeviceStatistics {
uint64_t good_packets;
@@ -51,7 +51,7 @@ protected:
ThreadSafeFIFO<Completion> work_completion_queue;
ThreadSafeFIFO<WorkRequest> work_request_queue;
std::vector<uint16_t *> buffer_device;
std::vector<DeviceOutput *> buffer_device;
Logger *logger;
uint16_t data_stream;
@@ -107,7 +107,6 @@ public:
virtual std::string GetMACAddress() const;
virtual uint16_t GetUDPPort() const;
virtual int32_t GetNUMANode() const;
virtual uint32_t GetCompletedDescriptors() const = 0;
virtual std::vector<uint16_t> GetInternalGeneratorFrame() const {
return {};
}
-1
View File
@@ -6,7 +6,6 @@
#include <cstdint>
struct WorkRequest {
uint16_t *ptr;
uint32_t handle;
};
+11 -11
View File
@@ -94,7 +94,7 @@ void FPGAAcquisitionDevice::InitializeIntegrationMap(const DiffractionExperiment
for (int m = 0; m < modules; m++) {
for (int i = 0; i < RAW_MODULE_SIZE; i++) {
buffer_device[modules + m][i] = to_fixed(weights[(offset + m) * RAW_MODULE_SIZE + i], 15);
buffer_device[modules + m]->pixels[i] = to_fixed(weights[(offset + m) * RAW_MODULE_SIZE + i], 15);
}
}
HW_LoadIntegrationMap(modules);
@@ -165,9 +165,9 @@ void FPGAAcquisitionDevice::InitializeCalibration(const DiffractionExperiment &e
"Not enough host/FPGA buffers to load all calibration constants");
for (int m = 0; m < modules; m++) {
calib.GainCalibration(m).ExportG0(buffer_device[m]);
calib.GainCalibration(m).ExportG1(buffer_device[m + modules]);
calib.GainCalibration(m).ExportG2(buffer_device[m + modules * 2]);
calib.GainCalibration(m).ExportG0((uint16_t *) buffer_device[m]->pixels);
calib.GainCalibration(m).ExportG1((uint16_t *) buffer_device[m + modules]->pixels);
calib.GainCalibration(m).ExportG2((uint16_t *) buffer_device[m + modules * 2]->pixels);
}
for (int s = 0; s < storage_cells; s++) {
@@ -178,13 +178,13 @@ void FPGAAcquisitionDevice::InitializeCalibration(const DiffractionExperiment &e
auto pedestal_g2 = calib.Pedestal(offset + m, 2, s).GetPedestal();
for (int i = 0; i < RAW_MODULE_SIZE; i++) {
if (experiment.GetApplyPixelMaskInFPGA() && (mask[(offset + m) * RAW_MODULE_SIZE + i] != 0)) {
buffer_device[(3 + 0 * storage_cells + s) * modules + m][i] = 16384;
buffer_device[(3 + 1 * storage_cells + s) * modules + m][i] = 16384;
buffer_device[(3 + 2 * storage_cells + s) * modules + m][i] = 16384;
buffer_device[(3 + 0 * storage_cells + s) * modules + m]->pixels[i] = 16384;
buffer_device[(3 + 1 * storage_cells + s) * modules + m]->pixels[i] = 16384;
buffer_device[(3 + 2 * storage_cells + s) * modules + m]->pixels[i] = 16384;
} else {
buffer_device[(3 + 0 * storage_cells + s) * modules + m][i] = pedestal_g0[i];
buffer_device[(3 + 1 * storage_cells + s) * modules + m][i] = pedestal_g1[i];
buffer_device[(3 + 2 * storage_cells + s) * modules + m][i] = pedestal_g2[i];
((uint16_t *) buffer_device[(3 + 0 * storage_cells + s) * modules + m]->pixels)[i] = pedestal_g0[i];
((uint16_t *) buffer_device[(3 + 1 * storage_cells + s) * modules + m]->pixels)[i] = pedestal_g1[i];
((uint16_t *) buffer_device[(3 + 2 * storage_cells + s) * modules + m]->pixels)[i] = pedestal_g2[i];
}
}
@@ -216,7 +216,7 @@ void FPGAAcquisitionDevice::FillActionRegister(const DiffractionExperiment& x, D
job.mode = data_collection_id << 16;
job.nsummation = x.GetSummation() - 1;
expected_descriptors_per_module = 5;
expected_descriptors_per_module = DMA_DESCRIPTORS_PER_MODULE;
switch (x.GetDetectorMode()) {
case DetectorMode::Conversion:
+15 -7
View File
@@ -160,7 +160,8 @@ void HLSSimulatedDevice::FPGA_StartAction(const DiffractionExperiment &experimen
ipv4_addr,
INT_PKT_GEN_BUNCHID,
INT_PKT_GEN_EXPTTIME,
INT_PKT_GEN_DEBUG);
INT_PKT_GEN_DEBUG,
cancel_data_collection);
if (ret)
throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError,
"Error running internal packet generator");
@@ -181,9 +182,22 @@ HLSSimulatedDevice::~HLSSimulatedDevice() {
}
bool HLSSimulatedDevice::HW_ReadMailbox(uint32_t *values) {
std::unique_lock<std::mutex> ul(completion_mutex);
ap_uint<32> tmp;
bool ret = completion_stream.read_nb(tmp);
values[0] = tmp;
// equivalent to driver functionality
if (ret) {
uint32_t handle = values[0] & 0xFFFF;
if (handle == HANDLE_START)
completion_count = 0;
else if (handle != HANDLE_END) {
completion_count++;
while (completion_count * DMA_DESCRIPTORS_PER_MODULE > datamover_out.GetCompletedDescriptors())
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
return ret;
}
@@ -509,7 +523,6 @@ void HLSSimulatedDevice::HW_GetStatus(DataCollectionStatus *status) const {
memset(status, 0, sizeof(DataCollectionStatus));
status->ctrl_reg = ap_uint<1>(host_writer_idle) ? (1 << 4) : 0;
status->modules_internal_packet_generator = 1;
status->max_modules = max_modules;
status->hbm_size_bytes = hbm_if_size;
}
@@ -588,11 +601,6 @@ void HLSSimulatedDevice::HW_LoadInternalGeneratorFrame(uint32_t modules) {
throw std::runtime_error("Datamover queue is not empty");
}
uint32_t HLSSimulatedDevice::GetCompletedDescriptors() const {
return datamover_out.GetCompletedDescriptors();
}
void HLSSimulatedDevice::HW_SetSpotFinderParameters(const SpotFinderParameters &params) {
count_threshold = params.count_threshold;
snr_threshold = params.snr_threshold;
+3 -1
View File
@@ -33,6 +33,9 @@ class HLSSimulatedDevice : public FPGAAcquisitionDevice {
hls::stream<ap_uint<32> > work_request_stream;
hls::stream<ap_uint<32> > completion_stream;
std::mutex completion_mutex;
uint32_t completion_count;
std::thread action_thread;
Datamover<512> datamover_in;
@@ -77,7 +80,6 @@ public:
void CreateFinalPacket(const DiffractionExperiment& experiment);
AXI_STREAM &OutputStream();
void Cancel() override;
uint32_t GetCompletedDescriptors() const override;
};
-4
View File
@@ -115,10 +115,6 @@ void PCIExpressDevice::HW_LoadIntegrationMap(uint32_t in_modules) {
dev.LoadIntegrationMap(in_modules);
}
uint32_t PCIExpressDevice::GetCompletedDescriptors() const {
return dev.GetCompletedDescriptors();
}
void PCIExpressDevice::HW_LoadInternalGeneratorFrame(uint32_t in_modules) {
dev.LoadInternalGeneratorFrame(in_modules);
}
-1
View File
@@ -40,7 +40,6 @@ public:
std::string GetMACAddress() const override;
std::string GetIPv4Address() const override;
uint32_t GetCompletedDescriptors() const override;
DeviceStatus GetDeviceStatus() const override;
DataCollectionStatus GetDataCollectionStatus() const override;
};
+1 -1
View File
@@ -44,7 +44,7 @@ ADD_LIBRARY( CommonFunctions STATIC
ADUHistogram.cpp ADUHistogram.h
RawToConvertedGeometryCore.h
Plot.h
DeviceOutput.h
../fpga/host_library/DeviceOutput.h
ZMQWrappers.cpp ZMQWrappers.h)
TARGET_LINK_LIBRARIES(CommonFunctions Compression JFCalibration libzmq -lrt)
+4 -2
View File
@@ -64,8 +64,8 @@
#define LOAD_CALIBRATION_DEST_INTEGRATION 1
#define LOAD_CALIBRATION_DEST_FRAME_GEN 2
#define HANDLE_START (UINT16_MAX - 1)
#define HANDLE_END (UINT16_MAX )
#define HANDLE_START (65534)
#define HANDLE_END (65535)
#define INT_PKT_GEN_DEBUG 0x0
#define INT_PKT_GEN_BUNCHID 0xCACACACACA
@@ -79,4 +79,6 @@
#define ADU_HISTO_BIN_WIDTH 32
#define ADU_HISTO_BIN_COUNT (65536/ ADU_HISTO_BIN_WIDTH)
#define DMA_DESCRIPTORS_PER_MODULE 5
#endif //DEFINITIONS_H
+1 -1
View File
@@ -12,7 +12,7 @@ Current power estimation is about 30 W when idle and 40 W in operation.
CPU Part:
* `pcie_driver` Linux kernel driver for PCIe version of the FPGA board - see [instructions](pcie_driver/README.md)
* `host_library` Library that should be used to access the driver + some simple diagnostic tools
* `host_library` Library that should be used to access the driver + some simple diagnostic tools - see [workflow documentation](pcie_driver/README.md)
FPGA part:
+8 -3
View File
@@ -85,7 +85,8 @@ int frame_generator(STREAM_512 &data_out,
ap_uint<32> dest_ipv4_addr,
ap_uint<64> bunchid,
ap_uint<32> exptime,
ap_uint<32> debug) {
ap_uint<32> debug,
volatile ap_uint<1> &in_cancel) {
#pragma HLS INTERFACE mode=s_axilite port=return
#pragma HLS INTERFACE mode=s_axilite port=frames
#pragma HLS INTERFACE mode=s_axilite port=modules
@@ -94,8 +95,9 @@ int frame_generator(STREAM_512 &data_out,
#pragma HLS INTERFACE mode=s_axilite port=debug
#pragma HLS INTERFACE mode=s_axilite port=dest_mac_addr
#pragma HLS INTERFACE mode=s_axilite port=dest_ipv4_addr
#pragma HLS INTERFACE mode=ap_none port=src_mac_addr
#pragma HLS INTERFACE mode=ap_none port=src_ipv4_addr
#pragma HLS INTERFACE mode=ap_none register port=src_mac_addr
#pragma HLS INTERFACE mode=ap_none register port=src_ipv4_addr
#pragma HLS INTERFACE mode=ap_none register port=in_cancel
#pragma HLS INTERFACE register ap_none port=hbm_size_bytes
#pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \
@@ -108,6 +110,9 @@ int frame_generator(STREAM_512 &data_out,
return 1;
for (uint32_t f = 0; f < frames; f++) {
ap_uint<1> local_cancel = in_cancel;
if (local_cancel == 1)
break;
for (uint32_t p = 0; p < 128; p++) {
for (uint32_t m = 0; m < modules; m++) {
generate_packet(data_out,
+2 -1
View File
@@ -360,7 +360,8 @@ int frame_generator(STREAM_512 &data_out,
ap_uint<32> dest_ipv4_addr,
ap_uint<64> bunchid,
ap_uint<32> exptime,
ap_uint<32> debug);
ap_uint<32> debug,
volatile ap_uint<1> &in_cancel);
int load_calibration(ap_uint<256> *d_hbm_p0,
+1 -1
View File
@@ -1,7 +1,7 @@
// Copyright (2019-2023) Paul Scherrer Institute
#include "hls_jfjoch.h"
#include "../../common/DeviceOutput.h"
#include "../host_library/DeviceOutput.h"
#define o(field) offsetof(ModuleStatistics, field)
#define sf(msg, field, s) msg(o(field)*8 + s - 1, o(field)*8)
@@ -5,6 +5,8 @@
#include <cstdint>
#include "../../common/Definitions.h"
#pragma pack(push)
#pragma pack(1)
+17 -12
View File
@@ -87,19 +87,12 @@ uint32_t JungfraujochDevice::GetNumaNode() const {
return tmp;
}
uint32_t JungfraujochDevice::GetCompletedDescriptors() const {
uint32_t ret = 0;
if (ioctl(fd, IOCTL_JFJOCH_C2H_DMA_DESC, &ret) != 0)
throw PCIeDeviceException("Failed geting C2H completed descriptor count");
return ret;
}
void JungfraujochDevice::SetConfig(const DataCollectionConfig &config) {
if (ioctl(fd, IOCTL_JFJOCH_SET_CONFIG, &config) != 0)
throw PCIeDeviceException("Failed writing config");
}
bool JungfraujochDevice::ReadWorkCompletion(uint32_t output[16]) {
bool JungfraujochDevice::ReadWorkCompletion(uint32_t *output) {
int tmp = ioctl(fd, IOCTL_JFJOCH_READ_WC_MBOX, output);
if (tmp != 0) {
if (errno == EAGAIN)
@@ -199,17 +192,21 @@ void JungfraujochDevice::LoadIntegrationMap(uint32_t modules) {
throw PCIeDeviceException("Failed uploading integration map");
}
uint16_t *JungfraujochDevice::MapKernelBuffer(uint32_t id) {
auto tmp = (uint16_t *) mmap(nullptr, FPGA_BUFFER_LOCATION_SIZE,
DeviceOutput *JungfraujochDevice::MapKernelBuffer(uint32_t id) {
auto tmp = (DeviceOutput *) mmap(nullptr, FPGA_BUFFER_LOCATION_SIZE,
PROT_READ | PROT_WRITE, MAP_SHARED,
fd, FPGA_BUFFER_LOCATION_SIZE * id);
if (tmp == nullptr)
if (tmp == MAP_FAILED)
throw PCIeDeviceException("Mmap of kernel buffer error");
return tmp;
}
void JungfraujochDevice::UnmapKernelBuffer(DeviceOutput *val) {
munmap(val, FPGA_BUFFER_LOCATION_SIZE);
}
void JungfraujochDevice::SetSpotFinderParameters(const SpotFinderParameters& params) {
if (ioctl(fd, IOCTL_JFJOCH_SPOT_FINDER_PAR, &params) != 0)
throw PCIeDeviceException("Failed settings spot finder parameters");
@@ -228,4 +225,12 @@ void JungfraujochDevice::SetDataSource(uint32_t id) {
if (ioctl(fd, IOCTL_JFJOCH_SET_DATA_SOURCE, &id) != 0)
throw JFJochException(JFJochExceptionCategory::PCIeError, "Failed setting data source");
}
}
bool JungfraujochDevice::ReadWorkCompletion(JungfraujochDeviceCompletion &completion) {
uint32_t tmp = 0;
bool ret = ReadWorkCompletion(&tmp);
completion.buffer_id = tmp & 0xFFFF;
completion.data_collection_id = (tmp >> 16) & 0xFFFF;
return false;
}
+30 -9
View File
@@ -5,6 +5,12 @@
#include <string>
#include "../pcie_driver/ActionConfig.h"
#include "DeviceOutput.h"
struct JungfraujochDeviceCompletion {
uint16_t data_collection_id;
uint16_t buffer_id;
};
class JungfraujochDevice {
int fd;
@@ -37,17 +43,22 @@ public:
// Resets FPGA - not safe at the moment
void Reset();
// Returns current NUMA node
uint32_t GetNumaNode() const;
// Returns number of completed card-to-host descriptors = number of full modules transferred to the host memory
uint32_t GetCompletedDescriptors() const;
// Read work completion
// returns true if there was completion in the mailbox - output is then saved
// returns false if mailbox was empty - output is invalid
bool ReadWorkCompletion(uint32_t output[16]);
// More user-friendly function, as it returns data structure
bool ReadWorkCompletion(JungfraujochDeviceCompletion &completion);
// Read work completion
// returns true if there was completion in the mailbox - output is then saved
// returns false if mailbox was empty - output is invalid
// output is coded as single 32-bit integer:
// bits (0..15) = buffer location that has completed data
// bits (31..16) = data_collection_id
bool ReadWorkCompletion(uint32_t *output);
// Sends work request of buffer location id
// returns true if there was space in the mailbox
@@ -79,23 +90,33 @@ public:
uint32_t ReadRegister(uint32_t addr) const;
// Load calibration parameters
// Function is synchronous - it will return when loading is done
void LoadCalibration(uint32_t modules, uint32_t storage_cells);
// Load frames for internal generator
// Must be placed in first <modules> kernel buffer locations
// Function is synchronous - it will return when loading is done
void LoadInternalGeneratorFrame(uint32_t modules);
// Load map of radial integration
// Must be placed in first <modules> kernel buffer locations
// Function is synchronous - it will return when loading is done
void LoadIntegrationMap(uint32_t modules);
void SetSpotFinderParameters(const SpotFinderParameters &params);
// Get number of kernel buffers
uint32_t GetBufferCount() const;
// Allocate id kernel buffer ( id must be less than GetBufferCount() )
// buffer has to be unmapped using munmap
uint16_t *MapKernelBuffer(uint32_t id);
// Map kernel buffer ( id must be less than GetBufferCount() ) to user virtual memory space
DeviceOutput *MapKernelBuffer(uint32_t buffer_id);
// Select data source on the FPGA - which network connector is used
// Unmap kernel buffer, using the pointer returned by MapKernelBuffer()
void UnmapKernelBuffer(DeviceOutput* val);
// Select data source on the FPGA
// Allowed values:
// 0 - no data input
// 1 - 100G interface
// 2 - 4x10G interface
// 3 - frame generator within the FPGA
void SetDataSource(uint32_t id);
uint32_t GetDataSource();
};
+89
View File
@@ -0,0 +1,89 @@
# Jungfraujoch host library
The library is used as the low-level interface between C++ host application and Jungfraujoch FPGA card.
It provides wrapper over kernel driver ioctl calls. Preferred way to use Jungfraujoch is via the full `jfjoch_broker` application,
however for more tailored solutions - one can use directly API described below.
## Configure the detector
To use the API, one needs to configure the detector via [slsDetectorPackage](https://github.com/slsdetectorgroup/slsDetectorPackage) for data acquisition. Besides the usual configuration used for JUNGFRAU, the following settings are necessary:
* Detector frame numbers must be restarted to start always from 1 using the following command-line interface command (or C++/Python equivalent):
```
sls_detector_put nextframenumber 1
```
* Detector has to be configured to measure 6 frames more than needed for data acquisition, for the acquisition to stop automatically
* Set destination MAC and IPv4 address to be consistent with ones for the card. At the time being, all the ethernet interfaces of the card (100G and 4x10G) share the same MAC/IPv4 address, though this might change in the future. UDP port is not used.
* Each module has to have its sequential number assigned through column field in the detector header, which has to be set to `2 * module number`. This is currently done by writing `module number * 2 * 65536` to register `0x7C` of the module. IMPORTANT: Module numbering for each FPGA card is independent and has to start for 1. With 4 FPGA cards and 16 modules, there will be four modules with number 1, four with number 2, etc.
* The card support both 1 and 2 network interfaces per JUNGFRAU module.
## Designing with the API
For each FPGA card, one needs to instantiate `JungfraujochDevice` object, using device name of format `/dev/jfjoch<number>`
and read/write parameter. Only one process can open the device with write access, while multiple processes might open the device for read in parallel.
### Configuring network
The first step for using the card is configuring network. To use network, one needs to select data source with `JungfraujochDevice::SetDataSource()` funtion - the options are no data source (default), 100G, 4x10G and internal generator. Next, addresses need to be configured for the network stack.
The card will receive MAC address automatically based on Xilinx assigned number, but IPv4 address has to be configured with `JungfraujochDevice::SetIPv4Address()` function. The card is equipped with a simple network stack - if both MAC and IPv4 addresses are set and 100G interface is used, the card will periodically send ARP gratuitous messages, it will also reply to ARP requests and to ICMP pings. Given 4x10G interface is designed for direct Jungfraujoch-detector configuration, without a switch, diagnostics functionality is not offered here at the moment.
### Uploading calibration
Will be contributed later, as there is planned modification to the interface
### Preparing data collection
Before any operation one needs to check if card is idle (not running data collection) with `JungfraujochDevice::IsIdle()` function. Most configuration parameters cannot be changed, when card is in not-idle state.
The card can be then configured with `JungfraujochDevice::SetConfig()` function. Details of the configuration data structure are given in [ActionConfig.h](../pcie_driver/ActionConfig.h) header file.
Next, kernel buffers need to be mapped to the user space. These buffers are allocated with memory physically continuous, simplyfing operation of the card and the driver. Count of these buffers can be checked with `JungfraujochDevice::GetBufferCount()` function. Buffers can be mapped with `JungfraujochDevice::MapKernelBuffer()` function and deallocated with `JungfraujochDevice::UnmapKernelBuffer()` functions. Structure of the kernel buffer is described in [DeviceOutput.h](DeviceOutput.h) header file.
### Data collection
Then one can start the card with `JungfraujochDevice::Start()` function. Final step is to wait for first completion (with value `HANDLE_START` defined in [Definitions.h](../../common/Definitions.h) as buffer number) using `JungfraujochDevice::ReadWorkCompletion()`.
Standard operation of the card requires exchange of buffer ownership between the host application and FPGA card. At the beginning all buffers are owned by host application and should be "given" to the card with `JungfraujochDevice::SendWorkRequest()` function. Then card will wait for the detector to send data. After full module is collected, data are written via Direct Memory Access to host memory and kernel driver is informed with an interrupt that data are ready. Host application can "learn" what was collected by the card by running `JungfraujochDevice::ReadWorkCompletion()` function. Buffer returned by the function is owned by the host application and is safe to process. After processing the buffer has to be given back to card via `JungfraujochDevice::SendWorkRequest()`. If the card doesn't receive enough work requests (open buffers) it won't be able to receive data, resulting in lost packets.
Some important points to mention:
* Both functions mentioned in the above paragraph may fail due to work request queue being full and completion queue being empty respectively. Please always check that return value is `true`.
* Both functions `JungfraujochDevice::SendWorkRequest()` and `JungfraujochDevice::ReadWorkCompletion()` are thread-safe and can be executed in parallel context. All other functions in the library that change configuration or state of the card are NOT thread-safe, anyway running them in parallel would give nondeterministic result.
* Reading work completion will wait up to 1 second before returning.
* `JungfraujochDevice::ReadWorkCompletion()` adds data collection ID as the highest 16-bit (16-31) - this allows to avoid mixing previous and current data collection.
* Work requests sent before `HANDLE_START` was received by host application will be likely discarded.
The card will end acquisition in two situations:
* Frame with number provided in the configuration is received
* `JungfraujochDevice::Cancel()` function is called by host application
The host application will know that the data collection finished by receiving completion with value of `HANDLE_END` as buffer number. After data collection is finished one should call `JungfraujochDevice::End()` to finalize.
### Internal generator
When detector is not installed, or one would like to check the Jungfraujoch card without running a detector,
it is possible to generate detector packets internally. Internal generator makes packets with all the headers (Ethernet, UDP/IP, SLS Detector) and is entering data stream in similar location to Ethernet Media Access Control (MAC) cores.
Before starting data collection, it is necessary to load content of module to card FPGA memory. At this moment, for each module a different content can be provided, but all frames for particular model will be the same. It will hopefully change in the future.
To load the data, one needs to place content of each module (in 16-bit) into respective kernel buffer (allocated with `JungfraujochDevice::MapKernelBuffer()`) - first module to buffer 0, second module to buffer 1, etc. Then one needs to call `JungfraujochDevice::LoadInternalGeneratorFrame()` with specified number of modules.
One also needs to switch data source by executing `JungfraujochDevice::SetDataSource()` with respective value.
The next step is to do all the preparations to start data collection, up to `JungfraujochDevice::Start()` and completion handshake. Then one can run `JungfraujochDevice::RunFrameGenerator()` function, with parameters described in the [ActionConfig.h](../pcie_driver/ActionConfig.h) header file. The function is asynchronous, and will start generation, but doesn't wait for the end. Though one can assume that frame generator is done, when data collection is finished.
### Spot finding parameters
Spot finding parameters can be updated with function `JungfraujochDevice::SetSpotFinderParameters()`.
Contrary to other configuration functions, this one is safe to execute during data collection.
## Tools
In addition to the library, there are also small CLI tools available in this folder.
### jfjoch_pcie_status
Prints detailed status information about the card. Execute by adding device path, e.g.:
```
./jfjoch_pcie_status /dev/jfjoch0
```
The program is safe to execute during a running data collection.
### jfjoch_pcie_clear_net_counters
Network counters in the card give information about Ethernet, UDP and ICMP packets encountered by the network stack prior to Jungfraujoch logic.
These counters are running from the moment card is powered on. They can be reset by running the program with device name, e.g.:
```
./jfjoch_pcie_clear_net_counters /dev/jfjoch0
```
@@ -1,7 +1,5 @@
// Copyright (2019-2023) Paul Scherrer Institute
// Copyright (2019-2023) Paul Scherrer Institute
#include <iostream>
#include "../common/JFJochException.h"
@@ -1,7 +1,5 @@
// Copyright (2019-2023) Paul Scherrer Institute
// Copyright (2019-2023) Paul Scherrer Institute
#include "../common/Logger.h"
#include "../common/JFJochException.h"
#include "JungfraujochDevice.h"
+7 -1
View File
@@ -58,7 +58,13 @@ int main(int argc, char **argv) {
std::cout << "Git SHA1 " << std::hex << fpga_status.git_sha1 << std::endl;
std::cout << "Max modules " << std::dec << fpga_status.max_modules << std::endl;
std::cout << "NUMA node " << device.GetNumaNode() << std::endl;
std::cout << "Ethernet aligned " << (fpga_env_data.ethernet_aligned ? "Yes" : "No") << std::endl;
std::cout << "Ethernet 100G link " << (fpga_env_data.eth_100G_link ? "Yes" : "No") << std::endl;
std::cout << "Ethernet 10G link (*=link) "
<< (fpga_env_data.eth_10G_link[0] ? "*" : "-")
<< (fpga_env_data.eth_10G_link[1] ? "*" : "-")
<< (fpga_env_data.eth_10G_link[2] ? "*" : "-")
<< (fpga_env_data.eth_10G_link[3] ? "*" : "-")
<< std::endl;
std::cout << "FPGA 12V rail current [A] " << fpga_env_data.fpga_pcie_12V_I_mA / 1000.0 << std::endl;
std::cout << "FPGA 12V rail voltage [V] " << fpga_env_data.fpga_pcie_12V_V_mV / 1000.0 << std::endl;
+15 -14
View File
@@ -14,25 +14,25 @@ typedef __u64 uint64_t;
#pragma pack(4)
struct DataCollectionConfig {
uint32_t nmodules;
uint32_t mode;
uint32_t one_over_energy;
uint32_t nframes;
uint32_t nstorage_cells;
uint32_t nsummation;
uint32_t nmodules; // Number of modules for data collection minus one (0 = 1 module, 1 = 2 modules, ..., 31 = 32 modules)
uint32_t mode; // see MODE_* in common/Definitions.h; upper 16-bit of the mode are data_collection_id, that is returned with completion numbers
uint32_t one_over_energy; // One over energy in keV (this is bit-to-bit float value, use float+uint32_t union to assign the value)
uint32_t nframes; // Number of frames for data collection
uint32_t nstorage_cells; // Number of storage cells minus one (0 = 1SC, 1 = 2SC, ..., 15 = 16SC)
uint32_t nsummation; // Summation of frames minus one (0 = no summation, 1 = 2 frames, 2 = 3 frames, ..., 255 = 256 frames)
};
struct DataCollectionStatus {
uint32_t ctrl_reg;
uint32_t reserved_0;
uint32_t set_led;
uint32_t reserved_1;
uint32_t git_sha1;
uint32_t action_type;
uint32_t release_level;
uint32_t reserved_2;
uint32_t reserved_3;
uint32_t reserved_1;
uint32_t max_modules;
uint32_t modules_internal_packet_generator;
uint32_t reserved_4;
uint64_t pipeline_stalls_host;
uint64_t pipeline_stalls_hbm;
uint32_t fifo_status;
@@ -82,14 +82,15 @@ struct DeviceStatus {
uint32_t hbm_1_temp_C;
uint32_t work_compl_fifo_avail;
bool ethernet_aligned;
bool eth_100G_link;
bool eth_10G_link[4];
};
struct FrameGeneratorConfig {
uint32_t frames;
uint32_t modules;
uint64_t dest_mac_addr;
uint32_t dest_ipv4_addr;
uint32_t frames; // Number of frames
uint32_t modules; // Number of modules (1-32)
uint64_t dest_mac_addr; // Use the same as source address
uint32_t dest_ipv4_addr; // Use the same as source address
uint64_t bunchid;
uint32_t exptime;
uint32_t debug;
+1 -1
View File
@@ -13,7 +13,7 @@ EXECUTE_PROCESS(COMMAND uname -r
FIND_PATH(KERNELHEADERS_DIR
include/linux/user.h
PATHS /usr/src/linux-headers-${KERNEL_RELEASE}
PATHS /usr/src/linux-headers-${KERNEL_RELEASE} /usr/src/kernels/${KERNEL_RELEASE}
)
MESSAGE(STATUS "Kernel release: ${KERNEL_RELEASE}")
+1 -1
View File
@@ -78,4 +78,4 @@ Interfacing should be done through the JungfraujochDevice class in `fpga/host_li
## Sysfs access
Certain performance counters can be read through sysfs mechanism in the kernel.
One needs to `cat` files in `/sys/class/misc/jfjoch<number of device>`.
One needs to `cat` files in `/sys/class/misc/jfjoch<number of device>/` directory.
+19 -3
View File
@@ -51,6 +51,10 @@
#define PCIE_OFFSET (0x090000)
#define FRAME_GEN_OFFSET (0x080000)
#define ADDRESS_TABLE_OFFSET (0x200000)
#define MAC_10G_0_OFFSET (0x100000)
#define MAC_10G_1_OFFSET (0x110000)
#define MAC_10G_2_OFFSET (0x120000)
#define MAC_10G_3_OFFSET (0x130000)
// Action config
#define ADDR_CTRL_REGISTER 0x0000
@@ -162,13 +166,25 @@ struct jfjoch_drvdata {
u32 git_sha1;
spinlock_t file_write_open_count_spinlock;
int file_write_open_count;
int file_write_open_count; // ensure
// AXI mailbox requires to check if full before writing anything - this obviously must be atomic
// assuming this is quick operation
spinlock_t work_request_submit_spinlock;
DECLARE_KFIFO(work_compl, u32, MAX_FPGA_BUFFER);
// KFIFO has separate synchronization for read and write - i.e. one writer and multiple readers need mutex/spinlock
// only for reading
//
// work_compl is not protected for put operation => this only happens in ISR
// atomicity is guaranteed by the kernel (interrupt is masked during its own ISR)
// however getting things from the queue happens in system call and can be executed in parallel
// therefore this part is protected by mutex (it is assumed that waiting for interrupts can take seconds,
// process has to be able to sleep while holding the lock => spinlock would not work)
// the mutex is also protecting work_compl_count variable
struct mutex work_compl_read_mutex;
wait_queue_head_t work_compl_wait_queue;
DECLARE_KFIFO(work_compl, u32, MAX_FPGA_BUFFER); // protected by work_compl_read_mutex
wait_queue_head_t work_compl_wait_queue; // used for read completion queue method to wait for interrupt, ISR has wake-up call
u32 work_compl_count; // protected by work_compl_read_mutex
};
int jfjoch_register_misc_dev(struct pci_dev *pdev);
+50 -7
View File
@@ -14,6 +14,19 @@ uint32_t jfjoch_read_register(struct jfjoch_drvdata *drvdata, uint32_t addr) {
}
void jfjoch_start(struct jfjoch_drvdata *drvdata) {
u32 tmp;
// Drain work completion queue
mutex_lock(&drvdata->work_compl_read_mutex);
while(!kfifo_is_empty(&drvdata->work_compl))
kfifo_get(&drvdata->work_compl, &tmp);
// Reset work completion queue
drvdata->work_compl_count = 0;
mutex_unlock(&drvdata->work_compl_read_mutex);
// Set PCIe beats counters
iowrite32((1 << 1), drvdata->bar0 + PCIE_OFFSET + (1<<12) + 0xC0);
iowrite32((1 << 2), drvdata->bar0 + PCIE_OFFSET + (1<<12) + 0xC0);
@@ -60,22 +73,45 @@ int jfjoch_send_wr(struct jfjoch_drvdata *drvdata, u32 handle) {
}
int jfjoch_read_wc(struct jfjoch_drvdata *drvdata, u32 *output) {
struct device *const dev = &drvdata->pdev->dev;
int ret, tmp;
u32 curr_compl_count, handle;
mutex_lock(&drvdata->work_compl_read_mutex);
ret = wait_event_interruptible_timeout(drvdata->work_compl_wait_queue, !kfifo_is_empty(&drvdata->work_compl), HZ);
if (ret >= 0)
if (ret >= 0) {
tmp = kfifo_get(&drvdata->work_compl, output);
handle = (*output) & 0xFFFF;
if (handle == HANDLE_START)
drvdata->work_compl_count = 0;
else if (handle != HANDLE_END)
curr_compl_count = drvdata->work_compl_count++;
}
mutex_unlock(&drvdata->work_compl_read_mutex);
if (ret < 0)
return ret;
else if (tmp == 0)
return -EAGAIN;
else
return 0;
// Guarantee that data are in host memory
if ((handle != HANDLE_START) && (handle != HANDLE_END)) {
u32 descriptors = ioread32(drvdata->bar0 + PCIE_OFFSET + (1 << 12) + 0x48);
if (descriptors < curr_compl_count * DMA_DESCRIPTORS_PER_MODULE) {
dev_warn(dev, "Late completion count SW %u, HW %u HANDLE %x", curr_compl_count, descriptors, handle);
udelay(50);
descriptors = ioread32(drvdata->bar0 + PCIE_OFFSET + (1 << 12) + 0x48);
if (descriptors < DMA_DESCRIPTORS_PER_MODULE * curr_compl_count) {
dev_err(dev, "Late completion count SW %u, HW %u HANDLE %x", curr_compl_count, descriptors, handle);
return -EIO;
}
}
}
return 0;
}
void jfjoch_set_config(struct jfjoch_drvdata *drvdata, const struct DataCollectionConfig *config) {
memcpy_toio((drvdata->bar0) + ACTION_CONFIG_OFFSET + ADDR_NMODULES, config, sizeof(struct DataCollectionConfig));
}
@@ -243,6 +279,8 @@ void jfjoch_setup_network(struct jfjoch_drvdata *drvdata) {
}
void jfjoch_get_env_data(struct jfjoch_drvdata *drvdata, struct DeviceStatus *env_params) {
int i;
env_params->mailbox_status_reg = ioread32(drvdata->bar0 + MAILBOX_OFFSET + ADDR_MAILBOX_STATUS);
env_params->mailbox_err_reg = ioread32(drvdata->bar0 + MAILBOX_OFFSET + ADDR_MAILBOX_ERR);
env_params->mailbox_interrupt_status = ioread32(drvdata->bar0 + MAILBOX_OFFSET + ADDR_MAILBOX_IS);
@@ -272,10 +310,15 @@ void jfjoch_get_env_data(struct jfjoch_drvdata *drvdata, struct DeviceStatus *en
env_params->hbm_0_temp_C = ioread32(drvdata->bar0 + CMS_OFFSET + ADDR_CMS_HBM_TEMP1_INS_REG);
env_params->hbm_1_temp_C = ioread32(drvdata->bar0 + CMS_OFFSET + ADDR_CMS_HBM_TEMP2_INS_REG);
// Somehow it is better to ask twice
env_params->ethernet_aligned = ioread32(drvdata->bar0 + CMAC_OFFSET + 0x0204) & 0x2;
env_params->ethernet_aligned = ioread32(drvdata->bar0 + CMAC_OFFSET + 0x0204) & 0x2;
// These are latched low register, so need to ask twice, to get current value
for (i = 0; i < 2; i++) {
env_params->eth_100G_link = ioread32(drvdata->bar0 + CMAC_OFFSET + 0x0204) & 0x1; // stat_rx_status
env_params->eth_10G_link[0] = ioread32(drvdata->bar0 + MAC_10G_0_OFFSET + 0x0404) & 0x1; // stat_rx_status
env_params->eth_10G_link[1] = ioread32(drvdata->bar0 + MAC_10G_1_OFFSET + 0x0404) & 0x1; // stat_rx_status
env_params->eth_10G_link[2] = ioread32(drvdata->bar0 + MAC_10G_2_OFFSET + 0x0404) & 0x1; // stat_rx_status
env_params->eth_10G_link[3] = ioread32(drvdata->bar0 + MAC_10G_3_OFFSET + 0x0404) & 0x1; // stat_rx_status
}
env_params->work_compl_fifo_avail = kfifo_avail(&drvdata->work_compl);
}
+2 -1
View File
@@ -472,7 +472,7 @@ proc create_hier_cell_jungfraujoch { parentCell nameHier } {
# Create port connections
connect_bd_net -net action_config_0_clear_counters [get_bd_pins action_config_0/clear_counters] [get_bd_pins network_stack/clear_counters]
connect_bd_net -net action_config_0_data_collection_cancel [get_bd_pins action_config_0/data_collection_cancel] [get_bd_pins data_collection_fsm_0/in_cancel]
connect_bd_net -net action_config_0_data_collection_cancel [get_bd_pins action_config_0/data_collection_cancel] [get_bd_pins data_collection_fsm_0/in_cancel] [get_bd_pins frame_generator_0/in_cancel]
connect_bd_net -net action_config_0_data_collection_fsm_start [get_bd_pins action_config_0/data_collection_start] [get_bd_pins data_collection_fsm_0/in_run] [get_bd_pins network_stack/data_collection_start]
connect_bd_net -net action_config_0_data_collection_mode [get_bd_pins action_config_0/data_collection_mode] [get_bd_pins data_collection_fsm_0/mode]
connect_bd_net -net action_config_0_data_source [get_bd_pins action_config_0/data_source] [get_bd_pins stream_merge_0/source]
@@ -550,3 +550,4 @@ proc create_hier_cell_jungfraujoch { parentCell nameHier } {
# Restore current instance
current_bd_instance $oldCurInst
}
+1 -1
View File
@@ -10,7 +10,7 @@
#include <memory>
#include "../common/Definitions.h"
#include "../common/DeviceOutput.h"
#include "../fpga/host_library/DeviceOutput.h"
class JFModulePedestal {
std::vector<uint16_t> pedestal;
-1
View File
@@ -35,7 +35,6 @@ TEST_CASE("HLS_C_Simulation_internal_packet_generator", "[FPGA][Full]") {
}
}
REQUIRE(test.GetExpectedDescriptorsPerModule() == 5);
REQUIRE(test.GetCompletedDescriptors() == 5 * (4 + DELAY_FRAMES_STOP_AND_QUIT - 1) * nmodules);
}
TEST_CASE("HLS_C_Simulation_internal_packet_generator_custom_frame", "[FPGA][Full]") {
+1 -1
View File
@@ -9,6 +9,6 @@ add_executable(HDF5DatasetWriteTest HDF5DatasetWriteTest.cpp)
target_link_libraries(HDF5DatasetWriteTest JFJochWriter CommonFunctions)
ADD_EXECUTABLE(jfjoch_writer_test jfjoch_writer_test.cpp)
TARGET_LINK_LIBRARIES(jfjoch_writer_test JFJochWriter CommonFunctions)
TARGET_LINK_LIBRARIES(jfjoch_writer_test JFJochWriter ImagePusher CommonFunctions)
INSTALL(TARGETS jfjoch_udp_simulator CompressionBenchmark HDF5DatasetWriteTest jfjoch_writer_test RUNTIME)