From d82bd13917cc890b6043303b7e9cff81d61e1cd1 Mon Sep 17 00:00:00 2001 From: leonarski_f Date: Thu, 14 Dec 2023 22:39:17 +0100 Subject: [PATCH] Minor fixes for CI and dependencies Improvements in documentation and readability of JungfraujochDevice function --- .gitlab-ci.yml | 58 +++--------- CMakeLists.txt | 2 - acquisition_device/AcquisitionDevice.cpp | 11 +-- acquisition_device/AcquisitionDevice.h | 5 +- acquisition_device/Completion.h | 1 - acquisition_device/FPGAAcquisitionDevice.cpp | 22 ++--- acquisition_device/HLSSimulatedDevice.cpp | 22 +++-- acquisition_device/HLSSimulatedDevice.h | 4 +- acquisition_device/PCIExpressDevice.cpp | 4 - acquisition_device/PCIExpressDevice.h | 1 - common/CMakeLists.txt | 2 +- common/Definitions.h | 6 +- fpga/README.md | 2 +- fpga/hls/frame_generator.cpp | 11 ++- fpga/hls/hls_jfjoch.h | 3 +- fpga/hls/host_writer.cpp | 2 +- {common => fpga/host_library}/DeviceOutput.h | 2 + fpga/host_library/JungfraujochDevice.cpp | 29 +++--- fpga/host_library/JungfraujochDevice.h | 39 ++++++-- fpga/host_library/README.md | 89 +++++++++++++++++++ .../jfjoch_pcie_cancel_data_collection.cpp | 2 - fpga/host_library/jfjoch_pcie_set_network.cpp | 2 - fpga/host_library/jfjoch_pcie_status.cpp | 8 +- fpga/pcie_driver/ActionConfig.h | 29 +++--- fpga/pcie_driver/CMakeLists.txt | 2 +- fpga/pcie_driver/README.md | 2 +- fpga/pcie_driver/jfjoch_drv.h | 22 ++++- fpga/pcie_driver/jfjoch_function.c | 57 ++++++++++-- fpga/scripts/jfjoch.tcl | 3 +- jungfrau/JFModulePedestal.h | 2 +- tests/FPGAIntegrationTest.cpp | 1 - tools/CMakeLists.txt | 2 +- 32 files changed, 297 insertions(+), 150 deletions(-) rename {common => fpga/host_library}/DeviceOutput.h (96%) create mode 100644 fpga/host_library/README.md diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 661b1797..5b262f05 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,45 +16,10 @@ build:x86:gcc: script: - mkdir build - cd build + - source /opt/rh/gcc-toolset-12/enable - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j48 jfjoch -build:x86:icpc: - stage: build - variables: - GIT_SUBMODULE_STRATEGY: recursive - CC: icx - CXX: icpx - tags: - - oneapi - - x86 - needs: [] - script: - - source /opt/grpc/grpc.sh - - source /opt/intel/oneapi/setvars.sh - - mkdir build - - cd build - - cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON .. - - make -j48 jfjoch - -build:x86:aocc: - stage: build - variables: - GIT_SUBMODULE_STRATEGY: recursive - CC: clang - CXX: clang++ - tags: - - aocc - - x86 - needs: [] - script: - - source /opt/grpc/grpc.sh - - source /opt/AMD/aocc-compiler-4.0.0/setenv_AOCC.sh - - mkdir build - - cd build - - cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON .. - - make -j48 jfjoch - build:x86:driver: stage: build variables: @@ -89,7 +54,7 @@ build:x86:vitis_hls: - source /opt/Xilinx/Vitis_HLS/2022.1/settings64.sh - mkdir build - cd build - - cmake .. + - /usr/bin/cmake .. - make hls build:x86:frontend: @@ -125,6 +90,7 @@ test:x86:gcc: - x86 - ib script: + - source /opt/rh/gcc-toolset-12/enable - mkdir -p build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. @@ -154,10 +120,10 @@ test:x86:crystfel: - x86 - crystfel script: - - source /opt/grpc/grpc.sh + - source /opt/rh/gcc-toolset-12/enable - mkdir -p build - cd build - - cmake -DCMAKE_BUILD_TYPE=Release -DgRPC_FROM_SYSTEM=1 .. + - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j8 HDF5DatasetWriteTest - cd ../tests/crystfel - HDF5DATASET_WRITE_TEST_IMAGES_PER_FILE=0 ../../build/tools/HDF5DatasetWriteTest ../../tests/test_data/compression_benchmark.h5 10 @@ -178,10 +144,10 @@ test:x86:xds_durin: - x86 - xds script: - - source /opt/grpc/grpc.sh + - source /opt/rh/gcc-toolset-12/enable - mkdir -p build - cd build - - cmake -DCMAKE_BUILD_TYPE=Release -DgRPC_FROM_SYSTEM=1 .. + - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j8 HDF5DatasetWriteTest - cd ../tests/xds_durin - HDF5DATASET_WRITE_TEST_IMAGES_PER_FILE=0 ../../build/tools/HDF5DatasetWriteTest ../../tests/test_data/compression_benchmark.h5 100 @@ -202,15 +168,15 @@ test:x86:xia2.ssx: - x86 - xds script: - - source /opt/grpc/grpc.sh + - source /opt/rh/gcc-toolset-12/enable - mkdir -p build - mkdir -p dials_test - cd build - - cmake -DCMAKE_BUILD_TYPE=Release -DgRPC_FROM_SYSTEM=1 .. + - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j8 HDF5DatasetWriteTest - cd ../dials_test - ../build/tools/HDF5DatasetWriteTest ../tests/test_data/compression_benchmark.h5 100 - - source /usr/local/dials-v3-13-0/dials_env.sh + - source /usr/local/dials-v3-17-0/dials_env.sh - xia2.ssx image=writing_test_master.h5 space_group=P43212 unit_cell=78.551,78.551,36.914,90.000,90.000,90.000 synthesis:vivado_pcie_100g: @@ -235,11 +201,11 @@ synthesis:vivado_pcie_100g: - "*.mcs" expire_in: 1 week script: - - source /opt/grpc/grpc.sh + - source /opt/rh/gcc-toolset-12/enable - source /opt/Xilinx/Vivado/2022.1/settings64.sh - mkdir -p build - cd build - - cmake .. + - /usr/bin/cmake .. - make action_pcie - mv fpga/*.mcs .. needs: ["build:x86:gcc", "build:x86:vitis_hls", "test:x86:gcc"] diff --git a/CMakeLists.txt b/CMakeLists.txt index 064d175c..c6702818 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,8 +8,6 @@ SET(CMAKE_CXX_STANDARD_REQUIRED True) SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native") SET(CMAKE_C_FLAGS_RELEASE "-O3 -march=native -mtune=native") -SET(HDF5_USE_STATIC_LIBRARIES ON) - INCLUDE(CheckLanguage) CHECK_LANGUAGE(CUDA) diff --git a/acquisition_device/AcquisitionDevice.cpp b/acquisition_device/AcquisitionDevice.cpp index 1130b92e..962aafd8 100644 --- a/acquisition_device/AcquisitionDevice.cpp +++ b/acquisition_device/AcquisitionDevice.cpp @@ -16,7 +16,7 @@ void *mmap_acquisition_buffer(size_t size, int16_t numa_node) { void *ret = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == nullptr) { + if (ret == MAP_FAILED) { throw JFJochException(JFJochExceptionCategory::MemAllocFailed, "frame_buffer"); } #ifdef JFJOCH_USE_NUMA @@ -75,8 +75,6 @@ void AcquisitionDevice::StartAction(const DiffractionExperiment &experiment, uin if (c.type != Completion::Type::Start) throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Mismatch in work completions"); - work_completion_count = 0; - StartSendingWorkRequests(); start_time = std::chrono::system_clock::now(); @@ -86,10 +84,6 @@ void AcquisitionDevice::WaitForActionComplete(bool pedestal_mode) { auto c = work_completion_queue.GetBlocking(); while (c.type != Completion::Type::End) { - work_completion_count++; - while (work_completion_count > GetCompletedDescriptors() / GetExpectedDescriptorsPerModule() ) - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - auto output = GetDeviceOutput(c.handle); c.module_number = output->module_statistics.module_number; c.packet_count = output->module_statistics.packet_count; @@ -127,7 +121,6 @@ void AcquisitionDevice::WaitForActionComplete(bool pedestal_mode) { void AcquisitionDevice::SendWorkRequest(uint32_t handle) { work_request_queue.Put(WorkRequest{ - .ptr = buffer_device.at(handle), .handle = handle }); } @@ -182,7 +175,7 @@ void AcquisitionDevice::InitializeIntegrationMap(const DiffractionExperiment &ex void AcquisitionDevice::MapBuffersStandard(size_t c2h_buffer_count, size_t h2c_buffer_count, int16_t numa_node) { try { for (int i = 0; i < std::max(c2h_buffer_count, h2c_buffer_count); i++) - buffer_device.emplace_back((uint16_t *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node)); + buffer_device.emplace_back((DeviceOutput *) mmap_acquisition_buffer(FPGA_BUFFER_LOCATION_SIZE, numa_node)); } catch (const JFJochException &e) { UnmapBuffers(); throw; diff --git a/acquisition_device/AcquisitionDevice.h b/acquisition_device/AcquisitionDevice.h index 5f533003..cce6ac49 100644 --- a/acquisition_device/AcquisitionDevice.h +++ b/acquisition_device/AcquisitionDevice.h @@ -18,7 +18,7 @@ #include "AcquisitionCounters.h" #include "Completion.h" -#include "../common/DeviceOutput.h" +#include "../fpga/host_library/DeviceOutput.h" struct AcquisitionDeviceStatistics { uint64_t good_packets; @@ -51,7 +51,7 @@ protected: ThreadSafeFIFO work_completion_queue; ThreadSafeFIFO work_request_queue; - std::vector buffer_device; + std::vector buffer_device; Logger *logger; uint16_t data_stream; @@ -107,7 +107,6 @@ public: virtual std::string GetMACAddress() const; virtual uint16_t GetUDPPort() const; virtual int32_t GetNUMANode() const; - virtual uint32_t GetCompletedDescriptors() const = 0; virtual std::vector GetInternalGeneratorFrame() const { return {}; } diff --git a/acquisition_device/Completion.h b/acquisition_device/Completion.h index ddcc894e..42d2cdf8 100644 --- a/acquisition_device/Completion.h +++ b/acquisition_device/Completion.h @@ -6,7 +6,6 @@ #include struct WorkRequest { - uint16_t *ptr; uint32_t handle; }; diff --git a/acquisition_device/FPGAAcquisitionDevice.cpp b/acquisition_device/FPGAAcquisitionDevice.cpp index 2317214b..84757505 100644 --- a/acquisition_device/FPGAAcquisitionDevice.cpp +++ b/acquisition_device/FPGAAcquisitionDevice.cpp @@ -94,7 +94,7 @@ void FPGAAcquisitionDevice::InitializeIntegrationMap(const DiffractionExperiment for (int m = 0; m < modules; m++) { for (int i = 0; i < RAW_MODULE_SIZE; i++) { - buffer_device[modules + m][i] = to_fixed(weights[(offset + m) * RAW_MODULE_SIZE + i], 15); + buffer_device[modules + m]->pixels[i] = to_fixed(weights[(offset + m) * RAW_MODULE_SIZE + i], 15); } } HW_LoadIntegrationMap(modules); @@ -165,9 +165,9 @@ void FPGAAcquisitionDevice::InitializeCalibration(const DiffractionExperiment &e "Not enough host/FPGA buffers to load all calibration constants"); for (int m = 0; m < modules; m++) { - calib.GainCalibration(m).ExportG0(buffer_device[m]); - calib.GainCalibration(m).ExportG1(buffer_device[m + modules]); - calib.GainCalibration(m).ExportG2(buffer_device[m + modules * 2]); + calib.GainCalibration(m).ExportG0((uint16_t *) buffer_device[m]->pixels); + calib.GainCalibration(m).ExportG1((uint16_t *) buffer_device[m + modules]->pixels); + calib.GainCalibration(m).ExportG2((uint16_t *) buffer_device[m + modules * 2]->pixels); } for (int s = 0; s < storage_cells; s++) { @@ -178,13 +178,13 @@ void FPGAAcquisitionDevice::InitializeCalibration(const DiffractionExperiment &e auto pedestal_g2 = calib.Pedestal(offset + m, 2, s).GetPedestal(); for (int i = 0; i < RAW_MODULE_SIZE; i++) { if (experiment.GetApplyPixelMaskInFPGA() && (mask[(offset + m) * RAW_MODULE_SIZE + i] != 0)) { - buffer_device[(3 + 0 * storage_cells + s) * modules + m][i] = 16384; - buffer_device[(3 + 1 * storage_cells + s) * modules + m][i] = 16384; - buffer_device[(3 + 2 * storage_cells + s) * modules + m][i] = 16384; + buffer_device[(3 + 0 * storage_cells + s) * modules + m]->pixels[i] = 16384; + buffer_device[(3 + 1 * storage_cells + s) * modules + m]->pixels[i] = 16384; + buffer_device[(3 + 2 * storage_cells + s) * modules + m]->pixels[i] = 16384; } else { - buffer_device[(3 + 0 * storage_cells + s) * modules + m][i] = pedestal_g0[i]; - buffer_device[(3 + 1 * storage_cells + s) * modules + m][i] = pedestal_g1[i]; - buffer_device[(3 + 2 * storage_cells + s) * modules + m][i] = pedestal_g2[i]; + ((uint16_t *) buffer_device[(3 + 0 * storage_cells + s) * modules + m]->pixels)[i] = pedestal_g0[i]; + ((uint16_t *) buffer_device[(3 + 1 * storage_cells + s) * modules + m]->pixels)[i] = pedestal_g1[i]; + ((uint16_t *) buffer_device[(3 + 2 * storage_cells + s) * modules + m]->pixels)[i] = pedestal_g2[i]; } } @@ -216,7 +216,7 @@ void FPGAAcquisitionDevice::FillActionRegister(const DiffractionExperiment& x, D job.mode = data_collection_id << 16; job.nsummation = x.GetSummation() - 1; - expected_descriptors_per_module = 5; + expected_descriptors_per_module = DMA_DESCRIPTORS_PER_MODULE; switch (x.GetDetectorMode()) { case DetectorMode::Conversion: diff --git a/acquisition_device/HLSSimulatedDevice.cpp b/acquisition_device/HLSSimulatedDevice.cpp index 348078c3..2bd44099 100644 --- a/acquisition_device/HLSSimulatedDevice.cpp +++ b/acquisition_device/HLSSimulatedDevice.cpp @@ -160,7 +160,8 @@ void HLSSimulatedDevice::FPGA_StartAction(const DiffractionExperiment &experimen ipv4_addr, INT_PKT_GEN_BUNCHID, INT_PKT_GEN_EXPTTIME, - INT_PKT_GEN_DEBUG); + INT_PKT_GEN_DEBUG, + cancel_data_collection); if (ret) throw JFJochException(JFJochExceptionCategory::AcquisitionDeviceError, "Error running internal packet generator"); @@ -181,9 +182,22 @@ HLSSimulatedDevice::~HLSSimulatedDevice() { } bool HLSSimulatedDevice::HW_ReadMailbox(uint32_t *values) { + std::unique_lock ul(completion_mutex); + ap_uint<32> tmp; bool ret = completion_stream.read_nb(tmp); values[0] = tmp; + // equivalent to driver functionality + if (ret) { + uint32_t handle = values[0] & 0xFFFF; + if (handle == HANDLE_START) + completion_count = 0; + else if (handle != HANDLE_END) { + completion_count++; + while (completion_count * DMA_DESCRIPTORS_PER_MODULE > datamover_out.GetCompletedDescriptors()) + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } return ret; } @@ -509,7 +523,6 @@ void HLSSimulatedDevice::HW_GetStatus(DataCollectionStatus *status) const { memset(status, 0, sizeof(DataCollectionStatus)); status->ctrl_reg = ap_uint<1>(host_writer_idle) ? (1 << 4) : 0; - status->modules_internal_packet_generator = 1; status->max_modules = max_modules; status->hbm_size_bytes = hbm_if_size; } @@ -588,11 +601,6 @@ void HLSSimulatedDevice::HW_LoadInternalGeneratorFrame(uint32_t modules) { throw std::runtime_error("Datamover queue is not empty"); } - -uint32_t HLSSimulatedDevice::GetCompletedDescriptors() const { - return datamover_out.GetCompletedDescriptors(); -} - void HLSSimulatedDevice::HW_SetSpotFinderParameters(const SpotFinderParameters ¶ms) { count_threshold = params.count_threshold; snr_threshold = params.snr_threshold; diff --git a/acquisition_device/HLSSimulatedDevice.h b/acquisition_device/HLSSimulatedDevice.h index d6928eab..358457e1 100644 --- a/acquisition_device/HLSSimulatedDevice.h +++ b/acquisition_device/HLSSimulatedDevice.h @@ -33,6 +33,9 @@ class HLSSimulatedDevice : public FPGAAcquisitionDevice { hls::stream > work_request_stream; hls::stream > completion_stream; + std::mutex completion_mutex; + uint32_t completion_count; + std::thread action_thread; Datamover<512> datamover_in; @@ -77,7 +80,6 @@ public: void CreateFinalPacket(const DiffractionExperiment& experiment); AXI_STREAM &OutputStream(); void Cancel() override; - uint32_t GetCompletedDescriptors() const override; }; diff --git a/acquisition_device/PCIExpressDevice.cpp b/acquisition_device/PCIExpressDevice.cpp index 5a8475f6..e9e75dcc 100644 --- a/acquisition_device/PCIExpressDevice.cpp +++ b/acquisition_device/PCIExpressDevice.cpp @@ -115,10 +115,6 @@ void PCIExpressDevice::HW_LoadIntegrationMap(uint32_t in_modules) { dev.LoadIntegrationMap(in_modules); } -uint32_t PCIExpressDevice::GetCompletedDescriptors() const { - return dev.GetCompletedDescriptors(); -} - void PCIExpressDevice::HW_LoadInternalGeneratorFrame(uint32_t in_modules) { dev.LoadInternalGeneratorFrame(in_modules); } diff --git a/acquisition_device/PCIExpressDevice.h b/acquisition_device/PCIExpressDevice.h index d22abb39..ef6e0e23 100644 --- a/acquisition_device/PCIExpressDevice.h +++ b/acquisition_device/PCIExpressDevice.h @@ -40,7 +40,6 @@ public: std::string GetMACAddress() const override; std::string GetIPv4Address() const override; - uint32_t GetCompletedDescriptors() const override; DeviceStatus GetDeviceStatus() const override; DataCollectionStatus GetDataCollectionStatus() const override; }; diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 9ba72c34..78d61d91 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -44,7 +44,7 @@ ADD_LIBRARY( CommonFunctions STATIC ADUHistogram.cpp ADUHistogram.h RawToConvertedGeometryCore.h Plot.h - DeviceOutput.h + ../fpga/host_library/DeviceOutput.h ZMQWrappers.cpp ZMQWrappers.h) TARGET_LINK_LIBRARIES(CommonFunctions Compression JFCalibration libzmq -lrt) diff --git a/common/Definitions.h b/common/Definitions.h index 8f4a1344..d9b2671a 100644 --- a/common/Definitions.h +++ b/common/Definitions.h @@ -64,8 +64,8 @@ #define LOAD_CALIBRATION_DEST_INTEGRATION 1 #define LOAD_CALIBRATION_DEST_FRAME_GEN 2 -#define HANDLE_START (UINT16_MAX - 1) -#define HANDLE_END (UINT16_MAX ) +#define HANDLE_START (65534) +#define HANDLE_END (65535) #define INT_PKT_GEN_DEBUG 0x0 #define INT_PKT_GEN_BUNCHID 0xCACACACACA @@ -79,4 +79,6 @@ #define ADU_HISTO_BIN_WIDTH 32 #define ADU_HISTO_BIN_COUNT (65536/ ADU_HISTO_BIN_WIDTH) +#define DMA_DESCRIPTORS_PER_MODULE 5 + #endif //DEFINITIONS_H diff --git a/fpga/README.md b/fpga/README.md index a4c497b2..a174869f 100644 --- a/fpga/README.md +++ b/fpga/README.md @@ -12,7 +12,7 @@ Current power estimation is about 30 W when idle and 40 W in operation. CPU Part: * `pcie_driver` Linux kernel driver for PCIe version of the FPGA board - see [instructions](pcie_driver/README.md) -* `host_library` Library that should be used to access the driver + some simple diagnostic tools +* `host_library` Library that should be used to access the driver + some simple diagnostic tools - see [workflow documentation](pcie_driver/README.md) FPGA part: diff --git a/fpga/hls/frame_generator.cpp b/fpga/hls/frame_generator.cpp index 3e08acc5..3e44c3f8 100644 --- a/fpga/hls/frame_generator.cpp +++ b/fpga/hls/frame_generator.cpp @@ -85,7 +85,8 @@ int frame_generator(STREAM_512 &data_out, ap_uint<32> dest_ipv4_addr, ap_uint<64> bunchid, ap_uint<32> exptime, - ap_uint<32> debug) { + ap_uint<32> debug, + volatile ap_uint<1> &in_cancel) { #pragma HLS INTERFACE mode=s_axilite port=return #pragma HLS INTERFACE mode=s_axilite port=frames #pragma HLS INTERFACE mode=s_axilite port=modules @@ -94,8 +95,9 @@ int frame_generator(STREAM_512 &data_out, #pragma HLS INTERFACE mode=s_axilite port=debug #pragma HLS INTERFACE mode=s_axilite port=dest_mac_addr #pragma HLS INTERFACE mode=s_axilite port=dest_ipv4_addr -#pragma HLS INTERFACE mode=ap_none port=src_mac_addr -#pragma HLS INTERFACE mode=ap_none port=src_ipv4_addr +#pragma HLS INTERFACE mode=ap_none register port=src_mac_addr +#pragma HLS INTERFACE mode=ap_none register port=src_ipv4_addr +#pragma HLS INTERFACE mode=ap_none register port=in_cancel #pragma HLS INTERFACE register ap_none port=hbm_size_bytes #pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \ @@ -108,6 +110,9 @@ int frame_generator(STREAM_512 &data_out, return 1; for (uint32_t f = 0; f < frames; f++) { + ap_uint<1> local_cancel = in_cancel; + if (local_cancel == 1) + break; for (uint32_t p = 0; p < 128; p++) { for (uint32_t m = 0; m < modules; m++) { generate_packet(data_out, diff --git a/fpga/hls/hls_jfjoch.h b/fpga/hls/hls_jfjoch.h index 0182cba7..fea8e8da 100644 --- a/fpga/hls/hls_jfjoch.h +++ b/fpga/hls/hls_jfjoch.h @@ -360,7 +360,8 @@ int frame_generator(STREAM_512 &data_out, ap_uint<32> dest_ipv4_addr, ap_uint<64> bunchid, ap_uint<32> exptime, - ap_uint<32> debug); + ap_uint<32> debug, + volatile ap_uint<1> &in_cancel); int load_calibration(ap_uint<256> *d_hbm_p0, diff --git a/fpga/hls/host_writer.cpp b/fpga/hls/host_writer.cpp index f983cf72..dcc84425 100644 --- a/fpga/hls/host_writer.cpp +++ b/fpga/hls/host_writer.cpp @@ -1,7 +1,7 @@ // Copyright (2019-2023) Paul Scherrer Institute #include "hls_jfjoch.h" -#include "../../common/DeviceOutput.h" +#include "../host_library/DeviceOutput.h" #define o(field) offsetof(ModuleStatistics, field) #define sf(msg, field, s) msg(o(field)*8 + s - 1, o(field)*8) diff --git a/common/DeviceOutput.h b/fpga/host_library/DeviceOutput.h similarity index 96% rename from common/DeviceOutput.h rename to fpga/host_library/DeviceOutput.h index 3d7d87c1..7c15328e 100644 --- a/common/DeviceOutput.h +++ b/fpga/host_library/DeviceOutput.h @@ -5,6 +5,8 @@ #include +#include "../../common/Definitions.h" + #pragma pack(push) #pragma pack(1) diff --git a/fpga/host_library/JungfraujochDevice.cpp b/fpga/host_library/JungfraujochDevice.cpp index aebdf522..a43400bb 100644 --- a/fpga/host_library/JungfraujochDevice.cpp +++ b/fpga/host_library/JungfraujochDevice.cpp @@ -87,19 +87,12 @@ uint32_t JungfraujochDevice::GetNumaNode() const { return tmp; } -uint32_t JungfraujochDevice::GetCompletedDescriptors() const { - uint32_t ret = 0; - if (ioctl(fd, IOCTL_JFJOCH_C2H_DMA_DESC, &ret) != 0) - throw PCIeDeviceException("Failed geting C2H completed descriptor count"); - return ret; -} - void JungfraujochDevice::SetConfig(const DataCollectionConfig &config) { if (ioctl(fd, IOCTL_JFJOCH_SET_CONFIG, &config) != 0) throw PCIeDeviceException("Failed writing config"); } -bool JungfraujochDevice::ReadWorkCompletion(uint32_t output[16]) { +bool JungfraujochDevice::ReadWorkCompletion(uint32_t *output) { int tmp = ioctl(fd, IOCTL_JFJOCH_READ_WC_MBOX, output); if (tmp != 0) { if (errno == EAGAIN) @@ -199,17 +192,21 @@ void JungfraujochDevice::LoadIntegrationMap(uint32_t modules) { throw PCIeDeviceException("Failed uploading integration map"); } -uint16_t *JungfraujochDevice::MapKernelBuffer(uint32_t id) { - auto tmp = (uint16_t *) mmap(nullptr, FPGA_BUFFER_LOCATION_SIZE, +DeviceOutput *JungfraujochDevice::MapKernelBuffer(uint32_t id) { + auto tmp = (DeviceOutput *) mmap(nullptr, FPGA_BUFFER_LOCATION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, FPGA_BUFFER_LOCATION_SIZE * id); - if (tmp == nullptr) + if (tmp == MAP_FAILED) throw PCIeDeviceException("Mmap of kernel buffer error"); return tmp; } +void JungfraujochDevice::UnmapKernelBuffer(DeviceOutput *val) { + munmap(val, FPGA_BUFFER_LOCATION_SIZE); +} + void JungfraujochDevice::SetSpotFinderParameters(const SpotFinderParameters& params) { if (ioctl(fd, IOCTL_JFJOCH_SPOT_FINDER_PAR, ¶ms) != 0) throw PCIeDeviceException("Failed settings spot finder parameters"); @@ -228,4 +225,12 @@ void JungfraujochDevice::SetDataSource(uint32_t id) { if (ioctl(fd, IOCTL_JFJOCH_SET_DATA_SOURCE, &id) != 0) throw JFJochException(JFJochExceptionCategory::PCIeError, "Failed setting data source"); -} \ No newline at end of file +} + +bool JungfraujochDevice::ReadWorkCompletion(JungfraujochDeviceCompletion &completion) { + uint32_t tmp = 0; + bool ret = ReadWorkCompletion(&tmp); + completion.buffer_id = tmp & 0xFFFF; + completion.data_collection_id = (tmp >> 16) & 0xFFFF; + return false; +} diff --git a/fpga/host_library/JungfraujochDevice.h b/fpga/host_library/JungfraujochDevice.h index d9e9eebf..3afe76db 100644 --- a/fpga/host_library/JungfraujochDevice.h +++ b/fpga/host_library/JungfraujochDevice.h @@ -5,6 +5,12 @@ #include #include "../pcie_driver/ActionConfig.h" +#include "DeviceOutput.h" + +struct JungfraujochDeviceCompletion { + uint16_t data_collection_id; + uint16_t buffer_id; +}; class JungfraujochDevice { int fd; @@ -37,17 +43,22 @@ public: // Resets FPGA - not safe at the moment void Reset(); - // Returns current NUMA node uint32_t GetNumaNode() const; - // Returns number of completed card-to-host descriptors = number of full modules transferred to the host memory - uint32_t GetCompletedDescriptors() const; - // Read work completion // returns true if there was completion in the mailbox - output is then saved // returns false if mailbox was empty - output is invalid - bool ReadWorkCompletion(uint32_t output[16]); + // More user-friendly function, as it returns data structure + bool ReadWorkCompletion(JungfraujochDeviceCompletion &completion); + + // Read work completion + // returns true if there was completion in the mailbox - output is then saved + // returns false if mailbox was empty - output is invalid + // output is coded as single 32-bit integer: + // bits (0..15) = buffer location that has completed data + // bits (31..16) = data_collection_id + bool ReadWorkCompletion(uint32_t *output); // Sends work request of buffer location id // returns true if there was space in the mailbox @@ -79,23 +90,33 @@ public: uint32_t ReadRegister(uint32_t addr) const; // Load calibration parameters + // Function is synchronous - it will return when loading is done void LoadCalibration(uint32_t modules, uint32_t storage_cells); // Load frames for internal generator // Must be placed in first kernel buffer locations + // Function is synchronous - it will return when loading is done void LoadInternalGeneratorFrame(uint32_t modules); // Load map of radial integration // Must be placed in first kernel buffer locations + // Function is synchronous - it will return when loading is done void LoadIntegrationMap(uint32_t modules); void SetSpotFinderParameters(const SpotFinderParameters ¶ms); // Get number of kernel buffers uint32_t GetBufferCount() const; - // Allocate id kernel buffer ( id must be less than GetBufferCount() ) - // buffer has to be unmapped using munmap - uint16_t *MapKernelBuffer(uint32_t id); + // Map kernel buffer ( id must be less than GetBufferCount() ) to user virtual memory space + DeviceOutput *MapKernelBuffer(uint32_t buffer_id); - // Select data source on the FPGA - which network connector is used + // Unmap kernel buffer, using the pointer returned by MapKernelBuffer() + void UnmapKernelBuffer(DeviceOutput* val); + + // Select data source on the FPGA + // Allowed values: + // 0 - no data input + // 1 - 100G interface + // 2 - 4x10G interface + // 3 - frame generator within the FPGA void SetDataSource(uint32_t id); uint32_t GetDataSource(); }; diff --git a/fpga/host_library/README.md b/fpga/host_library/README.md new file mode 100644 index 00000000..16272987 --- /dev/null +++ b/fpga/host_library/README.md @@ -0,0 +1,89 @@ +# Jungfraujoch host library + +The library is used as the low-level interface between C++ host application and Jungfraujoch FPGA card. +It provides wrapper over kernel driver ioctl calls. Preferred way to use Jungfraujoch is via the full `jfjoch_broker` application, +however for more tailored solutions - one can use directly API described below. + +## Configure the detector + +To use the API, one needs to configure the detector via [slsDetectorPackage](https://github.com/slsdetectorgroup/slsDetectorPackage) for data acquisition. Besides the usual configuration used for JUNGFRAU, the following settings are necessary: + +* Detector frame numbers must be restarted to start always from 1 using the following command-line interface command (or C++/Python equivalent): +``` +sls_detector_put nextframenumber 1 +``` +* Detector has to be configured to measure 6 frames more than needed for data acquisition, for the acquisition to stop automatically +* Set destination MAC and IPv4 address to be consistent with ones for the card. At the time being, all the ethernet interfaces of the card (100G and 4x10G) share the same MAC/IPv4 address, though this might change in the future. UDP port is not used. +* Each module has to have its sequential number assigned through column field in the detector header, which has to be set to `2 * module number`. This is currently done by writing `module number * 2 * 65536` to register `0x7C` of the module. IMPORTANT: Module numbering for each FPGA card is independent and has to start for 1. With 4 FPGA cards and 16 modules, there will be four modules with number 1, four with number 2, etc. +* The card support both 1 and 2 network interfaces per JUNGFRAU module. + +## Designing with the API + +For each FPGA card, one needs to instantiate `JungfraujochDevice` object, using device name of format `/dev/jfjoch` +and read/write parameter. Only one process can open the device with write access, while multiple processes might open the device for read in parallel. + +### Configuring network +The first step for using the card is configuring network. To use network, one needs to select data source with `JungfraujochDevice::SetDataSource()` funtion - the options are no data source (default), 100G, 4x10G and internal generator. Next, addresses need to be configured for the network stack. + +The card will receive MAC address automatically based on Xilinx assigned number, but IPv4 address has to be configured with `JungfraujochDevice::SetIPv4Address()` function. The card is equipped with a simple network stack - if both MAC and IPv4 addresses are set and 100G interface is used, the card will periodically send ARP gratuitous messages, it will also reply to ARP requests and to ICMP pings. Given 4x10G interface is designed for direct Jungfraujoch-detector configuration, without a switch, diagnostics functionality is not offered here at the moment. + +### Uploading calibration +Will be contributed later, as there is planned modification to the interface + +### Preparing data collection + +Before any operation one needs to check if card is idle (not running data collection) with `JungfraujochDevice::IsIdle()` function. Most configuration parameters cannot be changed, when card is in not-idle state. + +The card can be then configured with `JungfraujochDevice::SetConfig()` function. Details of the configuration data structure are given in [ActionConfig.h](../pcie_driver/ActionConfig.h) header file. + +Next, kernel buffers need to be mapped to the user space. These buffers are allocated with memory physically continuous, simplyfing operation of the card and the driver. Count of these buffers can be checked with `JungfraujochDevice::GetBufferCount()` function. Buffers can be mapped with `JungfraujochDevice::MapKernelBuffer()` function and deallocated with `JungfraujochDevice::UnmapKernelBuffer()` functions. Structure of the kernel buffer is described in [DeviceOutput.h](DeviceOutput.h) header file. + +### Data collection + +Then one can start the card with `JungfraujochDevice::Start()` function. Final step is to wait for first completion (with value `HANDLE_START` defined in [Definitions.h](../../common/Definitions.h) as buffer number) using `JungfraujochDevice::ReadWorkCompletion()`. + +Standard operation of the card requires exchange of buffer ownership between the host application and FPGA card. At the beginning all buffers are owned by host application and should be "given" to the card with `JungfraujochDevice::SendWorkRequest()` function. Then card will wait for the detector to send data. After full module is collected, data are written via Direct Memory Access to host memory and kernel driver is informed with an interrupt that data are ready. Host application can "learn" what was collected by the card by running `JungfraujochDevice::ReadWorkCompletion()` function. Buffer returned by the function is owned by the host application and is safe to process. After processing the buffer has to be given back to card via `JungfraujochDevice::SendWorkRequest()`. If the card doesn't receive enough work requests (open buffers) it won't be able to receive data, resulting in lost packets. + +Some important points to mention: +* Both functions mentioned in the above paragraph may fail due to work request queue being full and completion queue being empty respectively. Please always check that return value is `true`. +* Both functions `JungfraujochDevice::SendWorkRequest()` and `JungfraujochDevice::ReadWorkCompletion()` are thread-safe and can be executed in parallel context. All other functions in the library that change configuration or state of the card are NOT thread-safe, anyway running them in parallel would give nondeterministic result. +* Reading work completion will wait up to 1 second before returning. +* `JungfraujochDevice::ReadWorkCompletion()` adds data collection ID as the highest 16-bit (16-31) - this allows to avoid mixing previous and current data collection. +* Work requests sent before `HANDLE_START` was received by host application will be likely discarded. + +The card will end acquisition in two situations: +* Frame with number provided in the configuration is received +* `JungfraujochDevice::Cancel()` function is called by host application + +The host application will know that the data collection finished by receiving completion with value of `HANDLE_END` as buffer number. After data collection is finished one should call `JungfraujochDevice::End()` to finalize. + +### Internal generator +When detector is not installed, or one would like to check the Jungfraujoch card without running a detector, +it is possible to generate detector packets internally. Internal generator makes packets with all the headers (Ethernet, UDP/IP, SLS Detector) and is entering data stream in similar location to Ethernet Media Access Control (MAC) cores. + +Before starting data collection, it is necessary to load content of module to card FPGA memory. At this moment, for each module a different content can be provided, but all frames for particular model will be the same. It will hopefully change in the future. +To load the data, one needs to place content of each module (in 16-bit) into respective kernel buffer (allocated with `JungfraujochDevice::MapKernelBuffer()`) - first module to buffer 0, second module to buffer 1, etc. Then one needs to call `JungfraujochDevice::LoadInternalGeneratorFrame()` with specified number of modules. + +One also needs to switch data source by executing `JungfraujochDevice::SetDataSource()` with respective value. + +The next step is to do all the preparations to start data collection, up to `JungfraujochDevice::Start()` and completion handshake. Then one can run `JungfraujochDevice::RunFrameGenerator()` function, with parameters described in the [ActionConfig.h](../pcie_driver/ActionConfig.h) header file. The function is asynchronous, and will start generation, but doesn't wait for the end. Though one can assume that frame generator is done, when data collection is finished. + +### Spot finding parameters +Spot finding parameters can be updated with function `JungfraujochDevice::SetSpotFinderParameters()`. +Contrary to other configuration functions, this one is safe to execute during data collection. + +## Tools +In addition to the library, there are also small CLI tools available in this folder. + +### jfjoch_pcie_status +Prints detailed status information about the card. Execute by adding device path, e.g.: +``` +./jfjoch_pcie_status /dev/jfjoch0 +``` +The program is safe to execute during a running data collection. +### jfjoch_pcie_clear_net_counters +Network counters in the card give information about Ethernet, UDP and ICMP packets encountered by the network stack prior to Jungfraujoch logic. +These counters are running from the moment card is powered on. They can be reset by running the program with device name, e.g.: +``` +./jfjoch_pcie_clear_net_counters /dev/jfjoch0 +``` diff --git a/fpga/host_library/jfjoch_pcie_cancel_data_collection.cpp b/fpga/host_library/jfjoch_pcie_cancel_data_collection.cpp index 6bf95372..ab5ffb8a 100644 --- a/fpga/host_library/jfjoch_pcie_cancel_data_collection.cpp +++ b/fpga/host_library/jfjoch_pcie_cancel_data_collection.cpp @@ -1,7 +1,5 @@ // Copyright (2019-2023) Paul Scherrer Institute -// Copyright (2019-2023) Paul Scherrer Institute - #include #include "../common/JFJochException.h" diff --git a/fpga/host_library/jfjoch_pcie_set_network.cpp b/fpga/host_library/jfjoch_pcie_set_network.cpp index abacff28..8ffe6857 100644 --- a/fpga/host_library/jfjoch_pcie_set_network.cpp +++ b/fpga/host_library/jfjoch_pcie_set_network.cpp @@ -1,7 +1,5 @@ // Copyright (2019-2023) Paul Scherrer Institute -// Copyright (2019-2023) Paul Scherrer Institute - #include "../common/Logger.h" #include "../common/JFJochException.h" #include "JungfraujochDevice.h" diff --git a/fpga/host_library/jfjoch_pcie_status.cpp b/fpga/host_library/jfjoch_pcie_status.cpp index a71ecdd1..472a2420 100644 --- a/fpga/host_library/jfjoch_pcie_status.cpp +++ b/fpga/host_library/jfjoch_pcie_status.cpp @@ -58,7 +58,13 @@ int main(int argc, char **argv) { std::cout << "Git SHA1 " << std::hex << fpga_status.git_sha1 << std::endl; std::cout << "Max modules " << std::dec << fpga_status.max_modules << std::endl; std::cout << "NUMA node " << device.GetNumaNode() << std::endl; - std::cout << "Ethernet aligned " << (fpga_env_data.ethernet_aligned ? "Yes" : "No") << std::endl; + std::cout << "Ethernet 100G link " << (fpga_env_data.eth_100G_link ? "Yes" : "No") << std::endl; + std::cout << "Ethernet 10G link (*=link) " + << (fpga_env_data.eth_10G_link[0] ? "*" : "-") + << (fpga_env_data.eth_10G_link[1] ? "*" : "-") + << (fpga_env_data.eth_10G_link[2] ? "*" : "-") + << (fpga_env_data.eth_10G_link[3] ? "*" : "-") + << std::endl; std::cout << "FPGA 12V rail current [A] " << fpga_env_data.fpga_pcie_12V_I_mA / 1000.0 << std::endl; std::cout << "FPGA 12V rail voltage [V] " << fpga_env_data.fpga_pcie_12V_V_mV / 1000.0 << std::endl; diff --git a/fpga/pcie_driver/ActionConfig.h b/fpga/pcie_driver/ActionConfig.h index 8fd6c87a..ecb66594 100644 --- a/fpga/pcie_driver/ActionConfig.h +++ b/fpga/pcie_driver/ActionConfig.h @@ -14,25 +14,25 @@ typedef __u64 uint64_t; #pragma pack(4) struct DataCollectionConfig { - uint32_t nmodules; - uint32_t mode; - uint32_t one_over_energy; - uint32_t nframes; - uint32_t nstorage_cells; - uint32_t nsummation; + uint32_t nmodules; // Number of modules for data collection minus one (0 = 1 module, 1 = 2 modules, ..., 31 = 32 modules) + uint32_t mode; // see MODE_* in common/Definitions.h; upper 16-bit of the mode are data_collection_id, that is returned with completion numbers + uint32_t one_over_energy; // One over energy in keV (this is bit-to-bit float value, use float+uint32_t union to assign the value) + uint32_t nframes; // Number of frames for data collection + uint32_t nstorage_cells; // Number of storage cells minus one (0 = 1SC, 1 = 2SC, ..., 15 = 16SC) + uint32_t nsummation; // Summation of frames minus one (0 = no summation, 1 = 2 frames, 2 = 3 frames, ..., 255 = 256 frames) }; struct DataCollectionStatus { uint32_t ctrl_reg; uint32_t reserved_0; - uint32_t set_led; + uint32_t reserved_1; uint32_t git_sha1; uint32_t action_type; uint32_t release_level; + uint32_t reserved_2; uint32_t reserved_3; - uint32_t reserved_1; uint32_t max_modules; - uint32_t modules_internal_packet_generator; + uint32_t reserved_4; uint64_t pipeline_stalls_host; uint64_t pipeline_stalls_hbm; uint32_t fifo_status; @@ -82,14 +82,15 @@ struct DeviceStatus { uint32_t hbm_1_temp_C; uint32_t work_compl_fifo_avail; - bool ethernet_aligned; + bool eth_100G_link; + bool eth_10G_link[4]; }; struct FrameGeneratorConfig { - uint32_t frames; - uint32_t modules; - uint64_t dest_mac_addr; - uint32_t dest_ipv4_addr; + uint32_t frames; // Number of frames + uint32_t modules; // Number of modules (1-32) + uint64_t dest_mac_addr; // Use the same as source address + uint32_t dest_ipv4_addr; // Use the same as source address uint64_t bunchid; uint32_t exptime; uint32_t debug; diff --git a/fpga/pcie_driver/CMakeLists.txt b/fpga/pcie_driver/CMakeLists.txt index d37abd2a..dc0c97d3 100644 --- a/fpga/pcie_driver/CMakeLists.txt +++ b/fpga/pcie_driver/CMakeLists.txt @@ -13,7 +13,7 @@ EXECUTE_PROCESS(COMMAND uname -r FIND_PATH(KERNELHEADERS_DIR include/linux/user.h - PATHS /usr/src/linux-headers-${KERNEL_RELEASE} + PATHS /usr/src/linux-headers-${KERNEL_RELEASE} /usr/src/kernels/${KERNEL_RELEASE} ) MESSAGE(STATUS "Kernel release: ${KERNEL_RELEASE}") diff --git a/fpga/pcie_driver/README.md b/fpga/pcie_driver/README.md index 14e70595..9133a305 100644 --- a/fpga/pcie_driver/README.md +++ b/fpga/pcie_driver/README.md @@ -78,4 +78,4 @@ Interfacing should be done through the JungfraujochDevice class in `fpga/host_li ## Sysfs access Certain performance counters can be read through sysfs mechanism in the kernel. -One needs to `cat` files in `/sys/class/misc/jfjoch`. \ No newline at end of file +One needs to `cat` files in `/sys/class/misc/jfjoch/` directory. \ No newline at end of file diff --git a/fpga/pcie_driver/jfjoch_drv.h b/fpga/pcie_driver/jfjoch_drv.h index b01a1591..c0c59780 100644 --- a/fpga/pcie_driver/jfjoch_drv.h +++ b/fpga/pcie_driver/jfjoch_drv.h @@ -51,6 +51,10 @@ #define PCIE_OFFSET (0x090000) #define FRAME_GEN_OFFSET (0x080000) #define ADDRESS_TABLE_OFFSET (0x200000) +#define MAC_10G_0_OFFSET (0x100000) +#define MAC_10G_1_OFFSET (0x110000) +#define MAC_10G_2_OFFSET (0x120000) +#define MAC_10G_3_OFFSET (0x130000) // Action config #define ADDR_CTRL_REGISTER 0x0000 @@ -162,13 +166,25 @@ struct jfjoch_drvdata { u32 git_sha1; spinlock_t file_write_open_count_spinlock; - int file_write_open_count; + int file_write_open_count; // ensure + // AXI mailbox requires to check if full before writing anything - this obviously must be atomic + // assuming this is quick operation spinlock_t work_request_submit_spinlock; - DECLARE_KFIFO(work_compl, u32, MAX_FPGA_BUFFER); + // KFIFO has separate synchronization for read and write - i.e. one writer and multiple readers need mutex/spinlock + // only for reading + // + // work_compl is not protected for put operation => this only happens in ISR + // atomicity is guaranteed by the kernel (interrupt is masked during its own ISR) + // however getting things from the queue happens in system call and can be executed in parallel + // therefore this part is protected by mutex (it is assumed that waiting for interrupts can take seconds, + // process has to be able to sleep while holding the lock => spinlock would not work) + // the mutex is also protecting work_compl_count variable struct mutex work_compl_read_mutex; - wait_queue_head_t work_compl_wait_queue; + DECLARE_KFIFO(work_compl, u32, MAX_FPGA_BUFFER); // protected by work_compl_read_mutex + wait_queue_head_t work_compl_wait_queue; // used for read completion queue method to wait for interrupt, ISR has wake-up call + u32 work_compl_count; // protected by work_compl_read_mutex }; int jfjoch_register_misc_dev(struct pci_dev *pdev); diff --git a/fpga/pcie_driver/jfjoch_function.c b/fpga/pcie_driver/jfjoch_function.c index aba8cf36..0322f340 100644 --- a/fpga/pcie_driver/jfjoch_function.c +++ b/fpga/pcie_driver/jfjoch_function.c @@ -14,6 +14,19 @@ uint32_t jfjoch_read_register(struct jfjoch_drvdata *drvdata, uint32_t addr) { } void jfjoch_start(struct jfjoch_drvdata *drvdata) { + u32 tmp; + + // Drain work completion queue + mutex_lock(&drvdata->work_compl_read_mutex); + while(!kfifo_is_empty(&drvdata->work_compl)) + kfifo_get(&drvdata->work_compl, &tmp); + + // Reset work completion queue + drvdata->work_compl_count = 0; + + mutex_unlock(&drvdata->work_compl_read_mutex); + + // Set PCIe beats counters iowrite32((1 << 1), drvdata->bar0 + PCIE_OFFSET + (1<<12) + 0xC0); iowrite32((1 << 2), drvdata->bar0 + PCIE_OFFSET + (1<<12) + 0xC0); @@ -60,22 +73,45 @@ int jfjoch_send_wr(struct jfjoch_drvdata *drvdata, u32 handle) { } int jfjoch_read_wc(struct jfjoch_drvdata *drvdata, u32 *output) { + struct device *const dev = &drvdata->pdev->dev; + int ret, tmp; + u32 curr_compl_count, handle; + mutex_lock(&drvdata->work_compl_read_mutex); ret = wait_event_interruptible_timeout(drvdata->work_compl_wait_queue, !kfifo_is_empty(&drvdata->work_compl), HZ); - if (ret >= 0) + if (ret >= 0) { tmp = kfifo_get(&drvdata->work_compl, output); - + handle = (*output) & 0xFFFF; + if (handle == HANDLE_START) + drvdata->work_compl_count = 0; + else if (handle != HANDLE_END) + curr_compl_count = drvdata->work_compl_count++; + } mutex_unlock(&drvdata->work_compl_read_mutex); if (ret < 0) return ret; else if (tmp == 0) return -EAGAIN; - else - return 0; + + // Guarantee that data are in host memory + if ((handle != HANDLE_START) && (handle != HANDLE_END)) { + u32 descriptors = ioread32(drvdata->bar0 + PCIE_OFFSET + (1 << 12) + 0x48); + if (descriptors < curr_compl_count * DMA_DESCRIPTORS_PER_MODULE) { + dev_warn(dev, "Late completion count SW %u, HW %u HANDLE %x", curr_compl_count, descriptors, handle); + udelay(50); + descriptors = ioread32(drvdata->bar0 + PCIE_OFFSET + (1 << 12) + 0x48); + if (descriptors < DMA_DESCRIPTORS_PER_MODULE * curr_compl_count) { + dev_err(dev, "Late completion count SW %u, HW %u HANDLE %x", curr_compl_count, descriptors, handle); + return -EIO; + } + } + } + return 0; } + void jfjoch_set_config(struct jfjoch_drvdata *drvdata, const struct DataCollectionConfig *config) { memcpy_toio((drvdata->bar0) + ACTION_CONFIG_OFFSET + ADDR_NMODULES, config, sizeof(struct DataCollectionConfig)); } @@ -243,6 +279,8 @@ void jfjoch_setup_network(struct jfjoch_drvdata *drvdata) { } void jfjoch_get_env_data(struct jfjoch_drvdata *drvdata, struct DeviceStatus *env_params) { + int i; + env_params->mailbox_status_reg = ioread32(drvdata->bar0 + MAILBOX_OFFSET + ADDR_MAILBOX_STATUS); env_params->mailbox_err_reg = ioread32(drvdata->bar0 + MAILBOX_OFFSET + ADDR_MAILBOX_ERR); env_params->mailbox_interrupt_status = ioread32(drvdata->bar0 + MAILBOX_OFFSET + ADDR_MAILBOX_IS); @@ -272,10 +310,15 @@ void jfjoch_get_env_data(struct jfjoch_drvdata *drvdata, struct DeviceStatus *en env_params->hbm_0_temp_C = ioread32(drvdata->bar0 + CMS_OFFSET + ADDR_CMS_HBM_TEMP1_INS_REG); env_params->hbm_1_temp_C = ioread32(drvdata->bar0 + CMS_OFFSET + ADDR_CMS_HBM_TEMP2_INS_REG); - // Somehow it is better to ask twice - env_params->ethernet_aligned = ioread32(drvdata->bar0 + CMAC_OFFSET + 0x0204) & 0x2; - env_params->ethernet_aligned = ioread32(drvdata->bar0 + CMAC_OFFSET + 0x0204) & 0x2; + // These are latched low register, so need to ask twice, to get current value + for (i = 0; i < 2; i++) { + env_params->eth_100G_link = ioread32(drvdata->bar0 + CMAC_OFFSET + 0x0204) & 0x1; // stat_rx_status + env_params->eth_10G_link[0] = ioread32(drvdata->bar0 + MAC_10G_0_OFFSET + 0x0404) & 0x1; // stat_rx_status + env_params->eth_10G_link[1] = ioread32(drvdata->bar0 + MAC_10G_1_OFFSET + 0x0404) & 0x1; // stat_rx_status + env_params->eth_10G_link[2] = ioread32(drvdata->bar0 + MAC_10G_2_OFFSET + 0x0404) & 0x1; // stat_rx_status + env_params->eth_10G_link[3] = ioread32(drvdata->bar0 + MAC_10G_3_OFFSET + 0x0404) & 0x1; // stat_rx_status + } env_params->work_compl_fifo_avail = kfifo_avail(&drvdata->work_compl); } diff --git a/fpga/scripts/jfjoch.tcl b/fpga/scripts/jfjoch.tcl index a49ecbf8..ea3586bb 100644 --- a/fpga/scripts/jfjoch.tcl +++ b/fpga/scripts/jfjoch.tcl @@ -472,7 +472,7 @@ proc create_hier_cell_jungfraujoch { parentCell nameHier } { # Create port connections connect_bd_net -net action_config_0_clear_counters [get_bd_pins action_config_0/clear_counters] [get_bd_pins network_stack/clear_counters] - connect_bd_net -net action_config_0_data_collection_cancel [get_bd_pins action_config_0/data_collection_cancel] [get_bd_pins data_collection_fsm_0/in_cancel] + connect_bd_net -net action_config_0_data_collection_cancel [get_bd_pins action_config_0/data_collection_cancel] [get_bd_pins data_collection_fsm_0/in_cancel] [get_bd_pins frame_generator_0/in_cancel] connect_bd_net -net action_config_0_data_collection_fsm_start [get_bd_pins action_config_0/data_collection_start] [get_bd_pins data_collection_fsm_0/in_run] [get_bd_pins network_stack/data_collection_start] connect_bd_net -net action_config_0_data_collection_mode [get_bd_pins action_config_0/data_collection_mode] [get_bd_pins data_collection_fsm_0/mode] connect_bd_net -net action_config_0_data_source [get_bd_pins action_config_0/data_source] [get_bd_pins stream_merge_0/source] @@ -550,3 +550,4 @@ proc create_hier_cell_jungfraujoch { parentCell nameHier } { # Restore current instance current_bd_instance $oldCurInst } + diff --git a/jungfrau/JFModulePedestal.h b/jungfrau/JFModulePedestal.h index f7677f8f..45805f63 100644 --- a/jungfrau/JFModulePedestal.h +++ b/jungfrau/JFModulePedestal.h @@ -10,7 +10,7 @@ #include #include "../common/Definitions.h" -#include "../common/DeviceOutput.h" +#include "../fpga/host_library/DeviceOutput.h" class JFModulePedestal { std::vector pedestal; diff --git a/tests/FPGAIntegrationTest.cpp b/tests/FPGAIntegrationTest.cpp index e13fa770..07f78657 100644 --- a/tests/FPGAIntegrationTest.cpp +++ b/tests/FPGAIntegrationTest.cpp @@ -35,7 +35,6 @@ TEST_CASE("HLS_C_Simulation_internal_packet_generator", "[FPGA][Full]") { } } REQUIRE(test.GetExpectedDescriptorsPerModule() == 5); - REQUIRE(test.GetCompletedDescriptors() == 5 * (4 + DELAY_FRAMES_STOP_AND_QUIT - 1) * nmodules); } TEST_CASE("HLS_C_Simulation_internal_packet_generator_custom_frame", "[FPGA][Full]") { diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index fd058227..21f20e06 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -9,6 +9,6 @@ add_executable(HDF5DatasetWriteTest HDF5DatasetWriteTest.cpp) target_link_libraries(HDF5DatasetWriteTest JFJochWriter CommonFunctions) ADD_EXECUTABLE(jfjoch_writer_test jfjoch_writer_test.cpp) -TARGET_LINK_LIBRARIES(jfjoch_writer_test JFJochWriter CommonFunctions) +TARGET_LINK_LIBRARIES(jfjoch_writer_test JFJochWriter ImagePusher CommonFunctions) INSTALL(TARGETS jfjoch_udp_simulator CompressionBenchmark HDF5DatasetWriteTest jfjoch_writer_test RUNTIME)