From c5ca10792efff849602d1c67954a5599663d4620 Mon Sep 17 00:00:00 2001 From: Filip Leonarski Date: Sun, 15 Oct 2023 22:43:55 +0200 Subject: [PATCH] FPGA: Clean-up of spot_finder core + update README.MD --- fpga/README.md | 114 +++++++++++---------- fpga/hdl/action_config.v | 9 +- fpga/hls/CMakeLists.txt | 2 - fpga/hls/hls_jfjoch.h | 6 -- fpga/hls/spot_finder.cpp | 161 ++++-------------------------- fpga/scripts/jfjoch.tcl | 2 +- receiver/HLSSimulatedDevice.cpp | 2 +- tests/FPGASpotFindingUnitTest.cpp | 2 +- 8 files changed, 90 insertions(+), 208 deletions(-) diff --git a/fpga/README.md b/fpga/README.md index 147a2dc6..9a8f27bc 100644 --- a/fpga/README.md +++ b/fpga/README.md @@ -52,61 +52,75 @@ make action_pcie_100g To test that FPGA board is working properly without access to a JUNGFRAU detector, you can use `jfjoch_action_test` tool. ## FPGA reference + +### Frame generator + +Jungfraujoch card is equipped with frame generator. It allows to simulate JUNGFRAU detector without having access to such system. +It is placed in parallel to Ethernet MAC - so it is placed before the network stack and before any processing happening on the card. +In the future a redirection will be possible to send the simulated stream through the 100G TX network link. +Frame generator is written in HLS and controlled with AXI-Lite. + +### Register map FPGA setup can be done via 32-bit registers: -| Address | Bits | Meaning | Mode | Notes | -|---------------------|------|------------------------------------------------------------------------------------------------------|:-----|----------------------------------| -| 0x00000 - 0x0FFFF | | Reserved (in case using MicroBlaze in the future, this has to reserved for internal memory) | | | -| 0x010000 | 32 | Action Control Register | | | -| | | Bit 0 - Action start | R/W | | -| | | Bit 1 - Action idle | R | | -| | | Bit 2 - Action cancel | R/W | cleared on reset or action start | -| | | Bit 3 - Clear network counters | R/W | cleared on reset or action start | -| | | Bit 4 - Host writer idle | R | cleared on reset | -| | | Bit 7 - Design number | R | 0 = PCIe #0, 1 = PCIe #1 | -| | | Bit 16 - AXI Mailbox interrupt 0 | R | | -| | | Bit 17 - AXI Mailbox interrupt 1 | R | | -| | | Bits 24-27 - Various errors in host memory writer | R | cleared on reset or action start | -| 0x010004 | 32 | Reserved | - | | -| 0x01000C | 32 | Action GIT SHA1 | R | | -| 0x010010 | 32 | Action Type | R | | -| 0x010014 | 32 | Action Release Level | R | | -| 0x010020 | 32 | Max. number supported detector modules | R | constant | -| 0x010024 | 32 | Number of modules in internal packet generator memory | R | constant | -| 0x010028 | 64 | Pipeline stalls before writing to host memory | R | reset on action start | -| 0x010030 | 64 | Pipeline stalls before accessing HBM | R | reset on action start | -| 0x010038 | 32 | FIFO status (see action_config.v for details) | R | | -| 0x01003C | 32 | Size of single HBM channel in bytes (default value for the particular card) | R | | -| 0x010040 | 64 | Packets processed by the action | R | cleared on reset or action start | -| 0x010048 | 64 | Valid ethernet packets | R | cleared on reset | -| 0x010050 | 64 | Valid ICMP packets | R | cleared on reset | -| 0x010058 | 64 | Valid UDP packets | R | cleared on reset | -| 0x010060 | 64 | MAC address of FPGA card | R/W | network byte order | -| 0x010068 | 32 | IPv4 address of FPGA card | R/W | network byte order | -| 0x01006C | 32 | Number of detector modules | R/W | | -| 0x010070 | 32 | Data collection mode | R/W | | -| | | Bit 0 - Conversion to photons | | | -| | | Bit 1 - Use internal packet generator | | | -| | | Bit 2 - Nonblocking operation (host writer will ignore frames if there is no available work request) | | | -| | | Bit 16:31 - Data collection ID (carried with completions) | | | -| 0x010074 | 32 | One over energy in keV (in fixed-point:12 int. + 24 frac. bit format) | R/W | | -| 0x010078 | 32 | Number of frames to be generated by internal packet generator | R/W | | -| 0x01007C | 32 | Number of storage cells | R/W | | -| | | | | | -| 0x020000 - 0x02FFFF | | CMAC 100G | | See Xilinx PG203 for register map | -| 0x030000 - 0x03FFFF | | AXI Mailbox for Work Request / Work Completion | | See Xilinx PG114 for register map | -| 0x040000 - 0x04FFFF | | QuadSPI flash | | See Xilinx PG153 for register map | -| 0x060000 - 0x060FFF | 64 | Input calibration memory addresses block RAM | | | -| 0x070000 - 0x07FFFF | | AXI Firewall | | See Xilinx PG293 for register map | -| 0x090000 - 0x09FFFF | | PCIe DMA control | | See Xilinx PG195 for register map | -| 0x0A0000 - 0x0AFFFF | | Transfer between UltraRAM buffer <-> HBM (HLS registers) | | | -| 0x0C0000 - 0x0FFFFF | | Xilinx Card Management Solution Subsystem management subsystem | | See Xilinx PG348 for register map | -| 0x100000 - 0x1FFFFF | 16 | Internal packet generator frame | | | -| 0x200000 - 0x2FFFFF | | UltraRAM buffer for transfers to/from HBM | | | +| Address | Bits | Meaning | Mode | Notes | +|---------------------|------|---------------------------------------------------------------------------------------------|:-----|----------------------------------------------| +| 0x00000 - 0x0FFFF | | Reserved (in case using MicroBlaze in the future, this has to reserved for internal memory) | | | +| 0x010000 | 32 | Action Control Register | | | +| | | Bit 0 - Action start | R/W | | +| | | Bit 1 - Action idle | R | | +| | | Bit 2 - Action cancel | R/W | cleared on reset or action start | +| | | Bit 3 - Clear network counters | R/W | cleared on reset or action start | +| | | Bit 4 - Host writer idle | R | cleared on reset | +| | | Bit 7 - Design number | R | 0 = PCIe #0, 1 = PCIe #1 | +| | | Bit 16 - AXI Mailbox interrupt 0 | R | | +| | | Bit 17 - AXI Mailbox interrupt 1 | R | | +| | | Bits 24-27 - Various errors in host memory writer | R | cleared on reset or action start | +| 0x010004 | 32 | Reserved | - | | +| 0x010008 | 32 | Reserved | - | | +| 0x01000C | 32 | Action GIT SHA1 | R | | +| 0x010010 | 32 | Action Type | R | | +| 0x010014 | 32 | Action Release Level | R | | +| 0x010020 | 32 | Max. number supported detector modules | R | constant | +| 0x010024 | 32 | Reserved | R | constant | +| 0x010028 | 64 | Pipeline stalls before writing to host memory | R | reset on action start | +| 0x010030 | 64 | Pipeline stalls before accessing HBM | R | reset on action start | +| 0x010038 | 32 | FIFO status (see action_config.v for details) | R | | +| 0x01003C | 32 | Size of single HBM channel in bytes (default value for the particular card) | R/W | should not be altered for standard operation | +| 0x010040 | 64 | Packets processed by the action | R | cleared on reset or action start | +| 0x010048 | 64 | Valid ethernet packets | R | cleared on reset | +| 0x010050 | 64 | Valid ICMP packets | R | cleared on reset | +| 0x010058 | 64 | Valid UDP packets | R | cleared on reset | +| 0x010060 | 64 | Valid detector packets processed by the card | R | cleared on reset | +| 0x010066 | 64 | Packets flagged as errors by CMAC | R | cleared on reset | +| 0x010080 | 64 | MAC address of FPGA card | R/W | network byte order | +| 0x010088 | 32 | IPv4 address of FPGA card | R/W | network byte order | +| 0x01008C | 32 | Number of detector modules | R/W | | +| 0x010090 | 32 | Data collection mode | R/W | | +| | | Bit 0 - Conversion to photons | | | +| | | Bit 16:31 - Data collection ID (carried with completions) | | | +| 0x010094 | 32 | One over energy in keV (in fixed-point:12 int. + 24 frac. bit format) | R/W | | +| 0x010098 | 32 | Number of frames expected in the data collection (defines termination condition) | R/W | | +| 0x01009C | 32 | Number of storage cells | R/W | | +| 0x010100 | 32 | Spot finder photon count threshold | R/W | | +| 0x010104 | 32 | Spot finder signal-to-noise ratio threshold (in fixed-point: 6 int. + 4 frac. bit format) | R/W | | +| 0x020000 - 0x02FFFF | | CMAC 100G | | See Xilinx PG203 for register map | +| 0x030000 - 0x03FFFF | | AXI Mailbox for Work Request / Work Completion | | See Xilinx PG114 for register map | +| 0x040000 - 0x04FFFF | | QuadSPI flash | | See Xilinx PG153 for register map | +| 0x060000 - 0x060FFF | 64 | Input calibration memory addresses block RAM | | | +| 0x070000 - 0x07FFFF | | AXI Firewall | | See Xilinx PG293 for register map | +| 0x080000 - 0x08FFFF | | Frame generator | | | +| 0x090000 - 0x09FFFF | | PCIe DMA control | | See Xilinx PG195 for register map | +| 0x0A0000 - 0x0AFFFF | | Transfer between UltraRAM buffer <-> HBM (HLS registers) | | | +| 0x0C0000 - 0x0FFFFF | | Xilinx Card Management Solution Subsystem management subsystem | | See Xilinx PG348 for register map | +| 0x100000 - 0x1FFFFF | 16 | Internal packet generator frame | | | +| 0x200000 - 0x2FFFFF | | UltraRAM buffer for transfers to/from HBM | | | ### AXI Mailbox -AXI mailbox is used to send work request from host to action, and receive work completions. Messages are always multiple of 128-bit. See Xilinx PG114 on how to operate AXI Mailbox. +AXI mailbox is used to send work request from host to action, and receive work completions. +Messages are exchanged through AXI Mailbox IP from Xilinx (see Xilinx PG114). +Messages are always multiple of 128-bit. Work request has the following structure: diff --git a/fpga/hdl/action_config.v b/fpga/hdl/action_config.v index 1acc6999..64d0848d 100644 --- a/fpga/hdl/action_config.v +++ b/fpga/hdl/action_config.v @@ -10,13 +10,11 @@ `define HBM_SIZE_BYTES 32'h20000000 `define ADDR_AP_CTRL 16'h0000 -`define ADDR_SET_LED 16'h0008 `define ADDR_GIT_SHA1 16'h000C `define ADDR_ACTION_TYPE 16'h0010 `define ADDR_RELEASE_LEVEL 16'h0014 `define ADDR_MAX_MODULES_FPGA 16'h0020 -`define ADDR_MODS_INT_PKT_GEN 16'h0024 `define ADDR_STALLS_HOST_LO 16'h0028 `define ADDR_STALLS_HOST_HI 16'h002C @@ -50,8 +48,8 @@ `define ADDR_NFRAMES 16'h0098 `define ADDR_NSTORAGE_CELLS 16'h009C -`define ADDR_SPOT_FINDER_THRESHOLD 16'h00A0 -`define ADDR_SPOT_FINDER_SNR 16'h00A4 +`define ADDR_SPOT_FINDER_THRESHOLD 16'h0100 +`define ADDR_SPOT_FINDER_SNR 16'h0104 module action_config @@ -326,9 +324,6 @@ always @(posedge clk) begin `ADDR_MAX_MODULES_FPGA: begin rdata <= MAX_MODULES_FPGA_PARAM; end - `ADDR_MODS_INT_PKT_GEN: begin - rdata <= 32'd1; - end `ADDR_STALLS_HBM_HI: begin rdata <= reg_stalls_hbm[63:32]; end diff --git a/fpga/hls/CMakeLists.txt b/fpga/hls/CMakeLists.txt index f8ed1445..2bbf389b 100644 --- a/fpga/hls/CMakeLists.txt +++ b/fpga/hls/CMakeLists.txt @@ -61,7 +61,6 @@ MAKE_HLS_MODULE(save_to_hbm.cpp save_to_hbm) MAKE_HLS_MODULE(mask_missing.cpp mask_missing) MAKE_HLS_MODULE(integration.cpp integration) MAKE_HLS_MODULE(spot_finder.cpp spot_finder) -MAKE_HLS_MODULE(spot_finder.cpp spot_finder_2) MAKE_HLS_MODULE(axis_broadcast.cpp axis_broadcast) MAKE_HLS_MODULE(axis_256_to_512.cpp axis_256_to_512) MAKE_HLS_MODULE(axis_256_to_512.cpp axis_32_to_512) @@ -83,7 +82,6 @@ SET (HLS_IPS psi_ch_hls_data_collection_fsm_1_0.zip psi_ch_hls_save_to_hbm_1_0.zip psi_ch_hls_mask_missing_1_0.zip psi_ch_hls_frame_generator_1_0.zip - psi_ch_hls_spot_finder_2_1_0.zip psi_ch_hls_spot_finder_1_0.zip psi_ch_hls_integration_1_0.zip psi_ch_hls_axis_broadcast_1_0.zip diff --git a/fpga/hls/hls_jfjoch.h b/fpga/hls/hls_jfjoch.h index e56f4699..5332e2a8 100644 --- a/fpga/hls/hls_jfjoch.h +++ b/fpga/hls/hls_jfjoch.h @@ -202,12 +202,6 @@ void spot_finder(STREAM_512 &data_in, volatile ap_int<16> &in_photon_count_threshold, volatile strong_pixel_threshold_t &in_strong_pixel_threshold); -void spot_finder_2(STREAM_512 &data_in, - STREAM_512 &data_out, - hls::stream> &strong_pixel_out, - volatile ap_int<16> &in_photon_count_threshold, - volatile strong_pixel_threshold_t &in_strong_pixel_threshold); - void adu_histo(STREAM_512 &data_in, STREAM_512 &data_out, hls::stream> &result_out, diff --git a/fpga/hls/spot_finder.cpp b/fpga/hls/spot_finder.cpp index 848c4255..511ed5ef 100644 --- a/fpga/hls/spot_finder.cpp +++ b/fpga/hls/spot_finder.cpp @@ -95,7 +95,7 @@ ap_uint<32> check_threshold(const ap_uint<512> data_packed, const ap_uint valid_packed, strong_pixel_threshold_t strong_pixel_threshold, ap_int<16> photon_count_threshold) { -#pragma HLS PIPELINE +#pragma HLS PIPELINE II=1 ap_int<16> data[32]; ap_int sum[32]; ap_int sum2[32]; @@ -325,26 +325,32 @@ void spot_finder_check_threshold(hls::stream &data_in, data_in >> packet_in; while (!packet_in.user) { - data_out << packet_512_t{.data=packet_in.data, .user=0, .last=0}; - sum_in >> line_sum; - sum2_in >> line_sum2; - valid_in >> line_valid; - ap_axiu<32,1,1,1> strong_pixel{.user = 0}; + ap_int<16> photon_count_threshold = in_photon_count_threshold; + strong_pixel_threshold_t strong_pixel_threshold = in_strong_pixel_threshold; + strong_pixel_threshold_t strong_pixel_threshold_sq = strong_pixel_threshold * strong_pixel_threshold; + for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) { +#pragma HLS PIPELINE II=1 + data_out << packet_512_t{.data=packet_in.data, .user=0, .last=0}; + sum_in >> line_sum; + sum2_in >> line_sum2; + valid_in >> line_valid; + ap_axiu<32, 1, 1, 1> strong_pixel{.user = 0}; - strong_pixel.data = check_threshold(packet_in.data, line_sum, line_sum2, line_valid, - in_strong_pixel_threshold, in_photon_count_threshold); - strong_pixel_out << strong_pixel; - data_in >> packet_in; + strong_pixel.data = check_threshold(packet_in.data, line_sum, line_sum2, line_valid, + strong_pixel_threshold_sq, photon_count_threshold); + strong_pixel_out << strong_pixel; + data_in >> packet_in; + } } strong_pixel_out << ap_axiu<32,1,1,1>{.data = 0, .user = 1}; data_out << packet_512_t{.data=0, .user=1, .last=1}; } -void spot_finder_2(STREAM_512 &data_in, - STREAM_512 &data_out, - hls::stream> &strong_pixel_out, - volatile ap_int<16> &in_photon_count_threshold, - volatile strong_pixel_threshold_t &in_strong_pixel_threshold) { +void spot_finder(STREAM_512 &data_in, + STREAM_512 &data_out, + hls::stream> &strong_pixel_out, + volatile ap_int<16> &in_photon_count_threshold, + volatile strong_pixel_threshold_t &in_strong_pixel_threshold) { #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS DATAFLOW @@ -409,128 +415,3 @@ void spot_finder_2(STREAM_512 &data_in, i.join(); #endif } - -void spot_finder(STREAM_512 &data_in, - STREAM_512 &data_out, - hls::stream> &strong_pixel_out, - volatile ap_int<16> &in_photon_count_threshold, - volatile strong_pixel_threshold_t &in_strong_pixel_threshold) { -#pragma HLS INTERFACE ap_ctrl_none port=return - -#pragma HLS INTERFACE axis port=data_in -#pragma HLS INTERFACE axis port=data_out -#pragma HLS INTERFACE axis port=strong_pixel_out -#pragma HLS INTERFACE ap_none register port=in_photon_count_threshold -#pragma HLS INTERFACE ap_none register port=in_strong_pixel_threshold - - ap_uint<512> array_mid_line[(FPGA_NBX) * 32]; - ap_uint<512> array_top_line[(FPGA_NBX + 1) * 32]; - ap_uint sum_array[32]; - ap_uint sum2_array[32]; -#pragma HLS RESOURCE variable=array_mid_line core=RAM_2P_BRAM latency=3 -#pragma HLS RESOURCE variable=array_top_line core=RAM_2P_BRAM latency=3 -#pragma HLS RESOURCE variable=sum_array core=RAM_2P_BRAM latency=3 -#pragma HLS RESOURCE variable=sum2_array core=RAM_2P_BRAM latency=3 - - ap_uint<16 * FPGA_NBX> mid_line_save = 0; - - ap_uint column_sum_val_save = 0; - ap_uint column_sum2_val_save = 0; - - packet_512_t packet_in; - data_in >> packet_in; - data_out << packet_in; - - data_in >> packet_in; - - while (!packet_in.user) { - for (int i = 0; i < 32; i++) { -#pragma HLS pipeline II=1 - sum_array[i] = 0; - sum2_array[i] = 0; - } - - for (int i = 0; i < (FPGA_NBX) * 32; i++) - array_mid_line[i] = 0; - - for (int i = 0; i < (FPGA_NBX + 1) * 32; i++) - array_top_line[i] = 0; - - strong_pixel_threshold_t strong_pixel_threshold = in_strong_pixel_threshold; - ap_int<16> photon_count_threshold = in_photon_count_threshold; - - for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) { -#pragma HLS PIPELINE II = 1 - data_out << packet_in; - ap_axiu<32, 1, 1, 1> strong_pxl; - - if (strong_pixel_threshold > 0) { - uint16_t cell_number_mid = - (i / 32) % (FPGA_NBX) * 32 + (i % 32); // (axis_packet_line % (NBX)) * 32 + axis_packet_in_line; - uint16_t cell_number_top = (i / 32) % (FPGA_NBX + 1) * 32 + - (i % 32); // (axis_packet_line % (NBX + 1)) * 32 + axis_packet_in_line; - - uint16_t axis_packet_in_line = i % 32; - - ap_uint<512> mid_line_shifted; - ap_uint<512> mid_line = array_mid_line[cell_number_mid]; - mid_line_shifted(16 * FPGA_NBX - 1, 0) = mid_line_save; - mid_line_shifted(511, 16 * FPGA_NBX) = mid_line(16 * (32 - FPGA_NBX) - 1, 0); - mid_line_save = mid_line(511, 16 * (32 - FPGA_NBX)); - - ap_uint<512> top_line = array_top_line[cell_number_top]; - - ap_uint diff_sum; - ap_uint diff_sum2; - - ap_uint column_sum = sum_array[axis_packet_in_line]; - ap_uint column_sum2 = sum2_array[axis_packet_in_line]; - - calc_sum(diff_sum, top_line, packet_in.data); - calc_sum2(diff_sum2, top_line, packet_in.data); - - update_sum(column_sum, diff_sum); - update_sum(column_sum2, diff_sum2); - - ap_uint box_sum = prefix_sum(column_sum, column_sum_val_save); - ap_uint box_sum2 = prefix_sum(column_sum2, column_sum2_val_save); - - array_mid_line[cell_number_mid] = packet_in.data; - array_top_line[cell_number_top] = mid_line; - - sum_array[axis_packet_in_line] = column_sum; - sum2_array[axis_packet_in_line] = column_sum2; - - column_sum_val_save = column_sum(SUM_BITWIDTH * 32 - 1, SUM_BITWIDTH * (32 - 2 * FPGA_NBX)); - column_sum2_val_save = column_sum2(SUM2_BITWIDTH * 32 - 1, SUM2_BITWIDTH * (32 - 2 * FPGA_NBX)); - - // Check threshold - strong_pxl.data = check_threshold(mid_line_shifted, - box_sum, - box_sum2, - (2 * FPGA_NBX + 1) * (2 * FPGA_NBX + 1), - strong_pixel_threshold, - photon_count_threshold); - - } else if (photon_count_threshold > 0) { - ap_int<16> val[32]; - unpack32(packet_in.data, val); - - ap_uint<32> output; - for (int j = 0; j < 32; j++) - output[j] = ((val[j] > photon_count_threshold) ? 1 : 0); - - strong_pxl.data = output; - } else { - strong_pxl.data = 0; - } - strong_pxl.user = 0; - strong_pixel_out << strong_pxl; - data_in >> packet_in; - } - } - - strong_pixel_out << ap_axiu<32,1,1,1>{.data = 0, .user = 1}; - - data_out << packet_in; -} diff --git a/fpga/scripts/jfjoch.tcl b/fpga/scripts/jfjoch.tcl index 7e2bfd99..ed2b97d8 100644 --- a/fpga/scripts/jfjoch.tcl +++ b/fpga/scripts/jfjoch.tcl @@ -530,7 +530,7 @@ proc create_hier_cell_jungfraujoch { parentCell nameHier } { set smartconnect_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smartconnect_2 ] # Create instance: spot_finder_0, and set properties - set spot_finder_0 [ create_bd_cell -type ip -vlnv psi.ch:hls:spot_finder_2:1.0 spot_finder_0 ] + set spot_finder_0 [ create_bd_cell -type ip -vlnv psi.ch:hls:spot_finder:1.0 spot_finder_0 ] # Create instance: stream_merge_0, and set properties set stream_merge_0 [ create_bd_cell -type ip -vlnv psi.ch:hls:stream_merge:1.0 stream_merge_0 ] diff --git a/receiver/HLSSimulatedDevice.cpp b/receiver/HLSSimulatedDevice.cpp index 2e81a85d..2ab3ed02 100644 --- a/receiver/HLSSimulatedDevice.cpp +++ b/receiver/HLSSimulatedDevice.cpp @@ -352,7 +352,7 @@ void HLSSimulatedDevice::HLSMainThread() { strong_pixel_threshold_t strong_pixel_threshold = 7; // 6. Spot finding - hls_cores.emplace_back([&] { spot_finder_2(converted_5, converted_6, + hls_cores.emplace_back([&] { spot_finder(converted_5, converted_6, spot_finder_result_0, photon_count_threshold, strong_pixel_threshold);}); diff --git a/tests/FPGASpotFindingUnitTest.cpp b/tests/FPGASpotFindingUnitTest.cpp index 56d3bccc..43989454 100644 --- a/tests/FPGASpotFindingUnitTest.cpp +++ b/tests/FPGASpotFindingUnitTest.cpp @@ -159,7 +159,7 @@ bool Isigma_cpu(double val, double sum, double sum2, float threshold) { bool Isigma_fpga(ap_int<16> val, ap_int sum, ap_uint sum2, float threshold) { return check_threshold(val, sum, sum2, (FPGA_NBX *2 + 1) * (FPGA_NBX *2 + 1), - fpga_strong_pixel_threshold(threshold), -1); + threshold * threshold, -1); } TEST_CASE("FPGA_spot_check_threshold","[FPGA][SpotFinder]") {