FPGA: Add save to HBM (work in progress)
This commit is contained in:
@@ -12,7 +12,8 @@ ADD_LIBRARY( HLSSimulation STATIC
|
||||
icmp.cpp arp.cpp
|
||||
ip_header_checksum.h
|
||||
udp.cpp
|
||||
sls_detector.cpp)
|
||||
sls_detector.cpp
|
||||
save_to_hbm.cpp)
|
||||
|
||||
TARGET_INCLUDE_DIRECTORIES(HLSSimulation PUBLIC ../include)
|
||||
TARGET_LINK_LIBRARIES(HLSSimulation CommonFunctions)
|
||||
@@ -45,6 +46,7 @@ MAKE_HLS_MODULE(ethernet.cpp ethernet)
|
||||
MAKE_HLS_MODULE(arp.cpp arp)
|
||||
MAKE_HLS_MODULE(udp.cpp udp)
|
||||
MAKE_HLS_MODULE(sls_detector.cpp sls_detector)
|
||||
MAKE_HLS_MODULE(save_to_hbm.cpp save_to_hbm)
|
||||
|
||||
SET (HLS_IPS psi_ch_hls_data_collection_fsm_1_0.zip
|
||||
psi_ch_hls_timer_host_1_0.zip
|
||||
@@ -58,7 +60,8 @@ SET (HLS_IPS psi_ch_hls_data_collection_fsm_1_0.zip
|
||||
psi_ch_hls_udp_1_0.zip
|
||||
psi_ch_hls_sls_detector_1_0.zip
|
||||
psi_ch_hls_icmp_1_0.zip
|
||||
psi_ch_hls_host_writer_1_0.zip)
|
||||
psi_ch_hls_host_writer_1_0.zip
|
||||
psi_ch_hls_save_to_hbm_1_0.zip)
|
||||
|
||||
SET (HLS_IPS ${HLS_IPS} PARENT_SCOPE)
|
||||
ADD_CUSTOM_TARGET(hls DEPENDS ${HLS_IPS})
|
||||
|
||||
@@ -118,6 +118,18 @@ void internal_packet_generator(STREAM_512 &data_in, STREAM_512 &data_out,
|
||||
ap_uint<512> module_cache[RAW_MODULE_SIZE * sizeof(uint16_t) / 512 * 8],
|
||||
volatile ap_uint<1> &in_cancel);
|
||||
|
||||
void save_to_hbm(STREAM_512 &data_in,
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > &addr_in,
|
||||
STREAM_512 &data_out,
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > &addr_out,
|
||||
hls::burst_maxi<hbm256_t> d_hbm_p0, hls::burst_maxi<hbm256_t> d_hbm_p1,
|
||||
hls::burst_maxi<hbm256_t> d_hbm_p2, hls::burst_maxi<hbm256_t> d_hbm_p3,
|
||||
STREAM_512 &completion_out,
|
||||
volatile uint64_t &packets_processed,
|
||||
volatile ap_uint<1> &idle,
|
||||
ap_uint<8> &err_reg,
|
||||
uint32_t hbm_size);
|
||||
|
||||
template<int N> ap_uint<N*32> pack32(ap_int<N> in[32]) {
|
||||
#pragma HLS INLINE
|
||||
ap_uint<N*32> out;
|
||||
|
||||
@@ -0,0 +1,241 @@
|
||||
// Copyright (2019-2022) Paul Scherrer Institute
|
||||
// SPDX-License-Identifier: CERN-OHL-S-2.0 or GPL-3.0-or-later
|
||||
|
||||
#include "hls_jfjoch.h"
|
||||
|
||||
#ifndef __SYNTHESIS__
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
#define PACKET_SIZE 8192
|
||||
#define HBM_BURST_SIZE 64
|
||||
|
||||
inline void write_completion(STREAM_512 &m_axis_completion,
|
||||
const ap_uint<32> &handle,
|
||||
const ap_uint<8> &module_number,
|
||||
const ap_uint<64> &frame_num,
|
||||
const ap_uint<256> &packet_mask,
|
||||
const ap_uint<16> &packet_count,
|
||||
const ap_uint<32> &debug,
|
||||
const ap_uint<64> ×tamp,
|
||||
const ap_uint<64> &bunchid,
|
||||
const ap_uint<32> &exptime,
|
||||
const ap_uint<32> &data_collection_id,
|
||||
const ap_uint<1> &flushing) {
|
||||
#pragma HLS INLINE
|
||||
|
||||
ap_uint<1> all_packets_ok = packet_mask.and_reduce();
|
||||
ap_uint<1> any_packets_received = packet_mask.or_reduce();
|
||||
ap_uint<8> status = 0;
|
||||
status[0] = all_packets_ok;
|
||||
status[1] = any_packets_received;
|
||||
status[2] = flushing;
|
||||
ap_uint<128> tmp = (handle, packet_count, status, module_number, frame_num);
|
||||
status[7] = tmp.xor_reduce(); // ensure completion has even parity
|
||||
/*
|
||||
if (handle != HANDLE_SKIP_FRAME) {
|
||||
m_axis_completion << handle;
|
||||
m_axis_completion << (packet_count, status, module_number);
|
||||
m_axis_completion << frame_num(63, 32);
|
||||
m_axis_completion << frame_num(31, 0);
|
||||
|
||||
m_axis_completion << timestamp(63,32);
|
||||
m_axis_completion << timestamp(31,0);
|
||||
m_axis_completion << bunchid(63,32);
|
||||
m_axis_completion << bunchid(31,0);
|
||||
|
||||
m_axis_completion << exptime;
|
||||
m_axis_completion << debug;
|
||||
m_axis_completion << 0;
|
||||
m_axis_completion << data_collection_id;
|
||||
|
||||
m_axis_completion << packet_mask(127,96);
|
||||
m_axis_completion << packet_mask( 95,64);
|
||||
m_axis_completion << packet_mask( 63,32);
|
||||
m_axis_completion << packet_mask( 31, 0);
|
||||
} */
|
||||
|
||||
}
|
||||
|
||||
void save_to_hbm(STREAM_512 &data_in,
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > &addr_in,
|
||||
STREAM_512 &data_out,
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > &addr_out,
|
||||
hls::burst_maxi<hbm256_t> d_hbm_p0, hls::burst_maxi<hbm256_t> d_hbm_p1,
|
||||
hls::burst_maxi<hbm256_t> d_hbm_p2, hls::burst_maxi<hbm256_t> d_hbm_p3,
|
||||
STREAM_512 &completion_out,
|
||||
volatile uint64_t &packets_processed,
|
||||
volatile ap_uint<1> &idle,
|
||||
ap_uint<8> &err_reg,
|
||||
uint32_t hbm_size) {
|
||||
#pragma HLS INTERFACE ap_ctrl_none port=return
|
||||
#pragma HLS INTERFACE register both axis port=data_in
|
||||
#pragma HLS INTERFACE register both axis port=addr_in
|
||||
#pragma HLS INTERFACE register both axis port=data_out
|
||||
#pragma HLS INTERFACE register both axis port=addr_out
|
||||
#pragma HLS INTERFACE register both axis port=completion_out
|
||||
#pragma HLS INTERFACE register ap_vld port=packets_processed
|
||||
#pragma HLS INTERFACE register ap_vld port=err_reg
|
||||
#pragma HLS INTERFACE register ap_none port=idle
|
||||
#pragma HLS INTERFACE register ap_stable port=hbm_size
|
||||
|
||||
#pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \
|
||||
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
||||
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \
|
||||
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
||||
#pragma HLS INTERFACE m_axi port=d_hbm_p2 bundle=d_hbm_p2 depth=512 offset=off \
|
||||
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
||||
#pragma HLS INTERFACE m_axi port=d_hbm_p3 bundle=d_hbm_p3 depth=512 offset=off \
|
||||
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
||||
|
||||
ap_uint<128> packet_mask[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=packet_mask core=RAM_1P
|
||||
ap_uint<16> packet_count[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=packet_count core=RAM_1P
|
||||
ap_uint<32> handle[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=handle core=RAM_1P
|
||||
ap_uint<64> curr_frame[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=curr_frame core=RAM_1P
|
||||
ap_uint<32> debug[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=debug core=RAM_1P
|
||||
ap_uint<64> timestamp[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=timestamp core=RAM_1P
|
||||
ap_uint<32> exptime[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=exptime core=RAM_1P
|
||||
ap_uint<64> jf_bunchid[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=jf_bunchid core=RAM_1P
|
||||
|
||||
idle = 1;
|
||||
|
||||
for (int i = 0; i < MAX_MODULES_FPGA*2; i++) {
|
||||
#pragma HLS UNROLL
|
||||
curr_frame[i] = UINT64_MAX;
|
||||
handle[i] = 0;
|
||||
packet_mask[i] = 0;
|
||||
packet_count[i] = 0;
|
||||
debug[i] = 0;
|
||||
timestamp[i] = 0;
|
||||
exptime[i] = 0;
|
||||
jf_bunchid[i] = 0;
|
||||
}
|
||||
|
||||
uint32_t handle_val = 0;
|
||||
|
||||
ap_uint<ADDR_STREAM_WIDTH> addr;
|
||||
addr_in >> addr;
|
||||
addr_out << addr;
|
||||
|
||||
packet_512_t packet_in;
|
||||
data_in >> packet_in;
|
||||
data_out << packet_in;
|
||||
|
||||
ap_uint<5> nmodules = ACT_REG_NMODULES(packet_in.data);
|
||||
ap_uint<32> data_collection_mode = ACT_REG_MODE(packet_in.data);
|
||||
ap_uint<32> data_collection_id = data_collection_mode(31, 16); // upper 16-bit of mode
|
||||
|
||||
ap_uint<1> mode_nonblocking = (data_collection_mode & MODE_NONBLOCKING_ON_WR) ? 1 : 0;
|
||||
|
||||
ap_uint<8> internal_err_reg = 0;
|
||||
err_reg = internal_err_reg;
|
||||
|
||||
idle = 0;
|
||||
uint64_t total_counter = 0;
|
||||
packets_processed = 0;
|
||||
addr_in >> addr;
|
||||
addr_out << addr;
|
||||
|
||||
Loop_good_packet:
|
||||
while (!addr_last_flag(addr)) {
|
||||
// Process one UDP packet per iteration
|
||||
#pragma HLS PIPELINE II=128
|
||||
ap_uint<64> frame_number = addr_frame_number(addr);
|
||||
ap_uint<4> module_number = addr_module(addr);
|
||||
ap_uint<7> eth_packet = addr_eth_packet(addr);
|
||||
ap_uint<5> id = module_number * 2 + (frame_number % 2);
|
||||
|
||||
if (curr_frame[id] != frame_number) {
|
||||
if (packet_mask[id] != 0) {
|
||||
ap_uint<32> comp_handle = handle[id];
|
||||
ap_uint<64> comp_frame = curr_frame[id];
|
||||
ap_uint<256> comp_packet_mask = packet_mask[id];
|
||||
ap_uint<16> comp_packet_count = packet_count[id];
|
||||
ap_uint<32> comp_debug = debug[id];
|
||||
ap_uint<64> comp_timestamp = timestamp[id];
|
||||
ap_uint<64> comp_bunchid = jf_bunchid[id];
|
||||
ap_uint<32> comp_exptime = exptime[id];
|
||||
|
||||
write_completion(completion_out, comp_handle, module_number,
|
||||
comp_frame, comp_packet_mask, comp_packet_count,
|
||||
comp_debug, comp_timestamp, comp_bunchid,
|
||||
comp_exptime, data_collection_id, 0);
|
||||
}
|
||||
|
||||
handle[id] = handle_val;
|
||||
curr_frame[id] = frame_number;
|
||||
|
||||
debug[id] = addr_jf_debug(addr);
|
||||
timestamp[id] = addr_timestamp(addr);
|
||||
jf_bunchid[id] = addr_bunch_id(addr);
|
||||
exptime[id] = addr_exptime(addr);
|
||||
|
||||
packet_mask[id] = ap_uint<128>(1) << eth_packet;
|
||||
packet_count[id] = 1;
|
||||
|
||||
handle_val = (handle_val + 1) % hbm_size;
|
||||
} else {
|
||||
packet_count[id]++;
|
||||
packet_mask[id] |= ap_uint<128>(1) << eth_packet;
|
||||
}
|
||||
|
||||
size_t out_frame_addr = (handle[id] * 128 + eth_packet) * 64;
|
||||
|
||||
for (int i = 0; i < 64; i++) {
|
||||
if (i % 16 == 0) {
|
||||
d_hbm_p0.write_request(out_frame_addr + i, 16);
|
||||
d_hbm_p1.write_request(out_frame_addr + i, 16);
|
||||
d_hbm_p2.write_request(out_frame_addr + i, 16);
|
||||
d_hbm_p3.write_request(out_frame_addr + i, 16);
|
||||
}
|
||||
|
||||
data_in >> packet_in;
|
||||
data_out << packet_in;
|
||||
d_hbm_p0.write(packet_in.data(255, 0));
|
||||
d_hbm_p1.write(packet_in.data(511, 256));
|
||||
|
||||
data_in >> packet_in;
|
||||
data_out << packet_in;
|
||||
d_hbm_p2.write(packet_in.data(255, 0));
|
||||
d_hbm_p3.write(packet_in.data(511, 256));
|
||||
|
||||
if (i % 16 == 15) {
|
||||
d_hbm_p0.write_response();
|
||||
d_hbm_p1.write_response();
|
||||
d_hbm_p2.write_response();
|
||||
d_hbm_p3.write_response();
|
||||
}
|
||||
}
|
||||
if (packet_in.last != 1)
|
||||
internal_err_reg[1] = 1;
|
||||
|
||||
|
||||
total_counter++;
|
||||
packets_processed = total_counter;
|
||||
addr_in >> addr;
|
||||
addr_out << addr;
|
||||
err_reg = internal_err_reg;
|
||||
}
|
||||
|
||||
for (ap_uint<8> m = 0; m < nmodules * 2; m++) {
|
||||
#pragma HLS PIPELINE II=16
|
||||
if (packet_mask[m] != 0)
|
||||
write_completion(completion_out, handle[m], m / 2, curr_frame[m],
|
||||
packet_mask[m], packet_count[m],
|
||||
debug[m], timestamp[m], jf_bunchid[m],
|
||||
exptime[m], data_collection_id, 1);
|
||||
}
|
||||
|
||||
data_in >> packet_in;
|
||||
data_out << packet_in;
|
||||
|
||||
idle = 1;
|
||||
}
|
||||
@@ -390,6 +390,10 @@ proc create_root_design { parentCell } {
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axi_d_hbm_p9 [get_bd_intf_pins hbm_infrastructure/s_axi_hbm_9] [get_bd_intf_pins jungfraujoch_0/m_axi_d_hbm_p9]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axi_d_hbm_p10 [get_bd_intf_pins hbm_infrastructure/s_axi_hbm_10] [get_bd_intf_pins jungfraujoch_0/m_axi_d_hbm_p10]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axi_d_hbm_p11 [get_bd_intf_pins hbm_infrastructure/s_axi_hbm_11] [get_bd_intf_pins jungfraujoch_0/m_axi_d_hbm_p11]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axi_d_hbm_p12 [get_bd_intf_pins hbm_infrastructure/s_axi_hbm_12] [get_bd_intf_pins jungfraujoch_0/m_axi_d_hbm_p12]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axi_d_hbm_p13 [get_bd_intf_pins hbm_infrastructure/s_axi_hbm_13] [get_bd_intf_pins jungfraujoch_0/m_axi_d_hbm_p13]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axi_d_hbm_p14 [get_bd_intf_pins hbm_infrastructure/s_axi_hbm_14] [get_bd_intf_pins jungfraujoch_0/m_axi_d_hbm_p14]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axi_d_hbm_p15 [get_bd_intf_pins hbm_infrastructure/s_axi_hbm_15] [get_bd_intf_pins jungfraujoch_0/m_axi_d_hbm_p15]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axis_c2h_data [get_bd_intf_pins jungfraujoch_0/m_axis_c2h_data] [get_bd_intf_pins pcie_dma_0/s_axis_c2h_data]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axis_c2h_datamover_cmd [get_bd_intf_pins jungfraujoch_0/m_axis_c2h_datamover_cmd] [get_bd_intf_pins pcie_dma_0/s_axis_c2h_cmd]
|
||||
connect_bd_intf_net -intf_net jungfraujoch_0_m_axis_h2c_datamover_cmd [get_bd_intf_pins jungfraujoch_0/m_axis_h2c_datamover_cmd] [get_bd_intf_pins pcie_dma_0/s_axis_h2c_cmd]
|
||||
|
||||
@@ -64,6 +64,14 @@ proc create_hier_cell_jungfraujoch { parentCell nameHier } {
|
||||
|
||||
create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 m_axi_d_hbm_p11
|
||||
|
||||
create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 m_axi_d_hbm_p12
|
||||
|
||||
create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 m_axi_d_hbm_p13
|
||||
|
||||
create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 m_axi_d_hbm_p14
|
||||
|
||||
create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 m_axi_d_hbm_p15
|
||||
|
||||
create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_c2h_data
|
||||
|
||||
create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_c2h_datamover_cmd
|
||||
|
||||
@@ -216,11 +216,13 @@ void HLSSimulatedDevice::HLSMainThread() {
|
||||
|
||||
STREAM_512 converted_1;
|
||||
STREAM_512 converted_2;
|
||||
STREAM_512 converted_3;
|
||||
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > addr0;
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > addr1;
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > addr2;
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > addr3;
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > addr4;
|
||||
|
||||
hls::stream<ap_uint<UDP_METADATA_STREAM_WIDTH> > udp_metadata;
|
||||
ap_uint<1> idle_data_collection;
|
||||
@@ -306,10 +308,25 @@ void HLSSimulatedDevice::HLSMainThread() {
|
||||
// Timer procedure - count how many times write_data is not accepting input (to help track down latency issues)
|
||||
hls_cores.emplace_back([&] { timer_host(converted_1, converted_2, counter_host); });
|
||||
|
||||
STREAM_512 save_to_hbm_completion;
|
||||
ap_uint<8> save_to_hbm_err_reg;
|
||||
uint64_t save_to_hbm_packets_processed;
|
||||
ap_uint<1> save_to_hbm_idle;
|
||||
|
||||
hls_cores.emplace_back([&] { save_to_hbm(converted_2, addr3, converted_3, addr4,
|
||||
(hbm256_t *) (hbm_memory[12].data()),
|
||||
(hbm256_t *) (hbm_memory[13].data()),
|
||||
(hbm256_t *) (hbm_memory[14].data()),
|
||||
(hbm256_t *) (hbm_memory[15].data()),
|
||||
save_to_hbm_completion,
|
||||
save_to_hbm_packets_processed,
|
||||
save_to_hbm_idle,
|
||||
save_to_hbm_err_reg,
|
||||
16); });
|
||||
|
||||
// 3. Prepare data to write to host memory
|
||||
hls_cores.emplace_back([&] {
|
||||
host_writer(converted_2, addr3, datamover_out.GetDataStream(),
|
||||
host_writer(converted_3, addr4, datamover_out.GetDataStream(),
|
||||
datamover_out.GetCtrlStream(), work_request_stream, completion_stream,
|
||||
packets_processed, host_writer_idle, err_reg); });
|
||||
|
||||
@@ -328,6 +345,9 @@ void HLSSimulatedDevice::HLSMainThread() {
|
||||
if (!addr3.empty())
|
||||
throw std::runtime_error("Addr3 queue not empty");
|
||||
|
||||
if (!addr4.empty())
|
||||
throw std::runtime_error("Addr4 queue not empty");
|
||||
|
||||
if (!raw1.empty())
|
||||
throw std::runtime_error("Raw1 queue not empty");
|
||||
|
||||
@@ -349,6 +369,9 @@ void HLSSimulatedDevice::HLSMainThread() {
|
||||
if (!converted_2.empty())
|
||||
throw std::runtime_error("Converted_2 queue not empty");
|
||||
|
||||
if (!converted_3.empty())
|
||||
throw std::runtime_error("Converted_3 queue not empty");
|
||||
|
||||
if (!datamover_in.GetDataStream().empty())
|
||||
throw std::runtime_error("Datamover queue is not empty");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user