FPGA: Save to HBM uses only 2 channels

This commit is contained in:
2023-09-10 09:54:32 +02:00
parent 929f6c6544
commit 175aefc4b8
6 changed files with 6 additions and 67 deletions

View File

@@ -131,7 +131,6 @@ void save_to_hbm(STREAM_512 &data_in,
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > &addr_in,
hls::stream<ap_uint<32> > &completion_out,
hls::burst_maxi<hbm256_t> d_hbm_p0, hls::burst_maxi<hbm256_t> d_hbm_p1,
hls::burst_maxi<hbm256_t> d_hbm_p2, hls::burst_maxi<hbm256_t> d_hbm_p3,
volatile uint64_t &packets_processed,
volatile ap_uint<1> &idle,
ap_uint<8> &err_reg);

View File

@@ -59,7 +59,6 @@ void save_to_hbm(STREAM_512 &data_in,
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > &addr_in,
hls::stream<ap_uint<32> > &completion_out,
hls::burst_maxi<hbm256_t> d_hbm_p0, hls::burst_maxi<hbm256_t> d_hbm_p1,
hls::burst_maxi<hbm256_t> d_hbm_p2, hls::burst_maxi<hbm256_t> d_hbm_p3,
volatile uint64_t &packets_processed,
volatile ap_uint<1> &idle,
ap_uint<8> &err_reg) {
@@ -75,10 +74,6 @@ void save_to_hbm(STREAM_512 &data_in,
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE m_axi port=d_hbm_p2 bundle=d_hbm_p2 depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE m_axi port=d_hbm_p3 bundle=d_hbm_p3 depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
ap_uint<128> packet_mask[MAX_MODULES_FPGA*2];
#pragma HLS RESOURCE variable=packet_mask core=RAM_1P
@@ -125,9 +120,7 @@ void save_to_hbm(STREAM_512 &data_in,
ap_uint<32> hbm_size_256b = ACT_REG_HBM_SIZE_256b(packet_in.data);
ap_uint<32> offset_hbm_0 = 12 * hbm_size_256b;
ap_uint<32> offset_hbm_1 = 13 * hbm_size_256b;
ap_uint<32> offset_hbm_2 = 14 * hbm_size_256b;
ap_uint<32> offset_hbm_3 = 15 * hbm_size_256b;
ap_uint<32> hbm_size = hbm_size_256b * 32 * 4 / (RAW_MODULE_SIZE * 2);
ap_uint<32> hbm_size = hbm_size_256b * 32 * 2 / (RAW_MODULE_SIZE * 2);
ap_uint<1> mode_nonblocking = (data_collection_mode & MODE_NONBLOCKING_ON_WR) ? 1 : 0;
@@ -182,35 +175,26 @@ void save_to_hbm(STREAM_512 &data_in,
packet_mask[id] |= ap_uint<128>(1) << eth_packet;
}
size_t out_frame_addr = (handle[id] * 128 + eth_packet) * 64;
size_t out_frame_addr = (handle[id] * 128 + eth_packet) * 128;
for (int i = 0; i < 64; i++) {
for (int i = 0; i < 128; i++) {
if (i % 16 == 0) {
d_hbm_p0.write_request(offset_hbm_0 + out_frame_addr + i, 16);
d_hbm_p1.write_request(offset_hbm_1 + out_frame_addr + i, 16);
d_hbm_p2.write_request(offset_hbm_2 + out_frame_addr + i, 16);
d_hbm_p3.write_request(offset_hbm_3 + out_frame_addr + i, 16);
}
data_in >> packet_in;
d_hbm_p0.write(packet_in.data(255, 0));
d_hbm_p1.write(packet_in.data(511, 256));
data_in >> packet_in;
d_hbm_p2.write(packet_in.data(255, 0));
d_hbm_p3.write(packet_in.data(511, 256));
if (i % 16 == 15) {
d_hbm_p0.write_response();
d_hbm_p1.write_response();
d_hbm_p2.write_response();
d_hbm_p3.write_response();
}
}
if (packet_in.last != 1)
internal_err_reg[1] = 1;
total_counter++;
packets_processed = total_counter;
addr_in >> addr;

View File

@@ -19,7 +19,7 @@ void transfer_hbm(ap_uint<256> *d_hbm_p0,
#pragma HLS interface s_axilite port=uram_offset
#pragma HLS INTERFACE s_axilite port=return
if ((stride != 1) && (stride != 2) && (stride != 4))
if ((stride != 1) && (stride != 2))
return;
if (uram_offset >= stride)
@@ -37,18 +37,6 @@ void transfer_hbm(ap_uint<256> *d_hbm_p0,
d_hbm_p0[hbm_offset + i] = uram[2 * i + uram_offset];
}
}
} else if (stride == 4) {
if (hbm_to_uram) {
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 32 / 4; i++) {
#pragma HLS PIPELINE II=1
uram[4 * i + uram_offset] = d_hbm_p0[hbm_offset + i];
}
} else {
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 32 / 4; i++) {
#pragma HLS PIPELINE II=1
d_hbm_p0[hbm_offset + i] = uram[4 * i + uram_offset];
}
}
} else {
if (hbm_to_uram) {
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 32; i++) {

View File

@@ -535,8 +535,6 @@ proc create_hier_cell_jungfraujoch { parentCell nameHier } {
connect_bd_intf_net -intf_net s_axi_1 [get_bd_intf_pins s_axi] [get_bd_intf_pins smartconnect_0/S00_AXI]
connect_bd_intf_net -intf_net save_to_hbm_0_m_axi_d_hbm_p0 [get_bd_intf_pins m_axi_d_hbm_p12] [get_bd_intf_pins save_to_hbm_0/m_axi_d_hbm_p0]
connect_bd_intf_net -intf_net save_to_hbm_0_m_axi_d_hbm_p1 [get_bd_intf_pins m_axi_d_hbm_p13] [get_bd_intf_pins save_to_hbm_0/m_axi_d_hbm_p1]
connect_bd_intf_net -intf_net save_to_hbm_0_m_axi_d_hbm_p2 [get_bd_intf_pins m_axi_d_hbm_p14] [get_bd_intf_pins save_to_hbm_0/m_axi_d_hbm_p2]
connect_bd_intf_net -intf_net save_to_hbm_0_m_axi_d_hbm_p3 [get_bd_intf_pins m_axi_d_hbm_p15] [get_bd_intf_pins save_to_hbm_0/m_axi_d_hbm_p3]
connect_bd_intf_net -intf_net smartconnect_0_M00_AXI [get_bd_intf_pins action_config_0/s_axi] [get_bd_intf_pins smartconnect_0/M00_AXI]
connect_bd_intf_net -intf_net smartconnect_0_M01_AXI [get_bd_intf_pins mailbox_0/S0_AXI] [get_bd_intf_pins smartconnect_0/M01_AXI]
connect_bd_intf_net -intf_net smartconnect_0_M02_AXI [get_bd_intf_pins axi_bram_ctrl_calibration_addr/S_AXI] [get_bd_intf_pins smartconnect_0/M02_AXI]

View File

@@ -326,8 +326,6 @@ void HLSSimulatedDevice::HLSMainThread() {
// 3. Write images to HBM
hls_cores.emplace_back([&] { save_to_hbm(converted_3, addr4, save_to_hbm_completion,
hbm.data(),
hbm.data(),
hbm.data(),
hbm.data(),
save_to_hbm_packets_processed,

View File

@@ -1083,32 +1083,6 @@ TEST_CASE("HLS_C_transfer_hbm_2_interfaces", "[FPGA][Full]") {
REQUIRE(test_frame_1 == test_frame_2);
}
TEST_CASE("HLS_C_transfer_hbm_4_interfaces", "[FPGA][Full]") {
std::vector<uint16_t> test_frame_1(RAW_MODULE_SIZE);
std::vector<uint16_t> test_frame_2(RAW_MODULE_SIZE);
std::mt19937 g1(1389);
std::uniform_int_distribution<uint16_t> dist(0, 65535);
for (auto &i: test_frame_1)
i = dist(g1);
HLSSimulatedDevice test(0, 64);
REQUIRE(test_frame_1 != test_frame_2);
test.HBMTransfer(test_frame_1.data(), 12, 16*1024*1024, false, 4, 0);
test.HBMTransfer(test_frame_1.data(), 13, 16*1024*1024, false, 4, 1);
test.HBMTransfer(test_frame_1.data(), 14, 16*1024*1024, false, 4, 2);
test.HBMTransfer(test_frame_1.data(), 15, 16*1024*1024, false, 4, 3);
test.HBMTransfer(test_frame_2.data(), 12, 16*1024*1024, true, 4, 0);
test.HBMTransfer(test_frame_2.data(), 13, 16*1024*1024, true, 4, 1);
test.HBMTransfer(test_frame_2.data(), 14, 16*1024*1024, true, 4, 2);
test.HBMTransfer(test_frame_2.data(), 15, 16*1024*1024, true, 4, 3);
REQUIRE(test_frame_1 == test_frame_2);
}
TEST_CASE("HLS_C_Simulation_internal_packet_generator_write_to_hbm", "[FPGA][Full]") {
const uint16_t nmodules = 1;
@@ -1141,10 +1115,8 @@ TEST_CASE("HLS_C_Simulation_internal_packet_generator_write_to_hbm", "[FPGA][Ful
REQUIRE(test_frame_1 != test_frame_2);
test.HBMTransfer(test_frame_2.data(), 12, 2*256*1024, true, 4, 0);
test.HBMTransfer(test_frame_2.data(), 13, 2*256*1024, true, 4, 1);
test.HBMTransfer(test_frame_2.data(), 14, 2*256*1024, true, 4, 2);
test.HBMTransfer(test_frame_2.data(), 15, 2*256*1024, true, 4, 3);
test.HBMTransfer(test_frame_2.data(), 12, 2*512*1024, true, 2, 0);
test.HBMTransfer(test_frame_2.data(), 13, 2*512*1024, true, 2, 1);
REQUIRE(test_frame_1 == test_frame_2);
}