FPGA: Save to HBM uses only 2 channels
This commit is contained in:
@@ -131,7 +131,6 @@ void save_to_hbm(STREAM_512 &data_in,
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > &addr_in,
|
||||
hls::stream<ap_uint<32> > &completion_out,
|
||||
hls::burst_maxi<hbm256_t> d_hbm_p0, hls::burst_maxi<hbm256_t> d_hbm_p1,
|
||||
hls::burst_maxi<hbm256_t> d_hbm_p2, hls::burst_maxi<hbm256_t> d_hbm_p3,
|
||||
volatile uint64_t &packets_processed,
|
||||
volatile ap_uint<1> &idle,
|
||||
ap_uint<8> &err_reg);
|
||||
|
||||
@@ -59,7 +59,6 @@ void save_to_hbm(STREAM_512 &data_in,
|
||||
hls::stream<ap_uint<ADDR_STREAM_WIDTH> > &addr_in,
|
||||
hls::stream<ap_uint<32> > &completion_out,
|
||||
hls::burst_maxi<hbm256_t> d_hbm_p0, hls::burst_maxi<hbm256_t> d_hbm_p1,
|
||||
hls::burst_maxi<hbm256_t> d_hbm_p2, hls::burst_maxi<hbm256_t> d_hbm_p3,
|
||||
volatile uint64_t &packets_processed,
|
||||
volatile ap_uint<1> &idle,
|
||||
ap_uint<8> &err_reg) {
|
||||
@@ -75,10 +74,6 @@ void save_to_hbm(STREAM_512 &data_in,
|
||||
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
||||
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \
|
||||
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
||||
#pragma HLS INTERFACE m_axi port=d_hbm_p2 bundle=d_hbm_p2 depth=512 offset=off \
|
||||
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
||||
#pragma HLS INTERFACE m_axi port=d_hbm_p3 bundle=d_hbm_p3 depth=512 offset=off \
|
||||
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
||||
|
||||
ap_uint<128> packet_mask[MAX_MODULES_FPGA*2];
|
||||
#pragma HLS RESOURCE variable=packet_mask core=RAM_1P
|
||||
@@ -125,9 +120,7 @@ void save_to_hbm(STREAM_512 &data_in,
|
||||
ap_uint<32> hbm_size_256b = ACT_REG_HBM_SIZE_256b(packet_in.data);
|
||||
ap_uint<32> offset_hbm_0 = 12 * hbm_size_256b;
|
||||
ap_uint<32> offset_hbm_1 = 13 * hbm_size_256b;
|
||||
ap_uint<32> offset_hbm_2 = 14 * hbm_size_256b;
|
||||
ap_uint<32> offset_hbm_3 = 15 * hbm_size_256b;
|
||||
ap_uint<32> hbm_size = hbm_size_256b * 32 * 4 / (RAW_MODULE_SIZE * 2);
|
||||
ap_uint<32> hbm_size = hbm_size_256b * 32 * 2 / (RAW_MODULE_SIZE * 2);
|
||||
|
||||
ap_uint<1> mode_nonblocking = (data_collection_mode & MODE_NONBLOCKING_ON_WR) ? 1 : 0;
|
||||
|
||||
@@ -182,35 +175,26 @@ void save_to_hbm(STREAM_512 &data_in,
|
||||
packet_mask[id] |= ap_uint<128>(1) << eth_packet;
|
||||
}
|
||||
|
||||
size_t out_frame_addr = (handle[id] * 128 + eth_packet) * 64;
|
||||
size_t out_frame_addr = (handle[id] * 128 + eth_packet) * 128;
|
||||
|
||||
for (int i = 0; i < 64; i++) {
|
||||
for (int i = 0; i < 128; i++) {
|
||||
if (i % 16 == 0) {
|
||||
d_hbm_p0.write_request(offset_hbm_0 + out_frame_addr + i, 16);
|
||||
d_hbm_p1.write_request(offset_hbm_1 + out_frame_addr + i, 16);
|
||||
d_hbm_p2.write_request(offset_hbm_2 + out_frame_addr + i, 16);
|
||||
d_hbm_p3.write_request(offset_hbm_3 + out_frame_addr + i, 16);
|
||||
}
|
||||
|
||||
data_in >> packet_in;
|
||||
d_hbm_p0.write(packet_in.data(255, 0));
|
||||
d_hbm_p1.write(packet_in.data(511, 256));
|
||||
|
||||
data_in >> packet_in;
|
||||
d_hbm_p2.write(packet_in.data(255, 0));
|
||||
d_hbm_p3.write(packet_in.data(511, 256));
|
||||
|
||||
if (i % 16 == 15) {
|
||||
d_hbm_p0.write_response();
|
||||
d_hbm_p1.write_response();
|
||||
d_hbm_p2.write_response();
|
||||
d_hbm_p3.write_response();
|
||||
}
|
||||
}
|
||||
if (packet_in.last != 1)
|
||||
internal_err_reg[1] = 1;
|
||||
|
||||
|
||||
total_counter++;
|
||||
packets_processed = total_counter;
|
||||
addr_in >> addr;
|
||||
|
||||
@@ -19,7 +19,7 @@ void transfer_hbm(ap_uint<256> *d_hbm_p0,
|
||||
#pragma HLS interface s_axilite port=uram_offset
|
||||
#pragma HLS INTERFACE s_axilite port=return
|
||||
|
||||
if ((stride != 1) && (stride != 2) && (stride != 4))
|
||||
if ((stride != 1) && (stride != 2))
|
||||
return;
|
||||
|
||||
if (uram_offset >= stride)
|
||||
@@ -37,18 +37,6 @@ void transfer_hbm(ap_uint<256> *d_hbm_p0,
|
||||
d_hbm_p0[hbm_offset + i] = uram[2 * i + uram_offset];
|
||||
}
|
||||
}
|
||||
} else if (stride == 4) {
|
||||
if (hbm_to_uram) {
|
||||
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 32 / 4; i++) {
|
||||
#pragma HLS PIPELINE II=1
|
||||
uram[4 * i + uram_offset] = d_hbm_p0[hbm_offset + i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 32 / 4; i++) {
|
||||
#pragma HLS PIPELINE II=1
|
||||
d_hbm_p0[hbm_offset + i] = uram[4 * i + uram_offset];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (hbm_to_uram) {
|
||||
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 32; i++) {
|
||||
|
||||
@@ -535,8 +535,6 @@ proc create_hier_cell_jungfraujoch { parentCell nameHier } {
|
||||
connect_bd_intf_net -intf_net s_axi_1 [get_bd_intf_pins s_axi] [get_bd_intf_pins smartconnect_0/S00_AXI]
|
||||
connect_bd_intf_net -intf_net save_to_hbm_0_m_axi_d_hbm_p0 [get_bd_intf_pins m_axi_d_hbm_p12] [get_bd_intf_pins save_to_hbm_0/m_axi_d_hbm_p0]
|
||||
connect_bd_intf_net -intf_net save_to_hbm_0_m_axi_d_hbm_p1 [get_bd_intf_pins m_axi_d_hbm_p13] [get_bd_intf_pins save_to_hbm_0/m_axi_d_hbm_p1]
|
||||
connect_bd_intf_net -intf_net save_to_hbm_0_m_axi_d_hbm_p2 [get_bd_intf_pins m_axi_d_hbm_p14] [get_bd_intf_pins save_to_hbm_0/m_axi_d_hbm_p2]
|
||||
connect_bd_intf_net -intf_net save_to_hbm_0_m_axi_d_hbm_p3 [get_bd_intf_pins m_axi_d_hbm_p15] [get_bd_intf_pins save_to_hbm_0/m_axi_d_hbm_p3]
|
||||
connect_bd_intf_net -intf_net smartconnect_0_M00_AXI [get_bd_intf_pins action_config_0/s_axi] [get_bd_intf_pins smartconnect_0/M00_AXI]
|
||||
connect_bd_intf_net -intf_net smartconnect_0_M01_AXI [get_bd_intf_pins mailbox_0/S0_AXI] [get_bd_intf_pins smartconnect_0/M01_AXI]
|
||||
connect_bd_intf_net -intf_net smartconnect_0_M02_AXI [get_bd_intf_pins axi_bram_ctrl_calibration_addr/S_AXI] [get_bd_intf_pins smartconnect_0/M02_AXI]
|
||||
|
||||
@@ -326,8 +326,6 @@ void HLSSimulatedDevice::HLSMainThread() {
|
||||
|
||||
// 3. Write images to HBM
|
||||
hls_cores.emplace_back([&] { save_to_hbm(converted_3, addr4, save_to_hbm_completion,
|
||||
hbm.data(),
|
||||
hbm.data(),
|
||||
hbm.data(),
|
||||
hbm.data(),
|
||||
save_to_hbm_packets_processed,
|
||||
|
||||
@@ -1083,32 +1083,6 @@ TEST_CASE("HLS_C_transfer_hbm_2_interfaces", "[FPGA][Full]") {
|
||||
REQUIRE(test_frame_1 == test_frame_2);
|
||||
}
|
||||
|
||||
TEST_CASE("HLS_C_transfer_hbm_4_interfaces", "[FPGA][Full]") {
|
||||
std::vector<uint16_t> test_frame_1(RAW_MODULE_SIZE);
|
||||
std::vector<uint16_t> test_frame_2(RAW_MODULE_SIZE);
|
||||
|
||||
std::mt19937 g1(1389);
|
||||
std::uniform_int_distribution<uint16_t> dist(0, 65535);
|
||||
|
||||
for (auto &i: test_frame_1)
|
||||
i = dist(g1);
|
||||
|
||||
HLSSimulatedDevice test(0, 64);
|
||||
|
||||
REQUIRE(test_frame_1 != test_frame_2);
|
||||
|
||||
test.HBMTransfer(test_frame_1.data(), 12, 16*1024*1024, false, 4, 0);
|
||||
test.HBMTransfer(test_frame_1.data(), 13, 16*1024*1024, false, 4, 1);
|
||||
test.HBMTransfer(test_frame_1.data(), 14, 16*1024*1024, false, 4, 2);
|
||||
test.HBMTransfer(test_frame_1.data(), 15, 16*1024*1024, false, 4, 3);
|
||||
test.HBMTransfer(test_frame_2.data(), 12, 16*1024*1024, true, 4, 0);
|
||||
test.HBMTransfer(test_frame_2.data(), 13, 16*1024*1024, true, 4, 1);
|
||||
test.HBMTransfer(test_frame_2.data(), 14, 16*1024*1024, true, 4, 2);
|
||||
test.HBMTransfer(test_frame_2.data(), 15, 16*1024*1024, true, 4, 3);
|
||||
|
||||
REQUIRE(test_frame_1 == test_frame_2);
|
||||
}
|
||||
|
||||
TEST_CASE("HLS_C_Simulation_internal_packet_generator_write_to_hbm", "[FPGA][Full]") {
|
||||
const uint16_t nmodules = 1;
|
||||
|
||||
@@ -1141,10 +1115,8 @@ TEST_CASE("HLS_C_Simulation_internal_packet_generator_write_to_hbm", "[FPGA][Ful
|
||||
|
||||
REQUIRE(test_frame_1 != test_frame_2);
|
||||
|
||||
test.HBMTransfer(test_frame_2.data(), 12, 2*256*1024, true, 4, 0);
|
||||
test.HBMTransfer(test_frame_2.data(), 13, 2*256*1024, true, 4, 1);
|
||||
test.HBMTransfer(test_frame_2.data(), 14, 2*256*1024, true, 4, 2);
|
||||
test.HBMTransfer(test_frame_2.data(), 15, 2*256*1024, true, 4, 3);
|
||||
test.HBMTransfer(test_frame_2.data(), 12, 2*512*1024, true, 2, 0);
|
||||
test.HBMTransfer(test_frame_2.data(), 13, 2*512*1024, true, 2, 1);
|
||||
|
||||
REQUIRE(test_frame_1 == test_frame_2);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user