Files
Jungfraujoch/fpga/hls/load_from_hbm.cpp

68 lines
2.7 KiB
C++

// Copyright (2019-2023) Paul Scherrer Institute
// SPDX-License-Identifier: GPL-3.0-or-later
#include "hls_jfjoch.h"
void load_from_hbm(STREAM_512 &data_in,
STREAM_512 &data_out,
hls::stream<axis_completion > &s_axis_completion,
hls::stream<axis_completion > &m_axis_completion,
hls::stream<ap_uint<16> > &m_axis_free_handles,
ap_uint<256> *d_hbm_p0,
ap_uint<256> *d_hbm_p1,
ap_uint<32> hbm_size_bytes) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE register both axis port=data_in
#pragma HLS INTERFACE register both axis port=data_out
#pragma HLS INTERFACE register both axis port=m_axis_completion
#pragma HLS INTERFACE register both axis port=s_axis_completion
#pragma HLS INTERFACE register both axis port=m_axis_free_handles
#pragma HLS INTERFACE mode=ap_none port=hbm_size_bytes
#pragma HLS INTERFACE mode=m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
#pragma HLS INTERFACE mode=m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
ap_uint<32> offset_hbm_0 = 12 * hbm_size_bytes / 32;
ap_uint<32> offset_hbm_1 = 14 * hbm_size_bytes / 32;
packet_512_t packet;
data_in >> packet;
data_out << packet;
for (ap_uint<16> i = 0; i < hbm_size_bytes / (RAW_MODULE_SIZE * sizeof(uint32_t) / 2); i++)
m_axis_free_handles << i;
axis_completion cmpl;
s_axis_completion >> cmpl;
while (!cmpl.last) {
m_axis_completion << cmpl;
size_t offset = ((cmpl.handle / 2) * RAW_MODULE_SIZE * sizeof(uint16_t)) / 64;
if (cmpl.handle % 2 == 1)
offset += hbm_size_bytes / 32;
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) {
#pragma HLS PIPELINE II=1
packet_512_t packet_out;
packet_out.data(255, 0) = d_hbm_p0[offset_hbm_0 + offset + i];
packet_out.data(511, 256) = d_hbm_p1[offset_hbm_1 + offset + i];
packet_out.last = (i == RAW_MODULE_SIZE * sizeof(uint16_t) / 64 - 1);
packet_out.id = 0;
packet_out.dest = 0;
packet_out.keep = UINT64_MAX;
packet_out.strb = UINT64_MAX;
packet_out.user = 0;
data_out << packet_out;
}
m_axis_free_handles << cmpl.handle;
s_axis_completion >> cmpl;
}
m_axis_completion << cmpl;
m_axis_free_handles << UINT16_MAX;
data_in >> packet;
data_out << packet;
}