347 lines
11 KiB
C++
347 lines
11 KiB
C++
// Copyright (2019-2023) Paul Scherrer Institute
|
|
|
|
#define AP_INT_MAX_W 2048
|
|
|
|
#include "hls_jfjoch.h"
|
|
|
|
struct bshuf_packet {
|
|
ap_uint<512> data;
|
|
ap_uint<3> dest;
|
|
ap_uint<1> id;
|
|
ap_uint<1> last;
|
|
ap_uint<1> user;
|
|
};
|
|
|
|
typedef hls::stream<bshuf_packet> BSHUF_STREAM;
|
|
|
|
template<int N> ap_uint<16*N> bshuf16(const ap_uint<16*N> in) {
|
|
#pragma HLS INLINE
|
|
ap_uint<16*N> out;
|
|
for (int i = 0; i < N * 16; i++)
|
|
out[(i%16) * N + (i/16)] = in[i];
|
|
return out;
|
|
}
|
|
|
|
void bshuf256(STREAM_512 &data_in, BSHUF_STREAM &data_out) {
|
|
#pragma HLS INTERFACE ap_ctrl_none port=return
|
|
#pragma HLS INTERFACE axis port=data_in
|
|
#pragma HLS INTERFACE axis port=data_out
|
|
packet_512_t packet_in[4];
|
|
bshuf_packet packet_out[4];
|
|
|
|
data_in >> packet_in[0];
|
|
ap_uint<1> shuffle = (ACT_REG_MODE(packet_in[0].data) & MODE_BITSHUFFLE_FPGA) ? 1 : 0;
|
|
packet_out[0].data = packet_in[0].data;
|
|
packet_out[0].id = packet_in[0].id;
|
|
packet_out[0].last = packet_in[0].last;
|
|
packet_out[0].user = packet_in[0].user;
|
|
packet_out[0].dest = 0;
|
|
|
|
data_out << packet_out[0];
|
|
|
|
data_in >> packet_in[0];
|
|
|
|
ap_uint<2> counter = 0;
|
|
|
|
if (shuffle)
|
|
bitshuffle_loop:
|
|
while (!packet_in[0].user) {
|
|
#pragma HLS PIPELINE II=4
|
|
|
|
for (int i = 1; i < 4; i++)
|
|
data_in >> packet_in[i];
|
|
|
|
ap_uint<2048> tmp_reg_in = (packet_in[3].data, packet_in[2].data, packet_in[1].data, packet_in[0].data);
|
|
ap_uint<2048> tmp_reg_out = bshuf16<128>(tmp_reg_in);
|
|
for (int i = 0; i < 4; i++) {
|
|
packet_out[i].data = tmp_reg_out(512*i+511,512*i);
|
|
packet_out[i].dest = counter;
|
|
packet_out[i].last = packet_in[i].last;
|
|
packet_out[i].user = packet_in[i].user;
|
|
packet_out[i].id = packet_in[i].id;
|
|
data_out << packet_out[i];
|
|
}
|
|
counter++;
|
|
data_in >> packet_in[0];
|
|
}
|
|
|
|
forward_frames:
|
|
while (!packet_in[0].user) {
|
|
#pragma HLS PIPELINE II=1
|
|
packet_out[0].dest = 0;
|
|
packet_out[0].data = packet_in[0].data;
|
|
packet_out[0].last = packet_in[0].last;
|
|
packet_out[0].user = packet_in[0].user;
|
|
packet_out[0].id = packet_in[0].id;
|
|
data_out << packet_out[0];
|
|
data_in >> packet_in[0];
|
|
}
|
|
packet_out[0].dest = 0;
|
|
packet_out[0].data = packet_in[0].data;
|
|
packet_out[0].last = packet_in[0].last;
|
|
packet_out[0].user = packet_in[0].user;
|
|
packet_out[0].id = packet_in[0].id;
|
|
data_out << packet_out[0];
|
|
|
|
}
|
|
|
|
void bshuf1k_axis_split(BSHUF_STREAM &in, BSHUF_STREAM &out_0, BSHUF_STREAM &out_1, BSHUF_STREAM &out_2, BSHUF_STREAM &out_3) {
|
|
bshuf_packet packet_in;
|
|
in >> packet_in;
|
|
ap_uint<1> shuffle = (ACT_REG_MODE(packet_in.data) & MODE_BITSHUFFLE_FPGA) ? 1 : 0;
|
|
out_0 << packet_in;
|
|
|
|
in >> packet_in;
|
|
|
|
forward_frames:
|
|
while (!packet_in.user) {
|
|
#pragma HLS PIPELINE II=1
|
|
if (packet_in.dest == 1)
|
|
out_1 << packet_in;
|
|
else if (packet_in.dest == 2)
|
|
out_2 << packet_in;
|
|
else if (packet_in.dest == 3)
|
|
out_3 << packet_in;
|
|
else
|
|
out_0 << packet_in;
|
|
in >> packet_in;
|
|
}
|
|
out_0 << packet_in;
|
|
}
|
|
|
|
void bshuf1k_axis_combine(BSHUF_STREAM &in_0, BSHUF_STREAM &in_1, BSHUF_STREAM &in_2, BSHUF_STREAM &in_3,
|
|
BSHUF_STREAM &out) {
|
|
bshuf_packet packet_in;
|
|
in_0 >> packet_in;
|
|
ap_uint<1> shuffle = (ACT_REG_MODE(packet_in.data) & MODE_BITSHUFFLE_FPGA) ? 1 : 0;
|
|
out << packet_in;
|
|
|
|
in_0 >> packet_in;
|
|
|
|
if (shuffle)
|
|
bitshuffle_loop:
|
|
while (!packet_in.user) {
|
|
#pragma HLS PIPELINE II=4
|
|
out << packet_in;
|
|
|
|
in_1 >> packet_in;
|
|
out << packet_in;
|
|
|
|
in_2 >> packet_in;
|
|
out << packet_in;
|
|
|
|
in_3 >> packet_in;
|
|
out << packet_in;
|
|
|
|
in_0 >> packet_in;
|
|
}
|
|
|
|
forward_frames:
|
|
while (!packet_in.user) {
|
|
#pragma HLS PIPELINE II=1
|
|
out << packet_in;
|
|
in_0 >> packet_in;
|
|
}
|
|
out << packet_in;
|
|
}
|
|
|
|
void bshuf1k_shuffle(BSHUF_STREAM &in, BSHUF_STREAM &out) {
|
|
bshuf_packet packet_in[4];
|
|
in >> packet_in[0];
|
|
ap_uint<1> shuffle = (ACT_REG_MODE(packet_in[0].data) & MODE_BITSHUFFLE_FPGA) ? 1 : 0;
|
|
out << packet_in[0];
|
|
|
|
in >> packet_in[0];
|
|
|
|
ap_uint<5> counter = 0;
|
|
|
|
if (shuffle)
|
|
bitshuffle_loop:
|
|
while (!packet_in[0].user) {
|
|
#pragma HLS PIPELINE II=4
|
|
|
|
for (int i = 1; i < 4; i++)
|
|
in >> packet_in[i];
|
|
|
|
ap_uint<2048> tmp_reg_in = (packet_in[3].data, packet_in[2].data, packet_in[1].data, packet_in[0].data);
|
|
ap_uint<2048> tmp_reg_out;
|
|
for (int i = 0; i < 4; i++) {
|
|
for (int j = 0; j < 4; j++) {
|
|
tmp_reg_out(512 * i + 128 * j + 127, 512*i + 128 * j) = packet_in[j].data(128*i+127, 128*i);
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
packet_in[i].data = tmp_reg_out(512*i+511,512*i);
|
|
packet_in[i].dest = (counter / 4) % 4;
|
|
out << packet_in[i];
|
|
}
|
|
counter += 1;
|
|
|
|
in >> packet_in[0];
|
|
}
|
|
|
|
forward_frames:
|
|
while (!packet_in[0].user) {
|
|
#pragma HLS PIPELINE II=1
|
|
packet_in[0].dest = 0;
|
|
out << packet_in[0];
|
|
in >> packet_in[0];
|
|
}
|
|
packet_in[0].dest = 0;
|
|
out << packet_in[0];
|
|
}
|
|
|
|
void bshuf4k_axis_split(BSHUF_STREAM &in,
|
|
BSHUF_STREAM &out_0,
|
|
BSHUF_STREAM &out_1,
|
|
BSHUF_STREAM &out_2,
|
|
BSHUF_STREAM &out_3) {
|
|
bshuf_packet packet_in;
|
|
in >> packet_in;
|
|
ap_uint<1> shuffle = (ACT_REG_MODE(packet_in.data) & MODE_BITSHUFFLE_FPGA) ? 1 : 0;
|
|
out_0 << packet_in;
|
|
|
|
in >> packet_in;
|
|
|
|
forward_frames:
|
|
while (!packet_in.user) {
|
|
#pragma HLS PIPELINE II=1
|
|
if (packet_in.dest == 1)
|
|
out_1 << packet_in;
|
|
else if (packet_in.dest == 2)
|
|
out_2 << packet_in;
|
|
else if (packet_in.dest == 3)
|
|
out_3 << packet_in;
|
|
else
|
|
out_0 << packet_in;
|
|
in >> packet_in;
|
|
}
|
|
out_0 << packet_in;
|
|
}
|
|
|
|
void bshuf4k_axis_combine(BSHUF_STREAM &in_0, BSHUF_STREAM &in_1, BSHUF_STREAM &in_2, BSHUF_STREAM &in_3,
|
|
STREAM_512 &out) {
|
|
bshuf_packet packet_in;
|
|
packet_512_t packet_out;
|
|
|
|
in_0 >> packet_in;
|
|
ap_uint<1> shuffle = (ACT_REG_MODE(packet_in.data) & MODE_BITSHUFFLE_FPGA) ? 1 : 0;
|
|
packet_out.data = packet_in.data;
|
|
packet_out.last = packet_in.last;
|
|
packet_out.user = packet_in.user;
|
|
packet_out.id = packet_in.id;
|
|
out << packet_out;
|
|
|
|
in_0 >> packet_in;
|
|
|
|
if (shuffle)
|
|
bitshuffle_loop:
|
|
while (!packet_in.user) {
|
|
#pragma HLS PIPELINE II=4
|
|
packet_out.data = packet_in.data;
|
|
packet_out.last = packet_in.last;
|
|
packet_out.user = packet_in.user;
|
|
packet_out.id = packet_in.id;
|
|
out << packet_out;
|
|
|
|
in_1 >> packet_in;
|
|
packet_out.data = packet_in.data;
|
|
packet_out.last = packet_in.last;
|
|
packet_out.user = packet_in.user;
|
|
packet_out.id = packet_in.id;
|
|
out << packet_out;
|
|
|
|
in_2 >> packet_in;
|
|
packet_out.data = packet_in.data;
|
|
packet_out.last = packet_in.last;
|
|
packet_out.user = packet_in.user;
|
|
packet_out.id = packet_in.id;
|
|
out << packet_out;
|
|
|
|
in_3 >> packet_in;
|
|
packet_out.data = packet_in.data;
|
|
packet_out.last = packet_in.last;
|
|
packet_out.user = packet_in.user;
|
|
packet_out.id = packet_in.id;
|
|
out << packet_out;
|
|
|
|
in_0 >> packet_in;
|
|
}
|
|
|
|
forward_frames:
|
|
while (!packet_in.user) {
|
|
#pragma HLS PIPELINE II=1
|
|
packet_out.data = packet_in.data;
|
|
packet_out.last = packet_in.last;
|
|
packet_out.user = packet_in.user;
|
|
packet_out.id = packet_in.id;
|
|
out << packet_out;
|
|
in_0 >> packet_in;
|
|
}
|
|
packet_out.data = packet_in.data;
|
|
packet_out.last = packet_in.last;
|
|
packet_out.user = packet_in.user;
|
|
packet_out.id = packet_in.id;
|
|
out << packet_out;
|
|
}
|
|
|
|
void bitshuffle(STREAM_512 &data_in, STREAM_512 &data_out) {
|
|
#pragma HLS INTERFACE ap_ctrl_none port=return
|
|
#pragma HLS INTERFACE axis register both port=data_in
|
|
#pragma HLS INTERFACE axis register both port=data_out
|
|
#pragma HLS DATAFLOW
|
|
|
|
BSHUF_STREAM fifo_0;
|
|
#pragma HLS STREAM variable=fifo_0 depth=3
|
|
#pragma HLS RESOURCE variable=fifo_0 core=FIFO_SRL
|
|
BSHUF_STREAM fifo_1_0;
|
|
#pragma HLS STREAM variable=fifo_1_0 depth=8
|
|
#pragma HLS RESOURCE variable=fifo_1_0 core=FIFO_SRL
|
|
BSHUF_STREAM fifo_1_1;
|
|
#pragma HLS STREAM variable=fifo_1_1 depth=8
|
|
#pragma HLS RESOURCE variable=fifo_1_1 core=FIFO_SRL
|
|
BSHUF_STREAM fifo_1_2;
|
|
#pragma HLS STREAM variable=fifo_1_2 depth=8
|
|
#pragma HLS RESOURCE variable=fifo_1_2 core=FIFO_SRL
|
|
BSHUF_STREAM fifo_1_3;
|
|
#pragma HLS STREAM variable=fifo_1_3 depth=8
|
|
#pragma HLS RESOURCE variable=fifo_1_3 core=FIFO_SRL
|
|
BSHUF_STREAM fifo_2;
|
|
#pragma HLS STREAM variable=fifo_2 depth=3
|
|
#pragma HLS RESOURCE variable=fifo_2 core=FIFO_SRL
|
|
BSHUF_STREAM fifo_3;
|
|
#pragma HLS STREAM variable=fifo_3 depth=3
|
|
#pragma HLS RESOURCE variable=fifo_3 core=FIFO_SRL
|
|
BSHUF_STREAM fifo_4_0;
|
|
#pragma HLS STREAM variable=fifo_4_0 depth=32
|
|
BSHUF_STREAM fifo_4_1;
|
|
#pragma HLS STREAM variable=fifo_4_1 depth=32
|
|
BSHUF_STREAM fifo_4_2;
|
|
#pragma HLS STREAM variable=fifo_4_2 depth=32
|
|
BSHUF_STREAM fifo_4_3;
|
|
#pragma HLS STREAM variable=fifo_4_3 depth=32
|
|
|
|
#ifndef JFJOCH_HLS_NOSYNTH
|
|
bshuf256(data_in, fifo_0);
|
|
bshuf1k_axis_split(fifo_0, fifo_1_0, fifo_1_1, fifo_1_2, fifo_1_3);
|
|
bshuf1k_axis_combine(fifo_1_0, fifo_1_1, fifo_1_2, fifo_1_3, fifo_2);
|
|
bshuf1k_shuffle(fifo_2, fifo_3);
|
|
bshuf4k_axis_split(fifo_3,fifo_4_0, fifo_4_1, fifo_4_2, fifo_4_3);
|
|
bshuf4k_axis_combine(fifo_4_0, fifo_4_1, fifo_4_2, fifo_4_3,data_out);
|
|
#else
|
|
std::vector<std::thread> bshuf_cores;
|
|
|
|
bshuf_cores.emplace_back([&] { bshuf256(data_in, fifo_0);});
|
|
bshuf_cores.emplace_back([&] {bshuf1k_axis_split(fifo_0, fifo_1_0, fifo_1_1, fifo_1_2, fifo_1_3);});
|
|
bshuf_cores.emplace_back([&] {bshuf1k_axis_combine(fifo_1_0, fifo_1_1, fifo_1_2, fifo_1_3, fifo_2);});
|
|
bshuf_cores.emplace_back([&] {bshuf1k_shuffle(fifo_2, fifo_3);});
|
|
bshuf_cores.emplace_back([&] {bshuf4k_axis_split(fifo_3,fifo_4_0, fifo_4_1, fifo_4_2, fifo_4_3);});
|
|
bshuf_cores.emplace_back([&] {bshuf4k_axis_combine(fifo_4_0, fifo_4_1, fifo_4_2, fifo_4_3,data_out);});
|
|
|
|
for (auto &i : bshuf_cores)
|
|
i.join();
|
|
#endif
|
|
|
|
}
|