156 lines
5.8 KiB
C++
156 lines
5.8 KiB
C++
// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: CERN-OHL-S-2.0
|
|
|
|
#include "hls_jfjoch.h"
|
|
|
|
void roi_calc(STREAM_768 &data_in,
|
|
STREAM_768 &data_out,
|
|
hls::stream<ap_uint<512>> &roi_out,
|
|
hls::stream<axis_completion > &s_axis_completion,
|
|
hls::stream<axis_completion > &m_axis_completion,
|
|
ap_uint<256> *d_hbm_p0,
|
|
ap_uint<256> *d_hbm_p1,
|
|
ap_uint<32> hbm_size_bytes) {
|
|
#pragma HLS INTERFACE ap_ctrl_none port=return
|
|
#pragma HLS INTERFACE register both axis port=data_in
|
|
#pragma HLS INTERFACE register both axis port=data_out
|
|
#pragma HLS INTERFACE register both axis port=roi_out
|
|
#pragma HLS INTERFACE register both axis port=m_axis_completion
|
|
#pragma HLS INTERFACE register both axis port=s_axis_completion
|
|
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=16384 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=16384 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
|
|
|
|
#pragma HLS INTERFACE register ap_none port=hbm_size_bytes
|
|
|
|
ap_int<64> roi_sum[FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_sum type=complete
|
|
ap_uint<64> roi_sum2[FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_sum2 type=complete
|
|
ap_uint<32> roi_good_pixels[FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_good_pixels type=complete
|
|
ap_int<24> roi_max_value[FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_max_value type=complete
|
|
ap_int<64> roi_x_weighted_sum[FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_x_weighted_sum type=complete dim=1
|
|
ap_int<64> roi_y_weighted_sum[FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_y_weighted_sum type=complete dim=1
|
|
|
|
packet_768_t packet;
|
|
{
|
|
#pragma HLS PROTOCOL fixed
|
|
data_in >> packet;
|
|
ap_wait();
|
|
data_out << packet;
|
|
ap_wait();
|
|
}
|
|
|
|
ap_uint<32> offset_hbm_0 = 18 * hbm_size_bytes / 32;
|
|
ap_uint<32> offset_hbm_1 = 19 * hbm_size_bytes / 32;
|
|
|
|
axis_completion cmpl;
|
|
s_axis_completion >> cmpl;
|
|
while (!cmpl.last) {
|
|
m_axis_completion << cmpl;
|
|
for (int r = 0; r < FPGA_ROI_COUNT; r++) {
|
|
#pragma HLS PIPELINE II=1
|
|
roi_sum[r] = 0;
|
|
roi_sum2[r] = 0;
|
|
roi_x_weighted_sum[r] = 0;
|
|
roi_y_weighted_sum[r] = 0;
|
|
roi_good_pixels[r] = 0;
|
|
roi_max_value[r] = INT24_MIN;
|
|
}
|
|
|
|
for (int i = 0; i < RAW_MODULE_SIZE / 32; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
data_in >> packet;
|
|
|
|
ap_uint<9> line = i / 32;
|
|
ap_uint<10> col = i % 32 * 32;
|
|
|
|
// account for multipixel
|
|
line += (line / 256) * 2;
|
|
col += (col / 256) * 2;
|
|
|
|
ap_uint<16> roi[32];
|
|
ap_uint<256> roi_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i];
|
|
ap_uint<256> roi_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i ];
|
|
unpack_2xhbm_to_32x16bit(roi_0, roi_1, roi);
|
|
|
|
ap_int<24> in_val[32];
|
|
unpack32(packet.data, in_val);
|
|
|
|
ap_int<32> sum_tmp[FPGA_ROI_COUNT];
|
|
ap_uint<64> sum2_tmp[FPGA_ROI_COUNT];
|
|
ap_uint<32> good_pixels_tmp[FPGA_ROI_COUNT];
|
|
ap_int<24> max_value[FPGA_ROI_COUNT];
|
|
ap_int<64> weigh_x[FPGA_ROI_COUNT];
|
|
ap_int<64> weigh_y[FPGA_ROI_COUNT];
|
|
|
|
for (int r = 0; r < FPGA_ROI_COUNT; r++) {
|
|
sum_tmp[r] = 0;
|
|
sum2_tmp[r] = 0;
|
|
weigh_x[r] = 0;
|
|
weigh_y[r] = 0;
|
|
good_pixels_tmp[r] = 0;
|
|
max_value[r] = INT24_MIN;
|
|
}
|
|
|
|
for (int pxl = 0; pxl < 32; pxl++) {
|
|
ap_int<48> val_sq = in_val[pxl] * in_val[pxl];
|
|
ap_int<34> x = in_val[pxl] * (col + pxl);
|
|
ap_int<34> y = in_val[pxl] * line;
|
|
|
|
for (int r = 0; r < FPGA_ROI_COUNT; r++) {
|
|
if (roi[pxl][r] && (in_val[pxl] != INT24_MIN) && packet.strb[pxl]) {
|
|
if (in_val[pxl] != INT24_MAX) {
|
|
sum_tmp[r] += in_val[pxl];
|
|
sum2_tmp[r] += val_sq;
|
|
good_pixels_tmp[r] += 1;
|
|
weigh_x[r] += x;
|
|
weigh_y[r] += y;
|
|
}
|
|
if (max_value[r] < in_val[pxl])
|
|
max_value[r] = in_val[pxl];
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
for (int r = 0; r < FPGA_ROI_COUNT; r++) {
|
|
roi_sum[r] += sum_tmp[r];
|
|
roi_sum2[r] += sum2_tmp[r] ;
|
|
roi_good_pixels[r] += good_pixels_tmp[r];
|
|
roi_x_weighted_sum[r] += weigh_x[r];
|
|
roi_y_weighted_sum[r] += weigh_y[r];
|
|
if (max_value[r] > roi_max_value[r])
|
|
roi_max_value[r] = max_value[r];
|
|
}
|
|
data_out << packet;
|
|
}
|
|
|
|
for (int i = 0; i < FPGA_ROI_COUNT; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
ap_uint<512> packet_out = 0;
|
|
|
|
packet_out( 63, 0) = roi_sum[i];
|
|
packet_out(127, 64) = roi_sum2[i];
|
|
packet_out(191, 128) = roi_x_weighted_sum[i];
|
|
packet_out(255, 192) = roi_y_weighted_sum[i];
|
|
packet_out(265+63, 256) = roi_good_pixels[i];
|
|
packet_out(256+127, 256+64) = roi_max_value[i];
|
|
|
|
roi_out << packet_out;
|
|
}
|
|
|
|
s_axis_completion >> cmpl;
|
|
}
|
|
m_axis_completion << cmpl;
|
|
|
|
data_in >> packet;
|
|
data_out << packet;
|
|
}
|