Files
Jungfraujoch/fpga/hls/roi_calc.cpp
2025-04-14 11:52:06 +02:00

156 lines
5.8 KiB
C++

// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: CERN-OHL-S-2.0
#include "hls_jfjoch.h"
void roi_calc(STREAM_768 &data_in,
STREAM_768 &data_out,
hls::stream<ap_uint<512>> &roi_out,
hls::stream<axis_completion > &s_axis_completion,
hls::stream<axis_completion > &m_axis_completion,
ap_uint<256> *d_hbm_p0,
ap_uint<256> *d_hbm_p1,
ap_uint<32> hbm_size_bytes) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE register both axis port=data_in
#pragma HLS INTERFACE register both axis port=data_out
#pragma HLS INTERFACE register both axis port=roi_out
#pragma HLS INTERFACE register both axis port=m_axis_completion
#pragma HLS INTERFACE register both axis port=s_axis_completion
#pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=16384 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=16384 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
#pragma HLS INTERFACE register ap_none port=hbm_size_bytes
ap_int<64> roi_sum[FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_sum type=complete
ap_uint<64> roi_sum2[FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_sum2 type=complete
ap_uint<32> roi_good_pixels[FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_good_pixels type=complete
ap_int<24> roi_max_value[FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_max_value type=complete
ap_int<64> roi_x_weighted_sum[FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_x_weighted_sum type=complete dim=1
ap_int<64> roi_y_weighted_sum[FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_y_weighted_sum type=complete dim=1
packet_768_t packet;
{
#pragma HLS PROTOCOL fixed
data_in >> packet;
ap_wait();
data_out << packet;
ap_wait();
}
ap_uint<32> offset_hbm_0 = 18 * hbm_size_bytes / 32;
ap_uint<32> offset_hbm_1 = 19 * hbm_size_bytes / 32;
axis_completion cmpl;
s_axis_completion >> cmpl;
while (!cmpl.last) {
m_axis_completion << cmpl;
for (int r = 0; r < FPGA_ROI_COUNT; r++) {
#pragma HLS PIPELINE II=1
roi_sum[r] = 0;
roi_sum2[r] = 0;
roi_x_weighted_sum[r] = 0;
roi_y_weighted_sum[r] = 0;
roi_good_pixels[r] = 0;
roi_max_value[r] = INT24_MIN;
}
for (int i = 0; i < RAW_MODULE_SIZE / 32; i++) {
#pragma HLS PIPELINE II=1
data_in >> packet;
ap_uint<9> line = i / 32;
ap_uint<10> col = i % 32 * 32;
// account for multipixel
line += (line / 256) * 2;
col += (col / 256) * 2;
ap_uint<16> roi[32];
ap_uint<256> roi_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i];
ap_uint<256> roi_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i ];
unpack_2xhbm_to_32x16bit(roi_0, roi_1, roi);
ap_int<24> in_val[32];
unpack32(packet.data, in_val);
ap_int<32> sum_tmp[FPGA_ROI_COUNT];
ap_uint<64> sum2_tmp[FPGA_ROI_COUNT];
ap_uint<32> good_pixels_tmp[FPGA_ROI_COUNT];
ap_int<24> max_value[FPGA_ROI_COUNT];
ap_int<64> weigh_x[FPGA_ROI_COUNT];
ap_int<64> weigh_y[FPGA_ROI_COUNT];
for (int r = 0; r < FPGA_ROI_COUNT; r++) {
sum_tmp[r] = 0;
sum2_tmp[r] = 0;
weigh_x[r] = 0;
weigh_y[r] = 0;
good_pixels_tmp[r] = 0;
max_value[r] = INT24_MIN;
}
for (int pxl = 0; pxl < 32; pxl++) {
ap_int<48> val_sq = in_val[pxl] * in_val[pxl];
ap_int<34> x = in_val[pxl] * (col + pxl);
ap_int<34> y = in_val[pxl] * line;
for (int r = 0; r < FPGA_ROI_COUNT; r++) {
if (roi[pxl][r] && (in_val[pxl] != INT24_MIN) && packet.strb[pxl]) {
if (in_val[pxl] != INT24_MAX) {
sum_tmp[r] += in_val[pxl];
sum2_tmp[r] += val_sq;
good_pixels_tmp[r] += 1;
weigh_x[r] += x;
weigh_y[r] += y;
}
if (max_value[r] < in_val[pxl])
max_value[r] = in_val[pxl];
}
}
}
for (int r = 0; r < FPGA_ROI_COUNT; r++) {
roi_sum[r] += sum_tmp[r];
roi_sum2[r] += sum2_tmp[r] ;
roi_good_pixels[r] += good_pixels_tmp[r];
roi_x_weighted_sum[r] += weigh_x[r];
roi_y_weighted_sum[r] += weigh_y[r];
if (max_value[r] > roi_max_value[r])
roi_max_value[r] = max_value[r];
}
data_out << packet;
}
for (int i = 0; i < FPGA_ROI_COUNT; i++) {
#pragma HLS PIPELINE II=1
ap_uint<512> packet_out = 0;
packet_out( 63, 0) = roi_sum[i];
packet_out(127, 64) = roi_sum2[i];
packet_out(191, 128) = roi_x_weighted_sum[i];
packet_out(255, 192) = roi_y_weighted_sum[i];
packet_out(265+63, 256) = roi_good_pixels[i];
packet_out(256+127, 256+64) = roi_max_value[i];
roi_out << packet_out;
}
s_axis_completion >> cmpl;
}
m_axis_completion << cmpl;
data_in >> packet;
data_out << packet;
}