// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: CERN-OHL-S-2.0 #include "hls_jfjoch.h" void roi_calc(STREAM_768 &data_in, STREAM_768 &data_out, hls::stream> &roi_out, hls::stream &s_axis_completion, hls::stream &m_axis_completion, ap_uint<256> *d_hbm_p0, ap_uint<256> *d_hbm_p1, ap_uint<32> hbm_size_bytes) { #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS INTERFACE register both axis port=data_in #pragma HLS INTERFACE register both axis port=data_out #pragma HLS INTERFACE register both axis port=roi_out #pragma HLS INTERFACE register both axis port=m_axis_completion #pragma HLS INTERFACE register both axis port=s_axis_completion #pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=16384 offset=off \ max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8 #pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=16384 offset=off \ max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8 #pragma HLS INTERFACE register ap_none port=hbm_size_bytes ap_int<64> roi_sum[FPGA_ROI_COUNT]; #pragma HLS ARRAY_PARTITION variable=roi_sum type=complete ap_uint<64> roi_sum2[FPGA_ROI_COUNT]; #pragma HLS ARRAY_PARTITION variable=roi_sum2 type=complete ap_uint<32> roi_good_pixels[FPGA_ROI_COUNT]; #pragma HLS ARRAY_PARTITION variable=roi_good_pixels type=complete ap_int<24> roi_max_value[FPGA_ROI_COUNT]; #pragma HLS ARRAY_PARTITION variable=roi_max_value type=complete ap_int<64> roi_x_weighted_sum[FPGA_ROI_COUNT]; #pragma HLS ARRAY_PARTITION variable=roi_x_weighted_sum type=complete dim=1 ap_int<64> roi_y_weighted_sum[FPGA_ROI_COUNT]; #pragma HLS ARRAY_PARTITION variable=roi_y_weighted_sum type=complete dim=1 packet_768_t packet; { #pragma HLS PROTOCOL fixed data_in >> packet; ap_wait(); data_out << packet; ap_wait(); } ap_uint<32> offset_hbm_0 = 18 * hbm_size_bytes / 32; ap_uint<32> offset_hbm_1 = 19 * hbm_size_bytes / 32; axis_completion cmpl; s_axis_completion >> cmpl; while (!cmpl.last) { m_axis_completion << cmpl; for (int r = 0; r < FPGA_ROI_COUNT; r++) { #pragma HLS PIPELINE II=1 roi_sum[r] = 0; roi_sum2[r] = 0; roi_x_weighted_sum[r] = 0; roi_y_weighted_sum[r] = 0; roi_good_pixels[r] = 0; roi_max_value[r] = INT24_MIN; } for (int i = 0; i < RAW_MODULE_SIZE / 32; i++) { #pragma HLS PIPELINE II=1 data_in >> packet; ap_uint<9> line = i / 32; ap_uint<10> col = i % 32 * 32; // account for multipixel line += (line / 256) * 2; col += (col / 256) * 2; ap_uint<16> roi[32]; ap_uint<256> roi_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i]; ap_uint<256> roi_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i ]; unpack_2xhbm_to_32x16bit(roi_0, roi_1, roi); ap_int<24> in_val[32]; unpack32(packet.data, in_val); ap_int<32> sum_tmp[FPGA_ROI_COUNT]; ap_uint<64> sum2_tmp[FPGA_ROI_COUNT]; ap_uint<32> good_pixels_tmp[FPGA_ROI_COUNT]; ap_int<24> max_value[FPGA_ROI_COUNT]; ap_int<64> weigh_x[FPGA_ROI_COUNT]; ap_int<64> weigh_y[FPGA_ROI_COUNT]; for (int r = 0; r < FPGA_ROI_COUNT; r++) { sum_tmp[r] = 0; sum2_tmp[r] = 0; weigh_x[r] = 0; weigh_y[r] = 0; good_pixels_tmp[r] = 0; max_value[r] = INT24_MIN; } for (int pxl = 0; pxl < 32; pxl++) { ap_int<48> val_sq = in_val[pxl] * in_val[pxl]; ap_int<34> x = in_val[pxl] * (col + pxl); ap_int<34> y = in_val[pxl] * line; for (int r = 0; r < FPGA_ROI_COUNT; r++) { if (roi[pxl][r] && (in_val[pxl] != INT24_MIN) && packet.strb[pxl]) { if (in_val[pxl] != INT24_MAX) { sum_tmp[r] += in_val[pxl]; sum2_tmp[r] += val_sq; good_pixels_tmp[r] += 1; weigh_x[r] += x; weigh_y[r] += y; } if (max_value[r] < in_val[pxl]) max_value[r] = in_val[pxl]; } } } for (int r = 0; r < FPGA_ROI_COUNT; r++) { roi_sum[r] += sum_tmp[r]; roi_sum2[r] += sum2_tmp[r] ; roi_good_pixels[r] += good_pixels_tmp[r]; roi_x_weighted_sum[r] += weigh_x[r]; roi_y_weighted_sum[r] += weigh_y[r]; if (max_value[r] > roi_max_value[r]) roi_max_value[r] = max_value[r]; } data_out << packet; } for (int i = 0; i < FPGA_ROI_COUNT; i++) { #pragma HLS PIPELINE II=1 ap_uint<512> packet_out = 0; packet_out( 63, 0) = roi_sum[i]; packet_out(127, 64) = roi_sum2[i]; packet_out(191, 128) = roi_x_weighted_sum[i]; packet_out(255, 192) = roi_y_weighted_sum[i]; packet_out(265+63, 256) = roi_good_pixels[i]; packet_out(256+127, 256+64) = roi_max_value[i]; roi_out << packet_out; } s_axis_completion >> cmpl; } m_axis_completion << cmpl; data_in >> packet; data_out << packet; }