Files
Jungfraujoch/fpga/hls/roi_calc.cpp
leonarski_f d315506633 * Enhancements for XFEL
* Enhancements for EIGER
* Writer is more flexible and capable of handling DECTRIS data
2024-03-05 20:41:47 +01:00

155 lines
5.8 KiB
C++

// Copyright (2019-2024) Paul Scherrer Institute
#include "hls_jfjoch.h"
template <class T>
inline ap_uint<32> float2int(T &input) {
float_uint32 conv;
conv.f = input.to_float();
return conv.u;
}
void roi_calc(STREAM_768 &data_in,
STREAM_768 &data_out,
hls::stream<ap_uint<256>> &roi_out,
hls::stream<axis_completion > &s_axis_completion,
hls::stream<axis_completion > &m_axis_completion,
ap_uint<256> *d_hbm_p0,
ap_uint<256> *d_hbm_p1,
ap_uint<32> hbm_size_bytes) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE register both axis port=data_in
#pragma HLS INTERFACE register both axis port=data_out
#pragma HLS INTERFACE register both axis port=roi_out
#pragma HLS INTERFACE register both axis port=m_axis_completion
#pragma HLS INTERFACE register both axis port=s_axis_completion
#pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=16384 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=16384 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
#pragma HLS INTERFACE register ap_none port=hbm_size_bytes
ap_int<24+14> roi_sum[32*2][FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_sum type=complete dim=1
ap_uint<24+24+14> roi_sum2[32*2][FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_sum2 type=complete dim=1
ap_int<24+14+11> roi_x_weighted_sum[32*2][FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_x_weighted_sum type=complete dim=1
ap_int<24+14+11> roi_y_weighted_sum[32*2][FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_y_weighted_sum type=complete dim=1
ap_uint<14> roi_good_pixels[32*2][FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_good_pixels type=complete dim=1
ap_int<24> roi_max_value[32*2][FPGA_ROI_COUNT];
#pragma HLS ARRAY_PARTITION variable=roi_max_value type=complete dim=1
packet_768_t packet;
{
#pragma HLS PROTOCOL fixed
data_in >> packet;
ap_wait();
data_out << packet;
ap_wait();
}
ap_uint<32> offset_hbm_0 = 30 * hbm_size_bytes / 32;
ap_uint<32> offset_hbm_1 = 31 * hbm_size_bytes / 32;
axis_completion cmpl;
s_axis_completion >> cmpl;
while (!cmpl.last) {
m_axis_completion << cmpl;
for (int i = 0; i < FPGA_ROI_COUNT; i++) {
#pragma HLS PIPELINE II=1
for (int j = 0; j < 64; j++) {
roi_sum[j][i] = 0;
roi_sum2[j][i] = 0;
roi_x_weighted_sum[j][i] = 0;
roi_y_weighted_sum[j][i] = 0;
roi_good_pixels[j][i] = 0;
roi_max_value[j][i] = INT24_MIN;
}
}
for (int i = 0; i < RAW_MODULE_SIZE / 32 / 2; i++) {
#pragma HLS PIPELINE II=2
for (int k = 0; k < 2; k++) {
data_in >> packet;
ap_uint<9> line = (2 * i + k) / 32;
ap_uint<10> col = (2 * i + k) % 32 * 32;
// account for multipixel
line += (line / 256) * 2;
col += (col / 256) * 2;
ap_uint<16> roi[32];
ap_uint<256> roi_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + 2 * i + k];
ap_uint<256> roi_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + 2 * i + k];
unpack_2xhbm_to_32x16bit(roi_0, roi_1, roi);
ap_int<24> in_val[32];
unpack32(packet.data, in_val);
for (int pxl = 0; pxl < 32; pxl++) {
if ((roi[pxl] < FPGA_ROI_COUNT) && (in_val[pxl] != INT24_MIN)) {
if (in_val[pxl] != INT24_MAX) {
roi_sum[32 * k + pxl][roi[pxl]] += in_val[pxl];
roi_sum2[32 * k + pxl][roi[pxl]] += in_val[pxl] * in_val[pxl];
roi_x_weighted_sum[32 * k + pxl][roi[pxl]] += in_val[pxl] * (col + pxl);
roi_y_weighted_sum[32 * k + pxl][roi[pxl]] += in_val[pxl] * line;
roi_good_pixels[32 * k + pxl][roi[pxl]] += 1;
}
if (roi_max_value[32 * k + pxl][roi[pxl]] < in_val[pxl])
roi_max_value[32 * k + pxl][roi[pxl]] = in_val[pxl];
}
}
data_out << packet;
}
}
for (int i = 0; i < FPGA_ROI_COUNT; i++) {
#pragma HLS PIPELINE II=1
ap_uint<256> packet_out = 0;
ap_int<64> sum_tmp = 0;
ap_uint<64> sum2_tmp = 0;
ap_int<64> sum_x_tmp = 0;
ap_int<64> sum_y_tmp = 0;
ap_int<32> good_pixels_tmp = 0;
ap_int<32> max_value = INT32_MIN;
for (int j = 0; j < 64; j++) {
sum_tmp += roi_sum[j][i];
sum2_tmp += roi_sum2[j][i];
sum_x_tmp += roi_x_weighted_sum[j][i];
sum_y_tmp += roi_y_weighted_sum[j][i];
good_pixels_tmp += roi_good_pixels[j][i];
if (max_value < roi_max_value[j][i])
max_value = roi_max_value[j][i];
}
if (max_value == INT24_MIN)
max_value = INT32_MIN;
else if (max_value == INT24_MAX)
max_value = INT32_MAX;
packet_out( 63, 0) = sum_tmp;
packet_out(127, 64) = sum2_tmp;
packet_out(159, 128) = float2int(sum_x_tmp);
packet_out(191, 160) = float2int(sum_y_tmp);
packet_out(223, 192) = good_pixels_tmp;
packet_out(255, 224) = max_value;
roi_out << packet_out;
}
s_axis_completion >> cmpl;
}
m_axis_completion << cmpl;
data_in >> packet;
data_out << packet;
}