* Enhancements for EIGER * Writer is more flexible and capable of handling DECTRIS data
155 lines
5.8 KiB
C++
155 lines
5.8 KiB
C++
// Copyright (2019-2024) Paul Scherrer Institute
|
|
|
|
#include "hls_jfjoch.h"
|
|
|
|
template <class T>
|
|
inline ap_uint<32> float2int(T &input) {
|
|
float_uint32 conv;
|
|
conv.f = input.to_float();
|
|
return conv.u;
|
|
}
|
|
|
|
void roi_calc(STREAM_768 &data_in,
|
|
STREAM_768 &data_out,
|
|
hls::stream<ap_uint<256>> &roi_out,
|
|
hls::stream<axis_completion > &s_axis_completion,
|
|
hls::stream<axis_completion > &m_axis_completion,
|
|
ap_uint<256> *d_hbm_p0,
|
|
ap_uint<256> *d_hbm_p1,
|
|
ap_uint<32> hbm_size_bytes) {
|
|
#pragma HLS INTERFACE ap_ctrl_none port=return
|
|
#pragma HLS INTERFACE register both axis port=data_in
|
|
#pragma HLS INTERFACE register both axis port=data_out
|
|
#pragma HLS INTERFACE register both axis port=roi_out
|
|
#pragma HLS INTERFACE register both axis port=m_axis_completion
|
|
#pragma HLS INTERFACE register both axis port=s_axis_completion
|
|
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=16384 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=16384 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
|
|
|
|
#pragma HLS INTERFACE register ap_none port=hbm_size_bytes
|
|
|
|
ap_int<24+14> roi_sum[32*2][FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_sum type=complete dim=1
|
|
ap_uint<24+24+14> roi_sum2[32*2][FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_sum2 type=complete dim=1
|
|
ap_int<24+14+11> roi_x_weighted_sum[32*2][FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_x_weighted_sum type=complete dim=1
|
|
ap_int<24+14+11> roi_y_weighted_sum[32*2][FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_y_weighted_sum type=complete dim=1
|
|
ap_uint<14> roi_good_pixels[32*2][FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_good_pixels type=complete dim=1
|
|
ap_int<24> roi_max_value[32*2][FPGA_ROI_COUNT];
|
|
#pragma HLS ARRAY_PARTITION variable=roi_max_value type=complete dim=1
|
|
|
|
packet_768_t packet;
|
|
{
|
|
#pragma HLS PROTOCOL fixed
|
|
data_in >> packet;
|
|
ap_wait();
|
|
data_out << packet;
|
|
ap_wait();
|
|
}
|
|
|
|
ap_uint<32> offset_hbm_0 = 30 * hbm_size_bytes / 32;
|
|
ap_uint<32> offset_hbm_1 = 31 * hbm_size_bytes / 32;
|
|
|
|
axis_completion cmpl;
|
|
s_axis_completion >> cmpl;
|
|
while (!cmpl.last) {
|
|
m_axis_completion << cmpl;
|
|
|
|
for (int i = 0; i < FPGA_ROI_COUNT; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
for (int j = 0; j < 64; j++) {
|
|
roi_sum[j][i] = 0;
|
|
roi_sum2[j][i] = 0;
|
|
roi_x_weighted_sum[j][i] = 0;
|
|
roi_y_weighted_sum[j][i] = 0;
|
|
roi_good_pixels[j][i] = 0;
|
|
roi_max_value[j][i] = INT24_MIN;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < RAW_MODULE_SIZE / 32 / 2; i++) {
|
|
#pragma HLS PIPELINE II=2
|
|
for (int k = 0; k < 2; k++) {
|
|
data_in >> packet;
|
|
ap_uint<9> line = (2 * i + k) / 32;
|
|
ap_uint<10> col = (2 * i + k) % 32 * 32;
|
|
|
|
// account for multipixel
|
|
line += (line / 256) * 2;
|
|
col += (col / 256) * 2;
|
|
|
|
ap_uint<16> roi[32];
|
|
ap_uint<256> roi_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + 2 * i + k];
|
|
ap_uint<256> roi_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + 2 * i + k];
|
|
unpack_2xhbm_to_32x16bit(roi_0, roi_1, roi);
|
|
|
|
ap_int<24> in_val[32];
|
|
unpack32(packet.data, in_val);
|
|
|
|
for (int pxl = 0; pxl < 32; pxl++) {
|
|
if ((roi[pxl] < FPGA_ROI_COUNT) && (in_val[pxl] != INT24_MIN)) {
|
|
if (in_val[pxl] != INT24_MAX) {
|
|
roi_sum[32 * k + pxl][roi[pxl]] += in_val[pxl];
|
|
roi_sum2[32 * k + pxl][roi[pxl]] += in_val[pxl] * in_val[pxl];
|
|
roi_x_weighted_sum[32 * k + pxl][roi[pxl]] += in_val[pxl] * (col + pxl);
|
|
roi_y_weighted_sum[32 * k + pxl][roi[pxl]] += in_val[pxl] * line;
|
|
roi_good_pixels[32 * k + pxl][roi[pxl]] += 1;
|
|
}
|
|
if (roi_max_value[32 * k + pxl][roi[pxl]] < in_val[pxl])
|
|
roi_max_value[32 * k + pxl][roi[pxl]] = in_val[pxl];
|
|
}
|
|
}
|
|
data_out << packet;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < FPGA_ROI_COUNT; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
ap_uint<256> packet_out = 0;
|
|
|
|
ap_int<64> sum_tmp = 0;
|
|
ap_uint<64> sum2_tmp = 0;
|
|
ap_int<64> sum_x_tmp = 0;
|
|
ap_int<64> sum_y_tmp = 0;
|
|
ap_int<32> good_pixels_tmp = 0;
|
|
ap_int<32> max_value = INT32_MIN;
|
|
|
|
for (int j = 0; j < 64; j++) {
|
|
sum_tmp += roi_sum[j][i];
|
|
sum2_tmp += roi_sum2[j][i];
|
|
sum_x_tmp += roi_x_weighted_sum[j][i];
|
|
sum_y_tmp += roi_y_weighted_sum[j][i];
|
|
good_pixels_tmp += roi_good_pixels[j][i];
|
|
if (max_value < roi_max_value[j][i])
|
|
max_value = roi_max_value[j][i];
|
|
}
|
|
|
|
if (max_value == INT24_MIN)
|
|
max_value = INT32_MIN;
|
|
else if (max_value == INT24_MAX)
|
|
max_value = INT32_MAX;
|
|
|
|
packet_out( 63, 0) = sum_tmp;
|
|
packet_out(127, 64) = sum2_tmp;
|
|
packet_out(159, 128) = float2int(sum_x_tmp);
|
|
packet_out(191, 160) = float2int(sum_y_tmp);
|
|
packet_out(223, 192) = good_pixels_tmp;
|
|
packet_out(255, 224) = max_value;
|
|
|
|
roi_out << packet_out;
|
|
}
|
|
|
|
s_axis_completion >> cmpl;
|
|
}
|
|
m_axis_completion << cmpl;
|
|
|
|
data_in >> packet;
|
|
data_out << packet;
|
|
}
|