Files
Jungfraujoch/fpga/hls/pedestal.cpp
leonarski_f d315506633 * Enhancements for XFEL
* Enhancements for EIGER
* Writer is more flexible and capable of handling DECTRIS data
2024-03-05 20:41:47 +01:00

264 lines
12 KiB
C++

// Copyright (2019-2023) Paul Scherrer Institute
#include "hls_jfjoch.h"
#define PEDESTAL_G0_PRECISION 24 // 14-bit + 1-bit (fractional) + 7-bit (for 128 pixel window)
typedef ap_ufixed<PEDESTAL_G0_PRECISION,14, AP_RND_CONV> pedestal_g0_t;
typedef ap_uint<PEDESTAL_G0_PRECISION * 32> packed_pedestal_g0_t;
void pack(packed_pedestal_g0_t& out, pedestal_g0_t in[32]) {
#pragma HLS INLINE
for (int i = 0; i < 32; i ++) {
for (int j = 0; j < PEDESTAL_G0_PRECISION; j ++) out[i * PEDESTAL_G0_PRECISION + j] = in[i][j];
}
}
inline void unpack(packed_pedestal_g0_t in, pedestal_g0_t out[32]) {
#pragma HLS INLINE
for (int i = 0; i < 32; i ++) {
for (int j = 0; j < PEDESTAL_G0_PRECISION; j ++) out[i][j] = in[i * PEDESTAL_G0_PRECISION + j];
}
}
ap_uint<512> pack_and_reduce(const packed_pedestal_g0_t &in) {
#pragma HLS INLINE
ap_uint<512> out;
pedestal_g0_t tmp_full[32];
unpack(in, tmp_full);
for (int i = 0; i < 32; i++) {
ap_uint<16> tmp1 = tmp_full[i] + pedestal_g0_t(0.5); // returns only integer part
if (tmp_full[i] > 16383)
tmp1 = 16384;
for (int j = 0; j < 16; j++)
out[i*16+j] = tmp1[j];
}
return out;
}
packed_pedestal_g0_t update_pedestal(ap_uint<512> data_in,
const packed_pedestal_g0_t packed_pedestal_in,
ap_uint<1> accumulate, ap_uint<8> mode) {
#pragma HLS INLINE
// Load current pedestal
pedestal_g0_t pedestal[32];
unpack(packed_pedestal_in, pedestal);
for (int j = 0; j < 32; j++) {
ap_uint<2> gain = data_in(16 * j + 15,16 * j + 14);
ap_uint<14> adu = data_in(16 * j + 13,16 * j);
// Correct pedestal based on gain
if ((((gain == 0x0) && ((mode & MODE_PEDESTAL_G0) != 0)) ||
((gain == 0x1) && ((mode & MODE_PEDESTAL_G1) != 0)) ||
((gain == 0x3) && ((mode & MODE_PEDESTAL_G2) != 0)))
&& (pedestal[j] < pedestal_g0_t(16383.25))) {
if (accumulate)
pedestal[j] += pedestal_g0_t(adu) / PEDESTAL_WINDOW_SIZE;
else
pedestal[j] += ap_fixed<PEDESTAL_G0_PRECISION + 2, 16, AP_RND_CONV>(adu - pedestal[j]) / PEDESTAL_WINDOW_SIZE;
} else
pedestal[j] = pedestal_g0_t(16383.5);
}
packed_pedestal_g0_t packed_pedestal_out;
// Save pedestal
pack(packed_pedestal_out, pedestal);
return packed_pedestal_out;
}
void pedestal(STREAM_512 &data_in, STREAM_512 &data_out,
hls::stream<axis_completion > &s_axis_completion,
hls::stream<axis_completion > &m_axis_completion,
ap_uint<256> *d_hbm_p0,
ap_uint<256> *d_hbm_p0_w,
ap_uint<256> *d_hbm_p1,
ap_uint<256> *d_hbm_p1_w,
ap_uint<256> *d_hbm_p2,
ap_uint<256> *d_hbm_p2_w,
ap_uint<256> *d_hbm_p3,
ap_uint<256> *d_hbm_p3_w,
ap_uint<256> *d_hbm_p4,
ap_uint<256> *d_hbm_p4_w,
ap_uint<256> *d_hbm_p5,
ap_uint<256> *d_hbm_p5_w,
ap_uint<32> hbm_size_bytes) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE axis register both port=data_in
#pragma HLS INTERFACE axis register both port=data_out
#pragma HLS INTERFACE axis register both port=s_axis_completion
#pragma HLS INTERFACE axis register both port=m_axis_completion
#pragma HLS INTERFACE register ap_none port=hbm_size_bytes
#pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
#pragma HLS INTERFACE m_axi port=d_hbm_p2 bundle=d_hbm_p2 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
#pragma HLS INTERFACE m_axi port=d_hbm_p3 bundle=d_hbm_p3 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
#pragma HLS INTERFACE m_axi port=d_hbm_p4 bundle=d_hbm_p4 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
#pragma HLS INTERFACE m_axi port=d_hbm_p5 bundle=d_hbm_p5 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
#pragma HLS INTERFACE m_axi port=d_hbm_p0_w bundle=d_hbm_p0_w depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE m_axi port=d_hbm_p1_w bundle=d_hbm_p1_w depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE m_axi port=d_hbm_p2_w bundle=d_hbm_p2_w depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE m_axi port=d_hbm_p3_w bundle=d_hbm_p3_w depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE m_axi port=d_hbm_p4_w bundle=d_hbm_p4_w depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE m_axi port=d_hbm_p5_w bundle=d_hbm_p5_w depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
uint64_t frame_count[MAX_MODULES_FPGA*16];
for (int i = 0; i < MAX_MODULES_FPGA*16; i++)
frame_count[i] = 0;
packet_512_t packet;
{
#pragma HLS PROTOCOL fixed
data_in >> packet;
ap_wait();
data_out << packet;
ap_wait();
}
ap_uint<8> conversion_mode = ACT_REG_MODE(packet.data);
ap_uint<8> nmodules = ACT_REG_NMODULES(packet.data) + 1;
ap_uint<5> nstoragecells = ACT_REG_NSTORAGE_CELLS(packet.data);
ap_uint<1> pedestal_mode = ((conversion_mode & MODE_PEDESTAL_G0) != 0)
|| ((conversion_mode & MODE_PEDESTAL_G1) != 0)
|| ((conversion_mode & MODE_PEDESTAL_G2) != 0);
ap_uint<32> offset_hbm_0 = 24 * hbm_size_bytes / 32;
ap_uint<32> offset_hbm_1 = 25 * hbm_size_bytes / 32;
ap_uint<32> offset_hbm_2 = 26 * hbm_size_bytes / 32;
ap_uint<32> offset_hbm_3 = 27 * hbm_size_bytes / 32;
ap_uint<32> offset_hbm_4 = 28 * hbm_size_bytes / 32;
ap_uint<32> offset_hbm_5 = 29 * hbm_size_bytes / 32;
if (pedestal_mode) {
clean_hbm:
for (int i = 0; i < nmodules * nstoragecells * RAW_MODULE_SIZE * sizeof(uint16_t) / 128; i++) {
#pragma HLS PIPELINE II=1
d_hbm_p0_w[offset_hbm_0 + i] = 0;
d_hbm_p1_w[offset_hbm_1 + i] = 0;
d_hbm_p2_w[offset_hbm_2 + i] = 0;
d_hbm_p3_w[offset_hbm_3 + i] = 0;
d_hbm_p4_w[offset_hbm_4 + i] = 0;
d_hbm_p5_w[offset_hbm_5 + i] = 0;
}
}
axis_completion cmpl;
s_axis_completion >> cmpl;
while (!cmpl.last) {
m_axis_completion << cmpl;
if ((cmpl.packet_count == 256) && pedestal_mode) {
ap_int<5> storage_cell = ((nstoragecells > 1) ? ap_int<5>((cmpl.frame_number % nstoragecells)) : ap_int<5>(0));
ap_uint<1> accumulate = (frame_count[nmodules * storage_cell + cmpl.module] < PEDESTAL_WINDOW_SIZE);
frame_count[nmodules * storage_cell + cmpl.module]++;
size_t offset_local = (storage_cell * nmodules + cmpl.module) * (RAW_MODULE_SIZE * sizeof(uint16_t) / 128);
process_data:
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 128; i++) {
#pragma HLS PIPELINE II=2
data_in >> packet;
data_out << packet;
packed_pedestal_g0_t packed_pedestal_in_0, packed_pedestal_out_0;
packed_pedestal_in_0(255, 0) = d_hbm_p0[offset_hbm_0 + offset_local + i];
packed_pedestal_in_0(511, 256) = d_hbm_p1[offset_hbm_1 + offset_local + i];
packed_pedestal_in_0(767, 512) = d_hbm_p2[offset_hbm_2 + offset_local + i];
packed_pedestal_out_0 = update_pedestal(packet.data, packed_pedestal_in_0, accumulate, conversion_mode);
d_hbm_p0_w[offset_hbm_0 + offset_local + i] = packed_pedestal_out_0(255, 0);
d_hbm_p1_w[offset_hbm_1 + offset_local + i] = packed_pedestal_out_0(511, 256);
d_hbm_p2_w[offset_hbm_2 + offset_local + i] = packed_pedestal_out_0(767, 512);
data_in >> packet;
data_out << packet;
packed_pedestal_in_0(255, 0) = d_hbm_p3[offset_hbm_3 + offset_local + i];
packed_pedestal_in_0(511, 256) = d_hbm_p4[offset_hbm_4 + offset_local + i];
packed_pedestal_in_0(767, 512) = d_hbm_p5[offset_hbm_5 + offset_local + i];
packed_pedestal_out_0 = update_pedestal(packet.data, packed_pedestal_in_0, accumulate, conversion_mode);
d_hbm_p3_w[offset_hbm_3 + offset_local + i] = packed_pedestal_out_0(255, 0);
d_hbm_p4_w[offset_hbm_4 + offset_local + i] = packed_pedestal_out_0(511, 256);
d_hbm_p5_w[offset_hbm_5 + offset_local + i] = packed_pedestal_out_0(767, 512);
}
} else {
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) {
#pragma HLS PIPELINE II=1
data_in >> packet;
data_out << packet;
}
}
s_axis_completion >> cmpl;
}
if (pedestal_mode) {
for (int s = 0; s < nstoragecells; s++) {
for (int m = 0; m < nmodules; m++) {
if (frame_count[s * nmodules + m] > 0) {
axis_completion cmpl_pedestal;
cmpl_pedestal.last = 0;
cmpl_pedestal.frame_number = s;
cmpl_pedestal.module = m;
cmpl_pedestal.packet_mask(63, 0) = UINT64_MAX;
cmpl_pedestal.packet_mask(127, 64) = UINT64_MAX;
cmpl_pedestal.packet_mask(191, 128) = UINT64_MAX;
cmpl_pedestal.packet_mask(255, 192) = UINT64_MAX;
cmpl_pedestal.packet_count = frame_count[s * nmodules + m];
cmpl_pedestal.pedestal = 1;
m_axis_completion << cmpl_pedestal;
save_frames:
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 128; i++) {
#pragma HLS PIPELINE II=2
packet_512_t packet_out;
packed_pedestal_g0_t packed_pedestal_in_0;
size_t offset_local = (s * nmodules + m) * (RAW_MODULE_SIZE * sizeof(uint16_t) / 128) + i;
packed_pedestal_in_0(255, 0) = d_hbm_p0[offset_hbm_0 + offset_local];
packed_pedestal_in_0(511, 256) = d_hbm_p1[offset_hbm_1 + offset_local];
packed_pedestal_in_0(767, 512) = d_hbm_p2[offset_hbm_2 + offset_local];
packet_out.data = pack_and_reduce(packed_pedestal_in_0);
packet_out.user = 0;
packet_out.last = 0;
data_out << packet_out;
packed_pedestal_in_0(255, 0) = d_hbm_p3[offset_hbm_3 + offset_local];
packed_pedestal_in_0(511, 256) = d_hbm_p4[offset_hbm_4 + offset_local];
packed_pedestal_in_0(767, 512) = d_hbm_p5[offset_hbm_5 + offset_local];
packet_out.data = pack_and_reduce(packed_pedestal_in_0);
packet_out.user = 0;
packet_out.last = (i == RAW_MODULE_SIZE * sizeof(uint16_t) / 128 - 1);
data_out << packet_out;
}
}
}
}
}
m_axis_completion << cmpl;
data_in >> packet;
data_out << packet;
}