* Enhancements for EIGER * Writer is more flexible and capable of handling DECTRIS data
264 lines
12 KiB
C++
264 lines
12 KiB
C++
// Copyright (2019-2023) Paul Scherrer Institute
|
|
|
|
#include "hls_jfjoch.h"
|
|
|
|
#define PEDESTAL_G0_PRECISION 24 // 14-bit + 1-bit (fractional) + 7-bit (for 128 pixel window)
|
|
|
|
typedef ap_ufixed<PEDESTAL_G0_PRECISION,14, AP_RND_CONV> pedestal_g0_t;
|
|
typedef ap_uint<PEDESTAL_G0_PRECISION * 32> packed_pedestal_g0_t;
|
|
|
|
void pack(packed_pedestal_g0_t& out, pedestal_g0_t in[32]) {
|
|
#pragma HLS INLINE
|
|
for (int i = 0; i < 32; i ++) {
|
|
for (int j = 0; j < PEDESTAL_G0_PRECISION; j ++) out[i * PEDESTAL_G0_PRECISION + j] = in[i][j];
|
|
}
|
|
}
|
|
|
|
inline void unpack(packed_pedestal_g0_t in, pedestal_g0_t out[32]) {
|
|
#pragma HLS INLINE
|
|
for (int i = 0; i < 32; i ++) {
|
|
for (int j = 0; j < PEDESTAL_G0_PRECISION; j ++) out[i][j] = in[i * PEDESTAL_G0_PRECISION + j];
|
|
}
|
|
}
|
|
|
|
ap_uint<512> pack_and_reduce(const packed_pedestal_g0_t &in) {
|
|
#pragma HLS INLINE
|
|
ap_uint<512> out;
|
|
pedestal_g0_t tmp_full[32];
|
|
|
|
unpack(in, tmp_full);
|
|
for (int i = 0; i < 32; i++) {
|
|
ap_uint<16> tmp1 = tmp_full[i] + pedestal_g0_t(0.5); // returns only integer part
|
|
if (tmp_full[i] > 16383)
|
|
tmp1 = 16384;
|
|
for (int j = 0; j < 16; j++)
|
|
out[i*16+j] = tmp1[j];
|
|
}
|
|
return out;
|
|
}
|
|
|
|
|
|
packed_pedestal_g0_t update_pedestal(ap_uint<512> data_in,
|
|
const packed_pedestal_g0_t packed_pedestal_in,
|
|
ap_uint<1> accumulate, ap_uint<8> mode) {
|
|
#pragma HLS INLINE
|
|
// Load current pedestal
|
|
pedestal_g0_t pedestal[32];
|
|
unpack(packed_pedestal_in, pedestal);
|
|
|
|
for (int j = 0; j < 32; j++) {
|
|
ap_uint<2> gain = data_in(16 * j + 15,16 * j + 14);
|
|
ap_uint<14> adu = data_in(16 * j + 13,16 * j);
|
|
|
|
// Correct pedestal based on gain
|
|
if ((((gain == 0x0) && ((mode & MODE_PEDESTAL_G0) != 0)) ||
|
|
((gain == 0x1) && ((mode & MODE_PEDESTAL_G1) != 0)) ||
|
|
((gain == 0x3) && ((mode & MODE_PEDESTAL_G2) != 0)))
|
|
&& (pedestal[j] < pedestal_g0_t(16383.25))) {
|
|
|
|
if (accumulate)
|
|
pedestal[j] += pedestal_g0_t(adu) / PEDESTAL_WINDOW_SIZE;
|
|
else
|
|
pedestal[j] += ap_fixed<PEDESTAL_G0_PRECISION + 2, 16, AP_RND_CONV>(adu - pedestal[j]) / PEDESTAL_WINDOW_SIZE;
|
|
|
|
} else
|
|
pedestal[j] = pedestal_g0_t(16383.5);
|
|
|
|
}
|
|
packed_pedestal_g0_t packed_pedestal_out;
|
|
// Save pedestal
|
|
pack(packed_pedestal_out, pedestal);
|
|
return packed_pedestal_out;
|
|
}
|
|
|
|
void pedestal(STREAM_512 &data_in, STREAM_512 &data_out,
|
|
hls::stream<axis_completion > &s_axis_completion,
|
|
hls::stream<axis_completion > &m_axis_completion,
|
|
ap_uint<256> *d_hbm_p0,
|
|
ap_uint<256> *d_hbm_p0_w,
|
|
ap_uint<256> *d_hbm_p1,
|
|
ap_uint<256> *d_hbm_p1_w,
|
|
ap_uint<256> *d_hbm_p2,
|
|
ap_uint<256> *d_hbm_p2_w,
|
|
ap_uint<256> *d_hbm_p3,
|
|
ap_uint<256> *d_hbm_p3_w,
|
|
ap_uint<256> *d_hbm_p4,
|
|
ap_uint<256> *d_hbm_p4_w,
|
|
ap_uint<256> *d_hbm_p5,
|
|
ap_uint<256> *d_hbm_p5_w,
|
|
ap_uint<32> hbm_size_bytes) {
|
|
#pragma HLS INTERFACE ap_ctrl_none port=return
|
|
#pragma HLS INTERFACE axis register both port=data_in
|
|
#pragma HLS INTERFACE axis register both port=data_out
|
|
#pragma HLS INTERFACE axis register both port=s_axis_completion
|
|
#pragma HLS INTERFACE axis register both port=m_axis_completion
|
|
#pragma HLS INTERFACE register ap_none port=hbm_size_bytes
|
|
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p2 bundle=d_hbm_p2 depth=512 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p3 bundle=d_hbm_p3 depth=512 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p4 bundle=d_hbm_p4 depth=512 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p5 bundle=d_hbm_p5 depth=512 offset=off \
|
|
max_read_burst_length=16 max_write_burst_length=2 latency=130 num_write_outstanding=2 num_read_outstanding=16
|
|
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p0_w bundle=d_hbm_p0_w depth=512 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p1_w bundle=d_hbm_p1_w depth=512 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p2_w bundle=d_hbm_p2_w depth=512 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p3_w bundle=d_hbm_p3_w depth=512 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p4_w bundle=d_hbm_p4_w depth=512 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
|
|
#pragma HLS INTERFACE m_axi port=d_hbm_p5_w bundle=d_hbm_p5_w depth=512 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=16 latency=130 num_write_outstanding=8 num_read_outstanding=2
|
|
|
|
uint64_t frame_count[MAX_MODULES_FPGA*16];
|
|
for (int i = 0; i < MAX_MODULES_FPGA*16; i++)
|
|
frame_count[i] = 0;
|
|
|
|
packet_512_t packet;
|
|
{
|
|
#pragma HLS PROTOCOL fixed
|
|
data_in >> packet;
|
|
ap_wait();
|
|
data_out << packet;
|
|
ap_wait();
|
|
}
|
|
|
|
ap_uint<8> conversion_mode = ACT_REG_MODE(packet.data);
|
|
ap_uint<8> nmodules = ACT_REG_NMODULES(packet.data) + 1;
|
|
ap_uint<5> nstoragecells = ACT_REG_NSTORAGE_CELLS(packet.data);
|
|
|
|
ap_uint<1> pedestal_mode = ((conversion_mode & MODE_PEDESTAL_G0) != 0)
|
|
|| ((conversion_mode & MODE_PEDESTAL_G1) != 0)
|
|
|| ((conversion_mode & MODE_PEDESTAL_G2) != 0);
|
|
|
|
ap_uint<32> offset_hbm_0 = 24 * hbm_size_bytes / 32;
|
|
ap_uint<32> offset_hbm_1 = 25 * hbm_size_bytes / 32;
|
|
ap_uint<32> offset_hbm_2 = 26 * hbm_size_bytes / 32;
|
|
ap_uint<32> offset_hbm_3 = 27 * hbm_size_bytes / 32;
|
|
ap_uint<32> offset_hbm_4 = 28 * hbm_size_bytes / 32;
|
|
ap_uint<32> offset_hbm_5 = 29 * hbm_size_bytes / 32;
|
|
|
|
if (pedestal_mode) {
|
|
clean_hbm:
|
|
for (int i = 0; i < nmodules * nstoragecells * RAW_MODULE_SIZE * sizeof(uint16_t) / 128; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
d_hbm_p0_w[offset_hbm_0 + i] = 0;
|
|
d_hbm_p1_w[offset_hbm_1 + i] = 0;
|
|
d_hbm_p2_w[offset_hbm_2 + i] = 0;
|
|
d_hbm_p3_w[offset_hbm_3 + i] = 0;
|
|
d_hbm_p4_w[offset_hbm_4 + i] = 0;
|
|
d_hbm_p5_w[offset_hbm_5 + i] = 0;
|
|
}
|
|
}
|
|
|
|
axis_completion cmpl;
|
|
s_axis_completion >> cmpl;
|
|
while (!cmpl.last) {
|
|
m_axis_completion << cmpl;
|
|
if ((cmpl.packet_count == 256) && pedestal_mode) {
|
|
ap_int<5> storage_cell = ((nstoragecells > 1) ? ap_int<5>((cmpl.frame_number % nstoragecells)) : ap_int<5>(0));
|
|
|
|
ap_uint<1> accumulate = (frame_count[nmodules * storage_cell + cmpl.module] < PEDESTAL_WINDOW_SIZE);
|
|
frame_count[nmodules * storage_cell + cmpl.module]++;
|
|
|
|
size_t offset_local = (storage_cell * nmodules + cmpl.module) * (RAW_MODULE_SIZE * sizeof(uint16_t) / 128);
|
|
process_data:
|
|
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 128; i++) {
|
|
#pragma HLS PIPELINE II=2
|
|
data_in >> packet;
|
|
data_out << packet;
|
|
|
|
packed_pedestal_g0_t packed_pedestal_in_0, packed_pedestal_out_0;
|
|
|
|
packed_pedestal_in_0(255, 0) = d_hbm_p0[offset_hbm_0 + offset_local + i];
|
|
packed_pedestal_in_0(511, 256) = d_hbm_p1[offset_hbm_1 + offset_local + i];
|
|
packed_pedestal_in_0(767, 512) = d_hbm_p2[offset_hbm_2 + offset_local + i];
|
|
|
|
packed_pedestal_out_0 = update_pedestal(packet.data, packed_pedestal_in_0, accumulate, conversion_mode);
|
|
|
|
d_hbm_p0_w[offset_hbm_0 + offset_local + i] = packed_pedestal_out_0(255, 0);
|
|
d_hbm_p1_w[offset_hbm_1 + offset_local + i] = packed_pedestal_out_0(511, 256);
|
|
d_hbm_p2_w[offset_hbm_2 + offset_local + i] = packed_pedestal_out_0(767, 512);
|
|
|
|
data_in >> packet;
|
|
data_out << packet;
|
|
|
|
packed_pedestal_in_0(255, 0) = d_hbm_p3[offset_hbm_3 + offset_local + i];
|
|
packed_pedestal_in_0(511, 256) = d_hbm_p4[offset_hbm_4 + offset_local + i];
|
|
packed_pedestal_in_0(767, 512) = d_hbm_p5[offset_hbm_5 + offset_local + i];
|
|
|
|
packed_pedestal_out_0 = update_pedestal(packet.data, packed_pedestal_in_0, accumulate, conversion_mode);
|
|
|
|
d_hbm_p3_w[offset_hbm_3 + offset_local + i] = packed_pedestal_out_0(255, 0);
|
|
d_hbm_p4_w[offset_hbm_4 + offset_local + i] = packed_pedestal_out_0(511, 256);
|
|
d_hbm_p5_w[offset_hbm_5 + offset_local + i] = packed_pedestal_out_0(767, 512);
|
|
}
|
|
} else {
|
|
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
data_in >> packet;
|
|
data_out << packet;
|
|
}
|
|
}
|
|
s_axis_completion >> cmpl;
|
|
}
|
|
|
|
if (pedestal_mode) {
|
|
for (int s = 0; s < nstoragecells; s++) {
|
|
for (int m = 0; m < nmodules; m++) {
|
|
if (frame_count[s * nmodules + m] > 0) {
|
|
axis_completion cmpl_pedestal;
|
|
cmpl_pedestal.last = 0;
|
|
cmpl_pedestal.frame_number = s;
|
|
cmpl_pedestal.module = m;
|
|
cmpl_pedestal.packet_mask(63, 0) = UINT64_MAX;
|
|
cmpl_pedestal.packet_mask(127, 64) = UINT64_MAX;
|
|
cmpl_pedestal.packet_mask(191, 128) = UINT64_MAX;
|
|
cmpl_pedestal.packet_mask(255, 192) = UINT64_MAX;
|
|
cmpl_pedestal.packet_count = frame_count[s * nmodules + m];
|
|
cmpl_pedestal.pedestal = 1;
|
|
|
|
m_axis_completion << cmpl_pedestal;
|
|
save_frames:
|
|
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 128; i++) {
|
|
#pragma HLS PIPELINE II=2
|
|
packet_512_t packet_out;
|
|
packed_pedestal_g0_t packed_pedestal_in_0;
|
|
size_t offset_local = (s * nmodules + m) * (RAW_MODULE_SIZE * sizeof(uint16_t) / 128) + i;
|
|
packed_pedestal_in_0(255, 0) = d_hbm_p0[offset_hbm_0 + offset_local];
|
|
packed_pedestal_in_0(511, 256) = d_hbm_p1[offset_hbm_1 + offset_local];
|
|
packed_pedestal_in_0(767, 512) = d_hbm_p2[offset_hbm_2 + offset_local];
|
|
packet_out.data = pack_and_reduce(packed_pedestal_in_0);
|
|
packet_out.user = 0;
|
|
packet_out.last = 0;
|
|
data_out << packet_out;
|
|
|
|
packed_pedestal_in_0(255, 0) = d_hbm_p3[offset_hbm_3 + offset_local];
|
|
packed_pedestal_in_0(511, 256) = d_hbm_p4[offset_hbm_4 + offset_local];
|
|
packed_pedestal_in_0(767, 512) = d_hbm_p5[offset_hbm_5 + offset_local];
|
|
packet_out.data = pack_and_reduce(packed_pedestal_in_0);
|
|
packet_out.user = 0;
|
|
packet_out.last = (i == RAW_MODULE_SIZE * sizeof(uint16_t) / 128 - 1);
|
|
data_out << packet_out;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
m_axis_completion << cmpl;
|
|
|
|
data_in >> packet;
|
|
data_out << packet;
|
|
}
|