208 lines
10 KiB
C++
208 lines
10 KiB
C++
// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: CERN-OHL-S-2.0
|
|
|
|
#include "hls_jfjoch.h"
|
|
|
|
#define o(field) offsetof(ModuleStatistics, field)
|
|
#define sf(msg, field, s) msg(o(field)*8 + s - 1, o(field)*8)
|
|
|
|
void read_config(hls::stream<ap_axiu<512,1,1,1> > &host_memory_in,
|
|
ap_uint<32> &destination,
|
|
ap_uint<16> &module_id) {
|
|
#pragma HLS INLINE OFF
|
|
ap_axiu<512, 1, 1, 1> data_packet;
|
|
host_memory_in >> data_packet;
|
|
|
|
module_id = sf(data_packet.data, module_number, 32);
|
|
destination = sf(data_packet.data, load_calibration_destination, 32);
|
|
}
|
|
|
|
void read_module(ap_uint<256> *d_hbm_p0,
|
|
ap_uint<256> *d_hbm_p1,
|
|
hls::stream<ap_axiu<512,1,1,1> > &host_memory_in,
|
|
size_t offset_hbm_0,
|
|
size_t offset_hbm_1) {
|
|
#pragma HLS INLINE OFF
|
|
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
ap_axiu<512, 1, 1, 1> data_packet;
|
|
host_memory_in >> data_packet;
|
|
|
|
d_hbm_p0[offset_hbm_0 + i] = data_packet.data(255, 0);
|
|
d_hbm_p1[offset_hbm_1 + i] = data_packet.data(511, 256);
|
|
}
|
|
}
|
|
|
|
template <class T, int MULT> ap_uint<256> convert(const ap_uint<512> &in) {
|
|
//#pragma HLS INLINE
|
|
ap_uint<256> tmp;
|
|
for (int j = 0; j < 16; j++) {
|
|
float_uint32 conv{};
|
|
conv.u = in(j * 32 + 31, j * 32);
|
|
float x = conv.f * MULT;
|
|
T g;
|
|
if (!hls::isfinite(x) || (x < 0.0))
|
|
g = 0;
|
|
else
|
|
g = static_cast<T>(x);
|
|
|
|
for (int k = 0; k < 16; k++)
|
|
tmp[j * 16 + k] = g[k];
|
|
}
|
|
return tmp;
|
|
}
|
|
|
|
ap_uint<32> pxl_mask_reduce(const ap_uint<1024> &in) {
|
|
#pragma HLS INLINE
|
|
ap_uint<32> in_val[32];
|
|
unpack32(in, in_val);
|
|
ap_uint<32> out = 0;
|
|
for (int i = 0; i < 32; i++)
|
|
out[i] = (in_val[i] != 0) ? 1 : 0;
|
|
return out;
|
|
}
|
|
|
|
template <class T, int MULT = 1>
|
|
void read_module_float(ap_uint<256> *d_hbm_p0,
|
|
ap_uint<256> *d_hbm_p1,
|
|
hls::stream<ap_axiu<512,1,1,1> > &host_memory_in,
|
|
size_t offset_hbm_0,
|
|
size_t offset_hbm_1) {
|
|
#pragma HLS INLINE OFF
|
|
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) {
|
|
#pragma HLS PIPELINE II=2
|
|
ap_axiu<512, 1, 1, 1> data_packet;
|
|
|
|
ap_uint<256> tmp;
|
|
host_memory_in >> data_packet;
|
|
d_hbm_p0[offset_hbm_0 + i] = convert<T, MULT>(data_packet.data);
|
|
|
|
host_memory_in >> data_packet;
|
|
d_hbm_p1[offset_hbm_1 + i] = convert<T, MULT>(data_packet.data);
|
|
}
|
|
}
|
|
|
|
int load_calibration(ap_uint<256> *d_hbm_p0,
|
|
ap_uint<256> *d_hbm_p1,
|
|
uint32_t pixel_mask[MAX_MODULES_FPGA*RAW_MODULE_SIZE/32],
|
|
const LoadCalibrationConfig &config,
|
|
ap_uint<32> hbm_size_bytes,
|
|
hls::stream<axis_datamover_ctrl> &datamover_in_cmd,
|
|
hls::stream<ap_axiu<512,1,1,1> > &host_memory_in,
|
|
const uint64_t *dma_address_table) {
|
|
#pragma HLS INTERFACE mode=s_axilite port=return
|
|
#pragma HLS INTERFACE register both axis port=datamover_in_cmd
|
|
#pragma HLS INTERFACE register both axis port=host_memory_in
|
|
|
|
#pragma HLS INTERFACE mode=s_axilite port=config
|
|
#pragma HLS INTERFACE mode=ap_none port=hbm_size_bytes
|
|
|
|
#pragma HLS INTERFACE mode=m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
|
#pragma HLS INTERFACE mode=m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
|
|
#pragma HLS INTERFACE mode=m_axi port=dma_address_table bundle=dma_address_table depth=65536 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=2 latency=10 num_write_outstanding=1 num_read_outstanding=1
|
|
#pragma HLS INTERFACE mode=m_axi port=pixel_mask bundle=pixel_mask depth=65536 offset=off \
|
|
max_read_burst_length=2 max_write_burst_length=4 latency=4 num_write_outstanding=2 num_read_outstanding=1
|
|
|
|
uint64_t mem_addr = dma_address_table[config.handle];
|
|
if (mem_addr == 0)
|
|
return LOAD_CALIBRATION_ERR_HOST_ADDR;
|
|
|
|
setup_datamover(datamover_in_cmd, mem_addr + offsetof(DeviceOutput, module_statistics), 64);
|
|
ap_uint<16> module_id;
|
|
ap_uint<32> destination;
|
|
read_config(host_memory_in, destination, module_id);
|
|
|
|
if (module_id > 2 * hbm_size_bytes / (RAW_MODULE_SIZE * sizeof(int16_t)))
|
|
return LOAD_CALIBRATION_ERR_MODULE;
|
|
|
|
size_t offset_hbm_0;
|
|
size_t offset_hbm_1;
|
|
|
|
switch (destination) {
|
|
case LOAD_CALIBRATION_DEST_GAIN_G0:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
|
|
offset_hbm_0 = 0 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 1 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module_float<gainG0_t, GAIN_G0_MULTIPLIER>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_GAIN_G1:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
|
|
offset_hbm_0 = 2 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 3 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module_float<gainG1_t, GAIN_G1_MULTIPLIER>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_GAIN_G2:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
|
|
offset_hbm_0 = 4 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 5 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module_float<gainG2_t, GAIN_G2_MULTIPLIER>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_PEDESTAL_G0:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
|
|
offset_hbm_0 = 6 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 7 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_PEDESTAL_G1:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
|
|
offset_hbm_0 = 8 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 9 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_PEDESTAL_G2:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
|
|
offset_hbm_0 = 10 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 11 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_INTEGRATION_MAP:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
|
|
offset_hbm_0 = 12 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 13 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_INTEGRATION_WEIGHTS:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
|
|
offset_hbm_0 = 14 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 15 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module_float<integration_factor_t>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_SPOT_FINDER_RES_MAP:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
|
|
offset_hbm_0 = 16 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 17 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module_float<xray_d_t>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_ROI_CALC:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(uint16_t));
|
|
offset_hbm_0 = 18 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 19 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_FRAME_GEN:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
|
|
offset_hbm_0 = 20 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
offset_hbm_1 = 21 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
|
|
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
|
|
break;
|
|
case LOAD_CALIBRATION_DEST_PXL_MASK:
|
|
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int32_t));
|
|
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) {
|
|
#pragma HLS PIPELINE II=2
|
|
ap_axiu<512, 1, 1, 1> data_packet_0;
|
|
ap_axiu<512, 1, 1, 1> data_packet_1;
|
|
host_memory_in >> data_packet_0;
|
|
host_memory_in >> data_packet_1;
|
|
ap_uint<1024> tmp = (data_packet_1.data, data_packet_0.data);
|
|
pixel_mask[module_id * RAW_MODULE_SIZE / 32 + i] = pxl_mask_reduce(tmp);
|
|
}
|
|
break;
|
|
default:
|
|
return LOAD_CALIBRATION_ERR_DEST;
|
|
}
|
|
return LOAD_CALIBRATION_OK;
|
|
}
|