Files
Jungfraujoch/fpga/hls/load_calibration.cpp
2024-11-22 21:25:20 +01:00

208 lines
10 KiB
C++

// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: CERN-OHL-S-2.0
#include "hls_jfjoch.h"
#define o(field) offsetof(ModuleStatistics, field)
#define sf(msg, field, s) msg(o(field)*8 + s - 1, o(field)*8)
void read_config(hls::stream<ap_axiu<512,1,1,1> > &host_memory_in,
ap_uint<32> &destination,
ap_uint<16> &module_id) {
#pragma HLS INLINE OFF
ap_axiu<512, 1, 1, 1> data_packet;
host_memory_in >> data_packet;
module_id = sf(data_packet.data, module_number, 32);
destination = sf(data_packet.data, load_calibration_destination, 32);
}
void read_module(ap_uint<256> *d_hbm_p0,
ap_uint<256> *d_hbm_p1,
hls::stream<ap_axiu<512,1,1,1> > &host_memory_in,
size_t offset_hbm_0,
size_t offset_hbm_1) {
#pragma HLS INLINE OFF
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) {
#pragma HLS PIPELINE II=1
ap_axiu<512, 1, 1, 1> data_packet;
host_memory_in >> data_packet;
d_hbm_p0[offset_hbm_0 + i] = data_packet.data(255, 0);
d_hbm_p1[offset_hbm_1 + i] = data_packet.data(511, 256);
}
}
template <class T, int MULT> ap_uint<256> convert(const ap_uint<512> &in) {
//#pragma HLS INLINE
ap_uint<256> tmp;
for (int j = 0; j < 16; j++) {
float_uint32 conv{};
conv.u = in(j * 32 + 31, j * 32);
float x = conv.f * MULT;
T g;
if (!hls::isfinite(x) || (x < 0.0))
g = 0;
else
g = static_cast<T>(x);
for (int k = 0; k < 16; k++)
tmp[j * 16 + k] = g[k];
}
return tmp;
}
ap_uint<32> pxl_mask_reduce(const ap_uint<1024> &in) {
#pragma HLS INLINE
ap_uint<32> in_val[32];
unpack32(in, in_val);
ap_uint<32> out = 0;
for (int i = 0; i < 32; i++)
out[i] = (in_val[i] != 0) ? 1 : 0;
return out;
}
template <class T, int MULT = 1>
void read_module_float(ap_uint<256> *d_hbm_p0,
ap_uint<256> *d_hbm_p1,
hls::stream<ap_axiu<512,1,1,1> > &host_memory_in,
size_t offset_hbm_0,
size_t offset_hbm_1) {
#pragma HLS INLINE OFF
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) {
#pragma HLS PIPELINE II=2
ap_axiu<512, 1, 1, 1> data_packet;
ap_uint<256> tmp;
host_memory_in >> data_packet;
d_hbm_p0[offset_hbm_0 + i] = convert<T, MULT>(data_packet.data);
host_memory_in >> data_packet;
d_hbm_p1[offset_hbm_1 + i] = convert<T, MULT>(data_packet.data);
}
}
int load_calibration(ap_uint<256> *d_hbm_p0,
ap_uint<256> *d_hbm_p1,
uint32_t pixel_mask[MAX_MODULES_FPGA*RAW_MODULE_SIZE/32],
const LoadCalibrationConfig &config,
ap_uint<32> hbm_size_bytes,
hls::stream<axis_datamover_ctrl> &datamover_in_cmd,
hls::stream<ap_axiu<512,1,1,1> > &host_memory_in,
const uint64_t *dma_address_table) {
#pragma HLS INTERFACE mode=s_axilite port=return
#pragma HLS INTERFACE register both axis port=datamover_in_cmd
#pragma HLS INTERFACE register both axis port=host_memory_in
#pragma HLS INTERFACE mode=s_axilite port=config
#pragma HLS INTERFACE mode=ap_none port=hbm_size_bytes
#pragma HLS INTERFACE mode=m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE mode=m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \
max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2
#pragma HLS INTERFACE mode=m_axi port=dma_address_table bundle=dma_address_table depth=65536 offset=off \
max_read_burst_length=2 max_write_burst_length=2 latency=10 num_write_outstanding=1 num_read_outstanding=1
#pragma HLS INTERFACE mode=m_axi port=pixel_mask bundle=pixel_mask depth=65536 offset=off \
max_read_burst_length=2 max_write_burst_length=4 latency=4 num_write_outstanding=2 num_read_outstanding=1
uint64_t mem_addr = dma_address_table[config.handle];
if (mem_addr == 0)
return LOAD_CALIBRATION_ERR_HOST_ADDR;
setup_datamover(datamover_in_cmd, mem_addr + offsetof(DeviceOutput, module_statistics), 64);
ap_uint<16> module_id;
ap_uint<32> destination;
read_config(host_memory_in, destination, module_id);
if (module_id > 2 * hbm_size_bytes / (RAW_MODULE_SIZE * sizeof(int16_t)))
return LOAD_CALIBRATION_ERR_MODULE;
size_t offset_hbm_0;
size_t offset_hbm_1;
switch (destination) {
case LOAD_CALIBRATION_DEST_GAIN_G0:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
offset_hbm_0 = 0 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 1 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module_float<gainG0_t, GAIN_G0_MULTIPLIER>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_GAIN_G1:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
offset_hbm_0 = 2 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 3 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module_float<gainG1_t, GAIN_G1_MULTIPLIER>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_GAIN_G2:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
offset_hbm_0 = 4 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 5 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module_float<gainG2_t, GAIN_G2_MULTIPLIER>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_PEDESTAL_G0:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
offset_hbm_0 = 6 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 7 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_PEDESTAL_G1:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
offset_hbm_0 = 8 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 9 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_PEDESTAL_G2:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
offset_hbm_0 = 10 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 11 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_INTEGRATION_MAP:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
offset_hbm_0 = 12 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 13 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_INTEGRATION_WEIGHTS:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
offset_hbm_0 = 14 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 15 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module_float<integration_factor_t>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_SPOT_FINDER_RES_MAP:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float));
offset_hbm_0 = 16 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 17 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module_float<xray_d_t>(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_ROI_CALC:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(uint16_t));
offset_hbm_0 = 18 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 19 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_FRAME_GEN:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t));
offset_hbm_0 = 20 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
offset_hbm_1 = 21 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64;
read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1);
break;
case LOAD_CALIBRATION_DEST_PXL_MASK:
setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int32_t));
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) {
#pragma HLS PIPELINE II=2
ap_axiu<512, 1, 1, 1> data_packet_0;
ap_axiu<512, 1, 1, 1> data_packet_1;
host_memory_in >> data_packet_0;
host_memory_in >> data_packet_1;
ap_uint<1024> tmp = (data_packet_1.data, data_packet_0.data);
pixel_mask[module_id * RAW_MODULE_SIZE / 32 + i] = pxl_mask_reduce(tmp);
}
break;
default:
return LOAD_CALIBRATION_ERR_DEST;
}
return LOAD_CALIBRATION_OK;
}