// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: CERN-OHL-S-2.0 #include "hls_jfjoch.h" #define o(field) offsetof(ModuleStatistics, field) #define sf(msg, field, s) msg(o(field)*8 + s - 1, o(field)*8) void read_config(hls::stream > &host_memory_in, ap_uint<32> &destination, ap_uint<16> &module_id) { #pragma HLS INLINE OFF ap_axiu<512, 1, 1, 1> data_packet; host_memory_in >> data_packet; module_id = sf(data_packet.data, module_number, 32); destination = sf(data_packet.data, load_calibration_destination, 32); } void read_module(ap_uint<256> *d_hbm_p0, ap_uint<256> *d_hbm_p1, hls::stream > &host_memory_in, size_t offset_hbm_0, size_t offset_hbm_1) { #pragma HLS INLINE OFF for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) { #pragma HLS PIPELINE II=1 ap_axiu<512, 1, 1, 1> data_packet; host_memory_in >> data_packet; d_hbm_p0[offset_hbm_0 + i] = data_packet.data(255, 0); d_hbm_p1[offset_hbm_1 + i] = data_packet.data(511, 256); } } template ap_uint<256> convert(const ap_uint<512> &in) { //#pragma HLS INLINE ap_uint<256> tmp; for (int j = 0; j < 16; j++) { float_uint32 conv{}; conv.u = in(j * 32 + 31, j * 32); float x = conv.f * MULT; T g; if (!hls::isfinite(x) || (x < 0.0)) g = 0; else g = static_cast(x); for (int k = 0; k < 16; k++) tmp[j * 16 + k] = g[k]; } return tmp; } ap_uint<32> pxl_mask_reduce(const ap_uint<1024> &in) { #pragma HLS INLINE ap_uint<32> in_val[32]; unpack32(in, in_val); ap_uint<32> out = 0; for (int i = 0; i < 32; i++) out[i] = (in_val[i] != 0) ? 1 : 0; return out; } template void read_module_float(ap_uint<256> *d_hbm_p0, ap_uint<256> *d_hbm_p1, hls::stream > &host_memory_in, size_t offset_hbm_0, size_t offset_hbm_1) { #pragma HLS INLINE OFF for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) { #pragma HLS PIPELINE II=2 ap_axiu<512, 1, 1, 1> data_packet; ap_uint<256> tmp; host_memory_in >> data_packet; d_hbm_p0[offset_hbm_0 + i] = convert(data_packet.data); host_memory_in >> data_packet; d_hbm_p1[offset_hbm_1 + i] = convert(data_packet.data); } } int load_calibration(ap_uint<256> *d_hbm_p0, ap_uint<256> *d_hbm_p1, uint32_t pixel_mask[MAX_MODULES_FPGA*RAW_MODULE_SIZE/32], const LoadCalibrationConfig &config, ap_uint<32> hbm_size_bytes, hls::stream &datamover_in_cmd, hls::stream > &host_memory_in, const uint64_t *dma_address_table) { #pragma HLS INTERFACE mode=s_axilite port=return #pragma HLS INTERFACE register both axis port=datamover_in_cmd #pragma HLS INTERFACE register both axis port=host_memory_in #pragma HLS INTERFACE mode=s_axilite port=config #pragma HLS INTERFACE mode=ap_none port=hbm_size_bytes #pragma HLS INTERFACE mode=m_axi port=d_hbm_p0 bundle=d_hbm_p0 depth=512 offset=off \ max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2 #pragma HLS INTERFACE mode=m_axi port=d_hbm_p1 bundle=d_hbm_p1 depth=512 offset=off \ max_read_burst_length=2 max_write_burst_length=16 latency=120 num_write_outstanding=8 num_read_outstanding=2 #pragma HLS INTERFACE mode=m_axi port=dma_address_table bundle=dma_address_table depth=65536 offset=off \ max_read_burst_length=2 max_write_burst_length=2 latency=10 num_write_outstanding=1 num_read_outstanding=1 #pragma HLS INTERFACE mode=m_axi port=pixel_mask bundle=pixel_mask depth=65536 offset=off \ max_read_burst_length=2 max_write_burst_length=4 latency=4 num_write_outstanding=2 num_read_outstanding=1 uint64_t mem_addr = dma_address_table[config.handle]; if (mem_addr == 0) return LOAD_CALIBRATION_ERR_HOST_ADDR; setup_datamover(datamover_in_cmd, mem_addr + offsetof(DeviceOutput, module_statistics), 64); ap_uint<16> module_id; ap_uint<32> destination; read_config(host_memory_in, destination, module_id); if (module_id > 2 * hbm_size_bytes / (RAW_MODULE_SIZE * sizeof(int16_t))) return LOAD_CALIBRATION_ERR_MODULE; size_t offset_hbm_0; size_t offset_hbm_1; switch (destination) { case LOAD_CALIBRATION_DEST_GAIN_G0: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float)); offset_hbm_0 = 0 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 1 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module_float(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_GAIN_G1: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float)); offset_hbm_0 = 2 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 3 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module_float(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_GAIN_G2: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float)); offset_hbm_0 = 4 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 5 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module_float(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_PEDESTAL_G0: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t)); offset_hbm_0 = 6 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 7 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_PEDESTAL_G1: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t)); offset_hbm_0 = 8 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 9 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_PEDESTAL_G2: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t)); offset_hbm_0 = 10 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 11 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_INTEGRATION_MAP: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t)); offset_hbm_0 = 12 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 13 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_INTEGRATION_WEIGHTS: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float)); offset_hbm_0 = 14 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 15 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module_float(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_SPOT_FINDER_RES_MAP: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(float)); offset_hbm_0 = 16 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 17 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module_float(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_ROI_CALC: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(uint16_t)); offset_hbm_0 = 18 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 19 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_FRAME_GEN: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int16_t)); offset_hbm_0 = 20 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; offset_hbm_1 = 21 * hbm_size_bytes / 32 + module_id * RAW_MODULE_SIZE * sizeof(int16_t) / 64; read_module(d_hbm_p0, d_hbm_p1, host_memory_in, offset_hbm_0, offset_hbm_1); break; case LOAD_CALIBRATION_DEST_PXL_MASK: setup_datamover(datamover_in_cmd, mem_addr, RAW_MODULE_SIZE * sizeof(int32_t)); for (int i = 0; i < RAW_MODULE_SIZE * sizeof(int16_t) / 64; i++) { #pragma HLS PIPELINE II=2 ap_axiu<512, 1, 1, 1> data_packet_0; ap_axiu<512, 1, 1, 1> data_packet_1; host_memory_in >> data_packet_0; host_memory_in >> data_packet_1; ap_uint<1024> tmp = (data_packet_1.data, data_packet_0.data); pixel_mask[module_id * RAW_MODULE_SIZE / 32 + i] = pxl_mask_reduce(tmp); } break; default: return LOAD_CALIBRATION_ERR_DEST; } return LOAD_CALIBRATION_OK; }