Jungfraujoch/fpga/hls/save_to_hbm.cpp

// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: CERN-OHL-S-2.0

#include "hls_jfjoch.h"

void save_to_hbm(hls::stream<axis_addr> &addr_in,
                 hls::stream<axis_completion > &m_axis_completion,
                 hls::stream<ap_uint<16>> &s_axis_free_handles,
                 hls::stream<axis_datamover_ctrl> &datamover_0_cmd,
                 hls::stream<axis_datamover_ctrl> &datamover_1_cmd,
                 hls::stream<axis_datamover_ctrl> &datamover_2_cmd,
                 volatile ap_uint<1> &idle,
                 ap_uint<32> hbm_size_bytes) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE register both axis port=addr_in
#pragma HLS INTERFACE register both axis port=m_axis_completion
#pragma HLS INTERFACE register both axis port=s_axis_free_handles
#pragma HLS INTERFACE register both axis port=datamover_0_cmd
#pragma HLS INTERFACE register both axis port=datamover_1_cmd
#pragma HLS INTERFACE register both axis port=datamover_2_cmd
#pragma HLS INTERFACE mode=ap_none port=hbm_size_bytes
#pragma HLS INTERFACE register ap_none port=idle

    idle = 1;

    axis_completion cmpl[MAX_MODULES_FPGA*2];

    for (int i = 0; i < MAX_MODULES_FPGA*2; i++) {
#pragma HLS UNROLL
        cmpl[i].frame_number = UINT64_MAX;
        cmpl[i].packet_mask = 0;
        cmpl[i].last = 0;
    }

    idle = 0;

    size_t offset_hbm_0 = 22 * hbm_size_bytes;
    size_t offset_hbm_1 = 24 * hbm_size_bytes;
    size_t offset_hbm_2 = 26 * hbm_size_bytes;

    axis_addr addr;
    addr_in >> addr;

Loop_good_packet:
    while (!addr.last) {
        // Process one UDP packet per iteration
#pragma HLS PIPELINE II=4
        ap_uint<64> frame_number = addr.frame_number;
        ap_uint<5> module_number = addr.module;
        ap_uint<9> eth_packet = addr.eth_packet;
        ap_uint<6> id = module_number * 2 + (frame_number % 2);
        ap_uint<16> curr_handle;
        ap_uint<8> packet_length = addr.packet_length;

        if (cmpl[id].frame_number != frame_number) {
            if (cmpl[id].packet_mask != 0)
                m_axis_completion << cmpl[id];
            cmpl[id].module = addr.module;
            cmpl[id].frame_number = addr.frame_number;
            cmpl[id].timestamp = addr.timestamp;
            cmpl[id].exptime = addr.exptime;
            cmpl[id].debug = addr.debug;
            cmpl[id].bunchid = addr.bunchid;
            cmpl[id].detector_type = addr.detector_type;
            cmpl[id].last = 0;
            cmpl[id].ignore = 0;
            if (packet_length == 128) {
                cmpl[id].packet_mask = (ap_uint<512>(1) << (4 * eth_packet))
                                       | (ap_uint<512>(1) << (4 * eth_packet + 1))
                                       | (ap_uint<512>(1) << (4 * eth_packet + 2))
                                       | (ap_uint<512>(1) << (4 * eth_packet + 3));
                cmpl[id].packet_count = 4;
            } else if (packet_length == 64) {
                cmpl[id].packet_mask = (ap_uint<512>(1) << (2 * eth_packet))
                                       | (ap_uint<512>(1) << (2 * eth_packet + 1));
                cmpl[id].packet_count = 2;
            } else if (packet_length == 32) {
                cmpl[id].packet_mask = (ap_uint<512>(1) << eth_packet);
                cmpl[id].packet_count = 1;
            }
            cmpl[id].pedestal = 0;
            curr_handle = s_axis_free_handles.read();
            cmpl[id].handle = curr_handle;
        } else {
            if (addr.packet_length == 128) {
                cmpl[id].packet_mask |= (ap_uint<512>(1) << (4 * eth_packet))
                                        | (ap_uint<512>(1) << (4 * eth_packet + 1))
                                        | (ap_uint<512>(1) << (4 * eth_packet + 2))
                                        | (ap_uint<512>(1) << (4 * eth_packet + 3));
                cmpl[id].packet_count += 4;
            } else if (packet_length == 64) {
                cmpl[id].packet_mask |= (ap_uint<512>(1) << (2 * eth_packet))
                                        | (ap_uint<512>(1) << (2 * eth_packet + 1));
                cmpl[id].packet_count += 2;
            } else if (packet_length == 32) {
                cmpl[id].packet_mask |= (ap_uint<512>(1) << eth_packet);
                cmpl[id].packet_count += 1;
            }
        }

        size_t offset = (cmpl[id].handle / 2) * (RAW_MODULE_SIZE * sizeof(uint16_t) / 2);
        if (cmpl[id].handle % 2 == 1)
            offset += hbm_size_bytes;

        setup_datamover(datamover_0_cmd, offset_hbm_0 + offset + eth_packet * packet_length * 32,
                        packet_length * 32);
        setup_datamover(datamover_1_cmd, offset_hbm_1 + offset + eth_packet * packet_length * 32,
                        packet_length * 32);
        setup_datamover(datamover_2_cmd, offset_hbm_2 + offset + eth_packet * packet_length * 32,
                        packet_length * 32);

        addr_in >> addr;
    }

    for (ap_uint<8> m = 0; m < MAX_MODULES_FPGA; m++) {
#pragma HLS PIPELINE II=32
        if (cmpl[2 * m].frame_number < cmpl[2 * m + 1].frame_number) {
            if (cmpl[2 * m].packet_mask != 0)
                m_axis_completion << cmpl[2 * m];
            if (cmpl[2 * m + 1].packet_mask != 0)
                m_axis_completion << cmpl[2 * m + 1];
        } else {
            if (cmpl[2 * m + 1].packet_mask != 0)
                m_axis_completion << cmpl[2 * m + 1];
            if (cmpl[2 * m].packet_mask != 0)
                m_axis_completion << cmpl[2 * m];
        }
    }

    m_axis_completion << axis_completion{.last = 1};

    ap_uint<16> tmp = s_axis_free_handles.read();
    while (tmp != UINT16_MAX)
        tmp = s_axis_free_handles.read();

    idle = 1;
}