// Copyright (2019-2022) Paul Scherrer Institute // SPDX-License-Identifier: CERN-OHL-S-2.0 or GPL-3.0-or-later #include "hls_jfjoch.h" #ifndef __SYNTHESIS__ #include #endif #define PACKET_SIZE 8192 inline void write_completion(hls::stream > &m_axis_completion, const ap_uint<32> &handle, const ap_uint<8> &module_number, const ap_uint<64> &frame_num, const ap_uint<256> &packet_mask, const ap_uint<16> &packet_count, const ap_uint<32> &debug, const ap_uint<64> ×tamp, const ap_uint<64> &bunchid, const ap_uint<32> &exptime) { #pragma HLS INLINE ap_uint<1> all_packets_ok = packet_mask.and_reduce(); ap_uint<1> any_packets_received = packet_mask.or_reduce(); ap_uint<8> status = 0; status[0] = all_packets_ok; status[1] = any_packets_received; ap_uint<128> tmp = (handle, packet_count, status, module_number, frame_num); status[7] = tmp.xor_reduce(); // ensure completion has even parity if (handle != HANDLE_SKIP_FRAME) { m_axis_completion << handle; m_axis_completion << (packet_count, status, module_number); m_axis_completion << frame_num(63, 32); m_axis_completion << frame_num(31, 0); m_axis_completion << timestamp(63,32); m_axis_completion << timestamp(31,0); m_axis_completion << bunchid(63,32); m_axis_completion << bunchid(31,0); m_axis_completion << exptime; m_axis_completion << debug; m_axis_completion << 0; m_axis_completion << 0; m_axis_completion << packet_mask(127,96); m_axis_completion << packet_mask( 95,64); m_axis_completion << packet_mask( 63,32); m_axis_completion << packet_mask( 31, 0); } } inline ap_uint<1> read_request(hls::stream > &s_axis_work_request, ap_uint<32> &handle, ap_uint<64> &address) { #pragma HLS INLINE ap_uint<32> tmp1, tmp2, tmp3, tmp4; s_axis_work_request >> tmp1; s_axis_work_request >> tmp2; s_axis_work_request >> tmp3; s_axis_work_request >> tmp4; handle = tmp1; address = (tmp2, tmp3); ap_uint<128> tmp_all = (tmp1, tmp2, tmp3, tmp4); if (tmp_all.xor_reduce() != 0) return 1; else return 0; } void host_writer(STREAM_512 &data_in, hls::stream > &addr_in, hls::stream > &host_memory_out, hls::stream &datamover_out_cmd, hls::stream > &s_axis_work_request, hls::stream > &m_axis_completion, volatile uint64_t &packets_processed, volatile ap_uint<1> &idle, ap_uint<8> &err_reg) { #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS INTERFACE register both axis port=data_in #pragma HLS INTERFACE register both axis port=host_memory_out #pragma HLS INTERFACE register both axis port=addr_in #pragma HLS INTERFACE register both axis port=datamover_out_cmd #pragma HLS INTERFACE register both axis port=m_axis_completion #pragma HLS INTERFACE register both axis port=s_axis_work_request #pragma HLS INTERFACE register ap_vld port=packets_processed #pragma HLS INTERFACE register ap_vld port=err_reg #pragma HLS INTERFACE register ap_none port=idle ap_uint<128> packet_mask[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=packet_mask core=RAM_1P ap_uint<16> packet_count[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=packet_count core=RAM_1P ap_uint<32> handle[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=handle core=RAM_1P ap_uint<64> curr_frame[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=curr_frame core=RAM_1P ap_uint<32> debug[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=debug core=RAM_1P ap_uint<64> timestamp[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=timestamp core=RAM_1P ap_uint<32> exptime[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=exptime core=RAM_1P ap_uint<64> jf_bunchid[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=jf_bunchid core=RAM_1P ap_uint<64> curr_offset[MAX_MODULES_FPGA*2]; #pragma HLS RESOURCE variable=curr_offset core=RAM_1P idle = 1; for (int i = 0; i < MAX_MODULES_FPGA*2; i++) { #pragma HLS UNROLL curr_frame[i] = UINT64_MAX; handle[i] = 0; packet_mask[i] = 0; packet_count[i] = 0; curr_offset[i] = 0; debug[i] = 0; timestamp[i] = 0; exptime[i] = 0; jf_bunchid[i] = 0; } ap_uint<32> req_handle; ap_uint<64> req_host_offset; while (data_in.empty()) { #pragma HLS PIPELINE II=4 if (!s_axis_work_request.empty()) read_request(s_axis_work_request, req_handle, req_host_offset); } ap_uint addr; addr_in >> addr; packet_512_t packet_in; data_in >> packet_in; ap_uint<32> data_collection_mode = ACT_REG_MODE(packet_in.data); ap_uint<1> mode_nonblocking = (data_collection_mode & MODE_NONBLOCKING_ON_WR) ? 1 : 0; ap_uint<8> internal_err_reg = 0; err_reg = internal_err_reg; write_completion(m_axis_completion, HANDLE_START, 0, 0, 0, 0, 0, 0, 0, 0); idle = 0; uint64_t total_counter = 0; packets_processed = 0; addr_in >> addr; ap_axiu<512,1,1,1> packet_out; packet_out.keep = UINT64_MAX; packet_out.strb = UINT64_MAX; packet_out.dest = 0; packet_out.id = 0; packet_out.user = 0; Loop_good_packet: while (!addr_last_flag(addr)) { // Process one UDP packet per iteration #pragma HLS PIPELINE II=128 ap_uint<64> frame_number = addr_frame_number(addr); ap_uint<4> module = addr_module(addr); ap_uint<7> eth_packet = addr_eth_packet(addr); ap_uint<5> id = module * 2 + (frame_number % 2); if (curr_frame[id] != frame_number) { if (packet_mask[id] != 0) { ap_uint<32> comp_handle = handle[id]; ap_uint<64> comp_frame = curr_frame[id]; ap_uint<256> comp_packet_mask = packet_mask[id]; ap_uint<16> comp_packet_count = packet_count[id]; ap_uint<32> comp_debug = debug[id]; ap_uint<64> comp_timestamp = timestamp[id]; ap_uint<64> comp_bunchid = jf_bunchid[id]; ap_uint<32> comp_exptime = exptime[id]; write_completion(m_axis_completion, comp_handle, module, comp_frame, comp_packet_mask, comp_packet_count, comp_debug, comp_timestamp, comp_bunchid, comp_exptime); } if (s_axis_work_request.empty() && mode_nonblocking) { req_handle = HANDLE_SKIP_FRAME; req_host_offset = 0; } else { if (read_request(s_axis_work_request, req_handle, req_host_offset)) internal_err_reg[2] = 1; } if (req_handle >= HANDLE_START) internal_err_reg[4] = 1; handle[id] = req_handle; curr_frame[id] = frame_number; curr_offset[id] = req_host_offset; debug[id] = addr_jf_debug(addr); timestamp[id] = addr_timestamp(addr); jf_bunchid[id] = addr_bunch_id(addr); exptime[id] = addr_exptime(addr); packet_mask[id] = ap_uint<128>(1) << eth_packet; packet_count[id] = 1; } else { packet_count[id]++; packet_mask[id] |= ap_uint<128>(1) << eth_packet; } if (handle[id] != HANDLE_SKIP_FRAME) { for (int i = 0; i < 128; i++) { data_in >> packet_in; packet_out.data = packet_in.data; packet_out.last = packet_in.last; host_memory_out << packet_out; } if (packet_in.last != 1) internal_err_reg[1] = 1; size_t out_frame_addr = curr_offset[id] + eth_packet * PACKET_SIZE; if (out_frame_addr % 128 != 0) internal_err_reg[0] = 1; if (curr_offset[id] == 0) internal_err_reg[3] = 1; packets_processed = ++total_counter; setup_datamover(datamover_out_cmd, out_frame_addr, PACKET_SIZE); } else { for (int i = 0; i < 128; i++) data_in >> packet_in; if (packet_in.last != 1) internal_err_reg[1] = 1; } addr_in >> addr; err_reg = internal_err_reg; } #ifndef __SYNTHESIS__ while (!host_memory_out.empty()) std::this_thread::sleep_for(std::chrono::milliseconds(100)); #endif for (ap_uint<8> m = 0; m < MAX_MODULES_FPGA * 2; m++) { #pragma HLS PIPELINE II=16 if (packet_mask[m] > 0) write_completion(m_axis_completion, handle[m], m / 2, curr_frame[m], packet_mask[m], packet_count[m], debug[m], timestamp[m], jf_bunchid[m], exptime[m]); } data_in >> packet_in; write_completion(m_axis_completion, HANDLE_END, 0, total_counter, 0, 0, 0, 0, 0, 0); idle = 1; }