// Copyright (2019-2022) Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-or-later #include "HLSSimulatedDevice.h" #include #include #include "../fpga/hls/datamover_model.h" #include "../fpga/hls/hls_jfjoch.h" uint16_t checksum(const uint16_t *addr, size_t count) { /* Compute Internet Checksum for "count" bytes * beginning at location "addr". */ long sum = 0; for (int i = 0; i < count / 2; i++) sum += addr[i]; /* Add left-over byte, if any */ if (count % 2 == 1) sum += ((uint8_t *) addr)[count / 2]; /* Fold 32-bit sum to 16 bits */ while (sum>>16) sum = (sum & 0xffff) + (sum >> 16); return ~sum; } HLSSimulatedDevice::HLSSimulatedDevice(uint16_t data_stream, size_t in_frame_buffer_size_modules, int16_t numa_node) : FPGAAcquisitionDevice(data_stream), datamover_in(Direction::Input), datamover_out(Direction::Output, nullptr, 256), idle(true), hbm(hbm_if_size / 32 * hbm_if_count) { mac_addr = 0xCCAA11223344; ipv4_addr = 0x0132010A; max_modules = MAX_MODULES_FPGA; MapBuffersStandard(in_frame_buffer_size_modules, (3 + 3 * 16) * max_modules + 2, numa_node); } void HLSSimulatedDevice::CreateFinalPacket(const DiffractionExperiment& experiment) { CreatePacketJF(experiment, UINT64_MAX, 0, 0, nullptr, false); } void HLSSimulatedDevice::SendPacket(char *buffer, int len, uint8_t user) { auto obuff = (ap_uint<512> *)buffer; for (int i = 0; i < (len + 63) / 64; i++) { packet_512_t packet_in; if (i == (len + 63) / 64 - 1) packet_in.last = 1; else packet_in.last = 0; packet_in.keep = 0xFFFFFFFFFFFFFFFF; packet_in.user = user; packet_in.data = obuff[i]; din_eth.write(packet_in); } } void HLSSimulatedDevice::CreatePacketJF(const DiffractionExperiment& experiment, uint64_t frame_number, uint32_t eth_packet, uint32_t module_number, const uint16_t *data, bool trigger, int8_t adjust_axis, uint8_t user) { char buff[256*64]; memset(buff, 0, 256*64); auto packet = (jf_raw_packet *)buff; packet->ether_type = htons(0x0800); packet->sour_mac[0] = 0x00; // module 0 uint64_t tmp_mac = mac_addr; for (int i = 0; i < 6; i++) packet->dest_mac[i] = (tmp_mac >> (8*i)) % 256; uint32_t half_module = 2 * module_number | ((eth_packet >= 64) ? 1 : 0); packet->ipv4_header_h = htons(0x4500); // Big endian in IP header! packet->ipv4_header_total_length = htons(8268); // Big endian in IP header! packet->ipv4_header_dest_ip = ipv4_addr; packet->ipv4_header_sour_ip = experiment.GetSrcIPv4Address(data_stream, half_module); packet->ipv4_header_ttl_protocol = htons(0x0011); packet->ipv4_header_checksum = checksum( (uint16_t *) &packet->ipv4_header_h, 20); // checksum is already in network order packet->udp_dest_port = htons(GetUDPPort()); // module number packet->udp_sour_port = htons(0xDFAC); packet->udp_length = htons(8248); // JF headers are little endian packet->jf.timestamp = 0xABCDEF0000FEDCBAL; packet->jf.bunchid = 0x1234567898765431L; packet->jf.xCoord = half_module; packet->jf.framenum = frame_number; packet->jf.packetnum = eth_packet % 64; if (trigger) packet->jf.debug = 1<<31; if (data != nullptr) { for (int i = 0; i < 4096; i++) packet->jf.data[i] = data[i]; } packet->udp_checksum = htons(checksum( (uint16_t *) (buff+42), 8192+48)); SendPacket(buff, (130+adjust_axis)*64, user); } void HLSSimulatedDevice::CreatePackets(const DiffractionExperiment& experiment, uint64_t frame_number_0, uint64_t frames, uint32_t module_number, const uint16_t *data, bool trigger, int8_t adjust_axis, uint8_t user) { for (uint64_t i = 0; i < frames; i++) { for (int j = 0; j < 128; j++) CreatePacketJF(experiment, frame_number_0 + i, j, module_number, data + (i * 128 + j) * 4096, trigger, adjust_axis, user); } } AXI_STREAM & HLSSimulatedDevice::OutputStream() { return dout_eth; } void HLSSimulatedDevice::HW_ReadActionRegister(ActionConfig *job) { memcpy(job, &cfg, sizeof(ActionConfig)); } void HLSSimulatedDevice::HW_WriteActionRegister(const ActionConfig *job) { memcpy(&cfg, job, sizeof(ActionConfig)); } void HLSSimulatedDevice::FPGA_StartAction(const DiffractionExperiment &experiment) { if (action_thread.joinable()) action_thread.join(); run_data_collection = 1; cancel_data_collection = 0; idle = false; if (experiment.IsUsingInternalPacketGen()) { frame_generator(din_eth, reinterpret_cast *>(internal_pkt_gen_frame.data()), experiment.GetFrameNum() + DELAY_FRAMES_STOP_AND_QUIT + 1, experiment.GetModulesNum(data_stream), mac_addr, mac_addr, ipv4_addr, ipv4_addr, INT_PKT_GEN_BUNCHID, INT_PKT_GEN_EXPTTIME, INT_PKT_GEN_DEBUG); } action_thread = std::thread(&HLSSimulatedDevice::HLSMainThread, this ); } void HLSSimulatedDevice::FPGA_EndAction() { if (action_thread.joinable()) action_thread.join(); } HLSSimulatedDevice::~HLSSimulatedDevice() { if (action_thread.joinable()) action_thread.join(); } bool HLSSimulatedDevice::HW_ReadMailbox(uint32_t values[16]) { if (completion_stream.size() < 16) return false; for (int i = 0; i < 16; i++) values[i] = completion_stream.read(); return true; } void HLSSimulatedDevice::Cancel() { cancel_data_collection = 1; } bool HLSSimulatedDevice::HW_IsIdle() const { return idle && datamover_out.IsIdle(); } bool HLSSimulatedDevice::HW_SendWorkRequest(uint32_t handle) { uint64_t address = (handle == UINT32_MAX) ? 0 : (uint64_t) buffer_device.at(handle); uint32_t parity = (std::bitset<32>(handle).count() + std::bitset<64>(address).count()) % 2; work_request_stream.write(handle); work_request_stream.write(address >> 32); work_request_stream.write(address & UINT32_MAX); work_request_stream.write(parity); return true; } void HLSSimulatedDevice::HLSMainThread() { uint64_t counter_hbm; uint64_t counter_host; uint64_t eth_packets; uint64_t icmp_packets; uint64_t udp_packets; uint64_t sls_packets; uint32_t udp_len_err; uint32_t udp_eth_err; ap_uint<1> clear_counters = 0; uint64_t packets_processed; std::vector hls_cores; STREAM_512 ip1, udp1, udp2, icmp1, arp1; STREAM_512 raw0; STREAM_512 raw1; STREAM_512 raw2; STREAM_512 raw3; STREAM_512 converted_1; STREAM_512 converted_2; hls::stream addr0; hls::stream addr1; hls::stream addr2; hls::stream addr3; hls::stream > udp_metadata; ap_uint<1> idle_data_collection; ap_uint<8> err_reg; while(!din_eth.empty()) ethernet(din_eth, ip1, arp1, mac_addr, eth_packets, clear_counters); while(!ip1.empty()) ipv4(ip1, udp1, icmp1, ipv4_addr); arp(arp1, dout_eth, mac_addr, ipv4_addr, 1, run_data_collection); while (!arp1.empty()) { arp(arp1, dout_eth, mac_addr, ipv4_addr, 1, run_data_collection); } // reset static counter arp(arp1, dout_eth, mac_addr, ipv4_addr, 0, run_data_collection); while(!icmp1.empty()) icmp(icmp1, dout_eth, icmp_packets, clear_counters); while (!udp1.empty()) udp(udp1, udp2, udp_metadata, udp_packets, clear_counters); while (!udp2.empty()) sls_detector(udp2, udp_metadata, raw0, addr0, sls_packets, udp_eth_err, udp_len_err, clear_counters); // 1. Parse incoming UDP packets idle_data_collection = 0; hls_cores.emplace_back([&] { while ((idle_data_collection == 0) || (!raw0.empty())) { data_collection_fsm(raw0, raw1, addr0, addr1, run_data_collection, cancel_data_collection, idle_data_collection, cfg.mode, cfg.one_over_energy, cfg.nframes, cfg.nmodules, cfg.nstorage_cells, hbm_if_size); run_data_collection = 0; } }); // Timer procedure - count how many times pedestal_corr/gain_corr is not accepting input (to help track down latency issues) hls_cores.emplace_back([&] { timer_host(raw1, raw2, counter_hbm); }); // 2. Apply pedestal & gain corrections hls_cores.emplace_back([&] { jf_conversion(raw2, converted_1, addr1, addr2, hbm.data(), hbm.data(), hbm.data(), hbm.data(), hbm.data(), hbm.data(), hbm.data(), hbm.data(), hbm.data(), hbm.data(), hbm.data(), hbm.data()); }); // Timer procedure - count how many times write_data is not accepting input (to help track down latency issues) hls_cores.emplace_back([&] { timer_host(converted_1, converted_2, counter_host); }); // 3. Prepare data to write to host memory hls_cores.emplace_back([&] { host_writer(converted_2, addr2, datamover_out.GetDataStream(), datamover_out.GetCtrlStream(), work_request_stream, completion_stream, packets_processed, host_writer_idle, err_reg); }); for (auto &i : hls_cores) i.join(); if (!din_eth.empty()) throw std::runtime_error("din_eth queue not empty"); if (!addr1.empty()) throw std::runtime_error("Addr1 queue not empty"); if (!addr2.empty()) throw std::runtime_error("Addr2 queue not empty"); if (!addr3.empty()) throw std::runtime_error("Addr3 queue not empty"); if (!raw1.empty()) throw std::runtime_error("Raw1 queue not empty"); if (!raw2.empty()) throw std::runtime_error("Raw2 queue not empty"); if (!raw3.empty()) throw std::runtime_error("Raw3 queue not empty"); if (!converted_1.empty()) throw std::runtime_error("Converted_1 queue not empty"); if (!converted_2.empty()) throw std::runtime_error("Converted_2 queue not empty"); if (!datamover_in.GetDataStream().empty()) throw std::runtime_error("Datamover queue is not empty"); if (err_reg != 0) throw std::runtime_error("Error reg for frame_statistics not zero, val=" + std::to_string(err_reg)); while (!datamover_out.IsIdle()) std::this_thread::sleep_for(std::chrono::milliseconds(100)); if (logger) logger->Info("Packets Eth {} UDP {} SLS {} Proc {}", eth_packets, udp_packets, sls_packets, packets_processed); idle = true; } void HLSSimulatedDevice::HW_GetStatus(ActionStatus *status) const { memset(status, 0, sizeof(ActionStatus)); status->ctrl_reg = ap_uint<1>(host_writer_idle) ? (1 << 4) : 0; status->modules_internal_packet_generator = 1; status->max_modules = max_modules; status->hbm_size_bytes = hbm_if_size; } void HLSSimulatedDevice::HW_LoadCalibration(uint32_t modules, uint32_t storage_cells) { if (logger) logger->Info("Load calibration start"); auto in_mem_location32 = (uint32_t *) calibration_addr_bram; for (int i = 0; i < modules * (3 + 3 * storage_cells); i++) { in_mem_location32[2 * i ] = ((uint64_t) buffer_device[i]) & UINT32_MAX; in_mem_location32[2 * i + 1] = ((uint64_t) buffer_device[i]) >> 32; } load_calibration(hbm.data(), hbm.data(), modules, storage_cells, hbm_if_size, datamover_in.GetCtrlStream(), datamover_in.GetDataStream(), calibration_addr_bram); if (logger) logger->Info("Load calibration done"); if (!datamover_in.GetDataStream().empty()) throw std::runtime_error("Datamover queue is not empty"); }