Files
Jungfraujoch/receiver/jfjoch_action_test.cpp

226 lines
8.5 KiB
C++

// Copyright (2019-2023) Paul Scherrer Institute
#include <iostream>
#include "../acquisition_device/PCIExpressDevice.h"
#include "../acquisition_device/HLSSimulatedDevice.h"
#include "JFJochReceiverTest.h"
#include "../tests/FPGAUnitTest.h"
void print_usage(Logger &logger) {
logger.Info("Usage ./jfjoch_action_test {<options>} <path to repository>");
logger.Info("Options:");
logger.Info(" -R raw");
logger.Info(" -v verbose");
logger.Info(" -S<num> number of summed frames");
logger.Info(" -I use 32-bit integer");
logger.Info(" -s<num> number of data streams (acquisition devices)");
logger.Info(" -m<num> number of modules");
logger.Info(" -i<num> number of images");
logger.Info(" -N<num> number of image processing threads");
logger.Info(" -P<txt> NUMA Policy (none|n2g2|n8g4|n8g4_hbm), none is default");
logger.Info(" -D<path> use resonet deep learning model for resolution estimation - path to TorchScript");
logger.Info(" -B<num> size of send buffer in MiB (default 2048)");
}
int main(int argc, char **argv) {
Logger logger("ActionTest");
logger.Verbose(true);
constexpr uint64_t clock_MHz = 200;
uint16_t nstreams = 1;
uint16_t nmodules = 1;
uint16_t nsummation = 1;
size_t nimages = 2;
uint16_t nthreads = 64;
bool verbose = false;
std::string numa_policy_name;
bool raw_data = false;
bool force_32bit = false;
std::string resonet_path;
DetectorType detector_type = DetectorType::JUNGFRAU;
bool hls_simulation = false;
size_t send_buffer_size_MiB = 2048;
if (argc == 1) {
print_usage(logger);
exit(EXIT_FAILURE);
}
int opt;
while ((opt = getopt(argc, argv, "s:i:m:N:P:vRIS:D:EHB:")) != -1) {
switch (opt) {
case 'i':
nimages = atol(optarg);
break;
case 'S':
nsummation = atol(optarg);
break;
case 'm':
nmodules = atol(optarg);
break;
case 's':
nstreams = atol(optarg);
break;
case 'N':
nthreads = atol(optarg);
break;
case 'v':
verbose = true;
break;
case 'P':
numa_policy_name = std::string(optarg);
break;
case 'R':
raw_data = true;
break;
case 'I':
force_32bit = true;
break;
case 'D':
resonet_path = std::string(optarg);
break;
case 'E':
detector_type = DetectorType::EIGER;
break;
case 'H':
hls_simulation = true;
break;
case 'B':
send_buffer_size_MiB = atol(optarg);
break;
default: /* '?' */
print_usage(logger);
exit(EXIT_FAILURE);
}
}
if (optind != argc - 1) {
print_usage(logger);
exit(EXIT_FAILURE);
}
DiffractionExperiment x(DetectorSetup(DetectorGeometry(nmodules, 2, 8, 36, true), detector_type));
if (raw_data)
x.Mode(DetectorMode::Raw);
else
x.Mode(DetectorMode::Conversion);
x.ImagesPerTrigger(nimages).Summation(nsummation).PedestalG0Frames(0).UseInternalPacketGenerator(true).PhotonEnergy_keV(12.4).NumTriggers(1);
x.MaskModuleEdges(false).MaskChipEdges(false).BeamX_pxl(x.GetXPixelsNum()/ 2.0).BeamY_pxl(x.GetYPixelsNum()/ 2.0).DetectorDistance_mm(100);
x.Compression(CompressionAlgorithm::BSHUF_LZ4).DataStreams(nstreams);
x.SetUnitCell(UnitCell{.a = 79, .b = 79, .c = 37, .alpha = 90.0, .beta = 90.0, .gamma = 90.0});
if (force_32bit)
x.FPGAOutputMode(FPGAPixelOutput::Int32);
if (!resonet_path.empty())
x.NeuralNetModelPath(resonet_path);
logger.Info("Data streams {} Total modules {} Total images {} Threads {}", nstreams, nmodules, nimages, nthreads);
std::vector<std::string> dev_name = {
"/dev/jfjoch0",
"/dev/jfjoch2",
"/dev/jfjoch1",
"/dev/jfjoch3"
};
logger.Verbose(verbose);
AcquisitionDeviceGroup aq_devices;
std::string image_path = std::string(argv[optind]) + "/tests/test_data/mod5_raw0.bin";
std::vector<uint16_t> input(RAW_MODULE_SIZE * x.GetModulesNum(), 0);
std::vector<uint16_t> tmp(RAW_MODULE_SIZE);
LoadBinaryFile(image_path, tmp.data(), RAW_MODULE_SIZE);
for (int m = 0; m < x.GetModulesNum(); m++)
memcpy(input.data() + RAW_MODULE_SIZE * m, tmp.data(), RAW_MODULE_SIZE * sizeof(uint16_t));
if (hls_simulation) {
if (nstreams != 1) {
logger.Error("HLS simulation can work with only one device");
exit(EXIT_FAILURE);
}
auto tmp = std::make_unique<HLSSimulatedDevice>(0, 128);
tmp->EnableLogging(&logger);
tmp->SetIPv4Address(0x010a0a0a);
aq_devices.Add(std::move(tmp));
} else {
if (nstreams > dev_name.size()) {
logger.Error("Only {} data streams allowed on this platform", dev_name.size());
exit(EXIT_FAILURE);
}
for (int i = 0; i < nstreams; i++) {
auto tmp = std::make_unique<PCIExpressDevice>(i, dev_name[i]);
tmp->EnableLogging(&logger);
tmp->SetDefaultMAC();
tmp->SetIPv4Address((i << 24) + 0x010a0a0a);
aq_devices.Add(std::move(tmp));
}
}
volatile bool done = false;
JFJochReceiverOutput output;
bool ret;
std::thread run_thread([&] {
try {
ret = JFJochReceiverTest(output, logger, aq_devices, x, input, nthreads, numa_policy_name, send_buffer_size_MiB);
} catch (std::exception &e) {
logger.Error(e.what());
ret = false;
}
done = true;
});
while (!done) {
for (int i = 0; i < nstreams; i++) {
auto coll_status = aq_devices[i].GetDataCollectionStatus();
auto dev_status = aq_devices[i].GetDeviceStatus();
double power_3p3v = (dev_status.fpga_pcie_3p3V_I_mA * dev_status.fpga_pcie_3p3V_V_mV) / (1000.0 * 1000.0);
double power_12v = (dev_status.fpga_pcie_12V_I_mA * dev_status.fpga_pcie_12V_V_mV) / (1000.0 * 1000.0);
logger.Info("Device {}: Slowest packet: {:8d} Power: {:5.1f}+{:5.1f} W FPGA Temp: {:d} degC HBM Temp: {:d}/{:d} degC Stalls: {:15d}/{:15d}/{:15d}",
i, aq_devices[i].Counters().GetSlowestFrameNumber(), power_12v, power_3p3v,
dev_status.fpga_temp_C, dev_status.hbm_0_temp_C, dev_status.hbm_1_temp_C,
coll_status.pipeline_stalls_hbm, coll_status.pipeline_stalls_proc, coll_status.pipeline_stalls_host);
}
std::this_thread::sleep_for(std::chrono::seconds(1));
}
run_thread.join();
double receiving_time = static_cast<double>(output.end_time_ms - output.start_time_ms)/1000.0;
logger.Info("Efficiency: {:.2f}%", output.efficiency * 100.f);
logger.Info("Max delay: {}",output.status.max_receive_delay);
logger.Info("Compression factor: {}x", output.status.compressed_ratio);
logger.Info("Receiving time: {} s", receiving_time);
logger.Info("Frame rate: {} Hz", static_cast<double>(nimages)/receiving_time);
logger.Info("Total throughput: {:.2f} GB/s",
static_cast<double>(nsummation * nimages*x.GetModulesNum()*RAW_MODULE_SIZE*sizeof(uint16_t)) / (receiving_time * 1e9));
logger.Info("");
for (int i = 0; i < nstreams; i++) {
auto coll_status = aq_devices[i].GetDataCollectionStatus();
auto stalls_hbm = coll_status.pipeline_stalls_hbm;
auto stalls_host = coll_status.pipeline_stalls_host;
uint64_t throughput_MBs = nimages * nsummation * x.GetModulesNum(i) * RAW_MODULE_SIZE * sizeof(uint16_t) * clock_MHz /
(nimages * nsummation * x.GetModulesNum(i) * 128 * 128 + stalls_hbm);
double performance = static_cast<double>(throughput_MBs) / 1000;
logger.Info("Device {}: stalls HBM: {} stalls host: {} est. performance: {:.2f} GB/s", i, stalls_hbm,
stalls_host, performance);
}
if (ret) {
logger.Info("");
logger.Info("Test properly executed! (check stall values manually)");
exit(EXIT_SUCCESS);
} else {
logger.Info("Test finished with errors! (check stall values manually)");
exit(EXIT_FAILURE);
}
}