226 lines
8.0 KiB
C++
226 lines
8.0 KiB
C++
// Copyright (2019-2023) Paul Scherrer Institute
|
|
|
|
#include <iostream>
|
|
|
|
#include "../acquisition_device/PCIExpressDevice.h"
|
|
#include "../acquisition_device/MockAcquisitionDevice.h"
|
|
#include "JFJochReceiverTest.h"
|
|
#include "../tests/FPGAUnitTest.h"
|
|
|
|
void print_usage(Logger &logger) {
|
|
logger.Info("Usage ./jfjoch_action_test {<options>} <path to repository>");
|
|
logger.Info("Options:");
|
|
logger.Info(" -M use mock device");
|
|
logger.Info(" -R raw");
|
|
logger.Info(" -v verbose");
|
|
logger.Info(" -H mock aq. dev. with HBM (DL380 with Intel MAX only)");
|
|
logger.Info(" -D mock aq. dev. with DDR (2 NUMA node machines only)");
|
|
logger.Info(" -s<num> number of data streams (acquisition devices)");
|
|
logger.Info(" -m<num> number of modules");
|
|
logger.Info(" -i<num> number of images");
|
|
logger.Info(" -p<num> data processing period");
|
|
logger.Info(" -N<num> number of image processing threads");
|
|
logger.Info(" -P<txt> NUMA Policy (none|n2g2|n8g4|n8g4_hbm), none is default");
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
Logger logger("ActionTest");
|
|
logger.Verbose(true);
|
|
|
|
constexpr uint64_t clock_MHz = 200;
|
|
uint16_t nstreams = 1;
|
|
uint16_t nmodules = 1;
|
|
size_t nimages = 2;
|
|
uint64_t processing_period = 20;
|
|
uint16_t nthreads = 64;
|
|
bool use_mock_device = false;
|
|
bool verbose = false;
|
|
std::string numa_policy_name;
|
|
bool use_hbm_for_aq_dev = false;
|
|
bool use_ddr_for_aq_dev = false;
|
|
bool raw_data = false;
|
|
|
|
if (argc == 1) {
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
int opt;
|
|
while ((opt = getopt(argc, argv, "s:i:m:p:N:P:MvHDR")) != -1) {
|
|
switch (opt) {
|
|
case 'M':
|
|
use_mock_device = true;
|
|
break;
|
|
case 'i':
|
|
nimages = atol(optarg);
|
|
break;
|
|
case 'm':
|
|
nmodules = atol(optarg);
|
|
break;
|
|
case 's':
|
|
nstreams = atol(optarg);
|
|
break;
|
|
case 'p':
|
|
processing_period = atol(optarg);
|
|
break;
|
|
case 'N':
|
|
nthreads = atol(optarg);
|
|
break;
|
|
case 'v':
|
|
verbose = true;
|
|
break;
|
|
case 'P':
|
|
numa_policy_name = std::string(optarg);
|
|
break;
|
|
case 'H':
|
|
use_hbm_for_aq_dev = true;
|
|
break;
|
|
case 'D':
|
|
use_ddr_for_aq_dev = true;
|
|
break;
|
|
case 'R':
|
|
raw_data = true;
|
|
break;
|
|
default: /* '?' */
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
if (optind != argc - 1) {
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
DiffractionExperiment x(DetectorGeometry(nmodules, 2, 8, 36, true));
|
|
|
|
if (raw_data)
|
|
x.Mode(DetectorMode::Raw);
|
|
else
|
|
x.Mode(DetectorMode::Conversion);
|
|
|
|
x.ImagesPerTrigger(nimages).PedestalG0Frames(0).UseInternalPacketGenerator(true).PhotonEnergy_keV(12.4).NumTriggers(1);
|
|
x.MaskModuleEdges(false).MaskChipEdges(false);
|
|
x.Compression(JFJochProtoBuf::BSHUF_LZ4).DataStreams(nstreams);
|
|
|
|
logger.Info("Data streams {} Total modules {} Total images {} Threads {}", nstreams, nmodules, nimages, nthreads);
|
|
|
|
std::vector<std::string> dev_name = {
|
|
"/dev/jfjoch0",
|
|
"/dev/jfjoch2",
|
|
"/dev/jfjoch1",
|
|
"/dev/jfjoch3"
|
|
};
|
|
|
|
logger.Verbose(verbose);
|
|
|
|
AcquisitionDeviceGroup aq_devices;
|
|
|
|
std::string image_path = std::string(argv[optind]) + "/tests/test_data/mod5_raw0.bin";
|
|
std::vector<uint16_t> input(RAW_MODULE_SIZE, 0);
|
|
LoadBinaryFile(image_path, input.data(), RAW_MODULE_SIZE);
|
|
|
|
if (use_mock_device) {
|
|
if (nmodules > 1) {
|
|
logger.Warning("Conversion results might be wrong with more than 1 module per stream");
|
|
}
|
|
|
|
for (int i = 0; i < nstreams; i++) {
|
|
int16_t numa_node = -1;
|
|
|
|
if (use_hbm_for_aq_dev)
|
|
numa_node = 2 + (i % 2);
|
|
else if (use_ddr_for_aq_dev)
|
|
numa_node = i % 2;
|
|
|
|
if (numa_node != -1)
|
|
logger.Info("Pinning stream {} to NUMA node {}", i, numa_node);
|
|
|
|
auto tmp = std::make_unique<MockAcquisitionDevice>(i, 1024, numa_node);
|
|
tmp->SetCustomInternalGeneratorFrame(input);
|
|
tmp->EnableLogging(&logger);
|
|
aq_devices.Add(std::move(tmp));
|
|
}
|
|
|
|
} else {
|
|
if (nstreams > dev_name.size()) {
|
|
logger.Error("Only {} data streams allowed on this platform", dev_name.size());
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
for (int i = 0; i < nstreams; i++) {
|
|
auto tmp = std::make_unique<PCIExpressDevice>(i, dev_name[i]);
|
|
tmp->SetInternalGeneratorFrameForAllModules(input);
|
|
tmp->EnableLogging(&logger);
|
|
tmp->SetDefaultMAC();
|
|
tmp->SetIPv4Address((i << 24) + 0x010a0a0a);
|
|
aq_devices.Add(std::move(tmp));
|
|
}
|
|
}
|
|
|
|
volatile bool done = false;
|
|
JFJochReceiverOutput output;
|
|
bool ret;
|
|
std::thread run_thread([&] {
|
|
try {
|
|
ret = JFJochReceiverTest(output, logger, aq_devices, x, nthreads, false, nullptr, numa_policy_name);
|
|
} catch (std::exception &e) {
|
|
logger.Error(e.what());
|
|
ret = false;
|
|
}
|
|
done = true;
|
|
});
|
|
|
|
if (!use_mock_device) {
|
|
while (!done) {
|
|
for (int i = 0; i < nstreams; i++) {
|
|
auto coll_status = aq_devices[i].GetDataCollectionStatus();
|
|
auto dev_status = aq_devices[i].GetDeviceStatus();
|
|
double power = (dev_status.fpga_pcie_12V_I_mA * dev_status.fpga_pcie_12V_V_mV
|
|
+ dev_status.fpga_pcie_3p3V_I_mA * dev_status.fpga_pcie_3p3V_V_mV) / (1000.0 * 1000.0);
|
|
logger.Info("Device {}: Slowest packet: {:8d} Power: {:5.1f} W FPGA Temp: {:d} degC HBM Temp: {:d}/{:d} degC Stalls: {:15d}/{:15d}",
|
|
i, aq_devices[i].Counters().GetSlowestFrameNumber(), power,
|
|
dev_status.fpga_temp_C, dev_status.hbm_0_temp_C, dev_status.hbm_1_temp_C,
|
|
coll_status.pipeline_stalls_hbm, coll_status.pipeline_stalls_host);
|
|
}
|
|
std::this_thread::sleep_for(std::chrono::seconds(1));
|
|
}
|
|
}
|
|
|
|
run_thread.join();
|
|
|
|
double receiving_time = static_cast<double>(output.end_time_ms - output.start_time_ms)/1000.0;
|
|
|
|
logger.Info("Efficiency: {:.2f}%", output.efficiency * 100.f);
|
|
logger.Info("Max delay: {}",output.max_receive_delay);
|
|
logger.Info("Compression factor: {}x", output.compressed_ratio);
|
|
logger.Info("Receiving time: {} s", receiving_time);
|
|
logger.Info("Frame rate: {} Hz", static_cast<double>(nimages)/receiving_time);
|
|
logger.Info("Total throughput: {:.2f} GB/s",
|
|
static_cast<double>(nimages*x.GetModulesNum()*RAW_MODULE_SIZE*sizeof(uint16_t)) / (receiving_time * 1e9));
|
|
|
|
if (!use_mock_device) {
|
|
logger.Info("");
|
|
for (int i = 0; i < nstreams; i++) {
|
|
auto coll_status = aq_devices[i].GetDataCollectionStatus();
|
|
auto stalls_hbm = coll_status.pipeline_stalls_hbm;
|
|
auto stalls_host = coll_status.pipeline_stalls_host;
|
|
|
|
uint64_t throughput_MBs = nimages * x.GetModulesNum(i) * RAW_MODULE_SIZE * sizeof(uint16_t) * clock_MHz /
|
|
(nimages * x.GetModulesNum(i) * 128 * 128 + stalls_hbm);
|
|
double performance = static_cast<double>(throughput_MBs) / 1000;
|
|
|
|
logger.Info("Device {}: stalls HBM: {} stalls host: {} est. performance: {:.2f} GB/s", i, stalls_hbm,
|
|
stalls_host, performance);
|
|
}
|
|
}
|
|
if (ret) {
|
|
logger.Info("");
|
|
logger.Info("Test properly executed! (check stall values manually)");
|
|
exit(EXIT_SUCCESS);
|
|
} else {
|
|
logger.Info("Test finished with errors! (check stall values manually)");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|