Some checks failed
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 8m11s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 9m9s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 9m18s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 10m14s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 10m3s
Build Packages / Generate python client (push) Successful in 15s
Build Packages / Build documentation (push) Successful in 50s
Build Packages / Create release (push) Has been skipped
Build Packages / build:rpm (rocky8) (push) Successful in 8m31s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 8m21s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 7m42s
Build Packages / build:rpm (rocky9) (push) Successful in 9m11s
Build Packages / Unit tests (push) Failing after 1h13m19s
This is an UNSTABLE release and not recommended for production use (please use rc.96 instead). * jfjoch_broker: For DECTRIS detectors add dark data collection during initialization for bad pixel mask * jfjoch_broker: Refactor of calibration logic for more clear code (likely to introduce problems) * jfjoch_viewer: Add option to handle user pixel mask (experimental) * jfjoch_viewer: More options for ROI * jfjoch_viewer: Add window to display calibration Reviewed-on: #2 Co-authored-by: Filip Leonarski <filip.leonarski@psi.ch> Co-committed-by: Filip Leonarski <filip.leonarski@psi.ch>
309 lines
12 KiB
C++
309 lines
12 KiB
C++
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#include <iostream>
|
|
|
|
#include "../acquisition_device/PCIExpressDevice.h"
|
|
#include "../acquisition_device/HLSSimulatedDevice.h"
|
|
#include "../receiver/JFJochReceiverTest.h"
|
|
#include "../tests/CheckImageOutput.h"
|
|
#include "../common/print_license.h"
|
|
|
|
void print_usage(Logger &logger) {
|
|
logger.Info("Usage ./jfjoch_fpga_test {<options>} <path to repository>");
|
|
logger.Info("Options:");
|
|
logger.Info(" -v Verbose");
|
|
logger.Info(" -H Simulation with C HLS model (doesn't require FPGA device)");
|
|
logger.Info(" -E{<num>} EIGER detector mode (with optional bit depth: 8, 16, 32)");
|
|
logger.Info(" -R Raw");
|
|
logger.Info(" -S<num> Number of summed frames");
|
|
logger.Info(" -I Use 32-bit integer");
|
|
logger.Info(" -c Use 8-bit integer");
|
|
logger.Info(" -s<num> Number of data streams (acquisition devices)");
|
|
logger.Info(" -m<num> Number of modules");
|
|
logger.Info(" -i<num> Number of images");
|
|
logger.Info(" -N<num> Number of image processing threads");
|
|
logger.Info(" -P<txt> NUMA Policy (none|n2g2|n8g4|n8g4_hbm), none is default");
|
|
logger.Info(" -B<num> Size of send buffer in MiB (default 2048)");
|
|
logger.Info(" -q<num> Use Poisson lossy compression, with square root of counts");
|
|
logger.Info(" -T<num> Use thresholding for low counts");
|
|
logger.Info(" -M Apply pixel mask");
|
|
logger.Info(" -0 No compression");
|
|
logger.Info(" -Z Fast Zstd (RLE only!) compression");
|
|
logger.Info(" -X<txt> Indexing (none|fft|ffbidx), ffbidx is default");
|
|
logger.Info(" -t<num> Indexing thread pool size (default: 4)");
|
|
logger.Info(" -f<num> FFT indexing search vectors");
|
|
logger.Info(" -Q Quick integration");
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
print_license("jfjoch_fpga_test");
|
|
|
|
Logger logger("jfjoch_fpga_test");
|
|
logger.Verbose(true);
|
|
|
|
constexpr uint64_t clock_MHz = 200;
|
|
uint16_t nstreams = 1;
|
|
uint16_t nmodules = 1;
|
|
uint16_t nsummation = 1;
|
|
size_t nimages = 2;
|
|
uint16_t nthreads = 64;
|
|
IndexingAlgorithmEnum indexing = IndexingAlgorithmEnum::FFBIDX;
|
|
uint16_t indexing_threads = 4;
|
|
std::optional<int64_t> fft_num_vectors;
|
|
|
|
bool verbose = false;
|
|
std::string numa_policy_name;
|
|
bool raw_data = false;
|
|
bool force_32bit = false;
|
|
bool force_8bit = false;
|
|
bool apply_pixel_mask = false;
|
|
bool quick_integrate = false;
|
|
std::optional<int64_t> eiger_bit_depth;
|
|
DetectorType detector_type = DetectorType::JUNGFRAU;
|
|
bool hls_simulation = false;
|
|
size_t send_buffer_size_MiB = 2048;
|
|
std::optional<int64_t> lossy_compression_poisson;
|
|
int64_t thresholding = 0;
|
|
CompressionAlgorithm compr = CompressionAlgorithm::BSHUF_LZ4;
|
|
|
|
if (argc == 1) {
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
int opt;
|
|
while ((opt = getopt(argc, argv, "s:i:m:N:P:vRIS:E::HB:q:T:cM0ZX:t:f:Q")) != -1) {
|
|
switch (opt) {
|
|
case '0':
|
|
compr = CompressionAlgorithm::NO_COMPRESSION;
|
|
break;
|
|
case 'Z':
|
|
compr = CompressionAlgorithm::BSHUF_ZSTD_RLE;
|
|
break;
|
|
case 'i':
|
|
nimages = atol(optarg);
|
|
break;
|
|
case 'S':
|
|
nsummation = atol(optarg);
|
|
break;
|
|
case 'm':
|
|
nmodules = atol(optarg);
|
|
break;
|
|
case 's':
|
|
nstreams = atol(optarg);
|
|
break;
|
|
case 'N':
|
|
nthreads = atol(optarg);
|
|
break;
|
|
case 'v':
|
|
verbose = true;
|
|
break;
|
|
case 'P':
|
|
numa_policy_name = std::string(optarg);
|
|
break;
|
|
case 'R':
|
|
raw_data = true;
|
|
break;
|
|
case 'I':
|
|
force_32bit = true;
|
|
break;
|
|
case 'c':
|
|
force_8bit = true;
|
|
break;
|
|
case 'M':
|
|
apply_pixel_mask = true;
|
|
break;
|
|
case 'E':
|
|
detector_type = DetectorType::EIGER;
|
|
if (optarg != nullptr) {
|
|
eiger_bit_depth = atol(optarg);
|
|
}
|
|
break;
|
|
case 'H':
|
|
hls_simulation = true;
|
|
break;
|
|
case 'B':
|
|
send_buffer_size_MiB = atol(optarg);
|
|
break;
|
|
case 'q':
|
|
lossy_compression_poisson = atol(optarg);
|
|
break;
|
|
case 'T':
|
|
thresholding = atol(optarg);
|
|
break;
|
|
case 'X':
|
|
if (std::string(optarg) == "none")
|
|
indexing = IndexingAlgorithmEnum::None;
|
|
else if (std::string(optarg) == "fft" || std::string(optarg) == "FFT")
|
|
indexing = IndexingAlgorithmEnum::FFT;
|
|
else if (std::string(optarg) == "ffbidx" || std::string(optarg) == "FFBIDX")
|
|
indexing = IndexingAlgorithmEnum::FFBIDX;
|
|
break;
|
|
case 't':
|
|
indexing_threads = atol(optarg);
|
|
break;
|
|
case 'f':
|
|
fft_num_vectors = atol(optarg);
|
|
break;
|
|
case 'Q':
|
|
quick_integrate = true;
|
|
break;
|
|
|
|
default: /* '?' */
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
if (optind != argc - 1) {
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
DiffractionExperiment x(DetectorSetup(DetectorGeometryModular(nmodules, 2, 8, 36, true), detector_type));
|
|
|
|
if (raw_data)
|
|
x.Raw();
|
|
|
|
x.ImagesPerTrigger(nimages).Summation(nsummation).PedestalG0Frames(0).UseInternalPacketGenerator(true).
|
|
IncidentEnergy_keV(12.4).NumTriggers(1);
|
|
x.MaskModuleEdges(false).MaskChipEdges(false).BeamX_pxl(x.GetXPixelsNum() / 2.0).BeamY_pxl(x.GetYPixelsNum() / 2.0).
|
|
DetectorDistance_mm(100);
|
|
x.Compression(compr).DataStreams(nstreams);
|
|
x.SetUnitCell(UnitCell{.a = 79, .b = 79, .c = 37, .alpha = 90.0, .beta = 90.0, .gamma = 90.0});
|
|
x.LossyCompressionPoisson(lossy_compression_poisson);
|
|
x.ApplyPixelMask(apply_pixel_mask);
|
|
x.MaskChipEdges(true).MaskModuleEdges(true);
|
|
PixelMask mask(x);
|
|
|
|
IndexingSettings i_settings;
|
|
i_settings.Algorithm(indexing);
|
|
if (fft_num_vectors)
|
|
i_settings.FFT_NumVectors(fft_num_vectors.value());
|
|
i_settings.IndexingThreads(indexing_threads);
|
|
x.ImportIndexingSettings(i_settings);
|
|
|
|
if (thresholding > 0)
|
|
x.PixelValueLowThreshold(thresholding);
|
|
|
|
if (force_32bit)
|
|
x.BitDepthImage(32);
|
|
else if (force_8bit)
|
|
x.BitDepthImage(8);
|
|
|
|
x.EigerBitDepth(eiger_bit_depth);
|
|
|
|
logger.Info("Data streams {} Total modules {} Total images {} Threads {}", nstreams, nmodules, nimages, nthreads);
|
|
|
|
std::vector<std::string> dev_name = {
|
|
"/dev/jfjoch0",
|
|
"/dev/jfjoch1",
|
|
"/dev/jfjoch2",
|
|
"/dev/jfjoch3",
|
|
"/dev/jfjoch4",
|
|
"/dev/jfjoch5",
|
|
};
|
|
|
|
logger.Verbose(verbose);
|
|
|
|
AcquisitionDeviceGroup aq_devices;
|
|
|
|
std::string image_path = std::string(argv[optind]) + "/tests/test_data/mod5_raw0.bin";
|
|
std::vector<uint16_t> input(RAW_MODULE_SIZE * x.GetModulesNum(), 0);
|
|
std::vector<uint16_t> tmp(RAW_MODULE_SIZE);
|
|
LoadBinaryFile(image_path, tmp.data(), RAW_MODULE_SIZE);
|
|
for (int m = 0; m < x.GetModulesNum(); m++)
|
|
memcpy(input.data() + RAW_MODULE_SIZE * m, tmp.data(), RAW_MODULE_SIZE * sizeof(uint16_t));
|
|
|
|
if (hls_simulation) {
|
|
if (nstreams != 1) {
|
|
logger.Error("HLS simulation can work with only one device");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
auto tmp = std::make_unique<HLSSimulatedDevice>(0, 128);
|
|
tmp->EnableLogging(&logger);
|
|
aq_devices.Add(std::move(tmp));
|
|
} else {
|
|
if (nstreams > dev_name.size()) {
|
|
logger.Error("Only {} data streams allowed on this platform", dev_name.size());
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
for (int i = 0; i < nstreams; i++) {
|
|
auto tmp = std::make_unique<PCIExpressDevice>(i, dev_name[i]);
|
|
tmp->EnableLogging(&logger);
|
|
tmp->SetIPv4Address((i << 24) + 0x010a0a0a);
|
|
aq_devices.Add(std::move(tmp));
|
|
}
|
|
}
|
|
|
|
std::atomic<bool> done = false;
|
|
JFJochReceiverOutput output;
|
|
bool ret;
|
|
std::thread run_thread([&] {
|
|
try {
|
|
ret = JFJochReceiverTest(output, logger, aq_devices, x, mask, input, nthreads, numa_policy_name,
|
|
send_buffer_size_MiB, quick_integrate);
|
|
} catch (std::exception &e) {
|
|
logger.Error(e.what());
|
|
ret = false;
|
|
}
|
|
done = true;
|
|
});
|
|
|
|
while (!done) {
|
|
for (int i = 0; i < nstreams; i++) {
|
|
auto coll_status = aq_devices[i].GetDataCollectionStatus();
|
|
auto dev_status = aq_devices[i].GetDeviceStatus();
|
|
double power_3p3v = (dev_status.fpga_pcie_3p3V_I_mA * dev_status.fpga_pcie_3p3V_V_mV) / (1000.0 * 1000.0);
|
|
double power_12v = (dev_status.fpga_pcie_12V_I_mA * dev_status.fpga_pcie_12V_V_mV) / (1000.0 * 1000.0);
|
|
logger.Info(
|
|
"#{}: Slowest packet: {:8d} Pwr: {:4.1f}+{:4.1f}={:5.1f} W T FPGA/HBM1/HBM2: {:3d}/{:3d}/{:3d} degC Stalls: {:15d}/{:15d}",
|
|
i, aq_devices[i].Counters().GetSlowestFrameNumber(), power_12v, power_3p3v, power_12v + power_3p3v,
|
|
dev_status.fpga_temp_C, dev_status.hbm_0_temp_C, dev_status.hbm_1_temp_C,
|
|
coll_status.pipeline_stalls_hbm, coll_status.pipeline_stalls_host);
|
|
}
|
|
std::this_thread::sleep_for(std::chrono::seconds(1));
|
|
}
|
|
|
|
run_thread.join();
|
|
|
|
double receiving_time = static_cast<double>(output.end_time_ms - output.start_time_ms) / 1000.0;
|
|
|
|
logger.Info("Efficiency: {:.2f}%", output.efficiency * 100.f);
|
|
if (output.status.max_receive_delay)
|
|
logger.Info("Max delay: {}", output.status.max_receive_delay.value());
|
|
if (output.status.compressed_ratio)
|
|
logger.Info("Compression factor: {}x", output.status.compressed_ratio.value());
|
|
logger.Info("Receiving time: {} s", receiving_time);
|
|
logger.Info("Frame rate: {} Hz", static_cast<double>(nimages) / receiving_time);
|
|
logger.Info("Total throughput: {:.2f} GB/s",
|
|
static_cast<double>(nsummation * nimages * x.GetModulesNum() * RAW_MODULE_SIZE *
|
|
x.GetBitDepthReadout() / 8) / (receiving_time * 1e9));
|
|
|
|
logger.Info("");
|
|
for (int i = 0; i < nstreams; i++) {
|
|
auto coll_status = aq_devices[i].GetDataCollectionStatus();
|
|
auto stalls_hbm = coll_status.pipeline_stalls_hbm;
|
|
auto stalls_host = coll_status.pipeline_stalls_host;
|
|
|
|
uint64_t throughput_MBs = nimages * nsummation * x.GetModulesNum(i) * RAW_MODULE_SIZE * sizeof(uint16_t) *
|
|
clock_MHz /
|
|
(nimages * nsummation * x.GetModulesNum(i) * 128 * 128 + stalls_hbm);
|
|
double performance = static_cast<double>(throughput_MBs) / 1000;
|
|
|
|
logger.Info("Device {}: stalls HBM: {} stalls host: {} est. performance: {:.2f} GB/s", i, stalls_hbm,
|
|
stalls_host, performance);
|
|
}
|
|
|
|
if (ret) {
|
|
logger.Info("");
|
|
logger.Info("Test properly executed! (check stall values manually)");
|
|
exit(EXIT_SUCCESS);
|
|
} else {
|
|
logger.Info("Test finished with errors! (check stall values manually)");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|