cc925b2668
Build Packages / build:rpm (ubuntu2404_nocuda) (push) Successful in 8m30s
Build Packages / build:rpm (rocky8_nocuda) (push) Successful in 10m13s
Build Packages / build:rpm (ubuntu2204_nocuda) (push) Successful in 9m45s
Build Packages / build:rpm (rocky9_nocuda) (push) Successful in 11m13s
Build Packages / build:rpm (rocky8_sls9) (push) Successful in 9m51s
Build Packages / build:rpm (rocky8) (push) Successful in 8m29s
Build Packages / build:rpm (rocky9_sls9) (push) Successful in 9m31s
Build Packages / build:rpm (rocky9) (push) Successful in 9m42s
Build Packages / build:rpm (ubuntu2204) (push) Successful in 8m47s
Build Packages / build:rpm (ubuntu2404) (push) Successful in 8m23s
Build Packages / Generate python client (push) Successful in 19s
Build Packages / Build documentation (push) Successful in 38s
Build Packages / Create release (push) Skipped
Build Packages / XDS test (durin plugin) (push) Successful in 6m18s
Build Packages / XDS test (neggia plugin) (push) Successful in 6m4s
Build Packages / XDS test (JFJoch plugin) (push) Successful in 6m35s
Build Packages / DIALS test (push) Successful in 10m39s
Build Packages / Unit tests (push) Successful in 1h24m58s
NUMA CPU/memory pinning is no longer worthwhile: the FPGA DMA buffers are placed device-local by the kernel (dma_alloc_coherent), the big RAM ring buffer is random-access (first-touch handles placement), and GPU work is already spread across all visible devices. So drop the pinning entirely and with it libnuma. - Delete NUMAHWPolicy; the only concern worth keeping - GPU selection - is done directly via pin_gpu() (round-robin over visible GPUs) in the indexer pool and the Lite analysis threads. CPU-only threads (FPGA acquire/pedestal/summation/frame-transform) no longer bind anything. - Drop get_gpu_numa_node() (sysfs lookup) - only SelectGPUAndItsNUMA used it. - numa_policy broker setting is deprecated and ignored (kept in the API for backward compatibility; warns once on startup). - Remove NUMA_LIBRARY / numa.h / numaif.h detection from CMake. - Docs: drop the NUMA dependency, remove the numa_policy config example, and document running multiple brokers on disjoint GPUs via CUDA_VISIBLE_DEVICES. - Remove NUMA_GPU_REVIEW.md (the planning note; this work is now done). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
215 lines
8.2 KiB
C++
215 lines
8.2 KiB
C++
// SPDX-FileCopyrightText: 2025 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#include "../common/print_license.h"
|
|
#include "../common/Logger.h"
|
|
#include "../common/DiffractionExperiment.h"
|
|
#include "../writer/HDF5Objects.h"
|
|
#include "../common/PixelMask.h"
|
|
#include "../image_pusher/HDF5FilePusher.h"
|
|
#include "../image_puller/TestImagePuller.h"
|
|
#include "../acquisition_device/AcquisitionDeviceGroup.h"
|
|
#include "../receiver/JFJochReceiverService.h"
|
|
#include "JFJochCompressor.h"
|
|
|
|
|
|
void print_usage(Logger &logger) {
|
|
logger.Info("Usage ./jfjoch_fpga_test {<options>} <path to repository>");
|
|
logger.Info("Options:");
|
|
logger.Info(" -i<num> Number of images");
|
|
logger.Info(" -N<num> Number of image processing threads (default: 8)");
|
|
logger.Info(" -F{<txt>} Write file, optional parameter is name (default: lyso_lite_perf_test)");
|
|
logger.Info(" -X<txt> Indexing (none|fft|fftw|ffbidx), ffbidx is default");
|
|
logger.Info(" -t<num> Indexing thread pool size (default: 4)");
|
|
logger.Info(" -f<num> FFT indexing search vectors");
|
|
logger.Info(" -Q Quick integration");
|
|
logger.Info(" -G Geometry refinement");
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
print_license("jfjoch_fpga_test");
|
|
|
|
Logger logger("jfjoch_fpga_test");
|
|
logger.Verbose(false);
|
|
|
|
bool use_geom = false;
|
|
uint16_t nthreads = 8;
|
|
size_t nimages = 1000;
|
|
std::string numa_policy_name;
|
|
std::string filename = "";
|
|
std::string ml_model = "";
|
|
IndexingAlgorithmEnum indexing = IndexingAlgorithmEnum::FFBIDX;
|
|
std::optional<int64_t> fft_num_vectors;
|
|
uint16_t indexing_threads = 4;
|
|
bool quick_integrate = false;
|
|
|
|
RegisterHDF5Filter();
|
|
|
|
if (argc == 1) {
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
int opt;
|
|
while ((opt = getopt(argc, argv, "N:P:i:F::QGvX:t:f:")) != -1) {
|
|
switch (opt) {
|
|
case 'N':
|
|
nthreads = atol(optarg);
|
|
break;
|
|
case 'P':
|
|
logger.Warning("NUMA policy is deprecated and ignored");
|
|
break;
|
|
case 'i':
|
|
nimages = atol(optarg);
|
|
break;
|
|
case 'X':
|
|
if (std::string(optarg) == "none")
|
|
indexing = IndexingAlgorithmEnum::None;
|
|
else if (std::string(optarg) == "fft" || std::string(optarg) == "FFT")
|
|
indexing = IndexingAlgorithmEnum::FFT;
|
|
else if (std::string(optarg) == "ffbidx" || std::string(optarg) == "FFBIDX")
|
|
indexing = IndexingAlgorithmEnum::FFBIDX;
|
|
else if (std::string(optarg) == "fftw" || std::string(optarg) == "FFTW")
|
|
indexing = IndexingAlgorithmEnum::FFTW;
|
|
break;
|
|
case 't':
|
|
indexing_threads = atol(optarg);
|
|
break;
|
|
case 'f':
|
|
fft_num_vectors = atol(optarg);
|
|
break;
|
|
case 'F':
|
|
if (optarg)
|
|
filename = std::string(optarg);
|
|
else
|
|
filename = "lyso_lite_perf_test";
|
|
break;
|
|
case 'Q':
|
|
quick_integrate = true;
|
|
break;
|
|
case 'G':
|
|
use_geom = true;
|
|
break;
|
|
case 'v':
|
|
logger.Verbose(true);
|
|
break;
|
|
default: /* '?' */
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
if (optind != argc - 1) {
|
|
print_usage(logger);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
std::string jfjoch_path = std::string(argv[optind]) + "/";
|
|
|
|
DiffractionExperiment experiment(DetDECTRIS(2068, 2164, "Test", {}));
|
|
|
|
experiment.ImagesPerTrigger(nimages).NumTriggers(1).UseInternalPacketGenerator(true).ImagesPerFile(1000)
|
|
.FilePrefix(filename).JungfrauConvPhotonCnt(false).SetFileWriterFormat(FileWriterFormat::NXmxVDS).OverwriteExistingFiles(true)
|
|
.DetectorDistance_mm(75).BeamY_pxl(1136).BeamX_pxl(1090).IncidentEnergy_keV(12.4)
|
|
.SetUnitCell(UnitCell{.a = 36.9, .b = 78.95, .c = 78.95, .alpha =90, .beta = 90, .gamma = 90}).PixelSigned(true);
|
|
|
|
PixelMask pixel_mask(experiment);
|
|
|
|
experiment.ROI().SetROI(ROIDefinition{.boxes = {ROIBox("ROI1", 123, 180, 500,800) }});
|
|
|
|
// Load example image
|
|
HDF5ReadOnlyFile data(jfjoch_path + "tests/test_data/compression_benchmark.h5");
|
|
HDF5DataSet dataset(data, "/entry/data/data");
|
|
HDF5DataSpace file_space(dataset);
|
|
|
|
std::vector<int16_t> image_conv(file_space.GetDimensions()[1] * file_space.GetDimensions()[2]);
|
|
|
|
std::vector<hsize_t> start = {4,0,0};
|
|
std::vector<hsize_t> file_size = {1, file_space.GetDimensions()[1], file_space.GetDimensions()[2]};
|
|
dataset.ReadVector(image_conv, start, file_size);
|
|
|
|
JFJochBitShuffleCompressor compressor(CompressionAlgorithm::BSHUF_LZ4);
|
|
auto image_compressed = compressor.Compress(image_conv);
|
|
|
|
HDF5FilePusher pusher;
|
|
|
|
auto puller = std::make_shared<TestImagePuller>(nimages + 5);
|
|
|
|
StartMessage start_msg;
|
|
experiment.FillMessage(start_msg);
|
|
|
|
puller->Put(ImagePullerOutput{
|
|
.cbor = std::make_shared<CBORStream2DeserializerOutput>(start_msg)
|
|
});
|
|
|
|
DataMessage data_msg;
|
|
data_msg.image = CompressedImage(image_compressed,
|
|
file_space.GetDimensions()[2],
|
|
file_space.GetDimensions()[1],
|
|
CompressedImageMode::Int16,
|
|
CompressionAlgorithm::BSHUF_LZ4);
|
|
|
|
for (int i = 0; i < experiment.GetImageNum(); i++) {
|
|
data_msg.number = i;
|
|
puller->Put(ImagePullerOutput{
|
|
.cbor = std::make_shared<CBORStream2DeserializerOutput>(data_msg)
|
|
});
|
|
}
|
|
|
|
EndMessage end_msg{};
|
|
|
|
puller->Put(ImagePullerOutput{
|
|
.cbor = std::make_shared<CBORStream2DeserializerOutput>(end_msg)
|
|
});
|
|
|
|
AcquisitionDeviceGroup group;
|
|
JFJochReceiverService service(group, logger, pusher);
|
|
|
|
service.NumThreads(nthreads);
|
|
|
|
IndexingSettings i_settings;
|
|
i_settings.Algorithm(indexing);
|
|
if (fft_num_vectors)
|
|
i_settings.FFT_NumVectors(fft_num_vectors.value());
|
|
i_settings.IndexingThreads(indexing_threads);
|
|
if (use_geom)
|
|
i_settings.GeomRefinementAlgorithm(GeomRefinementAlgorithmEnum::BeamCenter);
|
|
|
|
experiment.ImportIndexingSettings(i_settings);
|
|
service.Indexing(i_settings);
|
|
|
|
SpotFindingSettings settings = DiffractionExperiment::DefaultDataProcessingSettings();
|
|
settings.signal_to_noise_threshold = 2.5;
|
|
settings.photon_count_threshold = 5;
|
|
settings.min_pix_per_spot = 1;
|
|
settings.max_pix_per_spot = 200;
|
|
settings.high_resolution_limit = 2.0;
|
|
settings.low_resolution_limit = 50.0;
|
|
settings.quick_integration = quick_integrate;
|
|
service.SetSpotFindingSettings(settings);
|
|
|
|
auto start_time = std::chrono::system_clock::now();
|
|
service.Start(experiment, pixel_mask, nullptr, puller);
|
|
auto output = service.Stop();
|
|
auto end_time = std::chrono::system_clock::now();
|
|
|
|
double receiving_time_s = static_cast<double>(output.end_time_ms - output.start_time_ms) / 1000.0;
|
|
logger.Info("Throughput {:.1f} Hz", experiment.GetImageNum() / receiving_time_s);
|
|
if (output.status.indexing_rate)
|
|
logger.Info("Indexing rate {:.0f}%", output.status.indexing_rate.value() * 100.0);
|
|
if (output.status.max_receive_delay)
|
|
logger.Info("Max delay {}", output.status.max_receive_delay.value());
|
|
|
|
logger.Info("Per-image time: (mean; microseconds): compression {:.0f} preprocess {:.0f} spot finding {:.0f} indexing {:.0f} refinement {:.0f} indexing analysis {:.0f} prediction {:.0f} integration {:.0f} total {:.0f}",
|
|
output.processing_time.compression * 1e6,
|
|
output.processing_time.preprocessing * 1e6,
|
|
output.processing_time.spot_finding * 1e6,
|
|
output.processing_time.indexing * 1e6,
|
|
output.processing_time.refinement * 1e6,
|
|
output.processing_time.indexing_analysis * 1e6,
|
|
output.processing_time.bragg_prediction * 1e6,
|
|
output.processing_time.integration * 1e6,
|
|
output.processing_time.processing * 1e6);
|
|
|
|
}
|