diff --git a/tools/JFCalibrationPerfTest.cpp b/tools/JFCalibrationPerfTest.cpp index 5aacd351..7bb0586f 100644 --- a/tools/JFCalibrationPerfTest.cpp +++ b/tools/JFCalibrationPerfTest.cpp @@ -4,12 +4,16 @@ #include #include #include +#include + #include "../jungfrau/JFPedestalCalc.h" #include "../common/Logger.h" #include "../jungfrau/JFCalibration.h" #include "../jungfrau/JFConversionFloatingPoint.h" #include "../jungfrau/JFConversionFixedPoint.h" #include "../tests/FPGAUnitTest.h" +#include "../jungfrau/jf_packet.h" +#include "../jungfrau/ProcessJFPacket.h" void test_pedestal(Logger &logger) { size_t nframes = 5000; @@ -147,13 +151,112 @@ template void test_conversion_with_geom(Logger &logger) { ntries * nframes * nmodules * RAW_MODULE_SIZE * sizeof(uint16_t) * 1000 * 1000/ ((double) elapsed.count() * 1024 * 1024 * 1024)); } +void process_thread(std::vector *packets, ProcessJFPacket *process, + uint32_t stride, uint32_t start) { + for (uint32_t i = start; i < packets->size(); i += stride) + process->ProcessPacket(&packets->at(i).jf, packets->at(i).ipv4_header_sour_ip); +} + +void test_packet_processing(Logger &logger, uint32_t nthreads, bool conversion) { + size_t nframes = 128; + int64_t nmodules = 8; + int64_t ntries = 8; + + DiffractionExperiment x(1,{nmodules}); + + std::vector packets(nframes * nmodules * 128); + std::vector output(nframes * nmodules * CONVERTED_MODULE_SIZE); + + std::vector input(RAW_MODULE_SIZE); + std::string image_path = "../../tests/test_data/mod5_raw0.bin"; + LoadBinaryFile(image_path, input.data(), RAW_MODULE_SIZE); + + for (int frame = 0; frame < nframes; frame++) { + for (int m = 0; m < nmodules; m++) { + for (int p = 0; p < 128; p++) { + packets[(frame * nmodules + m) * 128 + p].ipv4_header_sour_ip = (m * 2) << 24; + packets[(frame * nmodules + m) * 128 + p].jf.packetnum = p; + packets[(frame * nmodules + m) * 128 + p].jf.framenum = frame + 1; + memcpy(packets[(frame * nmodules + m) * 128 + p].jf.data, input.data() + 4096 * p, 4096 * sizeof(uint16_t)); + } + } + } + + std::vector v(nmodules); + + JFModuleGainCalibration gain_calib = GainCalibrationFromTestFile(); + + for (int m = 0; m < nmodules; m++) { + JFModulePedestal pedestal_g0; + JFModulePedestal pedestal_g1; + JFModulePedestal pedestal_g2; + + for (int i = 0; i < RAW_MODULE_SIZE; i++) { + pedestal_g0.GetPedestal()[i] = 3000 + i % 50 + m * 135; + pedestal_g1.GetPedestal()[i] = 15000 + i % 50 - m * 135; + pedestal_g2.GetPedestal()[i] = 14000 + i % 50 - m * 135; + } + v[m].Setup(gain_calib, pedestal_g0, pedestal_g1, pedestal_g2, 12.4); + } + + x.Mode(DetectorMode::Conversion); + + logger.Info("JF FP conversion input prepared"); + auto start_time = std::chrono::system_clock::now(); + for (int z = 0; z < ntries; z++) { + ThreadSafeFIFO c; + ThreadSafeFIFO wr; + ProcessJFPacket process(c, wr, nmodules); + + if (conversion) { + for (int i = 0; i < nmodules; i++) + process.RegisterConversion(i, &v[i]); + } + + for (uint32_t i = 0; i < nmodules * nframes; i++) + wr.Put(ProcessWorkRequest{ + .ptr = output.data() + i * RAW_MODULE_SIZE, + .handle = i + }); + + { + std::vector> futures; + for (int i = 0; i < nthreads; i++) + futures.emplace_back(std::async(std::launch::async, &process_thread, &packets, &process, nthreads, i)); + } + } + auto end_time = std::chrono::system_clock::now(); + auto elapsed = std::chrono::duration_cast(end_time - start_time); + + logger.Info("Packet analysis performance: {:5d} us/module {:5.2f} GB/s", std::lround(elapsed.count() / ((double) (ntries * nframes * nmodules))), + ntries * nframes * nmodules * RAW_MODULE_SIZE * sizeof(uint16_t) * 1000 * 1000/ ((double) elapsed.count() * 1024 * 1024 * 1024)); +} + int main () { Logger logger("JFCalibrationPerfTest"); test_pedestal(logger); + logger.Info("Floating point conversion"); test_conversion(logger); + logger.Info("Fixed point conversion"); test_conversion(logger); + logger.Info("Fixed point conversion (with geom)"); test_conversion_with_geom(logger); + + logger.Info("Packet processing with conversion (1 thread)"); + test_packet_processing(logger, 1, true); + + logger.Info("Packet processing with conversion (2 threads)"); + test_packet_processing(logger, 2, true); + + logger.Info("Packet processing with conversion (4 threads)"); + test_packet_processing(logger, 4, true); + + logger.Info("Packet processing with conversion (8 threads)"); + test_packet_processing(logger, 8, true); + + logger.Info("Packet processing without conversion"); + test_packet_processing(logger, 1, false); }