From 95762ae459a751b755cdb010e9bd8d1b33882a48 Mon Sep 17 00:00:00 2001 From: lrlunin Date: Mon, 27 Apr 2026 14:36:23 +0200 Subject: [PATCH] refactored ClusterFinder and find_clusters, add clusterfinder_benchmark, refactored main in benchmarks --- benchmarks/CMakeLists.txt | 21 +++- benchmarks/calculateeta_benchmark.cpp | 4 +- benchmarks/clusterfinder_benchmark.cpp | 113 ++++++++++++++++++++ benchmarks/fit_benchmark.cpp | 4 +- benchmarks/ndarray_benchmark.cpp | 4 +- include/aare/ClusterFinder.hpp | 140 ++++++++++++------------- 6 files changed, 200 insertions(+), 86 deletions(-) create mode 100644 benchmarks/clusterfinder_benchmark.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index ef378e4..8926ab7 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -22,8 +22,9 @@ target_sources( reduce_benchmark.cpp) # Link Google Benchmark and other necessary libraries -target_link_libraries(benchmarks PRIVATE benchmark::benchmark aare_core - aare_compiler_flags) +target_link_libraries( + benchmarks PRIVATE benchmark::benchmark benchmark::benchmark_main aare_core + aare_compiler_flags) # Set output properties set_target_properties( @@ -31,10 +32,22 @@ set_target_properties( OUTPUT_NAME run_benchmarks) add_executable(fit_benchmark fit_benchmark.cpp) -target_link_libraries(fit_benchmark PRIVATE benchmark::benchmark aare_core - aare_compiler_flags) +target_link_libraries( + fit_benchmark PRIVATE benchmark::benchmark benchmark::benchmark_main + aare_core aare_compiler_flags) target_include_directories( fit_benchmark SYSTEM PRIVATE $) set_target_properties(fit_benchmark PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_executable(clusterfinder_benchmark clusterfinder_benchmark.cpp) +target_link_libraries( + clusterfinder_benchmark + PRIVATE benchmark::benchmark benchmark::benchmark_main aare_core + aare_compiler_flags) +target_include_directories( + clusterfinder_benchmark SYSTEM + PRIVATE $) +set_target_properties(clusterfinder_benchmark + PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/benchmarks/calculateeta_benchmark.cpp b/benchmarks/calculateeta_benchmark.cpp index 41a4114..0ca4991 100644 --- a/benchmarks/calculateeta_benchmark.cpp +++ b/benchmarks/calculateeta_benchmark.cpp @@ -99,6 +99,4 @@ BENCHMARK_F(ClusterFixture, Calculate2x2Etawithoutreduction) benchmark::DoNotOptimize(eta); benchmark::DoNotOptimize(eta2); } -} - -// BENCHMARK_MAIN(); \ No newline at end of file +} \ No newline at end of file diff --git a/benchmarks/clusterfinder_benchmark.cpp b/benchmarks/clusterfinder_benchmark.cpp new file mode 100644 index 0000000..6ac8425 --- /dev/null +++ b/benchmarks/clusterfinder_benchmark.cpp @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: MPL-2.0 +#include "aare/ClusterFinder.hpp" +#include "aare/File.hpp" +#include "aare/Frame.hpp" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +namespace { + +using Cluster3x3 = aare::Cluster; +using Cluster5x5 = aare::Cluster; +using Cluster7x7 = aare::Cluster; + +template +using ClusterFinder = aare::ClusterFinder; + +template class BenchmarkData { + public: + aare::Shape<2> image_shape; + std::vector v_pedestal_frames, v_test_frames; + size_t n_pedestal_frames, n_test_frames; + BenchmarkData() { + auto pedestal_filepath = std::getenv("PEDESTAL_FILE"); + auto data_filepath = std::getenv("DATA_FILE"); + auto pedestal_frames = std::getenv("N_PEDESTAL_FRAMES"); + auto test_frames = std::getenv("N_TEST_FRAMES"); + // if not initialized, take 1000 pedestal and 1000 test frames + n_pedestal_frames = + pedestal_frames ? std::stoul(pedestal_frames) : 1000; + n_test_frames = test_frames ? std::stoul(test_frames) : 1000; + if (!pedestal_filepath || !data_filepath) { + throw std::runtime_error("PEDESTAL_FILE and DATA_FILE environment " + "variables must be set"); + } + aare::File pedestal_file(pedestal_filepath); + aare::File data_file(data_filepath); + pedestal_file.seek(0); + try { + v_pedestal_frames = pedestal_file.read_n(n_pedestal_frames); + } catch (const std::exception &e) { + throw std::runtime_error("Error reading pedestal " + + std::to_string(n_pedestal_frames) + + " frames: " + std::string(e.what())); + } + data_file.seek(0); + try { + v_test_frames = data_file.read_n(n_test_frames); + } catch (const std::exception &e) { + throw std::runtime_error("Error reading test " + + std::to_string(n_test_frames) + + " frames: " + std::string(e.what())); + } + std::vector frame_shape = { + static_cast(v_test_frames[0].rows()), + static_cast(v_test_frames[0].cols())}; + image_shape = aare::make_shape<2>(frame_shape); + } + + void initialize_finder(ClusterFinder &finder) { + for (aare::Frame &frame : v_pedestal_frames) { + finder.push_pedestal_frame(frame.view()); + } + } +}; + +template +void run_benchmark(benchmark::State &state, const char *label, + bool update_pedestal) { + BenchmarkData data{}; + ClusterFinder cluster_finder(data.image_shape); + data.initialize_finder(cluster_finder); + std::size_t cluster_count = 0; + for (auto _ : state) { + cluster_count = 0; + for (aare::Frame &frame : data.v_test_frames) { + cluster_finder.find_clusters(frame.view(), 0, + update_pedestal); + + cluster_count += cluster_finder.steal_clusters(true).size(); + } + benchmark::DoNotOptimize(cluster_count); + } + + auto n_test_frames = data.n_test_frames; + state.counters["test_frames"] = n_test_frames; + state.counters["clusters_per_frame"] = + static_cast(cluster_count) / static_cast(n_test_frames); + state.SetItemsProcessed(static_cast(state.iterations()) * + static_cast(n_test_frames)); + state.SetLabel(label); +} +// Wrapper for the old function +void BM_ClusterFinder_3x3(benchmark::State &state) { + run_benchmark(state, "find_clusters 3x3", true); +} +void BM_ClusterFinder_5x5(benchmark::State &state) { + run_benchmark(state, "find_clusters 5x5", true); +} +void BM_ClusterFinder_7x7(benchmark::State &state) { + run_benchmark(state, "find_clusters 7x7", true); +} +} // namespace + +BENCHMARK(BM_ClusterFinder_3x3)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_ClusterFinder_5x5)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_ClusterFinder_7x7)->Unit(benchmark::kMillisecond); \ No newline at end of file diff --git a/benchmarks/fit_benchmark.cpp b/benchmarks/fit_benchmark.cpp index 8249bcd..853b620 100644 --- a/benchmarks/fit_benchmark.cpp +++ b/benchmarks/fit_benchmark.cpp @@ -158,6 +158,4 @@ BENCHMARK(BM_FitGausMinuitGrad) BENCHMARK(BM_FitGausMinuitGradHesse) ->DenseRange(0, 5) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK_MAIN(); \ No newline at end of file + ->Unit(benchmark::kMicrosecond); \ No newline at end of file diff --git a/benchmarks/ndarray_benchmark.cpp b/benchmarks/ndarray_benchmark.cpp index 456f4d4..a0f2a03 100644 --- a/benchmarks/ndarray_benchmark.cpp +++ b/benchmarks/ndarray_benchmark.cpp @@ -128,6 +128,4 @@ BENCHMARK_F(TwoArrays, MultiplyAddDivideWithIndex)(benchmark::State &st) { } benchmark::DoNotOptimize(res); } -} - -BENCHMARK_MAIN(); \ No newline at end of file +} \ No newline at end of file diff --git a/include/aare/ClusterFinder.hpp b/include/aare/ClusterFinder.hpp index 83b5c78..c06cc28 100644 --- a/include/aare/ClusterFinder.hpp +++ b/include/aare/ClusterFinder.hpp @@ -29,8 +29,16 @@ class ClusterFinder { Pedestal m_pedestal; ClusterVector m_clusters; - static const uint8_t ClusterSizeX = ClusterType::cluster_size_x; - static const uint8_t ClusterSizeY = ClusterType::cluster_size_y; + static constexpr uint8_t ClusterSizeX = ClusterType::cluster_size_x; + static constexpr uint8_t ClusterSizeY = ClusterType::cluster_size_y; + + static constexpr int dx_c = ClusterSizeX / 2; + static constexpr int dy_c = ClusterSizeY / 2; + + static constexpr int is_odd_x = ClusterSizeX % 2; + static constexpr int is_odd_y = ClusterSizeY % 2; + // for even sized clusters there is no proper cluster center and + // even amount of pixels around the center using CT = typename ClusterType::value_type; public: @@ -81,25 +89,15 @@ class ClusterFinder { m_clusters = ClusterVector{}; return tmp; } - void find_clusters(NDView frame, uint64_t frame_number = 0) { + void find_clusters(NDView frame, uint64_t frame_number = 0, + bool update_pedestal = true) { // // TODO! deal with even size clusters // // currently 3,3 -> +/- 1 // // 4,4 -> +/- 2 - int dy = ClusterSizeY / 2; - int dx = ClusterSizeX / 2; - int has_center_pixel_x = - ClusterSizeX % - 2; // for even sized clusters there is no proper cluster center and - // even amount of pixels around the center - int has_center_pixel_y = ClusterSizeY % 2; m_clusters.set_frame_number(frame_number); for (int iy = 0; iy < frame.shape(0); iy++) { for (int ix = 0; ix < frame.shape(1); ix++) { - - PEDESTAL_TYPE max = std::numeric_limits::min(); - PEDESTAL_TYPE total = 0; - // What can we short circuit here? PEDESTAL_TYPE rms = m_pedestal.std(iy, ix); PEDESTAL_TYPE value = (frame(iy, ix) - m_pedestal.mean(iy, ix)); @@ -107,83 +105,79 @@ class ClusterFinder { if (value < -m_nSigma * rms) continue; // NEGATIVE_PEDESTAL go to next pixel // TODO! No pedestal update??? + PEDESTAL_TYPE max = std::numeric_limits::min(); + PEDESTAL_TYPE total = 0; - for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) { - for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) { + for (int ir = -dy_c; ir < dy_c + is_odd_y; ir++) { + for (int ic = -dx_c; ic < dx_c + is_odd_x; ic++) { if (ix + ic >= 0 && ix + ic < frame.shape(1) && iy + ir >= 0 && iy + ir < frame.shape(0)) { PEDESTAL_TYPE val = frame(iy + ir, ix + ic) - m_pedestal.mean(iy + ir, ix + ic); - total += val; max = std::max(max, val); } } } + if (max > m_nSigma * rms || total > c3 * m_nSigma * rms) { + if (value == max) { + ClusterType cluster{}; + cluster.x = ix; + cluster.y = iy; + // Fill the cluster data since we have a photon to store + // It's worth redoing the look since most of the time we + // don't have a photon + int cluster_index = 0; + for (int ir = -dy_c; ir < dy_c + is_odd_y; ir++) { + for (int ic = -dx_c; ic < dx_c + is_odd_x; ic++) { + if (ix + ic >= 0 && ix + ic < frame.shape(1) && + iy + ir >= 0 && iy + ir < frame.shape(0)) { - if ((max > m_nSigma * rms)) { - if (value < max) - continue; // Not max go to the next pixel - // but also no pedestal update - } else if (total > c3 * m_nSigma * rms) { - // pass - } else { - // m_pedestal.push(iy, ix, frame(iy, ix)); // Safe option - m_pedestal.push_fast( - iy, ix, - frame(iy, - ix)); // Assume we have reached n_samples in the - // pedestal, slight performance improvement - continue; // It was a pedestal value nothing to store - } - - // Store cluster - if (value == max) { - ClusterType cluster{}; - cluster.x = ix; - cluster.y = iy; - - // Fill the cluster data since we have a photon to store - // It's worth redoing the look since most of the time we - // don't have a photon - int i = 0; - for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) { - for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) { - if (ix + ic >= 0 && ix + ic < frame.shape(1) && - iy + ir >= 0 && iy + ir < frame.shape(0)) { - - // If the cluster type is an integral type, and - // the pedestal is a floating point type then we - // need to round the value before storing it - if constexpr (std::is_integral_v && - std::is_floating_point_v< - PEDESTAL_TYPE>) { - auto tmp = std::lround( - frame(iy + ir, ix + ic) - - m_pedestal.mean(iy + ir, ix + ic)); - cluster.data[i] = static_cast(tmp); - } - // On the other hand if both are floating point - // or both are integral then we can just static - // cast directly - else { - auto tmp = - frame(iy + ir, ix + ic) - - m_pedestal.mean(iy + ir, ix + ic); - cluster.data[i] = static_cast(tmp); + // If the cluster type is an integral type, + // and the pedestal is a floating point type + // then we need to round the value before + // storing it + if constexpr (std::is_integral_v && + std::is_floating_point_v< + PEDESTAL_TYPE>) { + auto tmp = std::lround( + frame(iy + ir, ix + ic) - + m_pedestal.mean(iy + ir, ix + ic)); + cluster.data[cluster_index] = + static_cast(tmp); + } + // On the other hand if both are floating + // point or both are integral then we can + // just static cast directly + else { + auto tmp = + frame(iy + ir, ix + ic) - + m_pedestal.mean(iy + ir, ix + ic); + cluster.data[cluster_index] = + static_cast(tmp); + } + cluster_index++; } } - i++; } + // Add the cluster to the output ClusterVector + m_clusters.push_back(cluster); + } else { + continue; + } + } else { + if (update_pedestal) { + m_pedestal.push_fast( + iy, ix, + frame( + iy, + ix)); // Assume we have reached n_samples in the + // pedestal already and can use the faster push method } - - // Add the cluster to the output ClusterVector - m_clusters.push_back(cluster); } } } - } + }; }; - } // namespace aare \ No newline at end of file