refactored ClusterFinder and find_clusters, add clusterfinder_benchmark, refactored main in benchmarks

This commit is contained in:
2026-04-27 14:36:23 +02:00
parent 2736d975c5
commit 95762ae459
6 changed files with 200 additions and 86 deletions
+17 -4
View File
@@ -22,8 +22,9 @@ target_sources(
reduce_benchmark.cpp)
# Link Google Benchmark and other necessary libraries
target_link_libraries(benchmarks PRIVATE benchmark::benchmark aare_core
aare_compiler_flags)
target_link_libraries(
benchmarks PRIVATE benchmark::benchmark benchmark::benchmark_main aare_core
aare_compiler_flags)
# Set output properties
set_target_properties(
@@ -31,10 +32,22 @@ set_target_properties(
OUTPUT_NAME run_benchmarks)
add_executable(fit_benchmark fit_benchmark.cpp)
target_link_libraries(fit_benchmark PRIVATE benchmark::benchmark aare_core
aare_compiler_flags)
target_link_libraries(
fit_benchmark PRIVATE benchmark::benchmark benchmark::benchmark_main
aare_core aare_compiler_flags)
target_include_directories(
fit_benchmark SYSTEM
PRIVATE $<TARGET_PROPERTY:Minuit2::Minuit2,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(fit_benchmark PROPERTIES RUNTIME_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR})
add_executable(clusterfinder_benchmark clusterfinder_benchmark.cpp)
target_link_libraries(
clusterfinder_benchmark
PRIVATE benchmark::benchmark benchmark::benchmark_main aare_core
aare_compiler_flags)
target_include_directories(
clusterfinder_benchmark SYSTEM
PRIVATE $<TARGET_PROPERTY:Minuit2::Minuit2,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(clusterfinder_benchmark
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+1 -3
View File
@@ -99,6 +99,4 @@ BENCHMARK_F(ClusterFixture, Calculate2x2Etawithoutreduction)
benchmark::DoNotOptimize(eta);
benchmark::DoNotOptimize(eta2);
}
}
// BENCHMARK_MAIN();
}
+113
View File
@@ -0,0 +1,113 @@
// SPDX-License-Identifier: MPL-2.0
#include "aare/ClusterFinder.hpp"
#include "aare/File.hpp"
#include "aare/Frame.hpp"
#include <benchmark/benchmark.h>
#include <algorithm>
#include <array>
#include <cstdint>
#include <limits>
#include <numeric>
#include <random>
#include <string>
#include <vector>
namespace {
using Cluster3x3 = aare::Cluster<int32_t, 3, 3>;
using Cluster5x5 = aare::Cluster<int32_t, 5, 5>;
using Cluster7x7 = aare::Cluster<int32_t, 7, 7>;
template <typename ClusterType>
using ClusterFinder = aare::ClusterFinder<ClusterType, uint16_t, double>;
template <typename ClusterType> class BenchmarkData {
public:
aare::Shape<2> image_shape;
std::vector<aare::Frame> v_pedestal_frames, v_test_frames;
size_t n_pedestal_frames, n_test_frames;
BenchmarkData() {
auto pedestal_filepath = std::getenv("PEDESTAL_FILE");
auto data_filepath = std::getenv("DATA_FILE");
auto pedestal_frames = std::getenv("N_PEDESTAL_FRAMES");
auto test_frames = std::getenv("N_TEST_FRAMES");
// if not initialized, take 1000 pedestal and 1000 test frames
n_pedestal_frames =
pedestal_frames ? std::stoul(pedestal_frames) : 1000;
n_test_frames = test_frames ? std::stoul(test_frames) : 1000;
if (!pedestal_filepath || !data_filepath) {
throw std::runtime_error("PEDESTAL_FILE and DATA_FILE environment "
"variables must be set");
}
aare::File pedestal_file(pedestal_filepath);
aare::File data_file(data_filepath);
pedestal_file.seek(0);
try {
v_pedestal_frames = pedestal_file.read_n(n_pedestal_frames);
} catch (const std::exception &e) {
throw std::runtime_error("Error reading pedestal " +
std::to_string(n_pedestal_frames) +
" frames: " + std::string(e.what()));
}
data_file.seek(0);
try {
v_test_frames = data_file.read_n(n_test_frames);
} catch (const std::exception &e) {
throw std::runtime_error("Error reading test " +
std::to_string(n_test_frames) +
" frames: " + std::string(e.what()));
}
std::vector<size_t> frame_shape = {
static_cast<size_t>(v_test_frames[0].rows()),
static_cast<size_t>(v_test_frames[0].cols())};
image_shape = aare::make_shape<2>(frame_shape);
}
void initialize_finder(ClusterFinder<ClusterType> &finder) {
for (aare::Frame &frame : v_pedestal_frames) {
finder.push_pedestal_frame(frame.view<uint16_t>());
}
}
};
template <typename ClusterType>
void run_benchmark(benchmark::State &state, const char *label,
bool update_pedestal) {
BenchmarkData<ClusterType> data{};
ClusterFinder<ClusterType> cluster_finder(data.image_shape);
data.initialize_finder(cluster_finder);
std::size_t cluster_count = 0;
for (auto _ : state) {
cluster_count = 0;
for (aare::Frame &frame : data.v_test_frames) {
cluster_finder.find_clusters(frame.view<uint16_t>(), 0,
update_pedestal);
cluster_count += cluster_finder.steal_clusters(true).size();
}
benchmark::DoNotOptimize(cluster_count);
}
auto n_test_frames = data.n_test_frames;
state.counters["test_frames"] = n_test_frames;
state.counters["clusters_per_frame"] =
static_cast<double>(cluster_count) / static_cast<double>(n_test_frames);
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(n_test_frames));
state.SetLabel(label);
}
// Wrapper for the old function
void BM_ClusterFinder_3x3(benchmark::State &state) {
run_benchmark<Cluster3x3>(state, "find_clusters 3x3", true);
}
void BM_ClusterFinder_5x5(benchmark::State &state) {
run_benchmark<Cluster5x5>(state, "find_clusters 5x5", true);
}
void BM_ClusterFinder_7x7(benchmark::State &state) {
run_benchmark<Cluster7x7>(state, "find_clusters 7x7", true);
}
} // namespace
BENCHMARK(BM_ClusterFinder_3x3)->Unit(benchmark::kMillisecond);
BENCHMARK(BM_ClusterFinder_5x5)->Unit(benchmark::kMillisecond);
BENCHMARK(BM_ClusterFinder_7x7)->Unit(benchmark::kMillisecond);
+1 -3
View File
@@ -158,6 +158,4 @@ BENCHMARK(BM_FitGausMinuitGrad)
BENCHMARK(BM_FitGausMinuitGradHesse)
->DenseRange(0, 5)
->Unit(benchmark::kMicrosecond);
BENCHMARK_MAIN();
->Unit(benchmark::kMicrosecond);
+1 -3
View File
@@ -128,6 +128,4 @@ BENCHMARK_F(TwoArrays, MultiplyAddDivideWithIndex)(benchmark::State &st) {
}
benchmark::DoNotOptimize(res);
}
}
BENCHMARK_MAIN();
}
+67 -73
View File
@@ -29,8 +29,16 @@ class ClusterFinder {
Pedestal<PEDESTAL_TYPE> m_pedestal;
ClusterVector<ClusterType> m_clusters;
static const uint8_t ClusterSizeX = ClusterType::cluster_size_x;
static const uint8_t ClusterSizeY = ClusterType::cluster_size_y;
static constexpr uint8_t ClusterSizeX = ClusterType::cluster_size_x;
static constexpr uint8_t ClusterSizeY = ClusterType::cluster_size_y;
static constexpr int dx_c = ClusterSizeX / 2;
static constexpr int dy_c = ClusterSizeY / 2;
static constexpr int is_odd_x = ClusterSizeX % 2;
static constexpr int is_odd_y = ClusterSizeY % 2;
// for even sized clusters there is no proper cluster center and
// even amount of pixels around the center
using CT = typename ClusterType::value_type;
public:
@@ -81,25 +89,15 @@ class ClusterFinder {
m_clusters = ClusterVector<ClusterType>{};
return tmp;
}
void find_clusters(NDView<FRAME_TYPE, 2> frame, uint64_t frame_number = 0) {
void find_clusters(NDView<FRAME_TYPE, 2> frame, uint64_t frame_number = 0,
bool update_pedestal = true) {
// // TODO! deal with even size clusters
// // currently 3,3 -> +/- 1
// // 4,4 -> +/- 2
int dy = ClusterSizeY / 2;
int dx = ClusterSizeX / 2;
int has_center_pixel_x =
ClusterSizeX %
2; // for even sized clusters there is no proper cluster center and
// even amount of pixels around the center
int has_center_pixel_y = ClusterSizeY % 2;
m_clusters.set_frame_number(frame_number);
for (int iy = 0; iy < frame.shape(0); iy++) {
for (int ix = 0; ix < frame.shape(1); ix++) {
PEDESTAL_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
PEDESTAL_TYPE total = 0;
// What can we short circuit here?
PEDESTAL_TYPE rms = m_pedestal.std(iy, ix);
PEDESTAL_TYPE value = (frame(iy, ix) - m_pedestal.mean(iy, ix));
@@ -107,83 +105,79 @@ class ClusterFinder {
if (value < -m_nSigma * rms)
continue; // NEGATIVE_PEDESTAL go to next pixel
// TODO! No pedestal update???
PEDESTAL_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
PEDESTAL_TYPE total = 0;
for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) {
for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) {
for (int ir = -dy_c; ir < dy_c + is_odd_y; ir++) {
for (int ic = -dx_c; ic < dx_c + is_odd_x; ic++) {
if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
iy + ir >= 0 && iy + ir < frame.shape(0)) {
PEDESTAL_TYPE val =
frame(iy + ir, ix + ic) -
m_pedestal.mean(iy + ir, ix + ic);
total += val;
max = std::max(max, val);
}
}
}
if (max > m_nSigma * rms || total > c3 * m_nSigma * rms) {
if (value == max) {
ClusterType cluster{};
cluster.x = ix;
cluster.y = iy;
// Fill the cluster data since we have a photon to store
// It's worth redoing the look since most of the time we
// don't have a photon
int cluster_index = 0;
for (int ir = -dy_c; ir < dy_c + is_odd_y; ir++) {
for (int ic = -dx_c; ic < dx_c + is_odd_x; ic++) {
if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
iy + ir >= 0 && iy + ir < frame.shape(0)) {
if ((max > m_nSigma * rms)) {
if (value < max)
continue; // Not max go to the next pixel
// but also no pedestal update
} else if (total > c3 * m_nSigma * rms) {
// pass
} else {
// m_pedestal.push(iy, ix, frame(iy, ix)); // Safe option
m_pedestal.push_fast(
iy, ix,
frame(iy,
ix)); // Assume we have reached n_samples in the
// pedestal, slight performance improvement
continue; // It was a pedestal value nothing to store
}
// Store cluster
if (value == max) {
ClusterType cluster{};
cluster.x = ix;
cluster.y = iy;
// Fill the cluster data since we have a photon to store
// It's worth redoing the look since most of the time we
// don't have a photon
int i = 0;
for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) {
for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) {
if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
iy + ir >= 0 && iy + ir < frame.shape(0)) {
// If the cluster type is an integral type, and
// the pedestal is a floating point type then we
// need to round the value before storing it
if constexpr (std::is_integral_v<CT> &&
std::is_floating_point_v<
PEDESTAL_TYPE>) {
auto tmp = std::lround(
frame(iy + ir, ix + ic) -
m_pedestal.mean(iy + ir, ix + ic));
cluster.data[i] = static_cast<CT>(tmp);
}
// On the other hand if both are floating point
// or both are integral then we can just static
// cast directly
else {
auto tmp =
frame(iy + ir, ix + ic) -
m_pedestal.mean(iy + ir, ix + ic);
cluster.data[i] = static_cast<CT>(tmp);
// If the cluster type is an integral type,
// and the pedestal is a floating point type
// then we need to round the value before
// storing it
if constexpr (std::is_integral_v<CT> &&
std::is_floating_point_v<
PEDESTAL_TYPE>) {
auto tmp = std::lround(
frame(iy + ir, ix + ic) -
m_pedestal.mean(iy + ir, ix + ic));
cluster.data[cluster_index] =
static_cast<CT>(tmp);
}
// On the other hand if both are floating
// point or both are integral then we can
// just static cast directly
else {
auto tmp =
frame(iy + ir, ix + ic) -
m_pedestal.mean(iy + ir, ix + ic);
cluster.data[cluster_index] =
static_cast<CT>(tmp);
}
cluster_index++;
}
}
i++;
}
// Add the cluster to the output ClusterVector
m_clusters.push_back(cluster);
} else {
continue;
}
} else {
if (update_pedestal) {
m_pedestal.push_fast(
iy, ix,
frame(
iy,
ix)); // Assume we have reached n_samples in the
// pedestal already and can use the faster push method
}
// Add the cluster to the output ClusterVector
m_clusters.push_back(cluster);
}
}
}
}
};
};
} // namespace aare