From 8733a1d66f8e62327fb2f6846d4d4ff7e58ce2be Mon Sep 17 00:00:00 2001 From: Alice Date: Fri, 22 Aug 2025 15:14:05 +0200 Subject: [PATCH] added benchmark --- benchmarks/CMakeLists.txt | 2 +- benchmarks/reduce_benchmark.cpp | 168 ++++++++++++++++++++++++++++++++ include/aare/Cluster.hpp | 31 ++++++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 benchmarks/reduce_benchmark.cpp mode change 100644 => 100755 include/aare/Cluster.hpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 699b4c6..f749466 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -15,7 +15,7 @@ FetchContent_MakeAvailable(benchmark) add_executable(benchmarks) -target_sources(benchmarks PRIVATE ndarray_benchmark.cpp calculateeta_benchmark.cpp) +target_sources(benchmarks PRIVATE ndarray_benchmark.cpp calculateeta_benchmark.cpp reduce_benchmark.cpp) # Link Google Benchmark and other necessary libraries target_link_libraries(benchmarks PRIVATE benchmark::benchmark aare_core aare_compiler_flags) diff --git a/benchmarks/reduce_benchmark.cpp b/benchmarks/reduce_benchmark.cpp new file mode 100644 index 0000000..2213624 --- /dev/null +++ b/benchmarks/reduce_benchmark.cpp @@ -0,0 +1,168 @@ +#include "aare/Cluster.hpp" +#include + +using namespace aare; + +class ClustersForReduceFixture : public benchmark::Fixture { + public: + Cluster cluster_5x5{}; + Cluster cluster_3x3{}; + + private: + using benchmark::Fixture::SetUp; + + void SetUp([[maybe_unused]] const benchmark::State &state) override { + int temp_data[25] = {1, 1, 1, 1, 1, 1, 1, 2, 1, 1, + 1, 2, 3, 1, 2, 1, 1, 1, 1, 2}; + std::copy(std::begin(temp_data), std::end(temp_data), + std::begin(cluster_5x5.data)); + + cluster_5x5.x = 5; + cluster_5x5.y = 5; + + int temp_data2[9] = {1, 1, 1, 2, 3, 1, 2, 2, 1}; + std::copy(std::begin(temp_data2), std::end(temp_data2), + std::begin(cluster_3x3.data)); + + cluster_3x3.x = 5; + cluster_3x3.y = 5; + } + + // void TearDown(::benchmark::State& state) { + // } +}; + +template +Cluster reduce_to_3x3(const Cluster &c) { + Cluster result; + + // Write out the sums in the hope that the compiler can optimize this + std::array sum_3x3_subclusters; + + // Write out the sums in the hope that the compiler can optimize this + sum_3x3_subclusters[0] = c.data[0] + c.data[1] + c.data[2] + c.data[5] + + c.data[6] + c.data[7] + c.data[10] + c.data[11] + + c.data[12]; + sum_3x3_subclusters[1] = c.data[1] + c.data[2] + c.data[3] + c.data[6] + + c.data[7] + c.data[8] + c.data[11] + c.data[12] + + c.data[13]; + sum_3x3_subclusters[2] = c.data[2] + c.data[3] + c.data[4] + c.data[7] + + c.data[8] + c.data[9] + c.data[12] + c.data[13] + + c.data[14]; + sum_3x3_subclusters[3] = c.data[5] + c.data[6] + c.data[7] + c.data[10] + + c.data[11] + c.data[12] + c.data[15] + c.data[16] + + c.data[17]; + sum_3x3_subclusters[4] = c.data[6] + c.data[7] + c.data[8] + c.data[11] + + c.data[12] + c.data[13] + c.data[16] + c.data[17] + + c.data[18]; + sum_3x3_subclusters[5] = c.data[7] + c.data[8] + c.data[9] + c.data[12] + + c.data[13] + c.data[14] + c.data[17] + c.data[18] + + c.data[19]; + sum_3x3_subclusters[6] = c.data[10] + c.data[11] + c.data[12] + c.data[15] + + c.data[16] + c.data[17] + c.data[20] + c.data[21] + + c.data[22]; + sum_3x3_subclusters[7] = c.data[11] + c.data[12] + c.data[13] + c.data[16] + + c.data[17] + c.data[18] + c.data[21] + c.data[22] + + c.data[23]; + sum_3x3_subclusters[8] = c.data[12] + c.data[13] + c.data[14] + c.data[17] + + c.data[18] + c.data[19] + c.data[22] + c.data[23] + + c.data[24]; + + auto index = std::max_element(sum_3x3_subclusters.begin(), + sum_3x3_subclusters.end()) - + sum_3x3_subclusters.begin(); + + switch (index) { + case 0: + result.x = c.x - 1; + result.y = c.y + 1; + result.data = {c.data[0], c.data[1], c.data[2], c.data[5], c.data[6], + c.data[7], c.data[10], c.data[11], c.data[12]}; + break; + case 1: + result.x = c.x; + result.y = c.y + 1; + result.data = {c.data[1], c.data[2], c.data[3], c.data[6], c.data[7], + c.data[8], c.data[11], c.data[12], c.data[13]}; + break; + case 2: + result.x = c.x + 1; + result.y = c.y + 1; + result.data = {c.data[2], c.data[3], c.data[4], c.data[7], c.data[8], + c.data[9], c.data[12], c.data[13], c.data[14]}; + break; + case 3: + result.x = c.x - 1; + result.y = c.y; + result.data = {c.data[5], c.data[6], c.data[7], + c.data[10], c.data[11], c.data[12], + c.data[15], c.data[16], c.data[17]}; + break; + case 4: + result.x = c.x + 1; + result.y = c.y; + result.data = {c.data[6], c.data[7], c.data[8], + c.data[11], c.data[12], c.data[13], + c.data[16], c.data[17], c.data[18]}; + break; + case 5: + result.x = c.x + 1; + result.y = c.y; + result.data = {c.data[7], c.data[8], c.data[9], + c.data[12], c.data[13], c.data[14], + c.data[17], c.data[18], c.data[19]}; + break; + case 6: + result.x = c.x + 1; + result.y = c.y - 1; + result.data = {c.data[10], c.data[11], c.data[12], + c.data[15], c.data[16], c.data[17], + c.data[20], c.data[21], c.data[22]}; + break; + case 7: + result.x = c.x + 1; + result.y = c.y - 1; + result.data = {c.data[11], c.data[12], c.data[13], + c.data[16], c.data[17], c.data[18], + c.data[21], c.data[22], c.data[23]}; + break; + case 8: + result.x = c.x + 1; + result.y = c.y - 1; + result.data = {c.data[12], c.data[13], c.data[14], + c.data[17], c.data[18], c.data[19], + c.data[22], c.data[23], c.data[24]}; + break; + } + return result; +} + +BENCHMARK_F(ClustersForReduceFixture, Reduce2x2)(benchmark::State &st) { + for (auto _ : st) { + // This code gets timed + benchmark::DoNotOptimize(reduce_to_2x2( + cluster_3x3)); // make sure compiler evaluates the expression + } +} + +BENCHMARK_F(ClustersForReduceFixture, SpecificReduce2x2)(benchmark::State &st) { + for (auto _ : st) { + // This code gets timed + benchmark::DoNotOptimize(reduce_to_2x2(cluster_3x3)); + } +} + +BENCHMARK_F(ClustersForReduceFixture, Reduce3x3)(benchmark::State &st) { + for (auto _ : st) { + // This code gets timed + benchmark::DoNotOptimize( + reduce_to_3x3(cluster_5x5)); + } +} + +BENCHMARK_F(ClustersForReduceFixture, SpecificReduce3x3)(benchmark::State &st) { + for (auto _ : st) { + // This code gets timed + benchmark::DoNotOptimize(reduce_to_3x3(cluster_5x5)); + } +} \ No newline at end of file diff --git a/include/aare/Cluster.hpp b/include/aare/Cluster.hpp old mode 100644 new mode 100755 index c4dff2f..32ab359 --- a/include/aare/Cluster.hpp +++ b/include/aare/Cluster.hpp @@ -116,6 +116,37 @@ reduce_to_2x2(const Cluster &c) { return result; } +template +Cluster reduce_to_2x2(const Cluster &c) { + Cluster result; + + auto [s, i] = c.max_sum_2x2(); + switch (i) { + case 0: + result.x = c.x - 1; + result.y = c.y + 1; + result.data = {c.data[0], c.data[1], c.data[3], c.data[4]}; + break; + case 1: + result.x = c.x; + result.y = c.y + 1; + result.data = {c.data[1], c.data[2], c.data[4], c.data[5]}; + break; + case 2: + result.x = c.x - 1; + result.y = c.y; + result.data = {c.data[3], c.data[4], c.data[6], c.data[7]}; + break; + case 3: + result.x = c.x; + result.y = c.y; + result.data = {c.data[4], c.data[5], c.data[7], c.data[8]}; + break; + } + + return result; +} + template inline std::pair