// SPDX-License-Identifier: MPL-2.0 #include "aare/Cluster.hpp" #include using namespace aare; class ClustersForReduceFixture : public benchmark::Fixture { public: Cluster cluster_5x5{}; Cluster cluster_3x3{}; private: using benchmark::Fixture::SetUp; void SetUp([[maybe_unused]] const benchmark::State &state) override { int temp_data[25] = {1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 3, 1, 2, 1, 1, 1, 1, 2}; std::copy(std::begin(temp_data), std::end(temp_data), std::begin(cluster_5x5.data)); cluster_5x5.x = 5; cluster_5x5.y = 5; int temp_data2[9] = {1, 1, 1, 2, 3, 1, 2, 2, 1}; std::copy(std::begin(temp_data2), std::end(temp_data2), std::begin(cluster_3x3.data)); cluster_3x3.x = 5; cluster_3x3.y = 5; } // void TearDown(::benchmark::State& state) { // } }; template Cluster reduce_to_3x3(const Cluster &c) { Cluster result; // Write out the sums in the hope that the compiler can optimize this std::array sum_3x3_subclusters; // Write out the sums in the hope that the compiler can optimize this sum_3x3_subclusters[0] = c.data[0] + c.data[1] + c.data[2] + c.data[5] + c.data[6] + c.data[7] + c.data[10] + c.data[11] + c.data[12]; sum_3x3_subclusters[1] = c.data[1] + c.data[2] + c.data[3] + c.data[6] + c.data[7] + c.data[8] + c.data[11] + c.data[12] + c.data[13]; sum_3x3_subclusters[2] = c.data[2] + c.data[3] + c.data[4] + c.data[7] + c.data[8] + c.data[9] + c.data[12] + c.data[13] + c.data[14]; sum_3x3_subclusters[3] = c.data[5] + c.data[6] + c.data[7] + c.data[10] + c.data[11] + c.data[12] + c.data[15] + c.data[16] + c.data[17]; sum_3x3_subclusters[4] = c.data[6] + c.data[7] + c.data[8] + c.data[11] + c.data[12] + c.data[13] + c.data[16] + c.data[17] + c.data[18]; sum_3x3_subclusters[5] = c.data[7] + c.data[8] + c.data[9] + c.data[12] + c.data[13] + c.data[14] + c.data[17] + c.data[18] + c.data[19]; sum_3x3_subclusters[6] = c.data[10] + c.data[11] + c.data[12] + c.data[15] + c.data[16] + c.data[17] + c.data[20] + c.data[21] + c.data[22]; sum_3x3_subclusters[7] = c.data[11] + c.data[12] + c.data[13] + c.data[16] + c.data[17] + c.data[18] + c.data[21] + c.data[22] + c.data[23]; sum_3x3_subclusters[8] = c.data[12] + c.data[13] + c.data[14] + c.data[17] + c.data[18] + c.data[19] + c.data[22] + c.data[23] + c.data[24]; auto index = std::max_element(sum_3x3_subclusters.begin(), sum_3x3_subclusters.end()) - sum_3x3_subclusters.begin(); switch (index) { case 0: result.x = c.x - 1; result.y = c.y + 1; result.data = {c.data[0], c.data[1], c.data[2], c.data[5], c.data[6], c.data[7], c.data[10], c.data[11], c.data[12]}; break; case 1: result.x = c.x; result.y = c.y + 1; result.data = {c.data[1], c.data[2], c.data[3], c.data[6], c.data[7], c.data[8], c.data[11], c.data[12], c.data[13]}; break; case 2: result.x = c.x + 1; result.y = c.y + 1; result.data = {c.data[2], c.data[3], c.data[4], c.data[7], c.data[8], c.data[9], c.data[12], c.data[13], c.data[14]}; break; case 3: result.x = c.x - 1; result.y = c.y; result.data = {c.data[5], c.data[6], c.data[7], c.data[10], c.data[11], c.data[12], c.data[15], c.data[16], c.data[17]}; break; case 4: result.x = c.x + 1; result.y = c.y; result.data = {c.data[6], c.data[7], c.data[8], c.data[11], c.data[12], c.data[13], c.data[16], c.data[17], c.data[18]}; break; case 5: result.x = c.x + 1; result.y = c.y; result.data = {c.data[7], c.data[8], c.data[9], c.data[12], c.data[13], c.data[14], c.data[17], c.data[18], c.data[19]}; break; case 6: result.x = c.x + 1; result.y = c.y - 1; result.data = {c.data[10], c.data[11], c.data[12], c.data[15], c.data[16], c.data[17], c.data[20], c.data[21], c.data[22]}; break; case 7: result.x = c.x + 1; result.y = c.y - 1; result.data = {c.data[11], c.data[12], c.data[13], c.data[16], c.data[17], c.data[18], c.data[21], c.data[22], c.data[23]}; break; case 8: result.x = c.x + 1; result.y = c.y - 1; result.data = {c.data[12], c.data[13], c.data[14], c.data[17], c.data[18], c.data[19], c.data[22], c.data[23], c.data[24]}; break; } return result; } BENCHMARK_F(ClustersForReduceFixture, Reduce2x2)(benchmark::State &st) { for (auto _ : st) { // This code gets timed benchmark::DoNotOptimize(reduce_to_2x2( cluster_3x3)); // make sure compiler evaluates the expression } } BENCHMARK_F(ClustersForReduceFixture, SpecificReduce2x2)(benchmark::State &st) { for (auto _ : st) { // This code gets timed benchmark::DoNotOptimize(reduce_to_2x2(cluster_3x3)); } } BENCHMARK_F(ClustersForReduceFixture, Reduce3x3)(benchmark::State &st) { for (auto _ : st) { // This code gets timed benchmark::DoNotOptimize( reduce_to_3x3(cluster_5x5)); } } BENCHMARK_F(ClustersForReduceFixture, SpecificReduce3x3)(benchmark::State &st) { for (auto _ : st) { // This code gets timed benchmark::DoNotOptimize(reduce_to_3x3(cluster_5x5)); } }