mirror of
https://github.com/slsdetectorgroup/aare.git
synced 2026-06-04 12:18:54 +02:00
refactored ClusterFinder and find_clusters, add clusterfinder_benchmark, refactored main in benchmarks
This commit is contained in:
@@ -22,8 +22,9 @@ target_sources(
|
||||
reduce_benchmark.cpp)
|
||||
|
||||
# Link Google Benchmark and other necessary libraries
|
||||
target_link_libraries(benchmarks PRIVATE benchmark::benchmark aare_core
|
||||
aare_compiler_flags)
|
||||
target_link_libraries(
|
||||
benchmarks PRIVATE benchmark::benchmark benchmark::benchmark_main aare_core
|
||||
aare_compiler_flags)
|
||||
|
||||
# Set output properties
|
||||
set_target_properties(
|
||||
@@ -31,10 +32,22 @@ set_target_properties(
|
||||
OUTPUT_NAME run_benchmarks)
|
||||
|
||||
add_executable(fit_benchmark fit_benchmark.cpp)
|
||||
target_link_libraries(fit_benchmark PRIVATE benchmark::benchmark aare_core
|
||||
aare_compiler_flags)
|
||||
target_link_libraries(
|
||||
fit_benchmark PRIVATE benchmark::benchmark benchmark::benchmark_main
|
||||
aare_core aare_compiler_flags)
|
||||
target_include_directories(
|
||||
fit_benchmark SYSTEM
|
||||
PRIVATE $<TARGET_PROPERTY:Minuit2::Minuit2,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
set_target_properties(fit_benchmark PROPERTIES RUNTIME_OUTPUT_DIRECTORY
|
||||
${CMAKE_BINARY_DIR})
|
||||
|
||||
add_executable(clusterfinder_benchmark clusterfinder_benchmark.cpp)
|
||||
target_link_libraries(
|
||||
clusterfinder_benchmark
|
||||
PRIVATE benchmark::benchmark benchmark::benchmark_main aare_core
|
||||
aare_compiler_flags)
|
||||
target_include_directories(
|
||||
clusterfinder_benchmark SYSTEM
|
||||
PRIVATE $<TARGET_PROPERTY:Minuit2::Minuit2,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
set_target_properties(clusterfinder_benchmark
|
||||
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
|
||||
@@ -99,6 +99,4 @@ BENCHMARK_F(ClusterFixture, Calculate2x2Etawithoutreduction)
|
||||
benchmark::DoNotOptimize(eta);
|
||||
benchmark::DoNotOptimize(eta2);
|
||||
}
|
||||
}
|
||||
|
||||
// BENCHMARK_MAIN();
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
#include "aare/ClusterFinder.hpp"
|
||||
#include "aare/File.hpp"
|
||||
#include "aare/Frame.hpp"
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
namespace {
|
||||
|
||||
using Cluster3x3 = aare::Cluster<int32_t, 3, 3>;
|
||||
using Cluster5x5 = aare::Cluster<int32_t, 5, 5>;
|
||||
using Cluster7x7 = aare::Cluster<int32_t, 7, 7>;
|
||||
|
||||
template <typename ClusterType>
|
||||
using ClusterFinder = aare::ClusterFinder<ClusterType, uint16_t, double>;
|
||||
|
||||
template <typename ClusterType> class BenchmarkData {
|
||||
public:
|
||||
aare::Shape<2> image_shape;
|
||||
std::vector<aare::Frame> v_pedestal_frames, v_test_frames;
|
||||
size_t n_pedestal_frames, n_test_frames;
|
||||
BenchmarkData() {
|
||||
auto pedestal_filepath = std::getenv("PEDESTAL_FILE");
|
||||
auto data_filepath = std::getenv("DATA_FILE");
|
||||
auto pedestal_frames = std::getenv("N_PEDESTAL_FRAMES");
|
||||
auto test_frames = std::getenv("N_TEST_FRAMES");
|
||||
// if not initialized, take 1000 pedestal and 1000 test frames
|
||||
n_pedestal_frames =
|
||||
pedestal_frames ? std::stoul(pedestal_frames) : 1000;
|
||||
n_test_frames = test_frames ? std::stoul(test_frames) : 1000;
|
||||
if (!pedestal_filepath || !data_filepath) {
|
||||
throw std::runtime_error("PEDESTAL_FILE and DATA_FILE environment "
|
||||
"variables must be set");
|
||||
}
|
||||
aare::File pedestal_file(pedestal_filepath);
|
||||
aare::File data_file(data_filepath);
|
||||
pedestal_file.seek(0);
|
||||
try {
|
||||
v_pedestal_frames = pedestal_file.read_n(n_pedestal_frames);
|
||||
} catch (const std::exception &e) {
|
||||
throw std::runtime_error("Error reading pedestal " +
|
||||
std::to_string(n_pedestal_frames) +
|
||||
" frames: " + std::string(e.what()));
|
||||
}
|
||||
data_file.seek(0);
|
||||
try {
|
||||
v_test_frames = data_file.read_n(n_test_frames);
|
||||
} catch (const std::exception &e) {
|
||||
throw std::runtime_error("Error reading test " +
|
||||
std::to_string(n_test_frames) +
|
||||
" frames: " + std::string(e.what()));
|
||||
}
|
||||
std::vector<size_t> frame_shape = {
|
||||
static_cast<size_t>(v_test_frames[0].rows()),
|
||||
static_cast<size_t>(v_test_frames[0].cols())};
|
||||
image_shape = aare::make_shape<2>(frame_shape);
|
||||
}
|
||||
|
||||
void initialize_finder(ClusterFinder<ClusterType> &finder) {
|
||||
for (aare::Frame &frame : v_pedestal_frames) {
|
||||
finder.push_pedestal_frame(frame.view<uint16_t>());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ClusterType>
|
||||
void run_benchmark(benchmark::State &state, const char *label,
|
||||
bool update_pedestal) {
|
||||
BenchmarkData<ClusterType> data{};
|
||||
ClusterFinder<ClusterType> cluster_finder(data.image_shape);
|
||||
data.initialize_finder(cluster_finder);
|
||||
std::size_t cluster_count = 0;
|
||||
for (auto _ : state) {
|
||||
cluster_count = 0;
|
||||
for (aare::Frame &frame : data.v_test_frames) {
|
||||
cluster_finder.find_clusters(frame.view<uint16_t>(), 0,
|
||||
update_pedestal);
|
||||
|
||||
cluster_count += cluster_finder.steal_clusters(true).size();
|
||||
}
|
||||
benchmark::DoNotOptimize(cluster_count);
|
||||
}
|
||||
|
||||
auto n_test_frames = data.n_test_frames;
|
||||
state.counters["test_frames"] = n_test_frames;
|
||||
state.counters["clusters_per_frame"] =
|
||||
static_cast<double>(cluster_count) / static_cast<double>(n_test_frames);
|
||||
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
||||
static_cast<int64_t>(n_test_frames));
|
||||
state.SetLabel(label);
|
||||
}
|
||||
// Wrapper for the old function
|
||||
void BM_ClusterFinder_3x3(benchmark::State &state) {
|
||||
run_benchmark<Cluster3x3>(state, "find_clusters 3x3", true);
|
||||
}
|
||||
void BM_ClusterFinder_5x5(benchmark::State &state) {
|
||||
run_benchmark<Cluster5x5>(state, "find_clusters 5x5", true);
|
||||
}
|
||||
void BM_ClusterFinder_7x7(benchmark::State &state) {
|
||||
run_benchmark<Cluster7x7>(state, "find_clusters 7x7", true);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
BENCHMARK(BM_ClusterFinder_3x3)->Unit(benchmark::kMillisecond);
|
||||
BENCHMARK(BM_ClusterFinder_5x5)->Unit(benchmark::kMillisecond);
|
||||
BENCHMARK(BM_ClusterFinder_7x7)->Unit(benchmark::kMillisecond);
|
||||
@@ -158,6 +158,4 @@ BENCHMARK(BM_FitGausMinuitGrad)
|
||||
|
||||
BENCHMARK(BM_FitGausMinuitGradHesse)
|
||||
->DenseRange(0, 5)
|
||||
->Unit(benchmark::kMicrosecond);
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
->Unit(benchmark::kMicrosecond);
|
||||
@@ -128,6 +128,4 @@ BENCHMARK_F(TwoArrays, MultiplyAddDivideWithIndex)(benchmark::State &st) {
|
||||
}
|
||||
benchmark::DoNotOptimize(res);
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
}
|
||||
@@ -29,8 +29,16 @@ class ClusterFinder {
|
||||
Pedestal<PEDESTAL_TYPE> m_pedestal;
|
||||
ClusterVector<ClusterType> m_clusters;
|
||||
|
||||
static const uint8_t ClusterSizeX = ClusterType::cluster_size_x;
|
||||
static const uint8_t ClusterSizeY = ClusterType::cluster_size_y;
|
||||
static constexpr uint8_t ClusterSizeX = ClusterType::cluster_size_x;
|
||||
static constexpr uint8_t ClusterSizeY = ClusterType::cluster_size_y;
|
||||
|
||||
static constexpr int dx_c = ClusterSizeX / 2;
|
||||
static constexpr int dy_c = ClusterSizeY / 2;
|
||||
|
||||
static constexpr int is_odd_x = ClusterSizeX % 2;
|
||||
static constexpr int is_odd_y = ClusterSizeY % 2;
|
||||
// for even sized clusters there is no proper cluster center and
|
||||
// even amount of pixels around the center
|
||||
using CT = typename ClusterType::value_type;
|
||||
|
||||
public:
|
||||
@@ -81,25 +89,15 @@ class ClusterFinder {
|
||||
m_clusters = ClusterVector<ClusterType>{};
|
||||
return tmp;
|
||||
}
|
||||
void find_clusters(NDView<FRAME_TYPE, 2> frame, uint64_t frame_number = 0) {
|
||||
void find_clusters(NDView<FRAME_TYPE, 2> frame, uint64_t frame_number = 0,
|
||||
bool update_pedestal = true) {
|
||||
// // TODO! deal with even size clusters
|
||||
// // currently 3,3 -> +/- 1
|
||||
// // 4,4 -> +/- 2
|
||||
int dy = ClusterSizeY / 2;
|
||||
int dx = ClusterSizeX / 2;
|
||||
int has_center_pixel_x =
|
||||
ClusterSizeX %
|
||||
2; // for even sized clusters there is no proper cluster center and
|
||||
// even amount of pixels around the center
|
||||
int has_center_pixel_y = ClusterSizeY % 2;
|
||||
|
||||
m_clusters.set_frame_number(frame_number);
|
||||
for (int iy = 0; iy < frame.shape(0); iy++) {
|
||||
for (int ix = 0; ix < frame.shape(1); ix++) {
|
||||
|
||||
PEDESTAL_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
|
||||
PEDESTAL_TYPE total = 0;
|
||||
|
||||
// What can we short circuit here?
|
||||
PEDESTAL_TYPE rms = m_pedestal.std(iy, ix);
|
||||
PEDESTAL_TYPE value = (frame(iy, ix) - m_pedestal.mean(iy, ix));
|
||||
@@ -107,83 +105,79 @@ class ClusterFinder {
|
||||
if (value < -m_nSigma * rms)
|
||||
continue; // NEGATIVE_PEDESTAL go to next pixel
|
||||
// TODO! No pedestal update???
|
||||
PEDESTAL_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
|
||||
PEDESTAL_TYPE total = 0;
|
||||
|
||||
for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) {
|
||||
for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) {
|
||||
for (int ir = -dy_c; ir < dy_c + is_odd_y; ir++) {
|
||||
for (int ic = -dx_c; ic < dx_c + is_odd_x; ic++) {
|
||||
if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
|
||||
iy + ir >= 0 && iy + ir < frame.shape(0)) {
|
||||
PEDESTAL_TYPE val =
|
||||
frame(iy + ir, ix + ic) -
|
||||
m_pedestal.mean(iy + ir, ix + ic);
|
||||
|
||||
total += val;
|
||||
max = std::max(max, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (max > m_nSigma * rms || total > c3 * m_nSigma * rms) {
|
||||
if (value == max) {
|
||||
ClusterType cluster{};
|
||||
cluster.x = ix;
|
||||
cluster.y = iy;
|
||||
// Fill the cluster data since we have a photon to store
|
||||
// It's worth redoing the look since most of the time we
|
||||
// don't have a photon
|
||||
int cluster_index = 0;
|
||||
for (int ir = -dy_c; ir < dy_c + is_odd_y; ir++) {
|
||||
for (int ic = -dx_c; ic < dx_c + is_odd_x; ic++) {
|
||||
if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
|
||||
iy + ir >= 0 && iy + ir < frame.shape(0)) {
|
||||
|
||||
if ((max > m_nSigma * rms)) {
|
||||
if (value < max)
|
||||
continue; // Not max go to the next pixel
|
||||
// but also no pedestal update
|
||||
} else if (total > c3 * m_nSigma * rms) {
|
||||
// pass
|
||||
} else {
|
||||
// m_pedestal.push(iy, ix, frame(iy, ix)); // Safe option
|
||||
m_pedestal.push_fast(
|
||||
iy, ix,
|
||||
frame(iy,
|
||||
ix)); // Assume we have reached n_samples in the
|
||||
// pedestal, slight performance improvement
|
||||
continue; // It was a pedestal value nothing to store
|
||||
}
|
||||
|
||||
// Store cluster
|
||||
if (value == max) {
|
||||
ClusterType cluster{};
|
||||
cluster.x = ix;
|
||||
cluster.y = iy;
|
||||
|
||||
// Fill the cluster data since we have a photon to store
|
||||
// It's worth redoing the look since most of the time we
|
||||
// don't have a photon
|
||||
int i = 0;
|
||||
for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) {
|
||||
for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) {
|
||||
if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
|
||||
iy + ir >= 0 && iy + ir < frame.shape(0)) {
|
||||
|
||||
// If the cluster type is an integral type, and
|
||||
// the pedestal is a floating point type then we
|
||||
// need to round the value before storing it
|
||||
if constexpr (std::is_integral_v<CT> &&
|
||||
std::is_floating_point_v<
|
||||
PEDESTAL_TYPE>) {
|
||||
auto tmp = std::lround(
|
||||
frame(iy + ir, ix + ic) -
|
||||
m_pedestal.mean(iy + ir, ix + ic));
|
||||
cluster.data[i] = static_cast<CT>(tmp);
|
||||
}
|
||||
// On the other hand if both are floating point
|
||||
// or both are integral then we can just static
|
||||
// cast directly
|
||||
else {
|
||||
auto tmp =
|
||||
frame(iy + ir, ix + ic) -
|
||||
m_pedestal.mean(iy + ir, ix + ic);
|
||||
cluster.data[i] = static_cast<CT>(tmp);
|
||||
// If the cluster type is an integral type,
|
||||
// and the pedestal is a floating point type
|
||||
// then we need to round the value before
|
||||
// storing it
|
||||
if constexpr (std::is_integral_v<CT> &&
|
||||
std::is_floating_point_v<
|
||||
PEDESTAL_TYPE>) {
|
||||
auto tmp = std::lround(
|
||||
frame(iy + ir, ix + ic) -
|
||||
m_pedestal.mean(iy + ir, ix + ic));
|
||||
cluster.data[cluster_index] =
|
||||
static_cast<CT>(tmp);
|
||||
}
|
||||
// On the other hand if both are floating
|
||||
// point or both are integral then we can
|
||||
// just static cast directly
|
||||
else {
|
||||
auto tmp =
|
||||
frame(iy + ir, ix + ic) -
|
||||
m_pedestal.mean(iy + ir, ix + ic);
|
||||
cluster.data[cluster_index] =
|
||||
static_cast<CT>(tmp);
|
||||
}
|
||||
cluster_index++;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
// Add the cluster to the output ClusterVector
|
||||
m_clusters.push_back(cluster);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (update_pedestal) {
|
||||
m_pedestal.push_fast(
|
||||
iy, ix,
|
||||
frame(
|
||||
iy,
|
||||
ix)); // Assume we have reached n_samples in the
|
||||
// pedestal already and can use the faster push method
|
||||
}
|
||||
|
||||
// Add the cluster to the output ClusterVector
|
||||
m_clusters.push_back(cluster);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace aare
|
||||
Reference in New Issue
Block a user