From 1f78c9f7d14e80cb0917b2d7900c09ca5d631628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Erik=20Fr=C3=B6jd?= Date: Wed, 27 May 2026 17:12:10 +0200 Subject: [PATCH] removed use of boost histogram --- CMakeLists.txt | 55 ------- etc/dev-env.yml | 1 - .../aare/PedestalTrackingPixelHistogram.hpp | 11 +- include/aare/PixelHistogram.hpp | 11 +- include/aare/PixelHistogramImpl.hpp | 137 ++++++++++++++++++ src/PedestalTrackingPixelHistogram.cpp | 69 +++------ src/PixelHistogram.cpp | 71 +++------ 7 files changed, 182 insertions(+), 173 deletions(-) create mode 100644 include/aare/PixelHistogramImpl.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fe9c50..219da34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,10 +64,6 @@ option(AARE_FETCH_JSON "Use FetchContent to download nlohmann::json" ON) option(AARE_FETCH_ZMQ "Use FetchContent to download libzmq" ON) option(AARE_FETCH_LMFIT "Use FetchContent to download lmfit" ON) option(AARE_FETCH_MINUIT2 "Use FetchContent to download Minuit2" ON) -option( - AARE_FETCH_BOOST_HISTOGRAM - "Use FetchContent to download Boost.Histogram and its required Boost sub-modules" - ON) # Convenience option to use system libraries only (no FetchContent) option(AARE_SYSTEM_LIBRARIES "Use system libraries" OFF) @@ -88,9 +84,6 @@ if(AARE_SYSTEM_LIBRARIES) set(AARE_FETCH_ZMQ OFF CACHE BOOL "Disabled FetchContent for libzmq" FORCE) - set(AARE_FETCH_BOOST_HISTOGRAM - OFF - CACHE BOOL "Disabled FetchContent for Boost.Histogram" FORCE) # Still fetch lmfit and Minuit2 when setting AARE_SYSTEM_LIBRARIES since these # are not available on conda-forge endif() @@ -267,53 +260,6 @@ else() find_package(nlohmann_json 3.11.3 REQUIRED) endif() -if(AARE_FETCH_BOOST_HISTOGRAM) - # Fetch Boost.Histogram and the minimal set of Boost sub-modules it links - # against (per upstream histogram/CMakeLists.txt: config, core, mp11, - # throw_exception, variant2, with assert pulled in transitively by core). - # We populate the repos without add_subdirectory and aggregate their include - # directories into a single INTERFACE target, so we don't drag in the - # upstream Boost test wiring or BOOST_SUPERPROJECT_VERSION machinery. - set(AARE_BOOST_TAG boost-1.91.0) - set(_aare_boost_modules histogram config core assert mp11 throw_exception - variant2) - - if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.30") - cmake_policy(SET CMP0169 OLD) # FetchContent_Populate still needed - endif() - - foreach(_mod IN LISTS _aare_boost_modules) - FetchContent_Declare( - boost_${_mod} - GIT_REPOSITORY https://github.com/boostorg/${_mod}.git - GIT_TAG ${AARE_BOOST_TAG} - GIT_SHALLOW TRUE - UPDATE_DISCONNECTED 1) - FetchContent_GetProperties(boost_${_mod}) - if(NOT boost_${_mod}_POPULATED) - FetchContent_Populate(boost_${_mod}) - endif() - endforeach() - - add_library(boost_histogram_headers INTERFACE) - foreach(_mod IN LISTS _aare_boost_modules) - target_include_directories( - boost_histogram_headers SYSTEM - INTERFACE $) - endforeach() - target_compile_features(boost_histogram_headers INTERFACE cxx_std_14) - add_library(Boost::histogram ALIAS boost_histogram_headers) - - unset(_aare_boost_modules) -else() - find_package(Boost 1.70 REQUIRED) - if(NOT TARGET Boost::histogram) - # Boost's CMake config exports Boost::headers; alias it so downstream - # consumption is uniform regardless of whether we fetched or not. - add_library(Boost::histogram ALIAS Boost::headers) - endif() -endif() - include(GNUInstallDirs) # If conda build, always set lib dir to 'lib' @@ -492,7 +438,6 @@ target_link_libraries( nlohmann_json::nlohmann_json ${STD_FS_LIB} # from helpers.cmake Minuit2::Minuit2 - $ PRIVATE aare_compiler_flags Threads::Threads $) target_include_directories( diff --git a/etc/dev-env.yml b/etc/dev-env.yml index 7177e46..6873e22 100644 --- a/etc/dev-env.yml +++ b/etc/dev-env.yml @@ -17,6 +17,5 @@ dependencies: - nlohmann_json - pytest - pytest-check - - boost - boost-histogram diff --git a/include/aare/PedestalTrackingPixelHistogram.hpp b/include/aare/PedestalTrackingPixelHistogram.hpp index e3f4588..3879fc3 100644 --- a/include/aare/PedestalTrackingPixelHistogram.hpp +++ b/include/aare/PedestalTrackingPixelHistogram.hpp @@ -2,10 +2,9 @@ #include "aare/NDArray.hpp" #include "aare/NDView.hpp" #include "aare/Pedestal.hpp" +#include "aare/PixelHistogramImpl.hpp" #include "aare/ProducerConsumerQueue.hpp" -// Lets see if we need to hide it behind a pimpl -#include #include #include #include @@ -15,7 +14,6 @@ #include #include #include -namespace bh = boost::histogram; namespace aare { @@ -27,12 +25,7 @@ class PedestalTrackingPixelHistogram { using FrameType = uint16_t; private: - using Axes = std::tuple< - bh::axis::regular, - bh::axis::integer, - bh::axis::integer>; - using Hist = bh::histogram>; + using Hist = PixelHistogramImpl; using AsyncQueue = ProducerConsumerQueue>; // What kind of fan-out work the worker pool should currently do. diff --git a/include/aare/PixelHistogram.hpp b/include/aare/PixelHistogram.hpp index 3a3560e..9159af6 100644 --- a/include/aare/PixelHistogram.hpp +++ b/include/aare/PixelHistogram.hpp @@ -1,10 +1,9 @@ #pragma once #include "aare/NDArray.hpp" #include "aare/NDView.hpp" +#include "aare/PixelHistogramImpl.hpp" #include "aare/ProducerConsumerQueue.hpp" -//Lets see if we need to hide it behind a pimpl -#include #include #include #include @@ -14,7 +13,6 @@ #include #include #include -namespace bh = boost::histogram; namespace aare { class PixelHistogram { @@ -23,12 +21,7 @@ class PixelHistogram { using AxisType = float; private: - using Axes = std::tuple< - bh::axis::regular, - bh::axis::integer, - bh::axis::integer>; - using Hist = bh::histogram>; + using Hist = PixelHistogramImpl; using AsyncQueue = ProducerConsumerQueue>; int rows_; diff --git a/include/aare/PixelHistogramImpl.hpp b/include/aare/PixelHistogramImpl.hpp new file mode 100644 index 0000000..a8a5fa9 --- /dev/null +++ b/include/aare/PixelHistogramImpl.hpp @@ -0,0 +1,137 @@ +#pragma once +/* +Implmenetation of a basic pixel histogram class with templated axis and storage type. +row, col are integers and the per pixel histogram axis is AxisType. + +Storage layout matches the existing PixelHistogram/PedestalTrackingPixelHistogram +hdata() shape: NDArray indexed as (row, col, bin) in row-major +order, i.e. bin is the fastest-varying dimension. This keeps downstream +consumers (memcpy stitching across thread shards, numpy reshaping in the +python bindings) unchanged. + +Bin policy: regular binning on [xmin, xmax). Values outside this range are +silently dropped (matches boost::histogram axis option::none_t used by the +boost-backed classes). +*/ + +#include "aare/NDArray.hpp" +#include "aare/NDView.hpp" + +#include +#include + +namespace aare { + +template class PixelHistogramImpl { + NDArray m_hdata; + NDArray m_edges; + int m_rows; + int m_cols; + int m_n_bins; + T m_xmin; + T m_xmax; + // n_bins / (xmax - xmin), precomputed to keep the hot path + // division-free. + T m_scale; + + public: + PixelHistogramImpl(int rows, int cols, int n_bins, T xmin, T xmax); + + void fill(const NDView &frame); + void fill(int row, int col, T value); + + NDArray hdata() const; + // Zero-copy view of the underlying [rows x cols x n_bins] storage. + // Lifetime is tied to *this. Use for low-level merge/stitching paths; + // prefer hdata() for the public API where you want an owned copy. + NDView view() const; + NDArray bin_centers() const; + NDArray bin_edges() const; +}; + +template +PixelHistogramImpl::PixelHistogramImpl(int rows, int cols, + int n_bins, T xmin, + T xmax) + : m_hdata(NDArray({static_cast(rows), + static_cast(cols), + static_cast(n_bins)}, + StorageType{0})), + m_edges(NDArray({static_cast(n_bins + 1)})), m_rows(rows), + m_cols(cols), m_n_bins(n_bins), m_xmin(xmin), m_xmax(xmax), + m_scale(static_cast(n_bins) / (xmax - xmin)) { + if (rows < 1 || cols < 1 || n_bins < 1) { + throw std::invalid_argument( + "PixelHistogramImpl requires positive rows, cols and bins"); + } + if (!(xmax > xmin)) { + throw std::invalid_argument("PixelHistogramImpl requires xmax > xmin"); + } + + const T range = xmax - xmin; + for (int i = 0; i <= n_bins; ++i) { + m_edges(i) = + xmin + (static_cast(i) * range) / static_cast(n_bins); + } +} + +template +void PixelHistogramImpl::fill(const NDView &frame) { + if (frame.shape(0) != m_rows || frame.shape(1) != m_cols) { + throw std::invalid_argument( + "PixelHistogramImpl::fill: frame shape does not match histogram"); + } + for (int row = 0; row < m_rows; ++row) { + for (int col = 0; col < m_cols; ++col) { + const T val = frame(row, col); + if (val < m_xmin || val >= m_xmax) { + continue; + } + int bin = static_cast((val - m_xmin) * m_scale); + // Guard against floating-point rounding pushing val just below + // xmax to bin == n_bins. + if (bin >= m_n_bins) { + bin = m_n_bins - 1; + } + ++m_hdata(row, col, bin); + } + } +} + +template +void PixelHistogramImpl::fill(int row, int col, T value) { + if (value < m_xmin || value >= m_xmax) { + return; + } + int bin = static_cast((value - m_xmin) * m_scale); + if (bin >= m_n_bins) { + bin = m_n_bins - 1; + } + ++m_hdata(row, col, bin); +} + +template +NDArray PixelHistogramImpl::hdata() const { + return m_hdata; +} + +template +NDView PixelHistogramImpl::view() const { + return m_hdata.view(); +} + +template +NDArray PixelHistogramImpl::bin_centers() const { + NDArray centers({static_cast(m_n_bins)}); + for (int i = 0; i < m_n_bins; ++i) { + centers(i) = (m_edges(i) + m_edges(i + 1)) / T(2); + } + return centers; +} + +template +NDArray PixelHistogramImpl::bin_edges() const { + return m_edges; +} + +} // namespace aare diff --git a/src/PedestalTrackingPixelHistogram.cpp b/src/PedestalTrackingPixelHistogram.cpp index 26ede28..464c661 100644 --- a/src/PedestalTrackingPixelHistogram.cpp +++ b/src/PedestalTrackingPixelHistogram.cpp @@ -1,9 +1,6 @@ #include "aare/PedestalTrackingPixelHistogram.hpp" #include -#include -#include -#include #include #include #include @@ -60,14 +57,7 @@ PedestalTrackingPixelHistogram::PedestalTrackingPixelHistogram( partial_std_.reserve(n_threads_); for (int i = 0; i < n_threads_; ++i) { const auto local_rows = row_count(i); - partial_hists_.emplace_back( - bh::axis::regular(n_bins, xmin, xmax, - "value"), - bh::axis::integer( - 0, cols, "y"), - bh::axis::integer( - 0, local_rows, "x")); + partial_hists_.emplace_back(local_rows, cols, n_bins, xmin, xmax); partial_pedestals_.emplace_back(static_cast(local_rows), static_cast(cols)); partial_std_.emplace_back(NDArray( @@ -188,7 +178,8 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) { case WorkKind::Fill: { // Histogram the pedestal-subtracted residual for each pixel // in this thread's row slice. `my_pedestal` is sized to the - // local row range and indexed by (local_row, col). + // local row range and indexed by (local_row, col). The + // [xmin, xmax) range gate lives inside PixelHistogramImpl::fill. for (int local_row = 0; local_row < local_rows; ++local_row) { const auto row = static_cast(first_row + local_row); for (ssize_t col = 0; col < image->shape(1); ++col) { @@ -197,10 +188,7 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) { static_cast(my_pedestal.mean( static_cast(local_row), static_cast(col))); - if (val < xmin_ || val >= xmax_) { - continue; // Skip out-of-range residuals - } - my_hist(val, col, local_row); + my_hist.fill(local_row, static_cast(col), val); } } break; @@ -242,6 +230,8 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) { // back into the pedestal shard. With n_sigma == 0 the gate // never fires, which makes this case behave identically to // WorkKind::Fill (modulo the per-pixel gate evaluation). + // The [xmin, xmax) histogram gate lives inside + // PixelHistogramImpl::fill. auto &my_std = partial_std_[thread_id]; const double n_sigma = n_sigma_.load(std::memory_order_relaxed); for (int local_row = 0; local_row < local_rows; ++local_row) { @@ -253,9 +243,7 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) { static_cast(my_pedestal.mean( static_cast(local_row), static_cast(col))); - if (val >= xmin_ && val < xmax_) { - my_hist(val, col, local_row); - } + my_hist.fill(local_row, static_cast(col), val); const double sigma = my_std(local_row, col); if (sigma > 0.0 && std::abs(static_cast(val)) < n_sigma * sigma) { @@ -287,28 +275,27 @@ PedestalTrackingPixelHistogram::hdata() const { // the partial histograms. Cheap when the queue is already drained. flush(); - const auto &hist = partial_hists_.front(); - const auto bins = static_cast(hist.axis(0).size()); - const auto cols = static_cast(hist.axis(1).size()); + const auto first_shard_view = partial_hists_.front().view(); + const auto cols = static_cast(first_shard_view.shape(1)); + const auto bins = static_cast(first_shard_view.shape(2)); const auto rows = static_cast(rows_); NDArray data({rows, cols, bins}); - // Each thread owns a disjoint, contiguous range of rows and its dense - // storage layout [local_row][col][bin] is identical to the slice - // [first_row .. first_row + local_rows)[col][bin] of `data`. So the + // Each thread owns a disjoint, contiguous range of rows. The shard's + // dense storage layout [local_row][col][bin] is identical to the slice + // [first_row .. first_row + local_rows)[col][bin] of `data`, so the // merge is just one bulk copy per thread; no per-element accumulation // and no upfront zeroing of `data` is needed. const size_t pixel_stride = static_cast(cols) * bins; for (int t = 0; t < n_threads_; ++t) { - const auto &storage = bh::unsafe_access::storage(partial_hists_[t]); - const StorageType *partial_data = storage.data(); const auto first_row = static_cast(row_start(t)); const auto local_rows = static_cast(row_count(t)); if (local_rows == 0) continue; - std::memcpy(data.data() + first_row * pixel_stride, partial_data, + const auto shard_view = partial_hists_[t].view(); + std::memcpy(data.data() + first_row * pixel_stride, shard_view.data(), local_rows * pixel_stride * sizeof(StorageType)); } @@ -434,32 +421,14 @@ void PedestalTrackingPixelHistogram::coordinator_loop() { NDArray PedestalTrackingPixelHistogram::bin_centers() const { - const auto &value_axis = partial_hists_.front().axis(0); - const auto n_bins = static_cast(value_axis.size()); - - NDArray centers({n_bins}); - - for (ssize_t i = 0; i < n_bins; ++i) { - AxisType left = value_axis.value(i); - AxisType right = value_axis.value(i + 1); - centers(i) = (left + right) / AxisType(2.0); - } - - return centers; + // All shards share the same value-axis configuration, so any one will + // do; pick the first. + return partial_hists_.front().bin_centers(); } NDArray PedestalTrackingPixelHistogram::bin_edges() const { - const auto &value_axis = partial_hists_.front().axis(0); - const auto n_bins = static_cast(value_axis.size()); - - NDArray edges({n_bins + 1}); - - for (ssize_t i = 0; i <= n_bins; ++i) { - edges(i) = value_axis.value(i); - } - - return edges; + return partial_hists_.front().bin_edges(); } } // namespace aare diff --git a/src/PixelHistogram.cpp b/src/PixelHistogram.cpp index a0aadfd..4bcb53a 100644 --- a/src/PixelHistogram.cpp +++ b/src/PixelHistogram.cpp @@ -1,9 +1,6 @@ #include "aare/PixelHistogram.hpp" #include -#include -#include -#include #include #include #include @@ -49,12 +46,9 @@ PixelHistogram::PixelHistogram(int rows, int cols, int n_bins, double xmin, doub partial_hists_.reserve(n_threads_); for (int i = 0; i < n_threads_; ++i) { const auto local_rows = row_count(i); - partial_hists_.emplace_back( - bh::axis::regular(n_bins, xmin, xmax, "value"), - bh::axis::integer(0, cols, "y"), - bh::axis::integer(0, local_rows, "x") - ); + partial_hists_.emplace_back(local_rows, cols, n_bins, + static_cast(xmin), + static_cast(xmax)); } // Spawn worker threads @@ -121,15 +115,14 @@ void PixelHistogram::worker_loop(int thread_id) { lock.unlock(); - // Do the work: fill this thread's partial histogram + // Do the work: fill this thread's partial histogram. The + // [xmin, xmax) range gate lives inside PixelHistogramImpl::fill. + auto &my_hist = partial_hists_[thread_id]; for (int local_row = 0; local_row < local_rows; ++local_row) { const auto row = static_cast(first_row + local_row); for (ssize_t col = 0; col < image.shape(1); ++col) { const auto val = image(row, col); - if (val < xmin_ || val >= xmax_) { - continue; // Skip out-of-range values - } - partial_hists_[thread_id](val, col, local_row); + my_hist.fill(local_row, static_cast(col), val); } } @@ -150,28 +143,27 @@ NDArray PixelHistogram::hdata() const { // the partial histograms. Cheap when the queue is already drained. flush(); - const auto &hist = partial_hists_.front(); - const auto bins = static_cast(hist.axis(0).size()); - const auto cols = static_cast(hist.axis(1).size()); + const auto first_shard_view = partial_hists_.front().view(); + const auto cols = static_cast(first_shard_view.shape(1)); + const auto bins = static_cast(first_shard_view.shape(2)); const auto rows = static_cast(rows_); NDArray data({rows, cols, bins}); - // Each thread owns a disjoint, contiguous range of rows and its dense - // storage layout [local_row][col][bin] is identical to the slice - // [first_row .. first_row + local_rows)[col][bin] of `data`. So the + // Each thread owns a disjoint, contiguous range of rows. The shard's + // dense storage layout [local_row][col][bin] is identical to the slice + // [first_row .. first_row + local_rows)[col][bin] of `data`, so the // merge is just one bulk copy per thread; no per-element accumulation // and no upfront zeroing of `data` is needed. const size_t pixel_stride = static_cast(cols) * bins; for (int t = 0; t < n_threads_; ++t) { - const auto &storage = bh::unsafe_access::storage(partial_hists_[t]); - const StorageType *partial_data = storage.data(); - const auto first_row = static_cast(row_start(t)); + const auto first_row = static_cast(row_start(t)); const auto local_rows = static_cast(row_count(t)); - if (local_rows == 0) continue; + if (local_rows == 0) + continue; - std::memcpy(data.data() + first_row * pixel_stride, - partial_data, + const auto shard_view = partial_hists_[t].view(); + std::memcpy(data.data() + first_row * pixel_stride, shard_view.data(), local_rows * pixel_stride * sizeof(StorageType)); } @@ -257,32 +249,13 @@ void PixelHistogram::coordinator_loop() { } NDArray PixelHistogram::bin_centers() const { - const auto& value_axis = partial_hists_.front().axis(0); - const auto n_bins = static_cast(value_axis.size()); - - NDArray centers({n_bins}); - - for (ssize_t i = 0; i < n_bins; ++i) { - // Get the left and right edges of the bin and compute the center - AxisType left = value_axis.value(i); - AxisType right = value_axis.value(i + 1); - centers(i) = (left + right) / AxisType(2.0); - } - - return centers; + // All shards share the same value-axis configuration, so any one will + // do; pick the first. + return partial_hists_.front().bin_centers(); } NDArray PixelHistogram::bin_edges() const { - const auto& value_axis = partial_hists_.front().axis(0); - const auto n_bins = static_cast(value_axis.size()); - - NDArray edges({n_bins + 1}); - - for (ssize_t i = 0; i <= n_bins; ++i) { - edges(i) = value_axis.value(i); - } - - return edges; + return partial_hists_.front().bin_edges(); } } // namespace aare