removed sync fill

2026-07-20 09:12:51 +02:00 · 2026-06-01 14:59:57 +02:00
parent 1f78c9f7d1
commit c10bcb8075
8 changed files with 103 additions and 216 deletions
@@ -21,7 +21,7 @@ namespace aare {
 class PedestalTrackingPixelHistogram {
  public:
    using StorageType = uint16_t;
-    using AxisType = float; //TODO: template on pedesta type if needed
+    using AxisType = float; //TODO: template on pedestal type if needed
    using FrameType = uint16_t;

  private:
@@ -30,7 +30,7 @@ class PedestalTrackingPixelHistogram {

    // What kind of fan-out work the worker pool should currently do.
    // Set under work_mutex_; read by worker_loop after wakeup.
-    enum class WorkKind { Fill, PushPedestal, UpdateMean, FillWithThreshold };
+    enum class WorkKind { PushPedestal, UpdateMean, FillWithThreshold };

    int rows_;
    int cols_;
@@ -58,10 +58,9 @@ class PedestalTrackingPixelHistogram {
    std::atomic<bool> stop_workers_;
    int work_generation_;

-    // Serialises all fan-outs: `fill`, `push_pedestal_no_update`,
-    // `update_mean`, and the async coordinator's calls into
-    // `fill_with_threshold_`. Always the outermost lock; work_mutex_
-    // is taken briefly inside it.
+    // Serialises all fan-outs: `push_pedestal_no_update`, `update_mean`,
+    // and the async coordinator's calls into `fill_with_threshold_`.
+    // Always the outermost lock; work_mutex_ is taken briefly inside it.
    mutable std::mutex fill_mutex_;

    // Async producer/consumer pipeline. SPSC queue feeds the coordinator
@@ -85,7 +84,8 @@ class PedestalTrackingPixelHistogram {
    void dispatch_(WorkKind kind, const NDView<FrameType, 2> *image);

    // Coordinator-facing entry point: takes fill_mutex_ and dispatches
-    // FillWithThreshold to the worker pool. Same shape as fill().
+    // FillWithThreshold to the worker pool. Only ever called by the
+    // coordinator thread, on images already shape-checked by fill_async.
    void fill_with_threshold_(const NDView<FrameType, 2> &image);

  public:
@@ -101,13 +101,6 @@ class PedestalTrackingPixelHistogram {
    void update_mean();
    NDArray<AxisType, 2> pedestal_mean() const;

-    // Synchronous fill: blocks until the pedestal-subtracted residual
-    // for `image` has been merged into the accumulators. Safe to call
-    // concurrently with `fill_async` and the
-    // pedestal-update API (calls are serialised through `fill_mutex_`).
-    // Histogram-only - independent of `n_sigma()`.
-    void fill(const NDView<FrameType, 2> &image);
-
    void fill_async(NDArray<FrameType, 2> image);

    // Sigma multiplier for the pedestal-update gate in
@@ -125,7 +118,7 @@ class PedestalTrackingPixelHistogram {

    // Implicitly flushes pending async fills first so the snapshot is
    // consistent with everything that was submitted up to the call.
-    NDArray<StorageType, 3> hdata() const;
+    NDArray<StorageType, 3> values() const;
    NDArray<AxisType, 1> bin_centers() const;
    NDArray<AxisType, 1> bin_edges() const;
 };
@@ -46,15 +46,8 @@ class PixelHistogram {
    std::atomic<bool> stop_workers_;
    int work_generation_;

-    // Serialises calls into the synchronous fan-out (`fill`). The
-    // coordinator thread acquires it for the duration of each item it
-    // processes, and direct callers of `fill` also acquire it. Without
-    // this, concurrent sync + async fills would race on `current_image_`
-    // and `work_generation_`.
-    std::mutex fill_mutex_;
-
    // Async producer/consumer pipeline. SPSC queue feeds the coordinator
-    // thread, which calls the synchronous `fill` one image at a time.
+    // thread, which fans each image out to the worker pool one at a time.
    std::unique_ptr<AsyncQueue> async_queue_;
    std::thread coordinator_;
    std::atomic<bool> stop_coordinator_{false};
@@ -64,6 +57,10 @@ class PixelHistogram {
    // Private worker thread method
    void worker_loop(int thread_id);
    void coordinator_loop();
+    // Fan a single image out to the worker pool and block until every
+    // worker has merged its row band. Only ever called by the coordinator
+    // thread, so no caller-serialisation lock is needed.
+    void dispatch(const NDView<AxisType, 2> &image);
    int row_start(int thread_id) const;
    int row_count(int thread_id) const;

@@ -72,11 +69,6 @@ class PixelHistogram {
                   int n_threads = 1, std::size_t max_pending = 16);
    ~PixelHistogram();

-    // Synchronous fill: blocks until the image has been merged into the
-    // accumulators. Safe to call concurrently with `fill_async` (calls are
-    // serialised through `fill_mutex_`).
-    void fill(const NDView<AxisType, 2> &image);
-
    // Asynchronous fill: takes ownership of `image`, enqueues it for the
    // coordinator thread, and returns. Blocks the caller only if the queue
    // is full (single-producer, single-consumer queue with a sleep-poll
@@ -92,7 +84,7 @@ class PixelHistogram {

    // Implicitly flushes pending async fills first so the snapshot is
    // consistent with everything that was submitted up to the call.
-    NDArray<StorageType, 3> hdata() const;
+    NDArray<StorageType, 3> values() const;
    NDArray<AxisType, 1> bin_centers() const;
    NDArray<AxisType, 1> bin_edges() const;
 };
@@ -1,17 +1,10 @@
 #pragma once
 /*
-Implmenetation of a basic pixel histogram class with templated axis and storage type.
+Basic pixel histogram class with templated axis and storage type.
 row, col are integers and the per pixel histogram axis is AxisType.

-Storage layout matches the existing PixelHistogram/PedestalTrackingPixelHistogram
-hdata() shape: NDArray<StorageType, 3> indexed as (row, col, bin) in row-major
-order, i.e. bin is the fastest-varying dimension. This keeps downstream
-consumers (memcpy stitching across thread shards, numpy reshaping in the
-python bindings) unchanged.
-
 Bin policy: regular binning on [xmin, xmax). Values outside this range are
-silently dropped (matches boost::histogram axis option::none_t used by the
-boost-backed classes).
+silently dropped.
 */

 #include "aare/NDArray.hpp"
@@ -23,7 +16,7 @@ boost-backed classes).
 namespace aare {

 template <typename T, typename StorageType> class PixelHistogramImpl {
-    NDArray<StorageType, 3> m_hdata;
+    NDArray<StorageType, 3> m_values;
    NDArray<T, 1> m_edges;
    int m_rows;
    int m_cols;
@@ -40,10 +33,10 @@ template <typename T, typename StorageType> class PixelHistogramImpl {
    void fill(const NDView<T, 2> &frame);
    void fill(int row, int col, T value);

-    NDArray<StorageType, 3> hdata() const;
+    NDArray<StorageType, 3> values() const;
    // Zero-copy view of the underlying [rows x cols x n_bins] storage.
    // Lifetime is tied to *this. Use for low-level merge/stitching paths;
-    // prefer hdata() for the public API where you want an owned copy.
+    // prefer values() for the public API where you want an owned copy.
    NDView<StorageType, 3> view() const;
    NDArray<T, 1> bin_centers() const;
    NDArray<T, 1> bin_edges() const;
@@ -53,7 +46,7 @@ template <typename T, typename StorageType>
 PixelHistogramImpl<T, StorageType>::PixelHistogramImpl(int rows, int cols,
                                                       int n_bins, T xmin,
                                                       T xmax)
-    : m_hdata(NDArray<StorageType, 3>({static_cast<ssize_t>(rows),
+    : m_values(NDArray<StorageType, 3>({static_cast<ssize_t>(rows),
                                       static_cast<ssize_t>(cols),
                                       static_cast<ssize_t>(n_bins)},
                                      StorageType{0})),
@@ -83,41 +76,34 @@ void PixelHistogramImpl<T, StorageType>::fill(const NDView<T, 2> &frame) {
    }
    for (int row = 0; row < m_rows; ++row) {
        for (int col = 0; col < m_cols; ++col) {
-            const T val = frame(row, col);
-            if (val < m_xmin || val >= m_xmax) {
-                continue;
-            }
-            int bin = static_cast<int>((val - m_xmin) * m_scale);
-            // Guard against floating-point rounding pushing val just below
-            // xmax to bin == n_bins.
-            if (bin >= m_n_bins) {
-                bin = m_n_bins - 1;
-            }
-            ++m_hdata(row, col, bin);
+            fill(row, col, frame(row, col));
        }
    }
 }

 template <typename T, typename StorageType>
 void PixelHistogramImpl<T, StorageType>::fill(int row, int col, T value) {
+    //TODO! add out of bounds check on row and col???
    if (value < m_xmin || value >= m_xmax) {
        return;
    }
    int bin = static_cast<int>((value - m_xmin) * m_scale);
+    // Guard against floating-point rounding pushing val just below
+     // xmax to bin == n_bins.
    if (bin >= m_n_bins) {
        bin = m_n_bins - 1;
    }
-    ++m_hdata(row, col, bin);
+    ++m_values(row, col, bin);
 }

 template <typename T, typename StorageType>
-NDArray<StorageType, 3> PixelHistogramImpl<T, StorageType>::hdata() const {
-    return m_hdata;
+NDArray<StorageType, 3> PixelHistogramImpl<T, StorageType>::values() const {
+    return m_values;
 }

 template <typename T, typename StorageType>
 NDView<StorageType, 3> PixelHistogramImpl<T, StorageType>::view() const {
-    return m_hdata.view();
+    return m_values.view();
 }

 template <typename T, typename StorageType>
@@ -73,7 +73,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
             its running sums. Drains pending async fills first, then
             dispatches the update to the worker pool so the writes to
             each shard happen on the same thread that reads them in
-             fill().
+             fill_async().
             )",
             py::call_guard<py::gil_scoped_release>())

@@ -98,27 +98,6 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
                 containing the current cached pedestal mean.
             )")

-        .def("fill",
-             [](PedestalTrackingPixelHistogram &self,
-                py::array_t<PedestalTrackingPixelHistogram::FrameType, 0>
-                    image) {
-                 auto view = make_view_2d(image);
-                 self.fill(view);
-             },
-             R"(
-             Fill the histogram with image data (blocking).
-
-             The pedestal-subtracted residual `image - pedestal_mean`
-             is computed per pixel inside the worker loop, so the
-             pedestal must already have been bootstrapped via
-             push_pedestal_no_update() + update_mean() for this to be
-             meaningful.
-
-             Args:
-                 image: A 2D numpy array of raw pixel values (dtype: uint16)
-             )",
-             py::arg("image").noconvert())
-
        .def("fill_async",
             [](PedestalTrackingPixelHistogram &self,
                py::array_t<PedestalTrackingPixelHistogram::FrameType, 0>
@@ -189,9 +168,9 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
             for monitoring/diagnostics.
             )")

-        .def("hdata",
+        .def("values",
             [](const PedestalTrackingPixelHistogram &self) {
-                 // hdata() implicitly flushes - release the GIL while it
+                 // values() implicitly flushes - release the GIL while it
                 // does so. Allocation/copy into the NDArray runs without
                 // the GIL too; only the numpy wrapping needs it.
                 NDArray<PedestalTrackingPixelHistogram::StorageType, 3>
@@ -200,7 +179,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
                     py::gil_scoped_release release;
                     ptr = new NDArray<
                         PedestalTrackingPixelHistogram::StorageType, 3>(
-                         self.hdata());
+                         self.values());
                 }
                 return return_image_data(ptr);
             },
@@ -32,20 +32,6 @@ void define_pixel_histogram_bindings(py::module &m) {
             py::arg("xmin"), py::arg("xmax"), py::arg("n_threads") = 1,
             py::arg("max_pending") = std::size_t{16})

-        .def("fill",
-             [](PixelHistogram &self,
-                py::array_t<PixelHistogram::AxisType, 0> image) {
-                 auto view = make_view_2d(image);
-                 self.fill(view);
-             },
-             R"(
-             Fill the histogram with image data (blocking).
-
-             Args:
-                 image: A 2D numpy array of pixel values (dtype: float32)
-             )",
-             py::arg("image").noconvert())
-
        .def("fill_async",
             [](PixelHistogram &self,
                py::array_t<PixelHistogram::AxisType, 0> image) {
@@ -89,15 +75,15 @@ void define_pixel_histogram_bindings(py::module &m) {
             for monitoring/diagnostics.
             )")

-        .def("hdata",
+        .def("values",
             [](const PixelHistogram &self) {
-                 // hdata() implicitly flushes - release the GIL while it
+                 // values() implicitly flushes - release the GIL while it
                 // does so. Allocation/copy into the NDArray runs without
                 // the GIL too; only the numpy wrapping needs it.
                 NDArray<PixelHistogram::StorageType, 3>* ptr = nullptr;
                 {
                     py::gil_scoped_release release;
-                     ptr = new NDArray<PixelHistogram::StorageType, 3>(self.hdata());
+                     ptr = new NDArray<PixelHistogram::StorageType, 3>(self.values());
                 }
                 return return_image_data(ptr);
             },
@@ -3,8 +3,6 @@
 #include <algorithm>
 #include <cmath>
 #include <cstring>
-#include <exception>
-#include <iostream>
 #include <stdexcept>
 #include <utility>
 #include <vector>
@@ -15,7 +13,7 @@ PedestalTrackingPixelHistogram::PedestalTrackingPixelHistogram(
    int rows, int cols, int n_bins, AxisType xmin, AxisType xmax, int n_threads,
    std::size_t max_pending, double n_sigma)
    : rows_(rows), cols_(cols), n_threads_(n_threads), xmin_(xmin), xmax_(xmax),
-      current_work_kind_(WorkKind::Fill), current_image_(nullptr),
+      current_work_kind_(WorkKind::FillWithThreshold), current_image_(nullptr),
      completed_threads_(0), stop_workers_(false), work_generation_(0),
      n_sigma_(n_sigma) {
    if (rows_ < 1 || cols_ < 1 || n_bins < 1) {
@@ -175,24 +173,6 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
        auto &my_hist = partial_hists_[thread_id];

        switch (kind) {
-        case WorkKind::Fill: {
-            // Histogram the pedestal-subtracted residual for each pixel
-            // in this thread's row slice. `my_pedestal` is sized to the
-            // local row range and indexed by (local_row, col). The
-            // [xmin, xmax) range gate lives inside PixelHistogramImpl::fill.
-            for (int local_row = 0; local_row < local_rows; ++local_row) {
-                const auto row = static_cast<ssize_t>(first_row + local_row);
-                for (ssize_t col = 0; col < image->shape(1); ++col) {
-                    const AxisType val =
-                        static_cast<AxisType>((*image)(row, col)) -
-                        static_cast<AxisType>(my_pedestal.mean(
-                            static_cast<uint32_t>(local_row),
-                            static_cast<uint32_t>(col)));
-                    my_hist.fill(local_row, static_cast<int>(col), val);
-                }
-            }
-            break;
-        }
        case WorkKind::PushPedestal: {
            // Accumulate raw frame values into this thread's pedestal
            // shard. Uses the pixel-level push_no_update which only
@@ -228,10 +208,9 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
            // whose residual is consistent with noise
            // (|residual| < n_sigma * cached_std), feed the raw value
            // back into the pedestal shard. With n_sigma == 0 the gate
-            // never fires, which makes this case behave identically to
-            // WorkKind::Fill (modulo the per-pixel gate evaluation).
-            // The [xmin, xmax) histogram gate lives inside
-            // PixelHistogramImpl::fill.
+            // never fires, recovering plain histogram-only behaviour
+            // (modulo the per-pixel gate evaluation). The [xmin, xmax)
+            // histogram gate lives inside PixelHistogramImpl::fill.
            auto &my_std = partial_std_[thread_id];
            const double n_sigma = n_sigma_.load(std::memory_order_relaxed);
            for (int local_row = 0; local_row < local_rows; ++local_row) {
@@ -270,7 +249,7 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
 }

 NDArray<PedestalTrackingPixelHistogram::StorageType, 3>
-PedestalTrackingPixelHistogram::hdata() const {
+PedestalTrackingPixelHistogram::values() const {
    // Make sure any pending async fills are merged in before we snapshot
    // the partial histograms. Cheap when the queue is already drained.
    flush();
@@ -332,23 +311,12 @@ NDArray<PedestalTrackingPixelHistogram::AxisType, 2> PedestalTrackingPixelHistog
    return data;
 }

-void PedestalTrackingPixelHistogram::fill(const NDView<FrameType, 2> &image) {
-    if (image.shape(0) != rows_ || image.shape(1) != cols_) {
-        throw std::invalid_argument(
-            "PedestalTrackingPixelHistogram image shape does not match "
-            "constructor shape");
-    }
-    std::lock_guard<std::mutex> fill_lock(fill_mutex_);
-    dispatch_(WorkKind::Fill, &image);
-}
-
 void PedestalTrackingPixelHistogram::fill_with_threshold_(
    const NDView<FrameType, 2> &image) {
-    if (image.shape(0) != rows_ || image.shape(1) != cols_) {
-        throw std::invalid_argument(
-            "PedestalTrackingPixelHistogram image shape does not match "
-            "constructor shape");
-    }
+    // Called only by the coordinator thread on images already shape-checked
+    // by fill_async, so there is no need to re-validate. fill_mutex_ is
+    // still required: push_pedestal_no_update / update_mean / pedestal_mean
+    // can run concurrently and must not race this fan-out.
    std::lock_guard<std::mutex> fill_lock(fill_mutex_);
    dispatch_(WorkKind::FillWithThreshold, &image);
 }
@@ -398,20 +366,7 @@ void PedestalTrackingPixelHistogram::coordinator_loop() {
           !async_queue_->isEmpty()) {
        if (async_queue_->read(item)) {
            coordinator_busy_.store(true, std::memory_order_release);
-            try {
-                fill_with_threshold_(item.view());
-            } catch (const std::exception &e) {
-                // fill_async pre-validates shape, so this
-                // is purely defensive. Log to stderr and keep the
-                // coordinator alive.
-                std::cerr << "PedestalTrackingPixelHistogram::"
-                             "fill_async error: "
-                          << e.what() << std::endl;
-            } catch (...) {
-                std::cerr << "PedestalTrackingPixelHistogram::"
-                             "fill_async error: unknown"
-                          << std::endl;
-            }
+            fill_with_threshold_(item.view());
            coordinator_busy_.store(false, std::memory_order_release);
        } else {
            std::this_thread::sleep_for(async_wait_);
@@ -2,8 +2,6 @@

 #include <algorithm>
 #include <cstring>
-#include <exception>
-#include <iostream>
 #include <stdexcept>
 #include <utility>
 #include <vector>
@@ -138,7 +136,7 @@ void PixelHistogram::worker_loop(int thread_id) {
    }
 }

-NDArray<PixelHistogram::StorageType, 3> PixelHistogram::hdata() const {
+NDArray<PixelHistogram::StorageType, 3> PixelHistogram::values() const {
    // Make sure any pending async fills are merged in before we snapshot
    // the partial histograms. Cheap when the queue is already drained.
    flush();
@@ -170,14 +168,10 @@ NDArray<PixelHistogram::StorageType, 3> PixelHistogram::hdata() const {
    return data;
 }

-void PixelHistogram::fill(const NDView<AxisType, 2> &image) {
-    if (image.shape(0) != rows_ || image.shape(1) != cols_) {
-        throw std::invalid_argument("PixelHistogram image shape does not match constructor shape");
-    }
-
-    // Serialise all calls into the fan-out. fill_mutex_ is always the
-    // outermost lock; work_mutex_ is taken briefly inside it.
-    std::lock_guard<std::mutex> fill_lock(fill_mutex_);
+void PixelHistogram::dispatch(const NDView<AxisType, 2> &image) {
+    // Called only by the coordinator thread on images already shape-checked
+    // by fill_async, so there is no need to re-validate or to serialise
+    // against other callers.

    // Reset counters and set work
    {
@@ -230,17 +224,7 @@ void PixelHistogram::coordinator_loop() {
    while (!stop_coordinator_.load(std::memory_order_acquire) || !async_queue_->isEmpty()) {
        if (async_queue_->read(item)) {
            coordinator_busy_.store(true, std::memory_order_release);
-            try {
-                fill(item.view());
-            } catch (const std::exception& e) {
-                // fill_async pre-validates shape, so this is purely
-                // defensive. Log to stderr and keep the coordinator alive.
-                std::cerr << "PixelHistogram::fill_async error: "
-                          << e.what() << std::endl;
-            } catch (...) {
-                std::cerr << "PixelHistogram::fill_async error: unknown"
-                          << std::endl;
-            }
+            dispatch(item.view());
            coordinator_busy_.store(false, std::memory_order_release);
        } else {
            std::this_thread::sleep_for(async_wait_);
@@ -16,28 +16,39 @@ using aare::PixelHistogram;
 using aare::NDArray;
 using aare::NDView;

+namespace {
+// The synchronous fill() has been removed; fill_async() is the only entry
+// point. This helper submits one frame and blocks until it has been merged
+// so the tests can keep their straightforward, ordered expectations.
+void fill_blocking(PixelHistogram &hist, const NDView<float, 2> &image) {
+    NDArray<float, 2> owned(image);
+    hist.fill_async(std::move(owned));
+    hist.flush();
+}
+} // namespace
+
 TEST_CASE("Fill one pixel of a 5x10 histogram"){
    PixelHistogram hist(5, 10, 20, 0.0, 10.0);
    NDArray<float, 2> image({5, 10}, -1.0); //Need to fill with -1 to not generate counts
    
    image(2, 3) = 5.7; // This should go into bin 11 (since bins are [0-0.5), [0.5-1.0), ..., [9.5-10.0))
    
-    hist.fill(image.view());
+    fill_blocking(hist, image.view());
    
-    auto hdata = hist.hdata();
-    REQUIRE(hdata.shape(0) == 5);
-    REQUIRE(hdata.shape(1) == 10);
-    REQUIRE(hdata.shape(2) == 20);
+    auto values = hist.values();
+    REQUIRE(values.shape(0) == 5);
+    REQUIRE(values.shape(1) == 10);
+    REQUIRE(values.shape(2) == 20);
    
    // Check that the correct bin for pixel (2,3) has count 1
-    CHECK(hdata(2, 3, 11) == 1);
+    CHECK(values(2, 3, 11) == 1);
    
    // Check that all other bins are zero
-    for (ssize_t row = 0; row < hdata.shape(0); ++row) {
-        for (ssize_t col = 0; col < hdata.shape(1); ++col) {
-            for (ssize_t bin = 0; bin < hdata.shape(2); ++bin) {
+    for (ssize_t row = 0; row < values.shape(0); ++row) {
+        for (ssize_t col = 0; col < values.shape(1); ++col) {
+            for (ssize_t bin = 0; bin < values.shape(2); ++bin) {
                if (!(row == 2 && col == 3 && bin == 11)) {
-                    CHECK(hdata(row, col, bin) == 0);
+                    CHECK(values(row, col, bin) == 0);
                }
            }
        }
@@ -54,29 +65,29 @@ TEST_CASE("Fill pixels with uneven partial histogram row slices"){
    image(3, 3) = 3.2;
    image(4, 0) = 4.2;

-    hist.fill(image.view());
+    fill_blocking(hist, image.view());

-    auto hdata = hist.hdata();
-    REQUIRE(hdata.shape(0) == 5);
-    REQUIRE(hdata.shape(1) == 4);
-    REQUIRE(hdata.shape(2) == 10);
+    auto values = hist.values();
+    REQUIRE(values.shape(0) == 5);
+    REQUIRE(values.shape(1) == 4);
+    REQUIRE(values.shape(2) == 10);

-    CHECK(hdata(0, 0, 0) == 1);
-    CHECK(hdata(1, 1, 1) == 1);
-    CHECK(hdata(2, 2, 2) == 1);
-    CHECK(hdata(3, 3, 3) == 1);
-    CHECK(hdata(4, 0, 4) == 1);
+    CHECK(values(0, 0, 0) == 1);
+    CHECK(values(1, 1, 1) == 1);
+    CHECK(values(2, 2, 2) == 1);
+    CHECK(values(3, 3, 3) == 1);
+    CHECK(values(4, 0, 4) == 1);

-    for (ssize_t row = 0; row < hdata.shape(0); ++row) {
-        for (ssize_t col = 0; col < hdata.shape(1); ++col) {
-            for (ssize_t bin = 0; bin < hdata.shape(2); ++bin) {
+    for (ssize_t row = 0; row < values.shape(0); ++row) {
+        for (ssize_t col = 0; col < values.shape(1); ++col) {
+            for (ssize_t bin = 0; bin < values.shape(2); ++bin) {
                const bool expected = (row == 0 && col == 0 && bin == 0) ||
                                      (row == 1 && col == 1 && bin == 1) ||
                                      (row == 2 && col == 2 && bin == 2) ||
                                      (row == 3 && col == 3 && bin == 3) ||
                                      (row == 4 && col == 0 && bin == 4);
                if (!expected) {
-                    CHECK(hdata(row, col, bin) == 0);
+                    CHECK(values(row, col, bin) == 0);
                }
            }
        }
@@ -113,9 +124,9 @@ TEST_CASE("Row partitioning handles rows < n_threads * ceil(rows/n_threads)") {
    for (ssize_t r = 0; r < rows; ++r) {
        img(r, 0) = static_cast<float>(r % n_bins) + 0.5f;
    }
-    hist.fill(img.view());
+    fill_blocking(hist, img.view());

-    auto h = hist.hdata();
+    auto h = hist.values();
    REQUIRE(h.shape(0) == rows);
    REQUIRE(h.shape(1) == cols);
    REQUIRE(h.shape(2) == n_bins);
@@ -183,9 +194,9 @@ TEST_CASE("Random fills match a reference implementation") {
    }

    for (const auto& img : frames) {
-        hist.fill(img.view());
+        fill_blocking(hist, img.view());
    }
-    auto h = hist.hdata();
+    auto h = hist.values();

    REQUIRE(h.shape(0) == rows);
    REQUIRE(h.shape(1) == cols);
@@ -209,9 +220,10 @@ TEST_CASE("Random fills match a reference implementation") {
    CHECK(all_match);
 }

-TEST_CASE("Async fill matches sync fill") {
+TEST_CASE("Streamed async fill matches per-frame flushed fill") {
    // Submit a stream of random frames through fill_async and compare
-    // against the same frames processed by fill() on a separate histogram.
+    // against the same frames submitted one-at-a-time with a flush between
+    // each (via fill_blocking) on a separate histogram.
    constexpr int rows = 19;
    constexpr int cols = 23;
    constexpr int n_bins = 16;
@@ -234,15 +246,15 @@ TEST_CASE("Async fill matches sync fill") {
        for (ssize_t r = 0; r < rows; ++r)
            for (ssize_t c = 0; c < cols; ++c)
                img(r, c) = dist(rng);
-        sync_hist.fill(img.view());
+        fill_blocking(sync_hist, img.view());
        async_hist.fill_async(std::move(img));
    }
-    // hdata() calls flush() internally, but exercise the explicit path too.
+    // values() calls flush() internally, but exercise the explicit path too.
    async_hist.flush();
    CHECK(async_hist.pending() == 0);

-    auto a = async_hist.hdata();
-    auto s = sync_hist.hdata();
+    auto a = async_hist.values();
+    auto s = sync_hist.values();
    REQUIRE(a.shape(0) == s.shape(0));
    REQUIRE(a.shape(1) == s.shape(1));
    REQUIRE(a.shape(2) == s.shape(2));
@@ -272,7 +284,7 @@ TEST_CASE("fill_async with mismatched shape throws") {
 TEST_CASE("Destructor drains pending async fills") {
    // Submit more frames than the queue can hold so backpressure kicks in,
    // then immediately let the histogram go out of scope and verify that
-    // the merged hdata() matches the reference computed sequentially.
+    // the merged values() matches the reference computed sequentially.
    constexpr int rows = 11;
    constexpr int cols = 7;
    constexpr int n_bins = 8;
@@ -303,14 +315,14 @@ TEST_CASE("Destructor drains pending async fills") {
            hist.fill_async(std::move(copy));
        }
        // No explicit flush(); destructor must drain.
-        // Capture hdata() *after* the loop but inside the scope so it
-        // observes everything that was submitted (hdata flushes too).
-        snapshot = hist.hdata();
+        // Capture values() *after* the loop but inside the scope so it
+        // observes everything that was submitted (values flushes too).
+        snapshot = hist.values();
    }

    PixelHistogram reference(rows, cols, n_bins, xmin, xmax, 2);
-    for (const auto& img : frames) reference.fill(img.view());
-    auto expected = reference.hdata();
+    for (const auto& img : frames) fill_blocking(reference, img.view());
+    auto expected = reference.values();

    bool all_match = true;
    for (ssize_t r = 0; r < rows && all_match; ++r) {