mirror of
https://github.com/slsdetectorgroup/aare.git
synced 2026-06-04 13:28:41 +02:00
removed sync fill
This commit is contained in:
@@ -21,7 +21,7 @@ namespace aare {
|
||||
class PedestalTrackingPixelHistogram {
|
||||
public:
|
||||
using StorageType = uint16_t;
|
||||
using AxisType = float; //TODO: template on pedesta type if needed
|
||||
using AxisType = float; //TODO: template on pedestal type if needed
|
||||
using FrameType = uint16_t;
|
||||
|
||||
private:
|
||||
@@ -30,7 +30,7 @@ class PedestalTrackingPixelHistogram {
|
||||
|
||||
// What kind of fan-out work the worker pool should currently do.
|
||||
// Set under work_mutex_; read by worker_loop after wakeup.
|
||||
enum class WorkKind { Fill, PushPedestal, UpdateMean, FillWithThreshold };
|
||||
enum class WorkKind { PushPedestal, UpdateMean, FillWithThreshold };
|
||||
|
||||
int rows_;
|
||||
int cols_;
|
||||
@@ -58,10 +58,9 @@ class PedestalTrackingPixelHistogram {
|
||||
std::atomic<bool> stop_workers_;
|
||||
int work_generation_;
|
||||
|
||||
// Serialises all fan-outs: `fill`, `push_pedestal_no_update`,
|
||||
// `update_mean`, and the async coordinator's calls into
|
||||
// `fill_with_threshold_`. Always the outermost lock; work_mutex_
|
||||
// is taken briefly inside it.
|
||||
// Serialises all fan-outs: `push_pedestal_no_update`, `update_mean`,
|
||||
// and the async coordinator's calls into `fill_with_threshold_`.
|
||||
// Always the outermost lock; work_mutex_ is taken briefly inside it.
|
||||
mutable std::mutex fill_mutex_;
|
||||
|
||||
// Async producer/consumer pipeline. SPSC queue feeds the coordinator
|
||||
@@ -85,7 +84,8 @@ class PedestalTrackingPixelHistogram {
|
||||
void dispatch_(WorkKind kind, const NDView<FrameType, 2> *image);
|
||||
|
||||
// Coordinator-facing entry point: takes fill_mutex_ and dispatches
|
||||
// FillWithThreshold to the worker pool. Same shape as fill().
|
||||
// FillWithThreshold to the worker pool. Only ever called by the
|
||||
// coordinator thread, on images already shape-checked by fill_async.
|
||||
void fill_with_threshold_(const NDView<FrameType, 2> &image);
|
||||
|
||||
public:
|
||||
@@ -101,13 +101,6 @@ class PedestalTrackingPixelHistogram {
|
||||
void update_mean();
|
||||
NDArray<AxisType, 2> pedestal_mean() const;
|
||||
|
||||
// Synchronous fill: blocks until the pedestal-subtracted residual
|
||||
// for `image` has been merged into the accumulators. Safe to call
|
||||
// concurrently with `fill_async` and the
|
||||
// pedestal-update API (calls are serialised through `fill_mutex_`).
|
||||
// Histogram-only - independent of `n_sigma()`.
|
||||
void fill(const NDView<FrameType, 2> &image);
|
||||
|
||||
void fill_async(NDArray<FrameType, 2> image);
|
||||
|
||||
// Sigma multiplier for the pedestal-update gate in
|
||||
@@ -125,7 +118,7 @@ class PedestalTrackingPixelHistogram {
|
||||
|
||||
// Implicitly flushes pending async fills first so the snapshot is
|
||||
// consistent with everything that was submitted up to the call.
|
||||
NDArray<StorageType, 3> hdata() const;
|
||||
NDArray<StorageType, 3> values() const;
|
||||
NDArray<AxisType, 1> bin_centers() const;
|
||||
NDArray<AxisType, 1> bin_edges() const;
|
||||
};
|
||||
|
||||
@@ -46,15 +46,8 @@ class PixelHistogram {
|
||||
std::atomic<bool> stop_workers_;
|
||||
int work_generation_;
|
||||
|
||||
// Serialises calls into the synchronous fan-out (`fill`). The
|
||||
// coordinator thread acquires it for the duration of each item it
|
||||
// processes, and direct callers of `fill` also acquire it. Without
|
||||
// this, concurrent sync + async fills would race on `current_image_`
|
||||
// and `work_generation_`.
|
||||
std::mutex fill_mutex_;
|
||||
|
||||
// Async producer/consumer pipeline. SPSC queue feeds the coordinator
|
||||
// thread, which calls the synchronous `fill` one image at a time.
|
||||
// thread, which fans each image out to the worker pool one at a time.
|
||||
std::unique_ptr<AsyncQueue> async_queue_;
|
||||
std::thread coordinator_;
|
||||
std::atomic<bool> stop_coordinator_{false};
|
||||
@@ -64,6 +57,10 @@ class PixelHistogram {
|
||||
// Private worker thread method
|
||||
void worker_loop(int thread_id);
|
||||
void coordinator_loop();
|
||||
// Fan a single image out to the worker pool and block until every
|
||||
// worker has merged its row band. Only ever called by the coordinator
|
||||
// thread, so no caller-serialisation lock is needed.
|
||||
void dispatch(const NDView<AxisType, 2> &image);
|
||||
int row_start(int thread_id) const;
|
||||
int row_count(int thread_id) const;
|
||||
|
||||
@@ -72,11 +69,6 @@ class PixelHistogram {
|
||||
int n_threads = 1, std::size_t max_pending = 16);
|
||||
~PixelHistogram();
|
||||
|
||||
// Synchronous fill: blocks until the image has been merged into the
|
||||
// accumulators. Safe to call concurrently with `fill_async` (calls are
|
||||
// serialised through `fill_mutex_`).
|
||||
void fill(const NDView<AxisType, 2> &image);
|
||||
|
||||
// Asynchronous fill: takes ownership of `image`, enqueues it for the
|
||||
// coordinator thread, and returns. Blocks the caller only if the queue
|
||||
// is full (single-producer, single-consumer queue with a sleep-poll
|
||||
@@ -92,7 +84,7 @@ class PixelHistogram {
|
||||
|
||||
// Implicitly flushes pending async fills first so the snapshot is
|
||||
// consistent with everything that was submitted up to the call.
|
||||
NDArray<StorageType, 3> hdata() const;
|
||||
NDArray<StorageType, 3> values() const;
|
||||
NDArray<AxisType, 1> bin_centers() const;
|
||||
NDArray<AxisType, 1> bin_edges() const;
|
||||
};
|
||||
|
||||
@@ -1,17 +1,10 @@
|
||||
#pragma once
|
||||
/*
|
||||
Implmenetation of a basic pixel histogram class with templated axis and storage type.
|
||||
Basic pixel histogram class with templated axis and storage type.
|
||||
row, col are integers and the per pixel histogram axis is AxisType.
|
||||
|
||||
Storage layout matches the existing PixelHistogram/PedestalTrackingPixelHistogram
|
||||
hdata() shape: NDArray<StorageType, 3> indexed as (row, col, bin) in row-major
|
||||
order, i.e. bin is the fastest-varying dimension. This keeps downstream
|
||||
consumers (memcpy stitching across thread shards, numpy reshaping in the
|
||||
python bindings) unchanged.
|
||||
|
||||
Bin policy: regular binning on [xmin, xmax). Values outside this range are
|
||||
silently dropped (matches boost::histogram axis option::none_t used by the
|
||||
boost-backed classes).
|
||||
silently dropped.
|
||||
*/
|
||||
|
||||
#include "aare/NDArray.hpp"
|
||||
@@ -23,7 +16,7 @@ boost-backed classes).
|
||||
namespace aare {
|
||||
|
||||
template <typename T, typename StorageType> class PixelHistogramImpl {
|
||||
NDArray<StorageType, 3> m_hdata;
|
||||
NDArray<StorageType, 3> m_values;
|
||||
NDArray<T, 1> m_edges;
|
||||
int m_rows;
|
||||
int m_cols;
|
||||
@@ -40,10 +33,10 @@ template <typename T, typename StorageType> class PixelHistogramImpl {
|
||||
void fill(const NDView<T, 2> &frame);
|
||||
void fill(int row, int col, T value);
|
||||
|
||||
NDArray<StorageType, 3> hdata() const;
|
||||
NDArray<StorageType, 3> values() const;
|
||||
// Zero-copy view of the underlying [rows x cols x n_bins] storage.
|
||||
// Lifetime is tied to *this. Use for low-level merge/stitching paths;
|
||||
// prefer hdata() for the public API where you want an owned copy.
|
||||
// prefer values() for the public API where you want an owned copy.
|
||||
NDView<StorageType, 3> view() const;
|
||||
NDArray<T, 1> bin_centers() const;
|
||||
NDArray<T, 1> bin_edges() const;
|
||||
@@ -53,7 +46,7 @@ template <typename T, typename StorageType>
|
||||
PixelHistogramImpl<T, StorageType>::PixelHistogramImpl(int rows, int cols,
|
||||
int n_bins, T xmin,
|
||||
T xmax)
|
||||
: m_hdata(NDArray<StorageType, 3>({static_cast<ssize_t>(rows),
|
||||
: m_values(NDArray<StorageType, 3>({static_cast<ssize_t>(rows),
|
||||
static_cast<ssize_t>(cols),
|
||||
static_cast<ssize_t>(n_bins)},
|
||||
StorageType{0})),
|
||||
@@ -83,41 +76,34 @@ void PixelHistogramImpl<T, StorageType>::fill(const NDView<T, 2> &frame) {
|
||||
}
|
||||
for (int row = 0; row < m_rows; ++row) {
|
||||
for (int col = 0; col < m_cols; ++col) {
|
||||
const T val = frame(row, col);
|
||||
if (val < m_xmin || val >= m_xmax) {
|
||||
continue;
|
||||
}
|
||||
int bin = static_cast<int>((val - m_xmin) * m_scale);
|
||||
// Guard against floating-point rounding pushing val just below
|
||||
// xmax to bin == n_bins.
|
||||
if (bin >= m_n_bins) {
|
||||
bin = m_n_bins - 1;
|
||||
}
|
||||
++m_hdata(row, col, bin);
|
||||
fill(row, col, frame(row, col));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename StorageType>
|
||||
void PixelHistogramImpl<T, StorageType>::fill(int row, int col, T value) {
|
||||
//TODO! add out of bounds check on row and col???
|
||||
if (value < m_xmin || value >= m_xmax) {
|
||||
return;
|
||||
}
|
||||
int bin = static_cast<int>((value - m_xmin) * m_scale);
|
||||
// Guard against floating-point rounding pushing val just below
|
||||
// xmax to bin == n_bins.
|
||||
if (bin >= m_n_bins) {
|
||||
bin = m_n_bins - 1;
|
||||
}
|
||||
++m_hdata(row, col, bin);
|
||||
++m_values(row, col, bin);
|
||||
}
|
||||
|
||||
template <typename T, typename StorageType>
|
||||
NDArray<StorageType, 3> PixelHistogramImpl<T, StorageType>::hdata() const {
|
||||
return m_hdata;
|
||||
NDArray<StorageType, 3> PixelHistogramImpl<T, StorageType>::values() const {
|
||||
return m_values;
|
||||
}
|
||||
|
||||
template <typename T, typename StorageType>
|
||||
NDView<StorageType, 3> PixelHistogramImpl<T, StorageType>::view() const {
|
||||
return m_hdata.view();
|
||||
return m_values.view();
|
||||
}
|
||||
|
||||
template <typename T, typename StorageType>
|
||||
|
||||
@@ -73,7 +73,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
|
||||
its running sums. Drains pending async fills first, then
|
||||
dispatches the update to the worker pool so the writes to
|
||||
each shard happen on the same thread that reads them in
|
||||
fill().
|
||||
fill_async().
|
||||
)",
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
|
||||
@@ -98,27 +98,6 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
|
||||
containing the current cached pedestal mean.
|
||||
)")
|
||||
|
||||
.def("fill",
|
||||
[](PedestalTrackingPixelHistogram &self,
|
||||
py::array_t<PedestalTrackingPixelHistogram::FrameType, 0>
|
||||
image) {
|
||||
auto view = make_view_2d(image);
|
||||
self.fill(view);
|
||||
},
|
||||
R"(
|
||||
Fill the histogram with image data (blocking).
|
||||
|
||||
The pedestal-subtracted residual `image - pedestal_mean`
|
||||
is computed per pixel inside the worker loop, so the
|
||||
pedestal must already have been bootstrapped via
|
||||
push_pedestal_no_update() + update_mean() for this to be
|
||||
meaningful.
|
||||
|
||||
Args:
|
||||
image: A 2D numpy array of raw pixel values (dtype: uint16)
|
||||
)",
|
||||
py::arg("image").noconvert())
|
||||
|
||||
.def("fill_async",
|
||||
[](PedestalTrackingPixelHistogram &self,
|
||||
py::array_t<PedestalTrackingPixelHistogram::FrameType, 0>
|
||||
@@ -189,9 +168,9 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
|
||||
for monitoring/diagnostics.
|
||||
)")
|
||||
|
||||
.def("hdata",
|
||||
.def("values",
|
||||
[](const PedestalTrackingPixelHistogram &self) {
|
||||
// hdata() implicitly flushes - release the GIL while it
|
||||
// values() implicitly flushes - release the GIL while it
|
||||
// does so. Allocation/copy into the NDArray runs without
|
||||
// the GIL too; only the numpy wrapping needs it.
|
||||
NDArray<PedestalTrackingPixelHistogram::StorageType, 3>
|
||||
@@ -200,7 +179,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
|
||||
py::gil_scoped_release release;
|
||||
ptr = new NDArray<
|
||||
PedestalTrackingPixelHistogram::StorageType, 3>(
|
||||
self.hdata());
|
||||
self.values());
|
||||
}
|
||||
return return_image_data(ptr);
|
||||
},
|
||||
|
||||
@@ -32,20 +32,6 @@ void define_pixel_histogram_bindings(py::module &m) {
|
||||
py::arg("xmin"), py::arg("xmax"), py::arg("n_threads") = 1,
|
||||
py::arg("max_pending") = std::size_t{16})
|
||||
|
||||
.def("fill",
|
||||
[](PixelHistogram &self,
|
||||
py::array_t<PixelHistogram::AxisType, 0> image) {
|
||||
auto view = make_view_2d(image);
|
||||
self.fill(view);
|
||||
},
|
||||
R"(
|
||||
Fill the histogram with image data (blocking).
|
||||
|
||||
Args:
|
||||
image: A 2D numpy array of pixel values (dtype: float32)
|
||||
)",
|
||||
py::arg("image").noconvert())
|
||||
|
||||
.def("fill_async",
|
||||
[](PixelHistogram &self,
|
||||
py::array_t<PixelHistogram::AxisType, 0> image) {
|
||||
@@ -89,15 +75,15 @@ void define_pixel_histogram_bindings(py::module &m) {
|
||||
for monitoring/diagnostics.
|
||||
)")
|
||||
|
||||
.def("hdata",
|
||||
.def("values",
|
||||
[](const PixelHistogram &self) {
|
||||
// hdata() implicitly flushes - release the GIL while it
|
||||
// values() implicitly flushes - release the GIL while it
|
||||
// does so. Allocation/copy into the NDArray runs without
|
||||
// the GIL too; only the numpy wrapping needs it.
|
||||
NDArray<PixelHistogram::StorageType, 3>* ptr = nullptr;
|
||||
{
|
||||
py::gil_scoped_release release;
|
||||
ptr = new NDArray<PixelHistogram::StorageType, 3>(self.hdata());
|
||||
ptr = new NDArray<PixelHistogram::StorageType, 3>(self.values());
|
||||
}
|
||||
return return_image_data(ptr);
|
||||
},
|
||||
|
||||
@@ -3,8 +3,6 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@@ -15,7 +13,7 @@ PedestalTrackingPixelHistogram::PedestalTrackingPixelHistogram(
|
||||
int rows, int cols, int n_bins, AxisType xmin, AxisType xmax, int n_threads,
|
||||
std::size_t max_pending, double n_sigma)
|
||||
: rows_(rows), cols_(cols), n_threads_(n_threads), xmin_(xmin), xmax_(xmax),
|
||||
current_work_kind_(WorkKind::Fill), current_image_(nullptr),
|
||||
current_work_kind_(WorkKind::FillWithThreshold), current_image_(nullptr),
|
||||
completed_threads_(0), stop_workers_(false), work_generation_(0),
|
||||
n_sigma_(n_sigma) {
|
||||
if (rows_ < 1 || cols_ < 1 || n_bins < 1) {
|
||||
@@ -175,24 +173,6 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
|
||||
auto &my_hist = partial_hists_[thread_id];
|
||||
|
||||
switch (kind) {
|
||||
case WorkKind::Fill: {
|
||||
// Histogram the pedestal-subtracted residual for each pixel
|
||||
// in this thread's row slice. `my_pedestal` is sized to the
|
||||
// local row range and indexed by (local_row, col). The
|
||||
// [xmin, xmax) range gate lives inside PixelHistogramImpl::fill.
|
||||
for (int local_row = 0; local_row < local_rows; ++local_row) {
|
||||
const auto row = static_cast<ssize_t>(first_row + local_row);
|
||||
for (ssize_t col = 0; col < image->shape(1); ++col) {
|
||||
const AxisType val =
|
||||
static_cast<AxisType>((*image)(row, col)) -
|
||||
static_cast<AxisType>(my_pedestal.mean(
|
||||
static_cast<uint32_t>(local_row),
|
||||
static_cast<uint32_t>(col)));
|
||||
my_hist.fill(local_row, static_cast<int>(col), val);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case WorkKind::PushPedestal: {
|
||||
// Accumulate raw frame values into this thread's pedestal
|
||||
// shard. Uses the pixel-level push_no_update which only
|
||||
@@ -228,10 +208,9 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
|
||||
// whose residual is consistent with noise
|
||||
// (|residual| < n_sigma * cached_std), feed the raw value
|
||||
// back into the pedestal shard. With n_sigma == 0 the gate
|
||||
// never fires, which makes this case behave identically to
|
||||
// WorkKind::Fill (modulo the per-pixel gate evaluation).
|
||||
// The [xmin, xmax) histogram gate lives inside
|
||||
// PixelHistogramImpl::fill.
|
||||
// never fires, recovering plain histogram-only behaviour
|
||||
// (modulo the per-pixel gate evaluation). The [xmin, xmax)
|
||||
// histogram gate lives inside PixelHistogramImpl::fill.
|
||||
auto &my_std = partial_std_[thread_id];
|
||||
const double n_sigma = n_sigma_.load(std::memory_order_relaxed);
|
||||
for (int local_row = 0; local_row < local_rows; ++local_row) {
|
||||
@@ -270,7 +249,7 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
|
||||
}
|
||||
|
||||
NDArray<PedestalTrackingPixelHistogram::StorageType, 3>
|
||||
PedestalTrackingPixelHistogram::hdata() const {
|
||||
PedestalTrackingPixelHistogram::values() const {
|
||||
// Make sure any pending async fills are merged in before we snapshot
|
||||
// the partial histograms. Cheap when the queue is already drained.
|
||||
flush();
|
||||
@@ -332,23 +311,12 @@ NDArray<PedestalTrackingPixelHistogram::AxisType, 2> PedestalTrackingPixelHistog
|
||||
return data;
|
||||
}
|
||||
|
||||
void PedestalTrackingPixelHistogram::fill(const NDView<FrameType, 2> &image) {
|
||||
if (image.shape(0) != rows_ || image.shape(1) != cols_) {
|
||||
throw std::invalid_argument(
|
||||
"PedestalTrackingPixelHistogram image shape does not match "
|
||||
"constructor shape");
|
||||
}
|
||||
std::lock_guard<std::mutex> fill_lock(fill_mutex_);
|
||||
dispatch_(WorkKind::Fill, &image);
|
||||
}
|
||||
|
||||
void PedestalTrackingPixelHistogram::fill_with_threshold_(
|
||||
const NDView<FrameType, 2> &image) {
|
||||
if (image.shape(0) != rows_ || image.shape(1) != cols_) {
|
||||
throw std::invalid_argument(
|
||||
"PedestalTrackingPixelHistogram image shape does not match "
|
||||
"constructor shape");
|
||||
}
|
||||
// Called only by the coordinator thread on images already shape-checked
|
||||
// by fill_async, so there is no need to re-validate. fill_mutex_ is
|
||||
// still required: push_pedestal_no_update / update_mean / pedestal_mean
|
||||
// can run concurrently and must not race this fan-out.
|
||||
std::lock_guard<std::mutex> fill_lock(fill_mutex_);
|
||||
dispatch_(WorkKind::FillWithThreshold, &image);
|
||||
}
|
||||
@@ -398,20 +366,7 @@ void PedestalTrackingPixelHistogram::coordinator_loop() {
|
||||
!async_queue_->isEmpty()) {
|
||||
if (async_queue_->read(item)) {
|
||||
coordinator_busy_.store(true, std::memory_order_release);
|
||||
try {
|
||||
fill_with_threshold_(item.view());
|
||||
} catch (const std::exception &e) {
|
||||
// fill_async pre-validates shape, so this
|
||||
// is purely defensive. Log to stderr and keep the
|
||||
// coordinator alive.
|
||||
std::cerr << "PedestalTrackingPixelHistogram::"
|
||||
"fill_async error: "
|
||||
<< e.what() << std::endl;
|
||||
} catch (...) {
|
||||
std::cerr << "PedestalTrackingPixelHistogram::"
|
||||
"fill_async error: unknown"
|
||||
<< std::endl;
|
||||
}
|
||||
fill_with_threshold_(item.view());
|
||||
coordinator_busy_.store(false, std::memory_order_release);
|
||||
} else {
|
||||
std::this_thread::sleep_for(async_wait_);
|
||||
|
||||
+6
-22
@@ -2,8 +2,6 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@@ -138,7 +136,7 @@ void PixelHistogram::worker_loop(int thread_id) {
|
||||
}
|
||||
}
|
||||
|
||||
NDArray<PixelHistogram::StorageType, 3> PixelHistogram::hdata() const {
|
||||
NDArray<PixelHistogram::StorageType, 3> PixelHistogram::values() const {
|
||||
// Make sure any pending async fills are merged in before we snapshot
|
||||
// the partial histograms. Cheap when the queue is already drained.
|
||||
flush();
|
||||
@@ -170,14 +168,10 @@ NDArray<PixelHistogram::StorageType, 3> PixelHistogram::hdata() const {
|
||||
return data;
|
||||
}
|
||||
|
||||
void PixelHistogram::fill(const NDView<AxisType, 2> &image) {
|
||||
if (image.shape(0) != rows_ || image.shape(1) != cols_) {
|
||||
throw std::invalid_argument("PixelHistogram image shape does not match constructor shape");
|
||||
}
|
||||
|
||||
// Serialise all calls into the fan-out. fill_mutex_ is always the
|
||||
// outermost lock; work_mutex_ is taken briefly inside it.
|
||||
std::lock_guard<std::mutex> fill_lock(fill_mutex_);
|
||||
void PixelHistogram::dispatch(const NDView<AxisType, 2> &image) {
|
||||
// Called only by the coordinator thread on images already shape-checked
|
||||
// by fill_async, so there is no need to re-validate or to serialise
|
||||
// against other callers.
|
||||
|
||||
// Reset counters and set work
|
||||
{
|
||||
@@ -230,17 +224,7 @@ void PixelHistogram::coordinator_loop() {
|
||||
while (!stop_coordinator_.load(std::memory_order_acquire) || !async_queue_->isEmpty()) {
|
||||
if (async_queue_->read(item)) {
|
||||
coordinator_busy_.store(true, std::memory_order_release);
|
||||
try {
|
||||
fill(item.view());
|
||||
} catch (const std::exception& e) {
|
||||
// fill_async pre-validates shape, so this is purely
|
||||
// defensive. Log to stderr and keep the coordinator alive.
|
||||
std::cerr << "PixelHistogram::fill_async error: "
|
||||
<< e.what() << std::endl;
|
||||
} catch (...) {
|
||||
std::cerr << "PixelHistogram::fill_async error: unknown"
|
||||
<< std::endl;
|
||||
}
|
||||
dispatch(item.view());
|
||||
coordinator_busy_.store(false, std::memory_order_release);
|
||||
} else {
|
||||
std::this_thread::sleep_for(async_wait_);
|
||||
|
||||
+52
-40
@@ -16,28 +16,39 @@ using aare::PixelHistogram;
|
||||
using aare::NDArray;
|
||||
using aare::NDView;
|
||||
|
||||
namespace {
|
||||
// The synchronous fill() has been removed; fill_async() is the only entry
|
||||
// point. This helper submits one frame and blocks until it has been merged
|
||||
// so the tests can keep their straightforward, ordered expectations.
|
||||
void fill_blocking(PixelHistogram &hist, const NDView<float, 2> &image) {
|
||||
NDArray<float, 2> owned(image);
|
||||
hist.fill_async(std::move(owned));
|
||||
hist.flush();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_CASE("Fill one pixel of a 5x10 histogram"){
|
||||
PixelHistogram hist(5, 10, 20, 0.0, 10.0);
|
||||
NDArray<float, 2> image({5, 10}, -1.0); //Need to fill with -1 to not generate counts
|
||||
|
||||
image(2, 3) = 5.7; // This should go into bin 11 (since bins are [0-0.5), [0.5-1.0), ..., [9.5-10.0))
|
||||
|
||||
hist.fill(image.view());
|
||||
fill_blocking(hist, image.view());
|
||||
|
||||
auto hdata = hist.hdata();
|
||||
REQUIRE(hdata.shape(0) == 5);
|
||||
REQUIRE(hdata.shape(1) == 10);
|
||||
REQUIRE(hdata.shape(2) == 20);
|
||||
auto values = hist.values();
|
||||
REQUIRE(values.shape(0) == 5);
|
||||
REQUIRE(values.shape(1) == 10);
|
||||
REQUIRE(values.shape(2) == 20);
|
||||
|
||||
// Check that the correct bin for pixel (2,3) has count 1
|
||||
CHECK(hdata(2, 3, 11) == 1);
|
||||
CHECK(values(2, 3, 11) == 1);
|
||||
|
||||
// Check that all other bins are zero
|
||||
for (ssize_t row = 0; row < hdata.shape(0); ++row) {
|
||||
for (ssize_t col = 0; col < hdata.shape(1); ++col) {
|
||||
for (ssize_t bin = 0; bin < hdata.shape(2); ++bin) {
|
||||
for (ssize_t row = 0; row < values.shape(0); ++row) {
|
||||
for (ssize_t col = 0; col < values.shape(1); ++col) {
|
||||
for (ssize_t bin = 0; bin < values.shape(2); ++bin) {
|
||||
if (!(row == 2 && col == 3 && bin == 11)) {
|
||||
CHECK(hdata(row, col, bin) == 0);
|
||||
CHECK(values(row, col, bin) == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -54,29 +65,29 @@ TEST_CASE("Fill pixels with uneven partial histogram row slices"){
|
||||
image(3, 3) = 3.2;
|
||||
image(4, 0) = 4.2;
|
||||
|
||||
hist.fill(image.view());
|
||||
fill_blocking(hist, image.view());
|
||||
|
||||
auto hdata = hist.hdata();
|
||||
REQUIRE(hdata.shape(0) == 5);
|
||||
REQUIRE(hdata.shape(1) == 4);
|
||||
REQUIRE(hdata.shape(2) == 10);
|
||||
auto values = hist.values();
|
||||
REQUIRE(values.shape(0) == 5);
|
||||
REQUIRE(values.shape(1) == 4);
|
||||
REQUIRE(values.shape(2) == 10);
|
||||
|
||||
CHECK(hdata(0, 0, 0) == 1);
|
||||
CHECK(hdata(1, 1, 1) == 1);
|
||||
CHECK(hdata(2, 2, 2) == 1);
|
||||
CHECK(hdata(3, 3, 3) == 1);
|
||||
CHECK(hdata(4, 0, 4) == 1);
|
||||
CHECK(values(0, 0, 0) == 1);
|
||||
CHECK(values(1, 1, 1) == 1);
|
||||
CHECK(values(2, 2, 2) == 1);
|
||||
CHECK(values(3, 3, 3) == 1);
|
||||
CHECK(values(4, 0, 4) == 1);
|
||||
|
||||
for (ssize_t row = 0; row < hdata.shape(0); ++row) {
|
||||
for (ssize_t col = 0; col < hdata.shape(1); ++col) {
|
||||
for (ssize_t bin = 0; bin < hdata.shape(2); ++bin) {
|
||||
for (ssize_t row = 0; row < values.shape(0); ++row) {
|
||||
for (ssize_t col = 0; col < values.shape(1); ++col) {
|
||||
for (ssize_t bin = 0; bin < values.shape(2); ++bin) {
|
||||
const bool expected = (row == 0 && col == 0 && bin == 0) ||
|
||||
(row == 1 && col == 1 && bin == 1) ||
|
||||
(row == 2 && col == 2 && bin == 2) ||
|
||||
(row == 3 && col == 3 && bin == 3) ||
|
||||
(row == 4 && col == 0 && bin == 4);
|
||||
if (!expected) {
|
||||
CHECK(hdata(row, col, bin) == 0);
|
||||
CHECK(values(row, col, bin) == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -113,9 +124,9 @@ TEST_CASE("Row partitioning handles rows < n_threads * ceil(rows/n_threads)") {
|
||||
for (ssize_t r = 0; r < rows; ++r) {
|
||||
img(r, 0) = static_cast<float>(r % n_bins) + 0.5f;
|
||||
}
|
||||
hist.fill(img.view());
|
||||
fill_blocking(hist, img.view());
|
||||
|
||||
auto h = hist.hdata();
|
||||
auto h = hist.values();
|
||||
REQUIRE(h.shape(0) == rows);
|
||||
REQUIRE(h.shape(1) == cols);
|
||||
REQUIRE(h.shape(2) == n_bins);
|
||||
@@ -183,9 +194,9 @@ TEST_CASE("Random fills match a reference implementation") {
|
||||
}
|
||||
|
||||
for (const auto& img : frames) {
|
||||
hist.fill(img.view());
|
||||
fill_blocking(hist, img.view());
|
||||
}
|
||||
auto h = hist.hdata();
|
||||
auto h = hist.values();
|
||||
|
||||
REQUIRE(h.shape(0) == rows);
|
||||
REQUIRE(h.shape(1) == cols);
|
||||
@@ -209,9 +220,10 @@ TEST_CASE("Random fills match a reference implementation") {
|
||||
CHECK(all_match);
|
||||
}
|
||||
|
||||
TEST_CASE("Async fill matches sync fill") {
|
||||
TEST_CASE("Streamed async fill matches per-frame flushed fill") {
|
||||
// Submit a stream of random frames through fill_async and compare
|
||||
// against the same frames processed by fill() on a separate histogram.
|
||||
// against the same frames submitted one-at-a-time with a flush between
|
||||
// each (via fill_blocking) on a separate histogram.
|
||||
constexpr int rows = 19;
|
||||
constexpr int cols = 23;
|
||||
constexpr int n_bins = 16;
|
||||
@@ -234,15 +246,15 @@ TEST_CASE("Async fill matches sync fill") {
|
||||
for (ssize_t r = 0; r < rows; ++r)
|
||||
for (ssize_t c = 0; c < cols; ++c)
|
||||
img(r, c) = dist(rng);
|
||||
sync_hist.fill(img.view());
|
||||
fill_blocking(sync_hist, img.view());
|
||||
async_hist.fill_async(std::move(img));
|
||||
}
|
||||
// hdata() calls flush() internally, but exercise the explicit path too.
|
||||
// values() calls flush() internally, but exercise the explicit path too.
|
||||
async_hist.flush();
|
||||
CHECK(async_hist.pending() == 0);
|
||||
|
||||
auto a = async_hist.hdata();
|
||||
auto s = sync_hist.hdata();
|
||||
auto a = async_hist.values();
|
||||
auto s = sync_hist.values();
|
||||
REQUIRE(a.shape(0) == s.shape(0));
|
||||
REQUIRE(a.shape(1) == s.shape(1));
|
||||
REQUIRE(a.shape(2) == s.shape(2));
|
||||
@@ -272,7 +284,7 @@ TEST_CASE("fill_async with mismatched shape throws") {
|
||||
TEST_CASE("Destructor drains pending async fills") {
|
||||
// Submit more frames than the queue can hold so backpressure kicks in,
|
||||
// then immediately let the histogram go out of scope and verify that
|
||||
// the merged hdata() matches the reference computed sequentially.
|
||||
// the merged values() matches the reference computed sequentially.
|
||||
constexpr int rows = 11;
|
||||
constexpr int cols = 7;
|
||||
constexpr int n_bins = 8;
|
||||
@@ -303,14 +315,14 @@ TEST_CASE("Destructor drains pending async fills") {
|
||||
hist.fill_async(std::move(copy));
|
||||
}
|
||||
// No explicit flush(); destructor must drain.
|
||||
// Capture hdata() *after* the loop but inside the scope so it
|
||||
// observes everything that was submitted (hdata flushes too).
|
||||
snapshot = hist.hdata();
|
||||
// Capture values() *after* the loop but inside the scope so it
|
||||
// observes everything that was submitted (values flushes too).
|
||||
snapshot = hist.values();
|
||||
}
|
||||
|
||||
PixelHistogram reference(rows, cols, n_bins, xmin, xmax, 2);
|
||||
for (const auto& img : frames) reference.fill(img.view());
|
||||
auto expected = reference.hdata();
|
||||
for (const auto& img : frames) fill_blocking(reference, img.view());
|
||||
auto expected = reference.values();
|
||||
|
||||
bool all_match = true;
|
||||
for (ssize_t r = 0; r < rows && all_match; ++r) {
|
||||
|
||||
Reference in New Issue
Block a user