diff --git a/etc/dev-env.yml b/etc/dev-env.yml index 6873e22..7177e46 100644 --- a/etc/dev-env.yml +++ b/etc/dev-env.yml @@ -17,5 +17,6 @@ dependencies: - nlohmann_json - pytest - pytest-check + - boost - boost-histogram diff --git a/include/aare/PedestalTrackingPixelHistogram.hpp b/include/aare/PedestalTrackingPixelHistogram.hpp index 505bbfd..e3f4588 100644 --- a/include/aare/PedestalTrackingPixelHistogram.hpp +++ b/include/aare/PedestalTrackingPixelHistogram.hpp @@ -18,39 +18,12 @@ namespace bh = boost::histogram; namespace aare { -// PedestalTrackingPixelHistogram histograms `frame - pedestal_mean` per -// pixel. Both the pedestal and the histogram are sharded by row range: -// thread t exclusively owns the slice of rows -// [row_offsets_[t], row_offsets_[t + 1]) of BOTH `partial_pedestals_[t]` -// and `partial_hists_[t]`, so no two threads ever touch the same memory -// while a fan-out is in flight. -// -// All four pedestal/histogram-mutating operations (`fill`, -// `push_pedestal_no_update`, `update_mean`, FillWithThreshold) are -// dispatched through the same worker pool via a `WorkKind` switch in -// `worker_loop`. They are serialised against each other by -// `fill_mutex_`, which also serialises the async coordinator's calls -// into `fill_with_threshold_`. -// -// The single async entry point is `fill_async_with_threshold`, which -// histograms the residual AND additionally pushes raw pixel samples -// whose residual is within `n_sigma_ * cached_std` of zero back into -// the per-thread pedestal shard (sigma-clipped pedestal tracking). -// Setting `n_sigma_ = 0.0` disables that pedestal-update side effect, -// recovering plain histogram-only async filling. -// -// Typical usage: -// -// for (auto& f : pedestal_frames) pth.push_pedestal_no_update(f); -// pth.update_mean(); -// for (auto& f : measurement_frames) -// pth.fill_async_with_threshold(std::move(f)); -// pth.flush(); -// auto data = pth.hdata(); + + class PedestalTrackingPixelHistogram { public: using StorageType = uint16_t; - using AxisType = float; + using AxisType = float; //TODO: template on pedesta type if needed using FrameType = uint16_t; private: @@ -71,24 +44,15 @@ class PedestalTrackingPixelHistogram { int n_threads_; const AxisType xmin_; const AxisType xmax_; - // Cumulative row offsets so that thread t owns rows - // [row_offsets_[t], row_offsets_[t + 1]) - // Length is n_threads_ + 1; partition is balanced (the first - // rows_ % n_threads_ threads get one extra row each). + std::vector row_offsets_; - // Per-thread histograms over (residual, col, local_row). std::vector partial_hists_; // Per-thread pedestal sized [local_rows x cols]. Indexed by the // worker using the LOCAL row index (i.e. 0..row_count(t)-1), NOT the // global row index. Owned exclusively by worker `t` during a // dispatched fan-out. std::vector> partial_pedestals_; - // Per-thread cached std, sized [local_rows x cols]. Written by worker - // thread t inside the UpdateMean case of worker_loop (after the - // shard's m_mean has been refreshed); read by worker t in the - // FillWithThreshold case. Same shard-locality contract as - // partial_pedestals_. - std::vector> partial_std_; + std::vector> partial_std_; //cached for pedestal tracking // Thread pool members std::vector workers_; @@ -114,11 +78,6 @@ class PedestalTrackingPixelHistogram { std::atomic stop_coordinator_{false}; std::atomic coordinator_busy_{false}; std::chrono::microseconds async_wait_{100}; - - // Sigma multiplier used as the pedestal-update gate in - // FillWithThreshold. Atomic so the setter can update it without - // taking fill_mutex_; workers do relaxed loads on each pixel. - // Setting it to 0.0 disables the pedestal update entirely. std::atomic n_sigma_; // Private worker thread method @@ -144,46 +103,22 @@ class PedestalTrackingPixelHistogram { double n_sigma = 1.0); ~PedestalTrackingPixelHistogram(); - // Accumulate `frame` into the running pedestal estimate without - // refreshing the cached mean (the cheap path used while - // bootstrapping the pedestal). Workers update their own shard. - // Call `update_mean()` once you're done before starting to - // `fill`/`fill_async_with_threshold`. + void push_pedestal_no_update(const NDView &frame); - - // Refresh each partial pedestal's cached per-pixel mean from its - // running sums. Serialises with all other fan-outs through - // `fill_mutex_` so worker reads of the pedestal mean cannot race. void update_mean(); - - // Snapshot of the per-pixel pedestal mean, stitched together from - // all shards into a single `[rows x cols]` array. Implicitly - // drains pending async fills and takes `fill_mutex_` so it cannot - // tear against an in-flight `update_mean()` (which is the only - // operation that overwrites `m_mean`). NDArray pedestal_mean() const; // Synchronous fill: blocks until the pedestal-subtracted residual // for `image` has been merged into the accumulators. Safe to call - // concurrently with `fill_async_with_threshold` and the + // concurrently with `fill_async` and the // pedestal-update API (calls are serialised through `fill_mutex_`). // Histogram-only - independent of `n_sigma()`. void fill(const NDView &image); - // Asynchronous fill with sigma-clipped pedestal tracking. Takes - // ownership of `image`, enqueues it for the coordinator thread, and - // returns. The worker pool histograms each in-range residual AND - // additionally pushes the raw pixel value into the pedestal shard - // when `abs(residual) < n_sigma() * cached_std` (per-pixel gate). - // `n_sigma() = 0.0` disables the pedestal update, recovering plain - // histogram-only async filling. Blocks the caller only if the - // queue is full (single-producer, single-consumer queue with a - // sleep-poll backpressure loop, matching the convention in - // ClusterFinderMT). - void fill_async_with_threshold(NDArray image); + void fill_async(NDArray image); // Sigma multiplier for the pedestal-update gate in - // fill_async_with_threshold. Atomic; safe to read/write at any + // fill_async. Atomic; safe to read/write at any // time (the new value takes effect on subsequent pixel evaluations). double n_sigma() const; void set_n_sigma(double n_sigma); diff --git a/python/src/bind_PedestalTrackingPixelHistogram.hpp b/python/src/bind_PedestalTrackingPixelHistogram.hpp index 79ff42c..865f3f2 100644 --- a/python/src/bind_PedestalTrackingPixelHistogram.hpp +++ b/python/src/bind_PedestalTrackingPixelHistogram.hpp @@ -32,11 +32,11 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { determines per-thread memory usage. max_pending: Maximum number of frames that can be queued for asynchronous filling before - fill_async_with_threshold() applies backpressure + fill_async() applies backpressure on the caller (default: 16). n_sigma: Sigma multiplier used as the gate for the pedestal-update side effect of - fill_async_with_threshold(): a pixel sample is + fill_async(): a pixel sample is pushed back into the pedestal estimate iff ``abs(residual) < n_sigma * cached_std``. Set to ``0.0`` to disable the pedestal update and get @@ -119,7 +119,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { )", py::arg("image").noconvert()) - .def("fill_async_with_threshold", + .def("fill_async", [](PedestalTrackingPixelHistogram &self, py::array_t image) { @@ -130,10 +130,10 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { NDArray owned( view); // Release the GIL while enqueueing - - // fill_async_with_threshold can block on backpressure + // fill_async can block on backpressure // when the queue is full. py::gil_scoped_release release; - self.fill_async_with_threshold(std::move(owned)); + self.fill_async(std::move(owned)); }, R"( Submit an image for asynchronous filling with sigma-clipped @@ -167,7 +167,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { &PedestalTrackingPixelHistogram::set_n_sigma, R"( Sigma multiplier used as the pedestal-update gate in - fill_async_with_threshold(). Atomic; safe to read or write + fill_async(). Atomic; safe to read or write from any thread. Setting it to 0.0 disables the pedestal update entirely. The new value takes effect on subsequent per-pixel evaluations inside the worker pool. @@ -176,7 +176,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { .def("flush", &PedestalTrackingPixelHistogram::flush, R"( Block until all images submitted via - fill_async_with_threshold() have been merged into the + fill_async() have been merged into the accumulators. Cheap when nothing is pending. )", py::call_guard()) @@ -185,7 +185,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { R"( Return the number of images either waiting in the queue or currently being processed by the background thread (i.e. - still in flight after fill_async_with_threshold()). Useful + still in flight after fill_async()). Useful for monitoring/diagnostics. )") diff --git a/src/PedestalTrackingPixelHistogram.cpp b/src/PedestalTrackingPixelHistogram.cpp index 208fb88..26ede28 100644 --- a/src/PedestalTrackingPixelHistogram.cpp +++ b/src/PedestalTrackingPixelHistogram.cpp @@ -366,7 +366,7 @@ void PedestalTrackingPixelHistogram::fill_with_threshold_( dispatch_(WorkKind::FillWithThreshold, &image); } -void PedestalTrackingPixelHistogram::fill_async_with_threshold( +void PedestalTrackingPixelHistogram::fill_async( NDArray image) { if (image.shape(0) != rows_ || image.shape(1) != cols_) { throw std::invalid_argument( @@ -414,15 +414,15 @@ void PedestalTrackingPixelHistogram::coordinator_loop() { try { fill_with_threshold_(item.view()); } catch (const std::exception &e) { - // fill_async_with_threshold pre-validates shape, so this + // fill_async pre-validates shape, so this // is purely defensive. Log to stderr and keep the // coordinator alive. std::cerr << "PedestalTrackingPixelHistogram::" - "fill_async_with_threshold error: " + "fill_async error: " << e.what() << std::endl; } catch (...) { std::cerr << "PedestalTrackingPixelHistogram::" - "fill_async_with_threshold error: unknown" + "fill_async error: unknown" << std::endl; } coordinator_busy_.store(false, std::memory_order_release);