cleanup and updated dev env
Build on RHEL9 / build (push) Successful in 2m32s
Build on RHEL8 / build (push) Successful in 3m28s
Run tests using data on local RHEL8 / build (push) Successful in 4m5s

This commit is contained in:
Lars Erik Fröjd
2026-05-27 16:05:00 +02:00
parent 7b55a94dd9
commit dbe68fd963
4 changed files with 22 additions and 86 deletions
+1
View File
@@ -17,5 +17,6 @@ dependencies:
- nlohmann_json
- pytest
- pytest-check
- boost
- boost-histogram
@@ -18,39 +18,12 @@
namespace bh = boost::histogram;
namespace aare {
// PedestalTrackingPixelHistogram histograms `frame - pedestal_mean` per
// pixel. Both the pedestal and the histogram are sharded by row range:
// thread t exclusively owns the slice of rows
// [row_offsets_[t], row_offsets_[t + 1]) of BOTH `partial_pedestals_[t]`
// and `partial_hists_[t]`, so no two threads ever touch the same memory
// while a fan-out is in flight.
//
// All four pedestal/histogram-mutating operations (`fill`,
// `push_pedestal_no_update`, `update_mean`, FillWithThreshold) are
// dispatched through the same worker pool via a `WorkKind` switch in
// `worker_loop`. They are serialised against each other by
// `fill_mutex_`, which also serialises the async coordinator's calls
// into `fill_with_threshold_`.
//
// The single async entry point is `fill_async_with_threshold`, which
// histograms the residual AND additionally pushes raw pixel samples
// whose residual is within `n_sigma_ * cached_std` of zero back into
// the per-thread pedestal shard (sigma-clipped pedestal tracking).
// Setting `n_sigma_ = 0.0` disables that pedestal-update side effect,
// recovering plain histogram-only async filling.
//
// Typical usage:
//
// for (auto& f : pedestal_frames) pth.push_pedestal_no_update(f);
// pth.update_mean();
// for (auto& f : measurement_frames)
// pth.fill_async_with_threshold(std::move(f));
// pth.flush();
// auto data = pth.hdata();
class PedestalTrackingPixelHistogram {
public:
using StorageType = uint16_t;
using AxisType = float;
using AxisType = float; //TODO: template on pedesta type if needed
using FrameType = uint16_t;
private:
@@ -71,24 +44,15 @@ class PedestalTrackingPixelHistogram {
int n_threads_;
const AxisType xmin_;
const AxisType xmax_;
// Cumulative row offsets so that thread t owns rows
// [row_offsets_[t], row_offsets_[t + 1])
// Length is n_threads_ + 1; partition is balanced (the first
// rows_ % n_threads_ threads get one extra row each).
std::vector<int> row_offsets_;
// Per-thread histograms over (residual, col, local_row).
std::vector<Hist> partial_hists_;
// Per-thread pedestal sized [local_rows x cols]. Indexed by the
// worker using the LOCAL row index (i.e. 0..row_count(t)-1), NOT the
// global row index. Owned exclusively by worker `t` during a
// dispatched fan-out.
std::vector<Pedestal<AxisType>> partial_pedestals_;
// Per-thread cached std, sized [local_rows x cols]. Written by worker
// thread t inside the UpdateMean case of worker_loop (after the
// shard's m_mean has been refreshed); read by worker t in the
// FillWithThreshold case. Same shard-locality contract as
// partial_pedestals_.
std::vector<NDArray<double, 2>> partial_std_;
std::vector<NDArray<double, 2>> partial_std_; //cached for pedestal tracking
// Thread pool members
std::vector<std::thread> workers_;
@@ -114,11 +78,6 @@ class PedestalTrackingPixelHistogram {
std::atomic<bool> stop_coordinator_{false};
std::atomic<bool> coordinator_busy_{false};
std::chrono::microseconds async_wait_{100};
// Sigma multiplier used as the pedestal-update gate in
// FillWithThreshold. Atomic so the setter can update it without
// taking fill_mutex_; workers do relaxed loads on each pixel.
// Setting it to 0.0 disables the pedestal update entirely.
std::atomic<double> n_sigma_;
// Private worker thread method
@@ -144,46 +103,22 @@ class PedestalTrackingPixelHistogram {
double n_sigma = 1.0);
~PedestalTrackingPixelHistogram();
// Accumulate `frame` into the running pedestal estimate without
// refreshing the cached mean (the cheap path used while
// bootstrapping the pedestal). Workers update their own shard.
// Call `update_mean()` once you're done before starting to
// `fill`/`fill_async_with_threshold`.
void push_pedestal_no_update(const NDView<FrameType, 2> &frame);
// Refresh each partial pedestal's cached per-pixel mean from its
// running sums. Serialises with all other fan-outs through
// `fill_mutex_` so worker reads of the pedestal mean cannot race.
void update_mean();
// Snapshot of the per-pixel pedestal mean, stitched together from
// all shards into a single `[rows x cols]` array. Implicitly
// drains pending async fills and takes `fill_mutex_` so it cannot
// tear against an in-flight `update_mean()` (which is the only
// operation that overwrites `m_mean`).
NDArray<AxisType, 2> pedestal_mean() const;
// Synchronous fill: blocks until the pedestal-subtracted residual
// for `image` has been merged into the accumulators. Safe to call
// concurrently with `fill_async_with_threshold` and the
// concurrently with `fill_async` and the
// pedestal-update API (calls are serialised through `fill_mutex_`).
// Histogram-only - independent of `n_sigma()`.
void fill(const NDView<FrameType, 2> &image);
// Asynchronous fill with sigma-clipped pedestal tracking. Takes
// ownership of `image`, enqueues it for the coordinator thread, and
// returns. The worker pool histograms each in-range residual AND
// additionally pushes the raw pixel value into the pedestal shard
// when `abs(residual) < n_sigma() * cached_std` (per-pixel gate).
// `n_sigma() = 0.0` disables the pedestal update, recovering plain
// histogram-only async filling. Blocks the caller only if the
// queue is full (single-producer, single-consumer queue with a
// sleep-poll backpressure loop, matching the convention in
// ClusterFinderMT).
void fill_async_with_threshold(NDArray<FrameType, 2> image);
void fill_async(NDArray<FrameType, 2> image);
// Sigma multiplier for the pedestal-update gate in
// fill_async_with_threshold. Atomic; safe to read/write at any
// fill_async. Atomic; safe to read/write at any
// time (the new value takes effect on subsequent pixel evaluations).
double n_sigma() const;
void set_n_sigma(double n_sigma);
@@ -32,11 +32,11 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
determines per-thread memory usage.
max_pending: Maximum number of frames that can be
queued for asynchronous filling before
fill_async_with_threshold() applies backpressure
fill_async() applies backpressure
on the caller (default: 16).
n_sigma: Sigma multiplier used as the gate for the
pedestal-update side effect of
fill_async_with_threshold(): a pixel sample is
fill_async(): a pixel sample is
pushed back into the pedestal estimate iff
``abs(residual) < n_sigma * cached_std``. Set to
``0.0`` to disable the pedestal update and get
@@ -119,7 +119,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
)",
py::arg("image").noconvert())
.def("fill_async_with_threshold",
.def("fill_async",
[](PedestalTrackingPixelHistogram &self,
py::array_t<PedestalTrackingPixelHistogram::FrameType, 0>
image) {
@@ -130,10 +130,10 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
NDArray<PedestalTrackingPixelHistogram::FrameType, 2> owned(
view);
// Release the GIL while enqueueing -
// fill_async_with_threshold can block on backpressure
// fill_async can block on backpressure
// when the queue is full.
py::gil_scoped_release release;
self.fill_async_with_threshold(std::move(owned));
self.fill_async(std::move(owned));
},
R"(
Submit an image for asynchronous filling with sigma-clipped
@@ -167,7 +167,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
&PedestalTrackingPixelHistogram::set_n_sigma,
R"(
Sigma multiplier used as the pedestal-update gate in
fill_async_with_threshold(). Atomic; safe to read or write
fill_async(). Atomic; safe to read or write
from any thread. Setting it to 0.0 disables the pedestal
update entirely. The new value takes effect on subsequent
per-pixel evaluations inside the worker pool.
@@ -176,7 +176,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
.def("flush", &PedestalTrackingPixelHistogram::flush,
R"(
Block until all images submitted via
fill_async_with_threshold() have been merged into the
fill_async() have been merged into the
accumulators. Cheap when nothing is pending.
)",
py::call_guard<py::gil_scoped_release>())
@@ -185,7 +185,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) {
R"(
Return the number of images either waiting in the queue or
currently being processed by the background thread (i.e.
still in flight after fill_async_with_threshold()). Useful
still in flight after fill_async()). Useful
for monitoring/diagnostics.
)")
+4 -4
View File
@@ -366,7 +366,7 @@ void PedestalTrackingPixelHistogram::fill_with_threshold_(
dispatch_(WorkKind::FillWithThreshold, &image);
}
void PedestalTrackingPixelHistogram::fill_async_with_threshold(
void PedestalTrackingPixelHistogram::fill_async(
NDArray<FrameType, 2> image) {
if (image.shape(0) != rows_ || image.shape(1) != cols_) {
throw std::invalid_argument(
@@ -414,15 +414,15 @@ void PedestalTrackingPixelHistogram::coordinator_loop() {
try {
fill_with_threshold_(item.view());
} catch (const std::exception &e) {
// fill_async_with_threshold pre-validates shape, so this
// fill_async pre-validates shape, so this
// is purely defensive. Log to stderr and keep the
// coordinator alive.
std::cerr << "PedestalTrackingPixelHistogram::"
"fill_async_with_threshold error: "
"fill_async error: "
<< e.what() << std::endl;
} catch (...) {
std::cerr << "PedestalTrackingPixelHistogram::"
"fill_async_with_threshold error: unknown"
"fill_async error: unknown"
<< std::endl;
}
coordinator_busy_.store(false, std::memory_order_release);