diff --git a/CMakeLists.txt b/CMakeLists.txt index 219da34..7918257 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -434,9 +434,8 @@ find_package(Threads REQUIRED) target_link_libraries( aare_core - PUBLIC fmt::fmt - nlohmann_json::nlohmann_json - ${STD_FS_LIB} # from helpers.cmake + PUBLIC fmt::fmt nlohmann_json::nlohmann_json ${STD_FS_LIB} # from + # helpers.cmake Minuit2::Minuit2 PRIVATE aare_compiler_flags Threads::Threads $) diff --git a/include/aare/Pedestal.hpp b/include/aare/Pedestal.hpp index 9d53c5e..a23cf71 100644 --- a/include/aare/Pedestal.hpp +++ b/include/aare/Pedestal.hpp @@ -112,7 +112,9 @@ template class Pedestal { } } - template void push_with_threshold(const NDView frame, const NDView threshold) { + template + void push_with_threshold(const NDView frame, + const NDView threshold) { assert(frame.size() == m_rows * m_cols); // TODO! move away from m_rows, m_cols @@ -123,7 +125,8 @@ template class Pedestal { for (size_t row = 0; row < m_rows; row++) { for (size_t col = 0; col < m_cols; col++) { - if (fabs(frame(row, col)-mean(row, col)) < threshold(row, col)) { + if (fabs(frame(row, col) - mean(row, col)) < + threshold(row, col)) { push(row, col, frame(row, col)); } } diff --git a/include/aare/PedestalTrackingPixelHistogram.hpp b/include/aare/PedestalTrackingPixelHistogram.hpp index 9f6684c..92e5c35 100644 --- a/include/aare/PedestalTrackingPixelHistogram.hpp +++ b/include/aare/PedestalTrackingPixelHistogram.hpp @@ -10,19 +10,18 @@ #include #include #include +#include #include #include #include #include -#include namespace aare { - class PedestalTrackingPixelHistogram { public: using StorageType = uint16_t; - using AxisType = float; //TODO: template on pedestal type if needed + using AxisType = float; // TODO: template on pedestal type if needed using FrameType = uint16_t; private: @@ -46,7 +45,8 @@ class PedestalTrackingPixelHistogram { // global row index. Owned exclusively by worker `t` during a // dispatched fan-out. std::vector> partial_pedestals_; - std::vector> partial_std_; //cached for pedestal tracking + std::vector> partial_std_; // cached for pedestal + // tracking // Thread pool members std::vector workers_; @@ -97,16 +97,17 @@ class PedestalTrackingPixelHistogram { double n_sigma = 1.0); ~PedestalTrackingPixelHistogram(); - void push_pedestal_no_update(const NDView &frame); void update_mean(); NDArray pedestal_mean() const; void fill_async(NDArray image); - void fill_from_file(const std::filesystem::path &fname, ssize_t max_frames = -1, bool verbose = false); + void fill_from_file(const std::filesystem::path &fname, + ssize_t max_frames = -1, bool verbose = false); - void process_pedestal_file(const std::filesystem::path &fname, ssize_t max_frames = -1, bool verbose = false); + void process_pedestal_file(const std::filesystem::path &fname, + ssize_t max_frames = -1, bool verbose = false); // Sigma multiplier for the pedestal-update gate in // fill_async. Atomic; safe to read/write at any diff --git a/include/aare/PixelHistogram.hpp b/include/aare/PixelHistogram.hpp index 241684c..c08271c 100644 --- a/include/aare/PixelHistogram.hpp +++ b/include/aare/PixelHistogram.hpp @@ -41,7 +41,7 @@ class PixelHistogram { std::mutex work_mutex_; std::condition_variable work_cv_; std::condition_variable done_cv_; - const NDView* current_image_; + const NDView *current_image_; std::atomic completed_threads_; std::atomic stop_workers_; int work_generation_; diff --git a/include/aare/PixelHistogramImpl.hpp b/include/aare/PixelHistogramImpl.hpp index 468bdb1..f876ca6 100644 --- a/include/aare/PixelHistogramImpl.hpp +++ b/include/aare/PixelHistogramImpl.hpp @@ -47,9 +47,9 @@ PixelHistogramImpl::PixelHistogramImpl(int rows, int cols, int n_bins, T xmin, T xmax) : m_values(NDArray({static_cast(rows), - static_cast(cols), - static_cast(n_bins)}, - StorageType{0})), + static_cast(cols), + static_cast(n_bins)}, + StorageType{0})), m_edges(NDArray({static_cast(n_bins + 1)})), m_rows(rows), m_cols(cols), m_n_bins(n_bins), m_xmin(xmin), m_xmax(xmax), m_scale(static_cast(n_bins) / (xmax - xmin)) { @@ -83,14 +83,14 @@ void PixelHistogramImpl::fill(const NDView &frame) { template void PixelHistogramImpl::fill(int row, int col, T value) { - //TODO! add out of bounds check on row and col??? - + // TODO! add out of bounds check on row and col??? + if (value < m_xmin || value >= m_xmax) { return; } int bin = static_cast((value - m_xmin) * m_scale); // Guard against floating-point rounding pushing val just below - // xmax to bin == n_bins. + // xmax to bin == n_bins. if (bin >= m_n_bins) { bin = m_n_bins - 1; } diff --git a/python/src/bind_PedestalTrackingPixelHistogram.hpp b/python/src/bind_PedestalTrackingPixelHistogram.hpp index c42b556..c44f594 100644 --- a/python/src/bind_PedestalTrackingPixelHistogram.hpp +++ b/python/src/bind_PedestalTrackingPixelHistogram.hpp @@ -15,9 +15,9 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { m, "PedestalTrackingPixelHistogram", "A pixel-wise histogram of frame - pedestal residuals, with a " "per-pixel running pedestal estimate sharded across worker threads") - .def(py::init(), - R"( + .def( + py::init(), + R"( Initialize a PedestalTrackingPixelHistogram. Args: @@ -43,19 +43,19 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { histogram-only async behaviour (default: 1.0). Also exposed live via the ``n_sigma`` property. )", - py::arg("rows"), py::arg("cols"), py::arg("n_bins"), - py::arg("xmin"), py::arg("xmax"), py::arg("n_threads") = 1, - py::arg("max_pending") = std::size_t{16}, - py::arg("n_sigma") = 1.0) + py::arg("rows"), py::arg("cols"), py::arg("n_bins"), + py::arg("xmin"), py::arg("xmax"), py::arg("n_threads") = 1, + py::arg("max_pending") = std::size_t{16}, py::arg("n_sigma") = 1.0) - .def("push_pedestal_no_update", - [](PedestalTrackingPixelHistogram &self, - py::array_t - frame) { - auto view = make_view_2d(frame); - self.push_pedestal_no_update(view); - }, - R"( + .def( + "push_pedestal_no_update", + [](PedestalTrackingPixelHistogram &self, + py::array_t + frame) { + auto view = make_view_2d(frame); + self.push_pedestal_no_update(view); + }, + R"( Accumulate `frame` into the per-pixel running pedestal estimate without refreshing the cached mean. @@ -65,7 +65,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { Args: frame: A 2D numpy array of raw pixel values (dtype: uint16) )", - py::arg("frame").noconvert()) + py::arg("frame").noconvert()) .def("update_mean", &PedestalTrackingPixelHistogram::update_mean, R"( @@ -77,19 +77,22 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { )", py::call_guard()) - .def("pedestal_mean", - [](const PedestalTrackingPixelHistogram &self) { - // pedestal_mean() flushes + locks + memcpys; do all of - // that without the GIL, only reacquire to wrap into a - // numpy array. - NDArray *ptr = nullptr; - { - py::gil_scoped_release release; - ptr = new NDArray(self.pedestal_mean()); - } - return return_image_data(ptr); - }, - R"( + .def( + "pedestal_mean", + [](const PedestalTrackingPixelHistogram &self) { + // pedestal_mean() flushes + locks + memcpys; do all of + // that without the GIL, only reacquire to wrap into a + // numpy array. + NDArray *ptr = + nullptr; + { + py::gil_scoped_release release; + ptr = new NDArray(self.pedestal_mean()); + } + return return_image_data(ptr); + }, + R"( Snapshot the per-pixel pedestal mean stitched together from all shards. @@ -98,23 +101,24 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { containing the current cached pedestal mean. )") - .def("fill_async", - [](PedestalTrackingPixelHistogram &self, - py::array_t - image) { - // Copy the numpy buffer into an owned NDArray while we - // still hold the GIL so we don't depend on the array's - // backing storage outliving this call. - auto view = make_view_2d(image); - NDArray owned( - view); - // Release the GIL while enqueueing - - // fill_async can block on backpressure - // when the queue is full. - py::gil_scoped_release release; - self.fill_async(std::move(owned)); - }, - R"( + .def( + "fill_async", + [](PedestalTrackingPixelHistogram &self, + py::array_t + image) { + // Copy the numpy buffer into an owned NDArray while we + // still hold the GIL so we don't depend on the array's + // backing storage outliving this call. + auto view = make_view_2d(image); + NDArray owned( + view); + // Release the GIL while enqueueing - + // fill_async can block on backpressure + // when the queue is full. + py::gil_scoped_release release; + self.fill_async(std::move(owned)); + }, + R"( Submit an image for asynchronous filling with sigma-clipped pedestal tracking. @@ -140,8 +144,7 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { Args: image: A 2D numpy array of raw pixel values (dtype: uint16) )", - py::arg("image").noconvert()) - + py::arg("image").noconvert()) .def("fill_from_file", &PedestalTrackingPixelHistogram::fill_from_file, R"( @@ -150,17 +153,20 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { Args: file_path: Path to the file to fill from max_frames: Maximum number of frames to fill from the file (default: -1) - )",py::call_guard(), - py::arg("fname"), py::arg("max_frames") = -1, py::arg("verbose") = false) - .def("process_pedestal_file", &PedestalTrackingPixelHistogram::process_pedestal_file, + )", + py::call_guard(), py::arg("fname"), + py::arg("max_frames") = -1, py::arg("verbose") = false) + .def("process_pedestal_file", + &PedestalTrackingPixelHistogram::process_pedestal_file, R"( Process a pedestal file. Args: file_path: Path to the file to process max_frames: Maximum number of frames to process from the file (default: -1) - )",py::call_guard(), - py::arg("fname"), py::arg("max_frames") = -1, py::arg("verbose") = false) + )", + py::call_guard(), py::arg("fname"), + py::arg("max_frames") = -1, py::arg("verbose") = false) .def_property("n_sigma", &PedestalTrackingPixelHistogram::n_sigma, &PedestalTrackingPixelHistogram::set_n_sigma, R"( @@ -187,22 +193,23 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { for monitoring/diagnostics. )") - .def("values", - [](const PedestalTrackingPixelHistogram &self) { - // values() implicitly flushes - release the GIL while it - // does so. Allocation/copy into the NDArray runs without - // the GIL too; only the numpy wrapping needs it. - NDArray - *ptr = nullptr; - { - py::gil_scoped_release release; - ptr = new NDArray< - PedestalTrackingPixelHistogram::StorageType, 3>( - self.values()); - } - return return_image_data(ptr); - }, - R"( + .def( + "values", + [](const PedestalTrackingPixelHistogram &self) { + // values() implicitly flushes - release the GIL while it + // does so. Allocation/copy into the NDArray runs without + // the GIL too; only the numpy wrapping needs it. + NDArray *ptr = + nullptr; + { + py::gil_scoped_release release; + ptr = + new NDArray(self.values()); + } + return return_image_data(ptr); + }, + R"( Get the histogram data as a numpy array. Implicitly flushes any pending asynchronous fills before @@ -214,28 +221,30 @@ void define_pedestal_tracking_pixel_histogram_bindings(py::module &m) { containing the histogram bins for each pixel. )") - .def("bin_centers", - [](const PedestalTrackingPixelHistogram &self) { - auto ptr = new NDArray< - PedestalTrackingPixelHistogram::AxisType, 1>( - self.bin_centers()); - return return_image_data(ptr); - }, - R"( + .def( + "bin_centers", + [](const PedestalTrackingPixelHistogram &self) { + auto ptr = + new NDArray( + self.bin_centers()); + return return_image_data(ptr); + }, + R"( Get the bin centers along the residual axis. Returns: A 1D numpy array (dtype: float32) of bin center values. )") - .def("bin_edges", - [](const PedestalTrackingPixelHistogram &self) { - auto ptr = new NDArray< - PedestalTrackingPixelHistogram::AxisType, 1>( - self.bin_edges()); - return return_image_data(ptr); - }, - R"( + .def( + "bin_edges", + [](const PedestalTrackingPixelHistogram &self) { + auto ptr = + new NDArray( + self.bin_edges()); + return return_image_data(ptr); + }, + R"( Get the bin edges along the residual axis. Returns: diff --git a/python/src/bind_PixelHistogram.hpp b/python/src/bind_PixelHistogram.hpp index 1e41cc7..aea8469 100644 --- a/python/src/bind_PixelHistogram.hpp +++ b/python/src/bind_PixelHistogram.hpp @@ -12,7 +12,7 @@ using namespace ::aare; void define_pixel_histogram_bindings(py::module &m) { py::class_(m, "PixelHistogram", - "A histogram for pixel-wise statistics") + "A histogram for pixel-wise statistics") .def(py::init(), R"( Initialize a PixelHistogram. @@ -32,20 +32,21 @@ void define_pixel_histogram_bindings(py::module &m) { py::arg("xmin"), py::arg("xmax"), py::arg("n_threads") = 1, py::arg("max_pending") = std::size_t{16}) - .def("fill_async", - [](PixelHistogram &self, - py::array_t image) { - // Copy the numpy buffer into an owned NDArray while we - // still hold the GIL so we don't depend on the array's - // backing storage outliving this call. - auto view = make_view_2d(image); - NDArray owned(view); - // Release the GIL while enqueueing - fill_async can block - // on backpressure when the queue is full. - py::gil_scoped_release release; - self.fill_async(std::move(owned)); - }, - R"( + .def( + "fill_async", + [](PixelHistogram &self, + py::array_t image) { + // Copy the numpy buffer into an owned NDArray while we + // still hold the GIL so we don't depend on the array's + // backing storage outliving this call. + auto view = make_view_2d(image); + NDArray owned(view); + // Release the GIL while enqueueing - fill_async can block + // on backpressure when the queue is full. + py::gil_scoped_release release; + self.fill_async(std::move(owned)); + }, + R"( Submit an image for asynchronous filling. The image is copied into an internal buffer before this call @@ -57,37 +58,37 @@ void define_pixel_histogram_bindings(py::module &m) { Args: image: A 2D numpy array of pixel values (dtype: float32) )", - py::arg("image").noconvert()) + py::arg("image").noconvert()) - .def("flush", - &PixelHistogram::flush, + .def("flush", &PixelHistogram::flush, R"( Block until all images submitted via fill_async() have been merged into the accumulators. Cheap when nothing is pending. )", py::call_guard()) - .def("pending", - &PixelHistogram::pending, + .def("pending", &PixelHistogram::pending, R"( Return the number of images either waiting in the queue or currently being processed by the background thread. Useful for monitoring/diagnostics. )") - .def("values", - [](const PixelHistogram &self) { - // values() implicitly flushes - release the GIL while it - // does so. Allocation/copy into the NDArray runs without - // the GIL too; only the numpy wrapping needs it. - NDArray* ptr = nullptr; - { - py::gil_scoped_release release; - ptr = new NDArray(self.values()); - } - return return_image_data(ptr); - }, - R"( + .def( + "values", + [](const PixelHistogram &self) { + // values() implicitly flushes - release the GIL while it + // does so. Allocation/copy into the NDArray runs without + // the GIL too; only the numpy wrapping needs it. + NDArray *ptr = nullptr; + { + py::gil_scoped_release release; + ptr = new NDArray( + self.values()); + } + return return_image_data(ptr); + }, + R"( Get the histogram data as a numpy array. Implicitly flushes any pending asynchronous fills before @@ -98,23 +99,27 @@ void define_pixel_histogram_bindings(py::module &m) { A 3D numpy array containing the histogram bins for each pixel )") - .def("bin_centers", - [](const PixelHistogram &self) { - auto ptr = new NDArray(self.bin_centers()); - return return_image_data(ptr); - }, - R"( + .def( + "bin_centers", + [](const PixelHistogram &self) { + auto ptr = new NDArray( + self.bin_centers()); + return return_image_data(ptr); + }, + R"( Get the bin centers along the value axis. Returns: A 1D numpy array containing the center values for each histogram bin )") - .def("bin_edges", - [](const PixelHistogram &self) { - auto ptr = new NDArray(self.bin_edges()); - return return_image_data(ptr); - }, - R"( + .def( + "bin_edges", + [](const PixelHistogram &self) { + auto ptr = + new NDArray(self.bin_edges()); + return return_image_data(ptr); + }, + R"( Get the bin edges along the value axis. Returns: diff --git a/python/src/pedestal.hpp b/python/src/pedestal.hpp index f0b27e2..6f8fc06 100644 --- a/python/src/pedestal.hpp +++ b/python/src/pedestal.hpp @@ -33,8 +33,8 @@ void define_pedestal_bindings(py::module &m, const std::string &name) { static_cast(sizeof(SUM_TYPE)), static_cast(v.strides()[1]) * static_cast(sizeof(SUM_TYPE))}; - auto arr = py::array_t(shape, byte_strides, - v.data(), self_py); + auto arr = py::array_t(shape, byte_strides, v.data(), + self_py); arr.attr("setflags")(py::arg("write") = false); return arr; }) diff --git a/src/PedestalTrackingPixelHistogram.cpp b/src/PedestalTrackingPixelHistogram.cpp index f1733b1..cc17823 100644 --- a/src/PedestalTrackingPixelHistogram.cpp +++ b/src/PedestalTrackingPixelHistogram.cpp @@ -20,8 +20,8 @@ PedestalTrackingPixelHistogram::PedestalTrackingPixelHistogram( completed_threads_(0), stop_workers_(false), work_generation_(0), n_sigma_(n_sigma) { if (rows_ < 1 || cols_ < 1 || n_bins < 1) { - throw std::invalid_argument( - "PedestalTrackingPixelHistogram requires positive rows, cols and bins"); + throw std::invalid_argument("PedestalTrackingPixelHistogram requires " + "positive rows, cols and bins"); } if (n_threads < 1) { throw std::invalid_argument( @@ -73,8 +73,8 @@ PedestalTrackingPixelHistogram::PedestalTrackingPixelHistogram( // Async pipeline. The PCQ holds (size - 1) usable slots, so size up by // one to honour the requested max_pending. - async_queue_ = - std::make_unique(static_cast(max_pending + 1)); + async_queue_ = std::make_unique( + static_cast(max_pending + 1)); coordinator_ = std::thread([this]() { this->coordinator_loop(); }); } @@ -220,11 +220,10 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) { const auto row = static_cast(first_row + local_row); for (ssize_t col = 0; col < image->shape(1); ++col) { const FrameType raw = (*image)(row, col); - const AxisType val = - static_cast(raw) - - static_cast(my_pedestal.mean( - static_cast(local_row), - static_cast(col))); + const AxisType val = static_cast(raw) - + static_cast(my_pedestal.mean( + static_cast(local_row), + static_cast(col))); my_hist.fill(local_row, static_cast(col), val); const double sigma = my_std(local_row, col); if (sigma > 0.0 && @@ -284,7 +283,8 @@ PedestalTrackingPixelHistogram::values() const { return data; } -NDArray PedestalTrackingPixelHistogram::pedestal_mean() const { +NDArray +PedestalTrackingPixelHistogram::pedestal_mean() const { // Drain in-flight async fills and serialise with all other fan-outs // (Fill / PushPedestal / UpdateMean). m_mean is overwritten wholesale // by Pedestal::update_mean, so without the lock we could read torn @@ -324,8 +324,7 @@ void PedestalTrackingPixelHistogram::fill_with_threshold_( dispatch_(WorkKind::FillWithThreshold, &image); } -void PedestalTrackingPixelHistogram::fill_async( - NDArray image) { +void PedestalTrackingPixelHistogram::fill_async(NDArray image) { if (image.shape(0) != rows_ || image.shape(1) != cols_) { throw std::invalid_argument( "PedestalTrackingPixelHistogram image shape does not match " @@ -389,50 +388,49 @@ PedestalTrackingPixelHistogram::bin_edges() const { return partial_hists_.front().bin_edges(); } -void PedestalTrackingPixelHistogram::fill_from_file(const std::filesystem::path &fname, ssize_t max_frames, bool verbose) { +void PedestalTrackingPixelHistogram::fill_from_file( + const std::filesystem::path &fname, ssize_t max_frames, bool verbose) { constexpr std::size_t progress_interval = 66; auto last = std::chrono::steady_clock::now(); - + File f(fname); - //check that row col matches constructor - if (f.rows() != static_cast(rows_) || f.cols() != static_cast(cols_)) { - throw std::invalid_argument( - "PedestalTrackingPixelHistogram: Frame in file {} has shape ({}, {}) does not match " - "constructor shape"); + // check that row col matches constructor + if (f.rows() != static_cast(rows_) || + f.cols() != static_cast(cols_)) { + throw std::invalid_argument("PedestalTrackingPixelHistogram: Frame in " + "file {} has shape ({}, {}) does not match " + "constructor shape"); } - const ssize_t total_frames = f.total_frames(); - const ssize_t n_frames = max_frames == -1 ? total_frames : std::min(max_frames, total_frames); + const ssize_t n_frames = + max_frames == -1 ? total_frames : std::min(max_frames, total_frames); for (ssize_t i = 0; i < n_frames; ++i) { aare::NDArray frame({rows_, cols_}); f.read_into(reinterpret_cast(frame.data())); fill_async(frame); // print progress - if (verbose && ((i + 1) % progress_interval == 0 || (i + 1 == n_frames))) { + if (verbose && + ((i + 1) % progress_interval == 0 || (i + 1 == n_frames))) { const auto now = std::chrono::steady_clock::now(); - const double dt = - std::chrono::duration(now - last).count(); + const double dt = std::chrono::duration(now - last).count(); const std::size_t done_in_interval = - (i + 1) % progress_interval == 0 - ? progress_interval - : (i + 1) % progress_interval; + (i + 1) % progress_interval == 0 ? progress_interval + : (i + 1) % progress_interval; const double fps = - dt > 0.0 ? static_cast(done_in_interval) / dt - : 0.0; + dt > 0.0 ? static_cast(done_in_interval) / dt : 0.0; // Carriage return (no newline) so the line is rewritten // in place; flush since stdout is line-buffered. - - fmt::print("\rProgress: {}/{} ({:.1f}%) {:.1f} FPS ", - i + 1, n_frames, + + fmt::print("\rProgress: {}/{} ({:.1f}%) {:.1f} FPS ", i + 1, + n_frames, 100.0 * static_cast(i + 1) / static_cast(n_frames), fps); std::fflush(stdout); last = now; } - } flush(); if (verbose) { @@ -441,38 +439,39 @@ void PedestalTrackingPixelHistogram::fill_from_file(const std::filesystem::path } } -void PedestalTrackingPixelHistogram::process_pedestal_file(const std::filesystem::path &fname, ssize_t max_frames, bool verbose) { +void PedestalTrackingPixelHistogram::process_pedestal_file( + const std::filesystem::path &fname, ssize_t max_frames, bool verbose) { constexpr std::size_t progress_interval = 66; auto last = std::chrono::steady_clock::now(); - + File f(fname); - //check that row col matches constructor - if (f.rows() != static_cast(rows_) || f.cols() != static_cast(cols_)) { - throw std::invalid_argument( - "PedestalTrackingPixelHistogram: Frame in file {} has shape ({}, {}) does not match " - "constructor shape"); + // check that row col matches constructor + if (f.rows() != static_cast(rows_) || + f.cols() != static_cast(cols_)) { + throw std::invalid_argument("PedestalTrackingPixelHistogram: Frame in " + "file {} has shape ({}, {}) does not match " + "constructor shape"); } const ssize_t total_frames = f.total_frames(); - const ssize_t n_frames = max_frames == -1 ? total_frames : std::min(max_frames, total_frames); + const ssize_t n_frames = + max_frames == -1 ? total_frames : std::min(max_frames, total_frames); aare::NDArray frame({rows_, cols_}); for (ssize_t i = 0; i < n_frames; ++i) { f.read_into(reinterpret_cast(frame.data())); push_pedestal_no_update(frame.view()); - if (verbose && ((i + 1) % progress_interval == 0 || (i + 1 == n_frames))) { + if (verbose && + ((i + 1) % progress_interval == 0 || (i + 1 == n_frames))) { const auto now = std::chrono::steady_clock::now(); - const double dt = - std::chrono::duration(now - last).count(); + const double dt = std::chrono::duration(now - last).count(); const std::size_t done_in_interval = - (i + 1) % progress_interval == 0 - ? progress_interval - : (i + 1) % progress_interval; + (i + 1) % progress_interval == 0 ? progress_interval + : (i + 1) % progress_interval; const double fps = - dt > 0.0 ? static_cast(done_in_interval) / dt - : 0.0; - fmt::print("\rProgress: {}/{} ({:.1f}%) {:.1f} FPS ", - i + 1, n_frames, + dt > 0.0 ? static_cast(done_in_interval) / dt : 0.0; + fmt::print("\rProgress: {}/{} ({:.1f}%) {:.1f} FPS ", i + 1, + n_frames, 100.0 * static_cast(i + 1) / static_cast(n_frames), fps); diff --git a/src/PixelHistogram.cpp b/src/PixelHistogram.cpp index e249a44..bd7c94a 100644 --- a/src/PixelHistogram.cpp +++ b/src/PixelHistogram.cpp @@ -8,16 +8,19 @@ namespace aare { - -PixelHistogram::PixelHistogram(int rows, int cols, int n_bins, double xmin, double xmax, - int n_threads, std::size_t max_pending): - rows_(rows), cols_(cols), n_threads_(n_threads), xmin_(xmin), xmax_(xmax), current_image_(nullptr), - completed_threads_(0), stop_workers_(false), work_generation_(0) { +PixelHistogram::PixelHistogram(int rows, int cols, int n_bins, double xmin, + double xmax, int n_threads, + std::size_t max_pending) + : rows_(rows), cols_(cols), n_threads_(n_threads), xmin_(xmin), xmax_(xmax), + current_image_(nullptr), completed_threads_(0), stop_workers_(false), + work_generation_(0) { if (rows_ < 1 || cols_ < 1 || n_bins < 1) { - throw std::invalid_argument("PixelHistogram requires positive rows, cols and bins"); + throw std::invalid_argument( + "PixelHistogram requires positive rows, cols and bins"); } if (n_threads < 1) { - throw std::invalid_argument("PixelHistogram requires at least one thread"); + throw std::invalid_argument( + "PixelHistogram requires at least one thread"); } if (max_pending < 1) { throw std::invalid_argument("PixelHistogram requires max_pending >= 1"); @@ -31,7 +34,7 @@ PixelHistogram::PixelHistogram(int rows, int cols, int n_bins, double xmin, doub // ceil(rows/n_threads) scheme leaving trailing threads with zero or // negative row counts (e.g. rows=17, n_threads=8). row_offsets_.resize(n_threads_ + 1); - const int base = rows_ / n_threads_; + const int base = rows_ / n_threads_; const int extra = rows_ % n_threads_; int offset = 0; for (int i = 0; i < n_threads_; ++i) { @@ -56,7 +59,8 @@ PixelHistogram::PixelHistogram(int rows, int cols, int n_bins, double xmin, doub // Async pipeline. The PCQ holds (size - 1) usable slots, so size up by // one to honour the requested max_pending. - async_queue_ = std::make_unique(static_cast(max_pending + 1)); + async_queue_ = std::make_unique( + static_cast(max_pending + 1)); coordinator_ = std::thread([this]() { this->coordinator_loop(); }); } @@ -77,7 +81,7 @@ PixelHistogram::~PixelHistogram() { work_cv_.notify_all(); // Join all worker threads - for (auto& thread : workers_) { + for (auto &thread : workers_) { if (thread.joinable()) { thread.join(); } @@ -100,19 +104,19 @@ void PixelHistogram::worker_loop(int thread_id) { work_cv_.wait(lock, [this, last_generation]() { return work_generation_ != last_generation || stop_workers_; }); - + if (stop_workers_) { break; } - + // Get work assignment - const NDView& image = *current_image_; + const NDView &image = *current_image_; const int generation = work_generation_; const int first_row = row_start(thread_id); const int local_rows = row_count(thread_id); - + lock.unlock(); - + // Do the work: fill this thread's partial histogram. The // [xmin, xmax) range gate lives inside PixelHistogramImpl::fill. auto &my_hist = partial_hists_[thread_id]; @@ -123,7 +127,7 @@ void PixelHistogram::worker_loop(int thread_id) { my_hist.fill(local_row, static_cast(col), val); } } - + // Signal completion { std::unique_lock done_lock(work_mutex_); @@ -187,14 +191,16 @@ void PixelHistogram::dispatch(const NDView &image) { // Wait for all workers to complete { std::unique_lock lock(work_mutex_); - done_cv_.wait(lock, [this]() { return completed_threads_ == n_threads_; }); - current_image_ = nullptr; // Clear work assignment + done_cv_.wait(lock, + [this]() { return completed_threads_ == n_threads_; }); + current_image_ = nullptr; // Clear work assignment } } void PixelHistogram::fill_async(NDArray image) { if (image.shape(0) != rows_ || image.shape(1) != cols_) { - throw std::invalid_argument("PixelHistogram image shape does not match constructor shape"); + throw std::invalid_argument( + "PixelHistogram image shape does not match constructor shape"); } // SPSC backpressure: spin with a short sleep until a slot frees up. @@ -206,7 +212,8 @@ void PixelHistogram::fill_async(NDArray image) { } void PixelHistogram::flush() const { - while (!async_queue_->isEmpty() || coordinator_busy_.load(std::memory_order_acquire)) { + while (!async_queue_->isEmpty() || + coordinator_busy_.load(std::memory_order_acquire)) { std::this_thread::sleep_for(async_wait_); } } @@ -221,7 +228,8 @@ std::size_t PixelHistogram::pending() const { void PixelHistogram::coordinator_loop() { NDArray item; - while (!stop_coordinator_.load(std::memory_order_acquire) || !async_queue_->isEmpty()) { + while (!stop_coordinator_.load(std::memory_order_acquire) || + !async_queue_->isEmpty()) { if (async_queue_->read(item)) { coordinator_busy_.store(true, std::memory_order_release); dispatch(item.view()); diff --git a/src/PixelHistogram.test.cpp b/src/PixelHistogram.test.cpp index dba04e2..07de22f 100644 --- a/src/PixelHistogram.test.cpp +++ b/src/PixelHistogram.test.cpp @@ -12,9 +12,9 @@ #include "aare/PixelHistogram.hpp" -using aare::PixelHistogram; using aare::NDArray; using aare::NDView; +using aare::PixelHistogram; namespace { // The synchronous fill() has been removed; fill_async() is the only entry @@ -27,22 +27,24 @@ void fill_blocking(PixelHistogram &hist, const NDView &image) { } } // namespace -TEST_CASE("Fill one pixel of a 5x10 histogram"){ +TEST_CASE("Fill one pixel of a 5x10 histogram") { PixelHistogram hist(5, 10, 20, 0.0, 10.0); - NDArray image({5, 10}, -1.0); //Need to fill with -1 to not generate counts - - image(2, 3) = 5.7; // This should go into bin 11 (since bins are [0-0.5), [0.5-1.0), ..., [9.5-10.0)) - + NDArray image( + {5, 10}, -1.0); // Need to fill with -1 to not generate counts + + image(2, 3) = 5.7; // This should go into bin 11 (since bins are [0-0.5), + // [0.5-1.0), ..., [9.5-10.0)) + fill_blocking(hist, image.view()); - + auto values = hist.values(); REQUIRE(values.shape(0) == 5); REQUIRE(values.shape(1) == 10); REQUIRE(values.shape(2) == 20); - + // Check that the correct bin for pixel (2,3) has count 1 CHECK(values(2, 3, 11) == 1); - + // Check that all other bins are zero for (ssize_t row = 0; row < values.shape(0); ++row) { for (ssize_t col = 0; col < values.shape(1); ++col) { @@ -55,7 +57,7 @@ TEST_CASE("Fill one pixel of a 5x10 histogram"){ } } -TEST_CASE("Fill pixels with uneven partial histogram row slices"){ +TEST_CASE("Fill pixels with uneven partial histogram row slices") { PixelHistogram hist(5, 4, 10, 0.0, 10.0, 3); NDArray image({5, 4}, -1.0); @@ -103,12 +105,12 @@ TEST_CASE("Row partitioning handles rows < n_threads * ceil(rows/n_threads)") { // at least floor(rows / n_threads) rows. We also exercise the case // where n_threads > rows, which is clamped down to rows. const auto p = GENERATE(table({ - {17, 8}, // previously broken: 8 threads * ceil(17/8) = 24 > 17 - {17, 5}, // previously broken: 5 threads * ceil(17/5) = 20 > 17 - {13, 4}, // previously broken: 4 * ceil(13/4) = 16 > 13 - {17, 32}, // n_threads > rows -> clamped to rows - {1, 8}, // n_threads > rows -> single thread, single row - {6, 6}, // balanced + {17, 8}, // previously broken: 8 threads * ceil(17/8) = 24 > 17 + {17, 5}, // previously broken: 5 threads * ceil(17/5) = 20 > 17 + {13, 4}, // previously broken: 4 * ceil(13/4) = 16 > 13 + {17, 32}, // n_threads > rows -> clamped to rows + {1, 8}, // n_threads > rows -> single thread, single row + {6, 6}, // balanced })); const int rows = std::get<0>(p); const int n_threads = std::get<1>(p); @@ -139,8 +141,8 @@ TEST_CASE("Row partitioning handles rows < n_threads * ceil(rows/n_threads)") { (c == 0 && b == expected_bin) ? uint16_t{1} : uint16_t{0}; if (h(r, c, b) != want) { INFO("rows=" << rows << " n_threads=" << n_threads - << " r=" << r << " c=" << c << " b=" << b - << " got=" << h(r, c, b) << " want=" << want); + << " r=" << r << " c=" << c << " b=" << b + << " got=" << h(r, c, b) << " want=" << want); CHECK(h(r, c, b) == want); } } @@ -180,20 +182,23 @@ TEST_CASE("Random fills match a reference implementation") { NDArray expected({rows, cols, n_bins}, 0u); const float inv_range = static_cast(n_bins) / (xmax - xmin); - for (const auto& img : frames) { + for (const auto &img : frames) { for (ssize_t r = 0; r < rows; ++r) { for (ssize_t c = 0; c < cols; ++c) { const float v = img(r, c); - if (!(v >= xmin) || !(v < xmax)) continue; + if (!(v >= xmin) || !(v < xmax)) + continue; int bin = static_cast((v - xmin) * inv_range); - if (bin >= n_bins) bin = n_bins - 1; - if (bin < 0) bin = 0; + if (bin >= n_bins) + bin = n_bins - 1; + if (bin < 0) + bin = 0; expected(r, c, bin) += 1; } } } - for (const auto& img : frames) { + for (const auto &img : frames) { fill_blocking(hist, img.view()); } auto h = hist.values(); @@ -208,10 +213,9 @@ TEST_CASE("Random fills match a reference implementation") { for (ssize_t b = 0; b < n_bins && all_match; ++b) { if (h(r, c, b) != expected(r, c, b)) { all_match = false; - INFO("n_threads=" << n_threads << " r=" << r - << " c=" << c << " b=" << b - << " got=" << h(r, c, b) - << " expected=" << expected(r, c, b)); + INFO("n_threads=" << n_threads << " r=" << r << " c=" << c + << " b=" << b << " got=" << h(r, c, b) + << " expected=" << expected(r, c, b)); CHECK(h(r, c, b) == expected(r, c, b)); } } @@ -235,7 +239,8 @@ TEST_CASE("Streamed async fill matches per-frame flushed fill") { // path at least a few times during the run. constexpr std::size_t max_pending = 4; - PixelHistogram async_hist(rows, cols, n_bins, xmin, xmax, n_threads, max_pending); + PixelHistogram async_hist(rows, cols, n_bins, xmin, xmax, n_threads, + max_pending); PixelHistogram sync_hist(rows, cols, n_bins, xmin, xmax, n_threads); std::mt19937 rng(0xA5A5A5A5); @@ -265,8 +270,8 @@ TEST_CASE("Streamed async fill matches per-frame flushed fill") { for (ssize_t b = 0; b < n_bins && all_match; ++b) { if (a(r, c, b) != s(r, c, b)) { all_match = false; - INFO("r=" << r << " c=" << c << " b=" << b - << " async=" << a(r, c, b) << " sync=" << s(r, c, b)); + INFO("r=" << r << " c=" << c << " b=" << b << " async=" + << a(r, c, b) << " sync=" << s(r, c, b)); CHECK(a(r, c, b) == s(r, c, b)); } } @@ -305,10 +310,11 @@ TEST_CASE("Destructor drains pending async fills") { frames.push_back(std::move(img)); } - NDArray snapshot({rows, cols, n_bins}, uint16_t{0}); + NDArray snapshot({rows, cols, n_bins}, + uint16_t{0}); { PixelHistogram hist(rows, cols, n_bins, xmin, xmax, 2, max_pending); - for (auto& img : frames) { + for (auto &img : frames) { // Move a copy so we can also build the reference below. NDArray copy({rows, cols}, 0.0f); std::memcpy(copy.data(), img.data(), copy.total_bytes()); @@ -321,7 +327,8 @@ TEST_CASE("Destructor drains pending async fills") { } PixelHistogram reference(rows, cols, n_bins, xmin, xmax, 2); - for (const auto& img : frames) fill_blocking(reference, img.view()); + for (const auto &img : frames) + fill_blocking(reference, img.view()); auto expected = reference.values(); bool all_match = true; @@ -331,8 +338,8 @@ TEST_CASE("Destructor drains pending async fills") { if (snapshot(r, c, b) != expected(r, c, b)) { all_match = false; INFO("r=" << r << " c=" << c << " b=" << b - << " got=" << snapshot(r, c, b) - << " expected=" << expected(r, c, b)); + << " got=" << snapshot(r, c, b) + << " expected=" << expected(r, c, b)); CHECK(snapshot(r, c, b) == expected(r, c, b)); } }