diff --git a/CMakeLists.txt b/CMakeLists.txt index bedb54e..6d3f369 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -490,6 +490,7 @@ if(AARE_TESTS) ${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterFile.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterFinderMT.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Pedestal.test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/PixelHistogram.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/JungfrauDataFile.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp diff --git a/include/aare/PixelHistogram.hpp b/include/aare/PixelHistogram.hpp index c54133a..17f3408 100644 --- a/include/aare/PixelHistogram.hpp +++ b/include/aare/PixelHistogram.hpp @@ -5,6 +5,11 @@ //Lets see if we need to hide it behind a pimpl #include #include +#include +#include +#include +#include +#include namespace bh = boost::histogram; namespace aare { @@ -20,10 +25,29 @@ class PixelHistogram { bh::axis::integer, bh::axis::integer>; using Hist = bh::histogram>; - Hist hist_; + int rows_; + int cols_; + int n_threads_; + std::vector partial_hists_; + + // Thread pool members + std::vector workers_; + std::mutex work_mutex_; + std::condition_variable work_cv_; + std::condition_variable done_cv_; + const NDView* current_image_; + std::atomic completed_threads_; + std::atomic stop_workers_; + int work_generation_; + + // Private worker thread method + void worker_loop(int thread_id); + int row_start(int thread_id) const; + int row_count(int thread_id) const; public: - PixelHistogram(int rows, int cols, int n_bins, double xmin, double xmax); + PixelHistogram(int rows, int cols, int n_bins, double xmin, double xmax, int n_threads = 1); + ~PixelHistogram(); void fill(const NDView &image); NDArray hdata() const; NDArray bin_centers() const; diff --git a/python/src/bind_PixelHistogram.hpp b/python/src/bind_PixelHistogram.hpp index 23ec6d1..65b2451 100644 --- a/python/src/bind_PixelHistogram.hpp +++ b/python/src/bind_PixelHistogram.hpp @@ -13,7 +13,7 @@ using namespace ::aare; void define_pixel_histogram_bindings(py::module &m) { py::class_(m, "PixelHistogram", "A histogram for pixel-wise statistics") - .def(py::init(), + .def(py::init(), R"( Initialize a PixelHistogram. @@ -23,9 +23,10 @@ void define_pixel_histogram_bindings(py::module &m) { n_bins: Number of histogram bins xmin: Minimum value for histogram range xmax: Maximum value for histogram range + n_threads: Number of threads for parallel filling (default: 1) )", py::arg("rows"), py::arg("cols"), py::arg("n_bins"), - py::arg("xmin"), py::arg("xmax")) + py::arg("xmin"), py::arg("xmax"), py::arg("n_threads") = 1) .def("fill", [](PixelHistogram &self, diff --git a/src/PixelHistogram.cpp b/src/PixelHistogram.cpp index 2d95b54..7b5da28 100644 --- a/src/PixelHistogram.cpp +++ b/src/PixelHistogram.cpp @@ -1,45 +1,170 @@ #include "aare/PixelHistogram.hpp" +#include #include #include #include #include +#include +#include namespace aare { -PixelHistogram::PixelHistogram(int rows, int cols, int n_bins, double xmin, double xmax): hist_( - bh::axis::regular(n_bins, xmin, xmax, "value"), - bh::axis::integer(0, cols, "y"), - bh::axis::integer(0, rows, "x") - ) { - // Constructor implementation +PixelHistogram::PixelHistogram(int rows, int cols, int n_bins, double xmin, double xmax, int n_threads): + rows_(rows), cols_(cols), n_threads_(n_threads), current_image_(nullptr), + completed_threads_(0), stop_workers_(false), work_generation_(0) { + if (rows_ < 1 || cols_ < 1 || n_bins < 1) { + throw std::invalid_argument("PixelHistogram requires positive rows, cols and bins"); + } + if (n_threads < 1) { + throw std::invalid_argument("PixelHistogram requires at least one thread"); + } + + n_threads_ = std::min(n_threads_, rows_); + + // Initialize partial histograms for each thread + for (int i = 0; i < n_threads_; ++i) { + const auto local_rows = row_count(i); + partial_hists_.emplace_back( + bh::axis::regular(n_bins, xmin, xmax, "value"), + bh::axis::integer(0, cols, "y"), + bh::axis::integer(0, local_rows, "x") + ); + } + + // Spawn worker threads + for (int i = 0; i < n_threads_; ++i) { + workers_.emplace_back([this, i]() { this->worker_loop(i); }); + } +} + +PixelHistogram::~PixelHistogram() { + // Signal all workers to stop + { + std::unique_lock lock(work_mutex_); + stop_workers_ = true; + } + work_cv_.notify_all(); + + // Join all worker threads + for (auto& thread : workers_) { + if (thread.joinable()) { + thread.join(); + } + } +} + +int PixelHistogram::row_start(int thread_id) const { + const int rows_per_thread = (rows_ + n_threads_ - 1) / n_threads_; + return thread_id * rows_per_thread; +} + +int PixelHistogram::row_count(int thread_id) const { + const int start = row_start(thread_id); + const int end = std::min(start + (rows_ + n_threads_ - 1) / n_threads_, rows_); + return end - start; +} + +void PixelHistogram::worker_loop(int thread_id) { + int last_generation = 0; + + while (true) { + std::unique_lock lock(work_mutex_); + work_cv_.wait(lock, [this, last_generation]() { + return work_generation_ != last_generation || stop_workers_; + }); + + if (stop_workers_) { + break; + } + + // Get work assignment + const NDView& image = *current_image_; + const int generation = work_generation_; + const int first_row = row_start(thread_id); + const int local_rows = row_count(thread_id); + + lock.unlock(); + + // Do the work: fill this thread's partial histogram + for (int local_row = 0; local_row < local_rows; ++local_row) { + const auto row = static_cast(first_row + local_row); + for (ssize_t col = 0; col < image.shape(1); ++col) { + partial_hists_[thread_id](image(row, col), col, local_row); + } + } + + // Signal completion + { + std::unique_lock done_lock(work_mutex_); + last_generation = generation; + completed_threads_++; + if (completed_threads_ == n_threads_) { + done_cv_.notify_one(); + } + } + } } NDArray PixelHistogram::hdata() const { - const auto bins = static_cast(hist_.axis(0).size()); - const auto cols = static_cast(hist_.axis(1).size()); - const auto rows = static_cast(hist_.axis(2).size()); + const auto &hist = partial_hists_.front(); + const auto bins = static_cast(hist.axis(0).size()); + const auto cols = static_cast(hist.axis(1).size()); + const auto rows = static_cast(rows_); NDArray data({rows, cols, bins}); - const auto &storage = bh::unsafe_access::storage(hist_); - std::memcpy(data.data(), storage.data(), data.total_bytes()); + // Merge all partial histograms into data array + std::memset(data.data(), 0, data.total_bytes()); + + for (int t = 0; t < n_threads_; ++t) { + const auto &storage = bh::unsafe_access::storage(partial_hists_[t]); + const StorageType *partial_data = storage.data(); + const auto first_row = static_cast(row_start(t)); + const auto local_rows = static_cast(row_count(t)); + + for (ssize_t local_row = 0; local_row < local_rows; ++local_row) { + for (ssize_t col = 0; col < cols; ++col) { + const auto src_offset = (local_row * cols + col) * bins; + const auto dst_offset = ((first_row + local_row) * cols + col) * bins; + for (ssize_t bin = 0; bin < bins; ++bin) { + data.data()[dst_offset + bin] += partial_data[src_offset + bin]; + } + } + } + } return data; } void PixelHistogram::fill(const NDView &image) { - for (ssize_t row = 0; row < image.shape(0); ++row) { - for (ssize_t col = 0; col < image.shape(1); ++col) { - hist_(image(row, col), col, row); - } + if (image.shape(0) != rows_ || image.shape(1) != cols_) { + throw std::invalid_argument("PixelHistogram image shape does not match constructor shape"); + } + + // Reset counters and set work + { + std::unique_lock lock(work_mutex_); + completed_threads_ = 0; + current_image_ = ℑ + ++work_generation_; + } + + // Signal all worker threads to start + work_cv_.notify_all(); + + // Wait for all workers to complete + { + std::unique_lock lock(work_mutex_); + done_cv_.wait(lock, [this]() { return completed_threads_ == n_threads_; }); + current_image_ = nullptr; // Clear work assignment } } NDArray PixelHistogram::bin_centers() const { - const auto& value_axis = hist_.axis(0); + const auto& value_axis = partial_hists_.front().axis(0); const auto n_bins = static_cast(value_axis.size()); NDArray centers({n_bins}); @@ -55,7 +180,7 @@ NDArray PixelHistogram::bin_centers() const { } NDArray PixelHistogram::bin_edges() const { - const auto& value_axis = hist_.axis(0); + const auto& value_axis = partial_hists_.front().axis(0); const auto n_bins = static_cast(value_axis.size()); NDArray edges({n_bins + 1}); diff --git a/src/PixelHistogram.test.cpp b/src/PixelHistogram.test.cpp new file mode 100644 index 0000000..6daae60 --- /dev/null +++ b/src/PixelHistogram.test.cpp @@ -0,0 +1,79 @@ +#include +#include + + +#include "test_config.hpp" +#include "test_macros.hpp" + +#include "aare/PixelHistogram.hpp" + +using aare::PixelHistogram; +using aare::NDArray; +using aare::NDView; + +TEST_CASE("Fill one pixel of a 5x10 histogram"){ + PixelHistogram hist(5, 10, 20, 0.0, 10.0); + NDArray image({5, 10}, -1.0); //Need to fill with -1 to not generate counts + + image(2, 3) = 5.7; // This should go into bin 11 (since bins are [0-0.5), [0.5-1.0), ..., [9.5-10.0)) + + hist.fill(image.view()); + + auto hdata = hist.hdata(); + REQUIRE(hdata.shape(0) == 5); + REQUIRE(hdata.shape(1) == 10); + REQUIRE(hdata.shape(2) == 20); + + // Check that the correct bin for pixel (2,3) has count 1 + CHECK(hdata(2, 3, 11) == 1); + + // Check that all other bins are zero + for (ssize_t row = 0; row < hdata.shape(0); ++row) { + for (ssize_t col = 0; col < hdata.shape(1); ++col) { + for (ssize_t bin = 0; bin < hdata.shape(2); ++bin) { + if (!(row == 2 && col == 3 && bin == 11)) { + CHECK(hdata(row, col, bin) == 0); + } + } + } + } +} + +TEST_CASE("Fill pixels with uneven partial histogram row slices"){ + PixelHistogram hist(5, 4, 10, 0.0, 10.0, 3); + NDArray image({5, 4}, -1.0); + + image(0, 0) = 0.2; + image(1, 1) = 1.2; + image(2, 2) = 2.2; + image(3, 3) = 3.2; + image(4, 0) = 4.2; + + hist.fill(image.view()); + + auto hdata = hist.hdata(); + REQUIRE(hdata.shape(0) == 5); + REQUIRE(hdata.shape(1) == 4); + REQUIRE(hdata.shape(2) == 10); + + CHECK(hdata(0, 0, 0) == 1); + CHECK(hdata(1, 1, 1) == 1); + CHECK(hdata(2, 2, 2) == 1); + CHECK(hdata(3, 3, 3) == 1); + CHECK(hdata(4, 0, 4) == 1); + + for (ssize_t row = 0; row < hdata.shape(0); ++row) { + for (ssize_t col = 0; col < hdata.shape(1); ++col) { + for (ssize_t bin = 0; bin < hdata.shape(2); ++bin) { + const bool expected = (row == 0 && col == 0 && bin == 0) || + (row == 1 && col == 1 && bin == 1) || + (row == 2 && col == 2 && bin == 2) || + (row == 3 && col == 3 && bin == 3) || + (row == 4 && col == 0 && bin == 4); + if (!expected) { + CHECK(hdata(row, col, bin) == 0); + } + } + } + } +}