diff --git a/CMakeLists.txt b/CMakeLists.txt index d9c07b0..5f291be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -393,6 +393,7 @@ set(PUBLICHEADERS include/aare/Models.hpp include/aare/FileInterface.hpp include/aare/FilePtr.hpp + include/aare/FastPedestal.hpp include/aare/Frame.hpp include/aare/hist/PixelHistogram.hpp include/aare/hist/PixelHistogramImpl.hpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index ef378e4..7803127 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -18,8 +18,8 @@ FetchContent_MakeAvailable(benchmark) add_executable(benchmarks) target_sources( - benchmarks PRIVATE ndarray_benchmark.cpp calculateeta_benchmark.cpp - reduce_benchmark.cpp) + benchmarks PRIVATE ndarray_benchmark.cpp ndview_benchmark.cpp + calculateeta_benchmark.cpp reduce_benchmark.cpp) # Link Google Benchmark and other necessary libraries target_link_libraries(benchmarks PRIVATE benchmark::benchmark aare_core diff --git a/benchmarks/ndview_benchmark.cpp b/benchmarks/ndview_benchmark.cpp new file mode 100644 index 0000000..b065319 --- /dev/null +++ b/benchmarks/ndview_benchmark.cpp @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MPL-2.0 +#include "aare/NDArray.hpp" +#include "aare/NDView.hpp" +#include + +using aare::NDArray; +using aare::NDView; + +static void BM_CreateNDView(benchmark::State &st) { + NDArray arr{{1024, 1024}, 0}; + for (auto _ : st) { + // This code gets timed + auto res = arr.view(); + benchmark::DoNotOptimize(res); + } +} +BENCHMARK(BM_CreateNDView); \ No newline at end of file diff --git a/include/aare/FastPedestal.hpp b/include/aare/FastPedestal.hpp new file mode 100644 index 0000000..ab3aa96 --- /dev/null +++ b/include/aare/FastPedestal.hpp @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: MPL-2.0 +#pragma once +#include "aare/Frame.hpp" +#include "aare/NDArray.hpp" +#include "aare/NDView.hpp" +#include + +namespace aare { + +/** + * @brief Calculate the pedestal of a series of frames. Can be used as + * standalone but mostly used in the ClusterFinder. + * + * @tparam SUM_TYPE type of the sum + */ +template class FastPedestal { + // TODO! Force floating point sum type? + // how does the internal calculation work with integers? + + uint32_t m_rows; + uint32_t m_cols; + + uint32_t m_samples; + SUM_TYPE m_inv_samples; // precompute 1/m_samples for faster division + uint32_t m_cur_samples = 0; // TODO! do we need this when we have m_samples? + + // for cache we want to keep sum and sum2 close + struct Entry { + SUM_TYPE sum; + SUM_TYPE sum2; + }; + // TODO! in case of int needs to be changed to uint64_t + NDArray m_sum; + + // Cache mean since it is used over and over in the ClusterFinder + // This optimization is related to the access pattern of the ClusterFinder + // Relies on having more reads than pushes to the pedestal + NDArray m_mean; + + public: + FastPedestal(uint32_t rows, uint32_t cols, uint32_t n_samples = 1000) + : m_rows(rows), m_cols(cols), m_samples(n_samples), + m_inv_samples(1.0 / n_samples), + m_sum(NDArray({rows, cols})), + m_mean(NDArray({rows, cols})) { + assert(rows > 0 && cols > 0 && n_samples > 0); + m_sum = Entry{SUM_TYPE(0), SUM_TYPE(0)}; + m_mean = SUM_TYPE(0); + } + ~FastPedestal() = default; + + NDArray mean() { return m_mean; } + + const NDView view() const { return m_mean.view(); } + + SUM_TYPE mean(const uint32_t row, const uint32_t col) const { + return m_mean(row, col); + } + + SUM_TYPE variance(const uint32_t row, const uint32_t col) const { + auto &entry = m_sum(row, col); + auto m2 = (entry.sum * m_inv_samples) * ((entry.sum * m_inv_samples)); + return entry.sum2 * m_inv_samples - m2; + } + + NDArray variance() { + NDArray res({m_rows, m_cols}); + for (ssize_t row = 0; row < m_rows; ++row) { + for (ssize_t col = 0; col < m_cols; ++col) { + res(row, col) = variance(row, col); + } + } + return res; + } + + SUM_TYPE std(const uint32_t row, const uint32_t col) const { + return std::sqrt(variance(row, col)); + } + + NDArray std() { + NDArray res({m_rows, m_cols}); + for (ssize_t row = 0; row < m_rows; ++row) { + for (ssize_t col = 0; col < m_cols; ++col) { + res(row, col) = std(row, col); + } + } + return res; + } + + bool ready() { return m_cur_samples == m_samples; } + + uint32_t cur_samples() { return m_cur_samples; } + + void clear() { + m_sum = Entry{SUM_TYPE(0), SUM_TYPE(0)}; + m_mean = SUM_TYPE(0); + } + + void clear(const uint32_t row, const uint32_t col) { + m_sum(row, col) = Entry{SUM_TYPE(0), SUM_TYPE(0)}; + m_mean(row, col) = 0; + } + + template void push(NDView frame) { + assert(frame.size() == m_rows * m_cols); + + // TODO! move away from m_rows, m_cols + if (frame.shape() != std::array{m_rows, m_cols}) { + throw std::runtime_error( + "Frame shape does not match pedestal shape"); + } + + for (size_t row = 0; row < m_rows; row++) { + for (size_t col = 0; col < m_cols; col++) { + push(row, col, frame(row, col)); + } + } + } + template void push_init(NDView frame) { + assert(frame.size() == m_rows * m_cols); + + // TODO! move away from m_rows, m_cols + if (frame.shape() != std::array{m_rows, m_cols}) { + throw std::runtime_error( + "Frame shape does not match pedestal shape"); + } + + for (size_t row = 0; row < m_rows; row++) { + for (size_t col = 0; col < m_cols; col++) { + const auto val = static_cast(frame(row, col)); + auto &entry = m_sum(row, col); + entry.sum += val; + entry.sum2 += val * val; + } + } + m_cur_samples += 1; + } + + template void push(Frame &frame) { + assert(frame.rows() == static_cast(m_rows) && + frame.cols() == static_cast(m_cols)); + push(frame.view()); + } + + // getter functions + uint32_t rows() const { return m_rows; } + uint32_t cols() const { return m_cols; } + uint32_t n_samples() const { return m_samples; } + + // pixel level operations (should be refactored to allow users to implement + // their own pixel level operations) + template + void push(const uint32_t row, const uint32_t col, const T val_) { + SUM_TYPE val = static_cast(val_); + auto &entry = m_sum(row, col); + entry.sum += val - entry.sum * m_inv_samples; + entry.sum2 += val * val - entry.sum2 * m_inv_samples; + m_mean(row, col) = entry.sum * m_inv_samples; + } + + template + void push_no_update(const uint32_t row, const uint32_t col, const T val_) { + SUM_TYPE val = static_cast(val_); + auto &entry = m_sum(row, col); + entry.sum += val - entry.sum * m_inv_samples; + entry.sum2 += val * val - entry.sum2 * m_inv_samples; + } + + /** + * @brief Update the mean of the pedestal. This is used after having done + * push_no_update. It is not necessary to call this function after push. + */ + void update_mean() { + for (size_t row = 0; row < m_rows; row++) { + for (size_t col = 0; col < m_cols; col++) { + const auto &entry = m_sum(row, col); + m_mean(row, col) = entry.sum * m_inv_samples; + } + } + } +}; +} // namespace aare diff --git a/include/aare/hist/PedestalTrackingPixelHistogram.hpp b/include/aare/hist/PedestalTrackingPixelHistogram.hpp index 263ea45..9cea031 100644 --- a/include/aare/hist/PedestalTrackingPixelHistogram.hpp +++ b/include/aare/hist/PedestalTrackingPixelHistogram.hpp @@ -1,4 +1,5 @@ #pragma once +#include "aare/FastPedestal.hpp" #include "aare/NDArray.hpp" #include "aare/NDView.hpp" #include "aare/Pedestal.hpp" @@ -44,7 +45,7 @@ class PedestalTrackingPixelHistogram { // worker using the LOCAL row index (i.e. 0..row_count(t)-1), NOT the // global row index. Owned exclusively by worker `t` during a // dispatched fan-out. - std::vector> partial_pedestals_; + std::vector> partial_pedestals_; std::vector> partial_std_; // cached for pedestal // tracking diff --git a/python/aare/__init__.py b/python/aare/__init__.py index 10872f8..36e1220 100644 --- a/python/aare/__init__.py +++ b/python/aare/__init__.py @@ -5,7 +5,14 @@ from . import _aare from . import transform from ._aare import File, RawMasterFile, RawSubFile, JungfrauDataFile -from ._aare import Pedestal_d, Pedestal_f, ClusterFinder_Cluster3x3i, VarClusterFinder +from ._aare import ( + FastPedestal_d, + FastPedestal_f, + Pedestal_d, + Pedestal_f, + ClusterFinder_Cluster3x3i, + VarClusterFinder, +) from ._aare import DetectorType, ReadoutMode from ._aare import hitmap from ._aare import ROI diff --git a/python/src/fast_pedestal.hpp b/python/src/fast_pedestal.hpp new file mode 100644 index 0000000..b8815bf --- /dev/null +++ b/python/src/fast_pedestal.hpp @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: MPL-2.0 + +#include "aare/FastPedestal.hpp" +#include "np_helper.hpp" + +#include +#include +#include +#include + +namespace py = pybind11; + +template +void define_fast_pedestal_bindings(py::module &m, const std::string &name) { + + py::class_>(m, name.c_str(), py::buffer_protocol()) + .def(py::init()) + .def(py::init()) + .def("mean", + [](FastPedestal &self) { + auto mean = new NDArray{}; + *mean = self.mean(); + return return_image_data(mean); + }) + .def("view", + [](py::object self_py) { + auto &self = self_py.cast &>(); + auto view = self.view(); + std::array shape{ + static_cast(view.shape(0)), + static_cast(view.shape(1))}; + std::array byte_strides{ + static_cast(view.strides()[0]) * + static_cast(sizeof(SUM_TYPE)), + static_cast(view.strides()[1]) * + static_cast(sizeof(SUM_TYPE))}; + auto array = py::array_t(shape, byte_strides, + view.data(), self_py); + array.attr("setflags")(py::arg("write") = false); + return array; + }) + .def("variance", + [](FastPedestal &self) { + auto variance = new NDArray{}; + *variance = self.variance(); + return return_image_data(variance); + }) + .def("std", + [](FastPedestal &self) { + auto standard_deviation = new NDArray{}; + *standard_deviation = self.std(); + return return_image_data(standard_deviation); + }) + .def( + "__array_ufunc__", + [](py::object self, py::object ufunc, const std::string &method, + py::args inputs, py::kwargs kwargs) -> py::object { + if (method != "__call__" || inputs.size() != 2 || + inputs[1].ptr() != self.ptr() || + py::cast(ufunc.attr("__name__")) != + "subtract") { + return py::reinterpret_borrow( + Py_NotImplemented); + } + + auto mean = + py::module_::import("builtins").attr("memoryview")(self); + return ufunc(inputs[0], mean, **kwargs); + }, + "Support subtracting a FastPedestal from a NumPy array.") + .def("clear", py::overload_cast<>(&FastPedestal::clear)) + .def_property_readonly("rows", &FastPedestal::rows) + .def_property_readonly("cols", &FastPedestal::cols) + .def_property_readonly("cur_samples", + &FastPedestal::cur_samples) + .def_property_readonly("ready", &FastPedestal::ready) + .def_property_readonly("n_samples", &FastPedestal::n_samples) + // .def_property_readonly("sum", &FastPedestal::get_sum) + .def("clone", + [](FastPedestal &pedestal) { + return FastPedestal(pedestal); + }) + .def( + "push", + [](FastPedestal &pedestal, + py::array_t &frame) { + pedestal.push(make_view_2d(frame)); + }, + py::arg("frame").noconvert()) + .def( + "push_init", + [](FastPedestal &pedestal, + py::array_t &frame) { + pedestal.push_init(make_view_2d(frame)); + }, + py::arg("frame").noconvert()) + // .def( + // "push_no_update", + // [](FastPedestal &pedestal, + // py::array_t &frame) { + // pedestal.push_no_update(make_view_2d(frame)); + // }, + // py::arg("frame").noconvert()) + .def("update_mean", &FastPedestal::update_mean) + .def_buffer([](FastPedestal &self) { + auto mean = self.view(); + return py::buffer_info( + const_cast(mean.data()), sizeof(SUM_TYPE), + py::format_descriptor::format(), 2, + {static_cast(mean.shape(0)), + static_cast(mean.shape(1))}, + {static_cast(mean.strides()[0] * sizeof(SUM_TYPE)), + static_cast(mean.strides()[1] * + sizeof(SUM_TYPE))}, + true); + }); +} diff --git a/python/src/module.cpp b/python/src/module.cpp index 023a19e..8d9e1a9 100644 --- a/python/src/module.cpp +++ b/python/src/module.cpp @@ -20,6 +20,7 @@ // TODO! migrate the other names #include "ctb_raw_file.hpp" +#include "fast_pedestal.hpp" #include "file.hpp" #include "fit.hpp" #include "jungfrau_data_file.hpp" @@ -70,6 +71,8 @@ PYBIND11_MODULE(_aare, m) { define_pedestal_tracking_pixel_histogram_bindings(m); define_pedestal_bindings(m, "Pedestal_d"); define_pedestal_bindings(m, "Pedestal_f"); + define_fast_pedestal_bindings(m, "FastPedestal_d"); + define_fast_pedestal_bindings(m, "FastPedestal_f"); define_fit_bindings(m); define_interpolation_bindings(m); define_jungfrau_data_file_io_bindings(m); diff --git a/python/tests/test_FastPedestal.py b/python/tests/test_FastPedestal.py new file mode 100644 index 0000000..377e54b --- /dev/null +++ b/python/tests/test_FastPedestal.py @@ -0,0 +1,56 @@ +import numpy as np +import pytest + +from aare import FastPedestal_d, FastPedestal_f + + +@pytest.mark.parametrize( + ("pedestal_type", "expected_dtype"), + [(FastPedestal_d, np.float64), (FastPedestal_f, np.float32)], +) +def test_fast_pedestal_initialization(pedestal_type, expected_dtype): + pedestal = pedestal_type(2, 3, 2) + first = np.array([[2, 4, 6], [8, 10, 12]], dtype=np.uint16) + second = np.array([[4, 6, 8], [10, 12, 14]], dtype=np.uint16) + + pedestal.push_init(first) + pedestal.push_init(second) + pedestal.update_mean() + + expected_mean = np.array( + [[3, 5, 7], [9, 11, 13]], dtype=expected_dtype + ) + np.testing.assert_array_equal(pedestal.mean(), expected_mean) + np.testing.assert_array_equal(pedestal.std(), np.ones((2, 3))) + + +def test_fast_pedestal_steady_state_push(): + pedestal = FastPedestal_d(1, 2, 2) + pedestal.push_init(np.array([[2, 4]], dtype=np.uint16)) + pedestal.push_init(np.array([[4, 6]], dtype=np.uint16)) + pedestal.update_mean() + + pedestal.push(np.array([[6, 8]], dtype=np.uint16)) + + np.testing.assert_array_equal(pedestal.mean(), [[4.5, 6.5]]) + + +def test_fast_pedestal_exposes_read_only_buffer_and_subtraction(): + pedestal = FastPedestal_d(1, 2, 1) + pedestal.push_init(np.array([[2, 4]], dtype=np.uint16)) + pedestal.update_mean() + + view = np.asarray(pedestal) + result = np.array([[12, 14]], dtype=np.uint16) - pedestal + + np.testing.assert_array_equal(view, [[2, 4]]) + np.testing.assert_array_equal(result, [[10, 10]]) + assert np.shares_memory(view, pedestal.view()) + assert not view.flags.writeable + + +def test_fast_pedestal_rejects_wrong_shape(): + pedestal = FastPedestal_d(2, 3) + + with pytest.raises(RuntimeError, match="shape"): + pedestal.push_init(np.zeros((2, 2), dtype=np.uint16)) diff --git a/src/hist/PedestalTrackingPixelHistogram.cpp b/src/hist/PedestalTrackingPixelHistogram.cpp index b444cef..9f4093c 100644 --- a/src/hist/PedestalTrackingPixelHistogram.cpp +++ b/src/hist/PedestalTrackingPixelHistogram.cpp @@ -202,17 +202,8 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) { switch (kind) { case WorkKind::PushPedestal: { - // Accumulate raw frame values into this thread's pedestal - // shard. Uses the pixel-level push_no_update which only - // touches m_sum/m_sum2/m_cur_samples (no m_mean writes). - for (int local_row = 0; local_row < local_rows; ++local_row) { - const auto row = static_cast(first_row + local_row); - for (ssize_t col = 0; col < image->shape(1); ++col) { - my_pedestal.template push_no_update( - static_cast(local_row), - static_cast(col), (*image)(row, col)); - } - } + auto frame = image->sub_view(first_row, first_row + local_rows); + my_pedestal.push_init(frame); break; } case WorkKind::UpdateMean: { @@ -266,30 +257,31 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) { auto &my_std = partial_std_[thread_id]; for (const auto &frame : *images) { - const auto cols = frame.shape(1); - for (int local_row = 0; local_row < local_rows; - ++local_row) { - const auto row = - static_cast(first_row + local_row); + const auto view = + frame.sub_view(first_row, first_row + local_rows); + const auto cols = view.shape(1); + const auto rows = view.shape(0); + + // in this function I need raw, mean and std + for (int row = 0; row < rows; ++row) { for (ssize_t col = 0; col < cols; ++col) { - const FrameType raw = frame(row, col); + const FrameType raw = view(row, col); const AxisType val = static_cast(raw) - - static_cast(my_pedestal.mean( - static_cast(local_row), - static_cast(col))); - my_hist.fill_unchecked(local_row, - static_cast(col), val); - const AxisType sigma = my_std(local_row, col); + my_pedestal.mean(static_cast(row), + static_cast(col)); + my_hist.fill_unchecked(row, static_cast(col), + val); + const AxisType sigma = my_std(row, col); if (sigma > AxisType{0.0} && - std::abs(static_cast(val)) < - n_sigma * sigma) { - my_pedestal.template push( - static_cast(local_row), + std::abs(val) < n_sigma * sigma) { + my_pedestal.push_no_update( + static_cast(row), static_cast(col), raw); } } } + my_pedestal.update_mean(); } break; }