removed use of boost histogram
Build on RHEL9 / build (push) Successful in 2m35s
Build on RHEL8 / build (push) Successful in 3m6s
Run tests using data on local RHEL8 / build (push) Successful in 3m59s

This commit is contained in:
Lars Erik Fröjd
2026-05-27 17:12:10 +02:00
parent e90977cfc8
commit 1f78c9f7d1
7 changed files with 182 additions and 173 deletions
+19 -50
View File
@@ -1,9 +1,6 @@
#include "aare/PedestalTrackingPixelHistogram.hpp"
#include <algorithm>
#include <boost/histogram.hpp>
#include <boost/histogram/storage_adaptor.hpp>
#include <boost/histogram/unsafe_access.hpp>
#include <cmath>
#include <cstring>
#include <exception>
@@ -60,14 +57,7 @@ PedestalTrackingPixelHistogram::PedestalTrackingPixelHistogram(
partial_std_.reserve(n_threads_);
for (int i = 0; i < n_threads_; ++i) {
const auto local_rows = row_count(i);
partial_hists_.emplace_back(
bh::axis::regular<AxisType, bh::use_default, bh::use_default,
bh::axis::option::none_t>(n_bins, xmin, xmax,
"value"),
bh::axis::integer<int, bh::use_default, bh::axis::option::none_t>(
0, cols, "y"),
bh::axis::integer<int, bh::use_default, bh::axis::option::none_t>(
0, local_rows, "x"));
partial_hists_.emplace_back(local_rows, cols, n_bins, xmin, xmax);
partial_pedestals_.emplace_back(static_cast<uint32_t>(local_rows),
static_cast<uint32_t>(cols));
partial_std_.emplace_back(NDArray<double, 2>(
@@ -188,7 +178,8 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
case WorkKind::Fill: {
// Histogram the pedestal-subtracted residual for each pixel
// in this thread's row slice. `my_pedestal` is sized to the
// local row range and indexed by (local_row, col).
// local row range and indexed by (local_row, col). The
// [xmin, xmax) range gate lives inside PixelHistogramImpl::fill.
for (int local_row = 0; local_row < local_rows; ++local_row) {
const auto row = static_cast<ssize_t>(first_row + local_row);
for (ssize_t col = 0; col < image->shape(1); ++col) {
@@ -197,10 +188,7 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
static_cast<AxisType>(my_pedestal.mean(
static_cast<uint32_t>(local_row),
static_cast<uint32_t>(col)));
if (val < xmin_ || val >= xmax_) {
continue; // Skip out-of-range residuals
}
my_hist(val, col, local_row);
my_hist.fill(local_row, static_cast<int>(col), val);
}
}
break;
@@ -242,6 +230,8 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
// back into the pedestal shard. With n_sigma == 0 the gate
// never fires, which makes this case behave identically to
// WorkKind::Fill (modulo the per-pixel gate evaluation).
// The [xmin, xmax) histogram gate lives inside
// PixelHistogramImpl::fill.
auto &my_std = partial_std_[thread_id];
const double n_sigma = n_sigma_.load(std::memory_order_relaxed);
for (int local_row = 0; local_row < local_rows; ++local_row) {
@@ -253,9 +243,7 @@ void PedestalTrackingPixelHistogram::worker_loop(int thread_id) {
static_cast<AxisType>(my_pedestal.mean(
static_cast<uint32_t>(local_row),
static_cast<uint32_t>(col)));
if (val >= xmin_ && val < xmax_) {
my_hist(val, col, local_row);
}
my_hist.fill(local_row, static_cast<int>(col), val);
const double sigma = my_std(local_row, col);
if (sigma > 0.0 &&
std::abs(static_cast<double>(val)) < n_sigma * sigma) {
@@ -287,28 +275,27 @@ PedestalTrackingPixelHistogram::hdata() const {
// the partial histograms. Cheap when the queue is already drained.
flush();
const auto &hist = partial_hists_.front();
const auto bins = static_cast<ssize_t>(hist.axis(0).size());
const auto cols = static_cast<ssize_t>(hist.axis(1).size());
const auto first_shard_view = partial_hists_.front().view();
const auto cols = static_cast<ssize_t>(first_shard_view.shape(1));
const auto bins = static_cast<ssize_t>(first_shard_view.shape(2));
const auto rows = static_cast<ssize_t>(rows_);
NDArray<StorageType, 3> data({rows, cols, bins});
// Each thread owns a disjoint, contiguous range of rows and its dense
// storage layout [local_row][col][bin] is identical to the slice
// [first_row .. first_row + local_rows)[col][bin] of `data`. So the
// Each thread owns a disjoint, contiguous range of rows. The shard's
// dense storage layout [local_row][col][bin] is identical to the slice
// [first_row .. first_row + local_rows)[col][bin] of `data`, so the
// merge is just one bulk copy per thread; no per-element accumulation
// and no upfront zeroing of `data` is needed.
const size_t pixel_stride = static_cast<size_t>(cols) * bins;
for (int t = 0; t < n_threads_; ++t) {
const auto &storage = bh::unsafe_access::storage(partial_hists_[t]);
const StorageType *partial_data = storage.data();
const auto first_row = static_cast<size_t>(row_start(t));
const auto local_rows = static_cast<size_t>(row_count(t));
if (local_rows == 0)
continue;
std::memcpy(data.data() + first_row * pixel_stride, partial_data,
const auto shard_view = partial_hists_[t].view();
std::memcpy(data.data() + first_row * pixel_stride, shard_view.data(),
local_rows * pixel_stride * sizeof(StorageType));
}
@@ -434,32 +421,14 @@ void PedestalTrackingPixelHistogram::coordinator_loop() {
NDArray<PedestalTrackingPixelHistogram::AxisType, 1>
PedestalTrackingPixelHistogram::bin_centers() const {
const auto &value_axis = partial_hists_.front().axis(0);
const auto n_bins = static_cast<ssize_t>(value_axis.size());
NDArray<AxisType, 1> centers({n_bins});
for (ssize_t i = 0; i < n_bins; ++i) {
AxisType left = value_axis.value(i);
AxisType right = value_axis.value(i + 1);
centers(i) = (left + right) / AxisType(2.0);
}
return centers;
// All shards share the same value-axis configuration, so any one will
// do; pick the first.
return partial_hists_.front().bin_centers();
}
NDArray<PedestalTrackingPixelHistogram::AxisType, 1>
PedestalTrackingPixelHistogram::bin_edges() const {
const auto &value_axis = partial_hists_.front().axis(0);
const auto n_bins = static_cast<ssize_t>(value_axis.size());
NDArray<AxisType, 1> edges({n_bins + 1});
for (ssize_t i = 0; i <= n_bins; ++i) {
edges(i) = value_axis.value(i);
}
return edges;
return partial_hists_.front().bin_edges();
}
} // namespace aare