Fixed slow cluster finding with new chunked pedestals (10x slower with multithreading). The pedestal data is now accessed as a 1D pointer

This commit is contained in:
2025-08-12 16:04:31 +02:00
parent 7ea20c6b9d
commit 12498dacaa
2 changed files with 33 additions and 18 deletions

View File

@@ -36,7 +36,7 @@ template <typename SUM_TYPE = double> class ChunkedPedestal {
public:
ChunkedPedestal(uint32_t rows, uint32_t cols, uint32_t chunk_size = 50000, uint32_t n_chunks = 10)
: m_rows(rows), m_cols(cols), m_chunk_size(chunk_size), m_n_chunks(n_chunks),
m_mean(NDArray<SUM_TYPE, 3>({rows, cols, n_chunks})), m_std(NDArray<SUM_TYPE, 3>({rows, cols, n_chunks})) {
m_mean(NDArray<SUM_TYPE, 3>({n_chunks, rows, cols})), m_std(NDArray<SUM_TYPE, 3>({n_chunks, rows, cols})) {
assert(rows > 0 && cols > 0 && chunk_size > 0);
m_mean = 0;
m_std = 0;
@@ -61,11 +61,19 @@ template <typename SUM_TYPE = double> class ChunkedPedestal {
}
SUM_TYPE mean(const uint32_t row, const uint32_t col) const {
return m_mean(row, col, m_current_chunk_number);
return m_mean(m_current_chunk_number, row, col);
}
SUM_TYPE std(const uint32_t row, const uint32_t col) const {
return m_std(row, col, m_current_chunk_number);
return m_std(m_current_chunk_number, row, col);
}
SUM_TYPE* get_mean_chunk_ptr() {
return &m_mean(m_current_chunk_number, 0, 0);
}
SUM_TYPE* get_std_chunk_ptr() {
return &m_std(m_current_chunk_number, 0, 0);
}
void clear() {
@@ -121,12 +129,12 @@ template <typename SUM_TYPE = double> class ChunkedPedestal {
// their own pixel level operations)
template <typename T>
void push_mean(const uint32_t row, const uint32_t col, const uint32_t chunk_number, const T val_) {
m_mean(row, col, chunk_number) = val_;
m_mean(chunk_number, row, col) = val_;
}
template <typename T>
void push_std(const uint32_t row, const uint32_t col, const uint32_t chunk_number, const T val_) {
m_std(row, col, chunk_number) = val_;
m_std(chunk_number, row, col) = val_;
}
// getter functions

View File

@@ -93,31 +93,38 @@ class ClusterFinder {
m_clusters.set_frame_number(frame_number);
m_pedestal.set_frame_number(frame_number);
auto mean_ptr = m_pedestal.get_mean_chunk_ptr();
auto std_ptr = m_pedestal.get_std_chunk_ptr();
for (int iy = 0; iy < frame.shape(0); iy++) {
size_t row_offset = iy * frame.shape(1);
for (int ix = 0; ix < frame.shape(1); ix++) {
PEDESTAL_TYPE rms = m_pedestal.std(iy, ix);
// PEDESTAL_TYPE rms = m_pedestal.std(iy, ix);
PEDESTAL_TYPE rms = std_ptr[row_offset + ix];
if (rms == 0) continue;
PEDESTAL_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
PEDESTAL_TYPE total = 0;
// What can we short circuit here?
PEDESTAL_TYPE value = (frame(iy, ix) - m_pedestal.mean(iy, ix));
// PEDESTAL_TYPE value = (frame(iy, ix) - m_pedestal.mean(iy, ix));
PEDESTAL_TYPE value = (frame(iy, ix) - mean_ptr[row_offset + ix]);
if (value < -m_nSigma * rms)
continue; // NEGATIVE_PEDESTAL go to next pixel
// TODO! No pedestal update???
for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) {
size_t inner_row_offset = row_offset + (ir * frame.shape(1));
for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) {
if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
iy + ir >= 0 && iy + ir < frame.shape(0)) {
if (m_pedestal.std(iy + ir, ix + ic) == 0) continue;
// if (m_pedestal.std(iy + ir, ix + ic) == 0) continue;
if (std_ptr[inner_row_offset + ix + ic] == 0) continue;
PEDESTAL_TYPE val =
frame(iy + ir, ix + ic) -
m_pedestal.mean(iy + ir, ix + ic);
// PEDESTAL_TYPE val = frame(iy + ir, ix + ic) - m_pedestal.mean(iy + ir, ix + ic);
PEDESTAL_TYPE val = frame(iy + ir, ix + ic) - mean_ptr[inner_row_offset + ix + ic];
total += val;
max = std::max(max, val);
@@ -173,17 +180,17 @@ class ClusterFinder {
// don't have a photon
int i = 0;
for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) {
size_t inner_row_offset = row_offset + (ir * frame.shape(1));
for (int ic = -dx; ic < dx + has_center_pixel_y; ic++) {
if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
iy + ir >= 0 && iy + ir < frame.shape(0)) {
if (m_pedestal.std(iy + ir, ix + ic) == 0) continue;
// if (m_pedestal.std(iy + ir, ix + ic) == 0) continue;
if (std_ptr[inner_row_offset + ix + ic] == 0) continue;
CT tmp =
static_cast<CT>(frame(iy + ir, ix + ic)) -
static_cast<CT>(
m_pedestal.mean(iy + ir, ix + ic));
cluster.data[i] =
tmp; // Watch for out of bounds access
// CT tmp = static_cast<CT>(frame(iy + ir, ix + ic)) - static_cast<CT>(m_pedestal.mean(iy + ir, ix + ic));
CT tmp = static_cast<CT>(frame(iy + ir, ix + ic)) - static_cast<CT>(mean_ptr[inner_row_offset + ix + ic]);
cluster.data[i] = tmp; // Watch for out of bounds access
i++;
}
}