Fixed slow cluster finding with new chunked pedestals (10x slower with multithreading). The pedestal data is now accessed as a 1D pointer

This commit is contained in:
2025-08-12 16:04:31 +02:00
parent 7ea20c6b9d
commit 12498dacaa
2 changed files with 33 additions and 18 deletions

View File

@@ -36,7 +36,7 @@ template <typename SUM_TYPE = double> class ChunkedPedestal {
public: public:
ChunkedPedestal(uint32_t rows, uint32_t cols, uint32_t chunk_size = 50000, uint32_t n_chunks = 10) ChunkedPedestal(uint32_t rows, uint32_t cols, uint32_t chunk_size = 50000, uint32_t n_chunks = 10)
: m_rows(rows), m_cols(cols), m_chunk_size(chunk_size), m_n_chunks(n_chunks), : m_rows(rows), m_cols(cols), m_chunk_size(chunk_size), m_n_chunks(n_chunks),
m_mean(NDArray<SUM_TYPE, 3>({rows, cols, n_chunks})), m_std(NDArray<SUM_TYPE, 3>({rows, cols, n_chunks})) { m_mean(NDArray<SUM_TYPE, 3>({n_chunks, rows, cols})), m_std(NDArray<SUM_TYPE, 3>({n_chunks, rows, cols})) {
assert(rows > 0 && cols > 0 && chunk_size > 0); assert(rows > 0 && cols > 0 && chunk_size > 0);
m_mean = 0; m_mean = 0;
m_std = 0; m_std = 0;
@@ -61,11 +61,19 @@ template <typename SUM_TYPE = double> class ChunkedPedestal {
} }
SUM_TYPE mean(const uint32_t row, const uint32_t col) const { SUM_TYPE mean(const uint32_t row, const uint32_t col) const {
return m_mean(row, col, m_current_chunk_number); return m_mean(m_current_chunk_number, row, col);
} }
SUM_TYPE std(const uint32_t row, const uint32_t col) const { SUM_TYPE std(const uint32_t row, const uint32_t col) const {
return m_std(row, col, m_current_chunk_number); return m_std(m_current_chunk_number, row, col);
}
SUM_TYPE* get_mean_chunk_ptr() {
return &m_mean(m_current_chunk_number, 0, 0);
}
SUM_TYPE* get_std_chunk_ptr() {
return &m_std(m_current_chunk_number, 0, 0);
} }
void clear() { void clear() {
@@ -121,12 +129,12 @@ template <typename SUM_TYPE = double> class ChunkedPedestal {
// their own pixel level operations) // their own pixel level operations)
template <typename T> template <typename T>
void push_mean(const uint32_t row, const uint32_t col, const uint32_t chunk_number, const T val_) { void push_mean(const uint32_t row, const uint32_t col, const uint32_t chunk_number, const T val_) {
m_mean(row, col, chunk_number) = val_; m_mean(chunk_number, row, col) = val_;
} }
template <typename T> template <typename T>
void push_std(const uint32_t row, const uint32_t col, const uint32_t chunk_number, const T val_) { void push_std(const uint32_t row, const uint32_t col, const uint32_t chunk_number, const T val_) {
m_std(row, col, chunk_number) = val_; m_std(chunk_number, row, col) = val_;
} }
// getter functions // getter functions

View File

@@ -93,31 +93,38 @@ class ClusterFinder {
m_clusters.set_frame_number(frame_number); m_clusters.set_frame_number(frame_number);
m_pedestal.set_frame_number(frame_number); m_pedestal.set_frame_number(frame_number);
auto mean_ptr = m_pedestal.get_mean_chunk_ptr();
auto std_ptr = m_pedestal.get_std_chunk_ptr();
for (int iy = 0; iy < frame.shape(0); iy++) { for (int iy = 0; iy < frame.shape(0); iy++) {
size_t row_offset = iy * frame.shape(1);
for (int ix = 0; ix < frame.shape(1); ix++) { for (int ix = 0; ix < frame.shape(1); ix++) {
PEDESTAL_TYPE rms = m_pedestal.std(iy, ix); // PEDESTAL_TYPE rms = m_pedestal.std(iy, ix);
PEDESTAL_TYPE rms = std_ptr[row_offset + ix];
if (rms == 0) continue; if (rms == 0) continue;
PEDESTAL_TYPE max = std::numeric_limits<FRAME_TYPE>::min(); PEDESTAL_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
PEDESTAL_TYPE total = 0; PEDESTAL_TYPE total = 0;
// What can we short circuit here? // What can we short circuit here?
PEDESTAL_TYPE value = (frame(iy, ix) - m_pedestal.mean(iy, ix)); // PEDESTAL_TYPE value = (frame(iy, ix) - m_pedestal.mean(iy, ix));
PEDESTAL_TYPE value = (frame(iy, ix) - mean_ptr[row_offset + ix]);
if (value < -m_nSigma * rms) if (value < -m_nSigma * rms)
continue; // NEGATIVE_PEDESTAL go to next pixel continue; // NEGATIVE_PEDESTAL go to next pixel
// TODO! No pedestal update??? // TODO! No pedestal update???
for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) { for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) {
size_t inner_row_offset = row_offset + (ir * frame.shape(1));
for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) { for (int ic = -dx; ic < dx + has_center_pixel_x; ic++) {
if (ix + ic >= 0 && ix + ic < frame.shape(1) && if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
iy + ir >= 0 && iy + ir < frame.shape(0)) { iy + ir >= 0 && iy + ir < frame.shape(0)) {
if (m_pedestal.std(iy + ir, ix + ic) == 0) continue; // if (m_pedestal.std(iy + ir, ix + ic) == 0) continue;
if (std_ptr[inner_row_offset + ix + ic] == 0) continue;
PEDESTAL_TYPE val = // PEDESTAL_TYPE val = frame(iy + ir, ix + ic) - m_pedestal.mean(iy + ir, ix + ic);
frame(iy + ir, ix + ic) - PEDESTAL_TYPE val = frame(iy + ir, ix + ic) - mean_ptr[inner_row_offset + ix + ic];
m_pedestal.mean(iy + ir, ix + ic);
total += val; total += val;
max = std::max(max, val); max = std::max(max, val);
@@ -173,17 +180,17 @@ class ClusterFinder {
// don't have a photon // don't have a photon
int i = 0; int i = 0;
for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) { for (int ir = -dy; ir < dy + has_center_pixel_y; ir++) {
size_t inner_row_offset = row_offset + (ir * frame.shape(1));
for (int ic = -dx; ic < dx + has_center_pixel_y; ic++) { for (int ic = -dx; ic < dx + has_center_pixel_y; ic++) {
if (ix + ic >= 0 && ix + ic < frame.shape(1) && if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
iy + ir >= 0 && iy + ir < frame.shape(0)) { iy + ir >= 0 && iy + ir < frame.shape(0)) {
if (m_pedestal.std(iy + ir, ix + ic) == 0) continue; // if (m_pedestal.std(iy + ir, ix + ic) == 0) continue;
if (std_ptr[inner_row_offset + ix + ic] == 0) continue;
CT tmp = // CT tmp = static_cast<CT>(frame(iy + ir, ix + ic)) - static_cast<CT>(m_pedestal.mean(iy + ir, ix + ic));
static_cast<CT>(frame(iy + ir, ix + ic)) - CT tmp = static_cast<CT>(frame(iy + ir, ix + ic)) - static_cast<CT>(mean_ptr[inner_row_offset + ix + ic]);
static_cast<CT>(
m_pedestal.mean(iy + ir, ix + ic)); cluster.data[i] = tmp; // Watch for out of bounds access
cluster.data[i] =
tmp; // Watch for out of bounds access
i++; i++;
} }
} }