Cluster finder improvements (#112)

2025-07-12 11:51:50 +02:00 · 2024-12-16 14:26:35 +01:00
parent b43003966f e6098c02ef
commit da67f58323
26 changed files with 1045 additions and 384 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -40,7 +40,7 @@ option(AARE_DOCS "Build documentation" OFF)
 option(AARE_VERBOSE "Verbose output" OFF)
 option(AARE_CUSTOM_ASSERT "Use custom assert" OFF)
 option(AARE_INSTALL_PYTHONEXT "Install the python extension in the install tree under CMAKE_INSTALL_PREFIX/aare/" OFF)
-
+option(AARE_ASAN "Enable AddressSanitizer" OFF)

 # Configure which of the dependencies to use FetchContent for
 option(AARE_FETCH_FMT "Use FetchContent to download fmt" ON)
@ -225,13 +225,6 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
    target_compile_options(aare_compiler_flags INTERFACE -O3)
 else()
    message(STATUS "Debug build")
-    target_compile_options(
-        aare_compiler_flags 
-        INTERFACE 
-            -Og 
-            -ggdb3 
-    )
-
 endif()

 # Common flags for GCC and Clang
@ -256,7 +249,21 @@ target_compile_options(
 endif() #GCC/Clang specific


-
+if(AARE_ASAN)
+    message(STATUS "AddressSanitizer enabled")
+    target_compile_options(
+        aare_compiler_flags 
+        INTERFACE 
+            -fsanitize=address,undefined,pointer-compare 
+            -fno-omit-frame-pointer
+    )
+    target_link_libraries(
+        aare_compiler_flags 
+        INTERFACE 
+            -fsanitize=address,undefined,pointer-compare 
+            -fno-omit-frame-pointer
+    )
+endif()



@ -275,6 +282,7 @@ set(PUBLICHEADERS
    include/aare/ClusterFinder.hpp
    include/aare/ClusterFile.hpp
    include/aare/CtbRawFile.hpp
+    include/aare/ClusterVector.hpp
    include/aare/defs.hpp
    include/aare/Dtype.hpp
    include/aare/File.hpp
@ -316,6 +324,8 @@ target_include_directories(aare_core PUBLIC
    "$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
 )

+
+
 target_link_libraries(
    aare_core 
    PUBLIC 
@ -344,6 +354,7 @@ if(AARE_TESTS)
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NDArray.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NDView.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterFinder.test.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/ClusterVector.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/Pedestal.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@ -1,6 +1,6 @@
 package:
  name: aare
-  version: 2024.11.28.dev0 #TODO! how to not duplicate this?
+  version: 2024.12.16.dev0 #TODO! how to not duplicate this?


 source:
--- a/docs/conf.py.in
+++ b/docs/conf.py.in
@ -29,7 +29,6 @@ version = '@PROJECT_VERSION@'
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = ['breathe',
-              'sphinx_rtd_theme',
              'sphinx.ext.autodoc',
              'sphinx.ext.napoleon',
             ]
--- a/docs/src/ClusterVector.rst
+++ b/docs/src/ClusterVector.rst
@ -0,0 +1,6 @@
+ClusterVector
+=============
+
+.. doxygenclass:: aare::ClusterVector
+   :members:
+   :undoc-members:
--- a/docs/src/index.rst
+++ b/docs/src/index.rst
@ -46,6 +46,7 @@ AARE
    Dtype
    ClusterFinder
    ClusterFile
+    ClusterVector
    Pedestal
    RawFile
    RawSubFile
--- a/include/aare/ClusterFile.hpp
+++ b/include/aare/ClusterFile.hpp
@ -1,12 +1,14 @@
+#pragma once

-
+#include "aare/ClusterVector.hpp"
+#include "aare/NDArray.hpp"
 #include "aare/defs.hpp"
 #include <filesystem>
 #include <fstream>

 namespace aare {

-struct Cluster {
+struct Cluster3x3 {
    int16_t x;
    int16_t y;
    int32_t data[9];
@ -38,30 +40,42 @@ struct ClusterAnalysis {
    double etay;
 };

-
-
+/*
+Binary cluster file. Expects data to be layed out as:
+int32_t frame_number
+uint32_t number_of_clusters
+int16_t x, int16_t y, int32_t data[9] x number_of_clusters
+int32_t frame_number
+uint32_t number_of_clusters
+....
+*/
 class ClusterFile {
    FILE *fp{};
    uint32_t m_num_left{};
    size_t m_chunk_size{};
+    const std::string m_mode;

  public:
-    ClusterFile(const std::filesystem::path &fname, size_t chunk_size = 1000);
+    ClusterFile(const std::filesystem::path &fname, size_t chunk_size = 1000,
+                const std::string &mode = "r");
    ~ClusterFile();
-    std::vector<Cluster> read_clusters(size_t n_clusters);
-    std::vector<Cluster> read_frame(int32_t &out_fnum);
-    std::vector<Cluster>
+    std::vector<Cluster3x3> read_clusters(size_t n_clusters);
+    std::vector<Cluster3x3> read_frame(int32_t &out_fnum);
+    void write_frame(int32_t frame_number,
+                     const ClusterVector<int32_t> &clusters);
+    std::vector<Cluster3x3>
    read_cluster_with_cut(size_t n_clusters, double *noise_map, int nx, int ny);

-    int analyze_data(int32_t *data, int32_t *t2, int32_t *t3, char *quad,
-                 double *eta2x, double *eta2y, double *eta3x, double *eta3y);
-    int analyze_cluster(Cluster cl, int32_t *t2, int32_t *t3, char *quad,
-                    double *eta2x, double *eta2y, double *eta3x,
-                    double *eta3y);
-
    size_t chunk_size() const { return m_chunk_size; }
    void close();
-
 };

+int analyze_data(int32_t *data, int32_t *t2, int32_t *t3, char *quad,
+                 double *eta2x, double *eta2y, double *eta3x, double *eta3y);
+int analyze_cluster(Cluster3x3& cl, int32_t *t2, int32_t *t3, char *quad,
+                    double *eta2x, double *eta2y, double *eta3x, double *eta3y);
+
+NDArray<double, 2> calculate_eta2( ClusterVector<int>& clusters);
+std::array<double,2> calculate_eta2( Cluster3x3& cl);
+
 } // namespace aare
--- a/include/aare/ClusterFinder.hpp
+++ b/include/aare/ClusterFinder.hpp
@ -1,4 +1,6 @@
 #pragma once
+#include "aare/ClusterFile.hpp"
+#include "aare/ClusterVector.hpp"
 #include "aare/Dtype.hpp"
 #include "aare/NDArray.hpp"
 #include "aare/NDView.hpp"
@ -9,7 +11,7 @@
 namespace aare {

 /** enum to define the event types */
-enum eventType {
+enum class eventType {
    PEDESTAL,   /** pedestal */
    NEIGHBOUR,  /** neighbour i.e. below threshold, but in the cluster of a
                   photon */
@ -21,239 +23,248 @@ enum eventType {
    UNDEFINED_EVENT = -1 /** undefined */
 };

-template <typename FRAME_TYPE = uint16_t, typename PEDESTAL_TYPE = double>
+template <typename FRAME_TYPE = uint16_t, typename PEDESTAL_TYPE = double,
+          typename CT = int32_t>
 class ClusterFinder {
    Shape<2> m_image_size;
    const int m_cluster_sizeX;
    const int m_cluster_sizeY;
-    const double m_threshold;
-    const double m_nSigma;
-    const double c2;
-    const double c3;
+    // const PEDESTAL_TYPE m_threshold;
+    const PEDESTAL_TYPE m_nSigma;
+    const PEDESTAL_TYPE c2;
+    const PEDESTAL_TYPE c3;
    Pedestal<PEDESTAL_TYPE> m_pedestal;
+    ClusterVector<CT> m_clusters;

  public:
-    ClusterFinder(Shape<2> image_size, Shape<2>cluster_size, double nSigma = 5.0,
-                  double threshold = 0.0)
-        : m_image_size(image_size), m_cluster_sizeX(cluster_size[0]), m_cluster_sizeY(cluster_size[1]),
-          m_threshold(threshold), m_nSigma(nSigma),
+    /**
+     * @brief Construct a new ClusterFinder object
+     * @param image_size size of the image
+     * @param cluster_size size of the cluster (x, y)
+     * @param nSigma number of sigma above the pedestal to consider a photon
+     * @param capacity initial capacity of the cluster vector
+     *
+     */
+    ClusterFinder(Shape<2> image_size, Shape<2> cluster_size,
+                  PEDESTAL_TYPE nSigma = 5.0, size_t capacity = 1000000)
+        : m_image_size(image_size), m_cluster_sizeX(cluster_size[0]),
+          m_cluster_sizeY(cluster_size[1]),
+          m_nSigma(nSigma),
          c2(sqrt((m_cluster_sizeY + 1) / 2 * (m_cluster_sizeX + 1) / 2)),
          c3(sqrt(m_cluster_sizeX * m_cluster_sizeY)),
-          m_pedestal(image_size[0], image_size[1]) {
-
-              // c2 = sqrt((cluster_sizeY + 1) / 2 * (cluster_sizeX + 1) / 2);
-              // c3 = sqrt(cluster_sizeX * cluster_sizeY);
-          };
+          m_pedestal(image_size[0], image_size[1]),
+          m_clusters(m_cluster_sizeX, m_cluster_sizeY, capacity) {};

    void push_pedestal_frame(NDView<FRAME_TYPE, 2> frame) {
        m_pedestal.push(frame);
    }

-    NDArray<PEDESTAL_TYPE, 2> pedestal() {
-        return m_pedestal.mean();
+    NDArray<PEDESTAL_TYPE, 2> pedestal() { return m_pedestal.mean(); }
+    NDArray<PEDESTAL_TYPE, 2> noise() { return m_pedestal.std(); }
+
+    /**
+     * @brief Move the clusters from the ClusterVector in the ClusterFinder to a
+     * new ClusterVector and return it.
+     * @param realloc_same_capacity if true the new ClusterVector will have the
+     * same capacity as the old one
+     *
+     */
+    ClusterVector<CT> steal_clusters(bool realloc_same_capacity = false) {
+        ClusterVector<CT> tmp = std::move(m_clusters);
+        if (realloc_same_capacity)
+            m_clusters = ClusterVector<CT>(m_cluster_sizeX, m_cluster_sizeY,
+                                           tmp.capacity());
+        else
+            m_clusters = ClusterVector<CT>(m_cluster_sizeX, m_cluster_sizeY);
+        return tmp;
    }
+    void find_clusters(NDView<FRAME_TYPE, 2> frame) {
+        // // TODO! deal with even size clusters
+        // // currently 3,3 -> +/- 1
+        // //  4,4 -> +/- 2
+        int dy = m_cluster_sizeY / 2;
+        int dx = m_cluster_sizeX / 2;

-    std::vector<DynamicCluster>
-    find_clusters_without_threshold(NDView<FRAME_TYPE, 2> frame,
-                                    // Pedestal<PEDESTAL_TYPE> &pedestal,
-                                    bool late_update = false) {
-        struct pedestal_update {
-            int x;
-            int y;
-            FRAME_TYPE value;
-        };
-        std::vector<pedestal_update> pedestal_updates;
-
-        std::vector<DynamicCluster> clusters;
-        std::vector<std::vector<eventType>> eventMask;
-        for (int i = 0; i < frame.shape(0); i++) {
-            eventMask.push_back(std::vector<eventType>(frame.shape(1)));
-        }
-        long double val;
-        long double max;
-
+        std::vector<CT> cluster_data(m_cluster_sizeX * m_cluster_sizeY);
        for (int iy = 0; iy < frame.shape(0); iy++) {
            for (int ix = 0; ix < frame.shape(1); ix++) {
-                // initialize max and total
-                max = std::numeric_limits<FRAME_TYPE>::min();
-                long double total = 0;
-                eventMask[iy][ix] = PEDESTAL;

-                for (short ir = -(m_cluster_sizeY / 2);
-                     ir < (m_cluster_sizeY / 2) + 1; ir++) {
-                    for (short ic = -(m_cluster_sizeX / 2);
-                         ic < (m_cluster_sizeX / 2) + 1; ic++) {
+                PEDESTAL_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
+                PEDESTAL_TYPE total = 0;
+
+                // What can we short circuit here?
+                PEDESTAL_TYPE rms = m_pedestal.std(iy, ix);
+                PEDESTAL_TYPE value = (frame(iy, ix) - m_pedestal.mean(iy, ix));
+
+                if (value < -m_nSigma * rms)
+                    continue; // NEGATIVE_PEDESTAL go to next pixel
+                              // TODO! No pedestal update???
+
+                for (int ir = -dy; ir < dy + 1; ir++) {
+                    for (int ic = -dx; ic < dx + 1; ic++) {
                        if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
                            iy + ir >= 0 && iy + ir < frame.shape(0)) {
-                            val = frame(iy + ir, ix + ic) -
-                                  m_pedestal.mean(iy + ir, ix + ic);
+                            PEDESTAL_TYPE val =
+                                frame(iy + ir, ix + ic) -
+                                m_pedestal.mean(iy + ir, ix + ic);
+
                            total += val;
-                            if (val > max) {
-                                max = val;
-                            }
+                            max = std::max(max, val);
                        }
                    }
                }
-                auto rms = m_pedestal.std(iy, ix);
-
-                if (frame(iy, ix) - m_pedestal.mean(iy, ix) < -m_nSigma * rms) {
-                    eventMask[iy][ix] = NEGATIVE_PEDESTAL;
-                    continue;
-                } else if (max > m_nSigma * rms) {
-                    eventMask[iy][ix] = PHOTON;

+                if ((max > m_nSigma * rms)) {
+                    if (value < max)
+                        continue; // Not max go to the next pixel
+                                  // but also no pedestal update
                } else if (total > c3 * m_nSigma * rms) {
-                    eventMask[iy][ix] = PHOTON;
+                    // pass
                } else {
-                    if (late_update) {
-                        pedestal_updates.push_back({ix, iy, frame(iy, ix)});
-                    } else {
-                        m_pedestal.push(iy, ix, frame(iy, ix));
-                    }
-                    continue;
+                    // m_pedestal.push(iy, ix, frame(iy, ix));
+                    m_pedestal.push_fast(iy, ix, frame(iy, ix));
+                    continue; // It was a pedestal value nothing to store
                }
-                if (eventMask[iy][ix] == PHOTON &&
-                    (frame(iy, ix) - m_pedestal.mean(iy, ix)) >= max) {
-                    eventMask[iy][ix] = PHOTON_MAX;
-                    DynamicCluster cluster(m_cluster_sizeX, m_cluster_sizeY,
-                                    Dtype(typeid(PEDESTAL_TYPE)));
-                    cluster.x = ix;
-                    cluster.y = iy;
-                    short i = 0;

-                    for (short ir = -(m_cluster_sizeY / 2);
-                         ir < (m_cluster_sizeY / 2) + 1; ir++) {
-                        for (short ic = -(m_cluster_sizeX / 2);
-                             ic < (m_cluster_sizeX / 2) + 1; ic++) {
+                // Store cluster
+                if (value == max) {
+                    // Zero out the cluster data
+                    std::fill(cluster_data.begin(), cluster_data.end(), 0);
+
+                    // Fill the cluster data since we have a photon to store
+                    // It's worth redoing the look since most of the time we
+                    // don't have a photon
+                    int i = 0;
+                    for (int ir = -dy; ir < dy + 1; ir++) {
+                        for (int ic = -dx; ic < dx + 1; ic++) {
                            if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
                                iy + ir >= 0 && iy + ir < frame.shape(0)) {
-                                PEDESTAL_TYPE tmp =
-                                    static_cast<PEDESTAL_TYPE>(
-                                        frame(iy + ir, ix + ic)) -
+                                CT tmp =
+                                    static_cast<CT>(frame(iy + ir, ix + ic)) -
                                    m_pedestal.mean(iy + ir, ix + ic);
-                                cluster.set<PEDESTAL_TYPE>(i, tmp);
+                                cluster_data[i] =
+                                    tmp; // Watch for out of bounds access
                                i++;
                            }
                        }
                    }
-                    clusters.push_back(cluster);
+
+                    // Add the cluster to the output ClusterVector
+                    m_clusters.push_back(
+                        ix, iy,
+                        reinterpret_cast<std::byte *>(cluster_data.data()));
                }
            }
        }
-        if (late_update) {
-            for (auto &update : pedestal_updates) {
-                m_pedestal.push(update.y, update.x, update.value);
-            }
-        }
-        return clusters;
    }

-    // template <typename FRAME_TYPE, typename PEDESTAL_TYPE>
-    std::vector<DynamicCluster>
-    find_clusters_with_threshold(NDView<FRAME_TYPE, 2> frame,
-                                 Pedestal<PEDESTAL_TYPE> &pedestal) {
-        assert(m_threshold > 0);
-        std::vector<DynamicCluster> clusters;
-        std::vector<std::vector<eventType>> eventMask;
-        for (int i = 0; i < frame.shape(0); i++) {
-            eventMask.push_back(std::vector<eventType>(frame.shape(1)));
-        }
-        double tthr, tthr1, tthr2;
+    // // template <typename FRAME_TYPE, typename PEDESTAL_TYPE>
+    // std::vector<DynamicCluster>
+    // find_clusters_with_threshold(NDView<FRAME_TYPE, 2> frame,
+    //                              Pedestal<PEDESTAL_TYPE> &pedestal) {
+    //     assert(m_threshold > 0);
+    //     std::vector<DynamicCluster> clusters;
+    //     std::vector<std::vector<eventType>> eventMask;
+    //     for (int i = 0; i < frame.shape(0); i++) {
+    //         eventMask.push_back(std::vector<eventType>(frame.shape(1)));
+    //     }
+    //     double tthr, tthr1, tthr2;

-        NDArray<FRAME_TYPE, 2> rest({frame.shape(0), frame.shape(1)});
-        NDArray<int, 2> nph({frame.shape(0), frame.shape(1)});
-        // convert to n photons
-        // nph = (frame-pedestal.mean()+0.5*m_threshold)/m_threshold; // can be
-        // optimized with expression templates?
-        for (int iy = 0; iy < frame.shape(0); iy++) {
-            for (int ix = 0; ix < frame.shape(1); ix++) {
-                auto val = frame(iy, ix) - pedestal.mean(iy, ix);
-                nph(iy, ix) = (val + 0.5 * m_threshold) / m_threshold;
-                nph(iy, ix) = nph(iy, ix) < 0 ? 0 : nph(iy, ix);
-                rest(iy, ix) = val - nph(iy, ix) * m_threshold;
-            }
-        }
-        // iterate over frame pixels
-        for (int iy = 0; iy < frame.shape(0); iy++) {
-            for (int ix = 0; ix < frame.shape(1); ix++) {
-                eventMask[iy][ix] = PEDESTAL;
-                // initialize max and total
-                FRAME_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
-                long double total = 0;
-                if (rest(iy, ix) <= 0.25 * m_threshold) {
-                    pedestal.push(iy, ix, frame(iy, ix));
-                    continue;
-                }
-                eventMask[iy][ix] = NEIGHBOUR;
-                // iterate over cluster pixels around the current pixel (ix,iy)
-                for (short ir = -(m_cluster_sizeY / 2);
-                     ir < (m_cluster_sizeY / 2) + 1; ir++) {
-                    for (short ic = -(m_cluster_sizeX / 2);
-                         ic < (m_cluster_sizeX / 2) + 1; ic++) {
-                        if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
-                            iy + ir >= 0 && iy + ir < frame.shape(0)) {
-                            auto val = frame(iy + ir, ix + ic) -
-                                       pedestal.mean(iy + ir, ix + ic);
-                            total += val;
-                            if (val > max) {
-                                max = val;
-                            }
-                        }
-                    }
-                }
+    //     NDArray<FRAME_TYPE, 2> rest({frame.shape(0), frame.shape(1)});
+    //     NDArray<int, 2> nph({frame.shape(0), frame.shape(1)});
+    //     // convert to n photons
+    //     // nph = (frame-pedestal.mean()+0.5*m_threshold)/m_threshold; // can
+    //     be
+    //     // optimized with expression templates?
+    //     for (int iy = 0; iy < frame.shape(0); iy++) {
+    //         for (int ix = 0; ix < frame.shape(1); ix++) {
+    //             auto val = frame(iy, ix) - pedestal.mean(iy, ix);
+    //             nph(iy, ix) = (val + 0.5 * m_threshold) / m_threshold;
+    //             nph(iy, ix) = nph(iy, ix) < 0 ? 0 : nph(iy, ix);
+    //             rest(iy, ix) = val - nph(iy, ix) * m_threshold;
+    //         }
+    //     }
+    //     // iterate over frame pixels
+    //     for (int iy = 0; iy < frame.shape(0); iy++) {
+    //         for (int ix = 0; ix < frame.shape(1); ix++) {
+    //             eventMask[iy][ix] = eventType::PEDESTAL;
+    //             // initialize max and total
+    //             FRAME_TYPE max = std::numeric_limits<FRAME_TYPE>::min();
+    //             long double total = 0;
+    //             if (rest(iy, ix) <= 0.25 * m_threshold) {
+    //                 pedestal.push(iy, ix, frame(iy, ix));
+    //                 continue;
+    //             }
+    //             eventMask[iy][ix] = eventType::NEIGHBOUR;
+    //             // iterate over cluster pixels around the current pixel
+    //             (ix,iy) for (short ir = -(m_cluster_sizeY / 2);
+    //                  ir < (m_cluster_sizeY / 2) + 1; ir++) {
+    //                 for (short ic = -(m_cluster_sizeX / 2);
+    //                      ic < (m_cluster_sizeX / 2) + 1; ic++) {
+    //                     if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
+    //                         iy + ir >= 0 && iy + ir < frame.shape(0)) {
+    //                         auto val = frame(iy + ir, ix + ic) -
+    //                                    pedestal.mean(iy + ir, ix + ic);
+    //                         total += val;
+    //                         if (val > max) {
+    //                             max = val;
+    //                         }
+    //                     }
+    //                 }
+    //             }

-                auto rms = pedestal.std(iy, ix);
-                if (m_nSigma == 0) {
-                    tthr = m_threshold;
-                    tthr1 = m_threshold;
-                    tthr2 = m_threshold;
-                } else {
-                    tthr = m_nSigma * rms;
-                    tthr1 = m_nSigma * rms * c3;
-                    tthr2 = m_nSigma * rms * c2;
+    //             auto rms = pedestal.std(iy, ix);
+    //             if (m_nSigma == 0) {
+    //                 tthr = m_threshold;
+    //                 tthr1 = m_threshold;
+    //                 tthr2 = m_threshold;
+    //             } else {
+    //                 tthr = m_nSigma * rms;
+    //                 tthr1 = m_nSigma * rms * c3;
+    //                 tthr2 = m_nSigma * rms * c2;

-                    if (m_threshold > 2 * tthr)
-                        tthr = m_threshold - tthr;
-                    if (m_threshold > 2 * tthr1)
-                        tthr1 = tthr - tthr1;
-                    if (m_threshold > 2 * tthr2)
-                        tthr2 = tthr - tthr2;
-                }
-                if (total > tthr1 || max > tthr) {
-                    eventMask[iy][ix] = PHOTON;
-                    nph(iy, ix) += 1;
-                    rest(iy, ix) -= m_threshold;
-                } else {
-                    pedestal.push(iy, ix, frame(iy, ix));
-                    continue;
-                }
-                if (eventMask[iy][ix] == PHOTON &&
-                    frame(iy, ix) - pedestal.mean(iy, ix) >= max) {
-                    eventMask[iy][ix] = PHOTON_MAX;
-                    DynamicCluster cluster(m_cluster_sizeX, m_cluster_sizeY,
-                                    Dtype(typeid(FRAME_TYPE)));
-                    cluster.x = ix;
-                    cluster.y = iy;
-                    short i = 0;
-                    for (short ir = -(m_cluster_sizeY / 2);
-                         ir < (m_cluster_sizeY / 2) + 1; ir++) {
-                        for (short ic = -(m_cluster_sizeX / 2);
-                             ic < (m_cluster_sizeX / 2) + 1; ic++) {
-                            if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
-                                iy + ir >= 0 && iy + ir < frame.shape(0)) {
-                                auto tmp = frame(iy + ir, ix + ic) -
-                                           pedestal.mean(iy + ir, ix + ic);
-                                cluster.set<FRAME_TYPE>(i, tmp);
-                                i++;
-                            }
-                        }
-                    }
-                    clusters.push_back(cluster);
-                }
-            }
-        }
-        return clusters;
-    }
+    //                 if (m_threshold > 2 * tthr)
+    //                     tthr = m_threshold - tthr;
+    //                 if (m_threshold > 2 * tthr1)
+    //                     tthr1 = tthr - tthr1;
+    //                 if (m_threshold > 2 * tthr2)
+    //                     tthr2 = tthr - tthr2;
+    //             }
+    //             if (total > tthr1 || max > tthr) {
+    //                 eventMask[iy][ix] = eventType::PHOTON;
+    //                 nph(iy, ix) += 1;
+    //                 rest(iy, ix) -= m_threshold;
+    //             } else {
+    //                 pedestal.push(iy, ix, frame(iy, ix));
+    //                 continue;
+    //             }
+    //             if (eventMask[iy][ix] == eventType::PHOTON &&
+    //                 frame(iy, ix) - pedestal.mean(iy, ix) >= max) {
+    //                 eventMask[iy][ix] = eventType::PHOTON_MAX;
+    //                 DynamicCluster cluster(m_cluster_sizeX, m_cluster_sizeY,
+    //                                        Dtype(typeid(FRAME_TYPE)));
+    //                 cluster.x = ix;
+    //                 cluster.y = iy;
+    //                 short i = 0;
+    //                 for (short ir = -(m_cluster_sizeY / 2);
+    //                      ir < (m_cluster_sizeY / 2) + 1; ir++) {
+    //                     for (short ic = -(m_cluster_sizeX / 2);
+    //                          ic < (m_cluster_sizeX / 2) + 1; ic++) {
+    //                         if (ix + ic >= 0 && ix + ic < frame.shape(1) &&
+    //                             iy + ir >= 0 && iy + ir < frame.shape(0)) {
+    //                             auto tmp = frame(iy + ir, ix + ic) -
+    //                                        pedestal.mean(iy + ir, ix + ic);
+    //                             cluster.set<FRAME_TYPE>(i, tmp);
+    //                             i++;
+    //                         }
+    //                     }
+    //                 }
+    //                 clusters.push_back(cluster);
+    //             }
+    //         }
+    //     }
+    //     return clusters;
+    // }
 };

 } // namespace aare
--- a/include/aare/ClusterVector.hpp
+++ b/include/aare/ClusterVector.hpp
@ -0,0 +1,180 @@
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <numeric>
+#include <vector>
+
+#include <fmt/core.h>
+
+namespace aare {
+
+/**
+ * @brief ClusterVector is a container for clusters of various sizes. It uses a 
+ * contiguous memory buffer to store the clusters. 
+ * @note push_back can invalidate pointers to elements in the container
+ * @tparam T data type of the pixels in the cluster
+ * @tparam CoordType data type of the x and y coordinates of the cluster (normally int16_t)
+ */
+template <typename T, typename CoordType=int16_t> class ClusterVector {
+    using value_type = T;
+    size_t m_cluster_size_x;
+    size_t m_cluster_size_y;
+    std::byte *m_data{};
+    size_t m_size{0};
+    size_t m_capacity;
+    /*
+    Format string used in the python bindings to create a numpy
+    array from the buffer
+    = - native byte order
+    h - short
+    d - double
+    i - int
+    */
+    constexpr static char m_fmt_base[] = "=h:x:\nh:y:\n({},{}){}:data:" ;
+
+  public:
+    /**
+     * @brief Construct a new ClusterVector object
+     * @param cluster_size_x size of the cluster in x direction
+     * @param cluster_size_y size of the cluster in y direction
+     * @param capacity initial capacity of the buffer in number of clusters
+     */
+    ClusterVector(size_t cluster_size_x, size_t cluster_size_y,
+                  size_t capacity = 1024)
+        : m_cluster_size_x(cluster_size_x), m_cluster_size_y(cluster_size_y),
+          m_capacity(capacity) {
+        allocate_buffer(capacity);
+    }
+    ~ClusterVector() {
+        delete[] m_data;
+    }
+
+   
+    //Move constructor
+    ClusterVector(ClusterVector &&other) noexcept
+        : m_cluster_size_x(other.m_cluster_size_x),
+          m_cluster_size_y(other.m_cluster_size_y), m_data(other.m_data),
+          m_size(other.m_size), m_capacity(other.m_capacity) {
+        other.m_data = nullptr;
+        other.m_size = 0;
+        other.m_capacity = 0;
+    }
+
+    //Move assignment operator
+    ClusterVector& operator=(ClusterVector &&other) noexcept {
+        if (this != &other) {
+            delete[] m_data;
+            m_cluster_size_x = other.m_cluster_size_x;
+            m_cluster_size_y = other.m_cluster_size_y;
+            m_data = other.m_data;
+            m_size = other.m_size;
+            m_capacity = other.m_capacity;
+            other.m_data = nullptr;
+            other.m_size = 0;
+            other.m_capacity = 0;
+        }
+        return *this;
+    }
+
+    /**
+     * @brief Reserve space for at least capacity clusters
+     * @param capacity number of clusters to reserve space for
+     * @note If capacity is less than the current capacity, the function does nothing. 
+     */
+    void reserve(size_t capacity) {
+        if (capacity > m_capacity) {
+            allocate_buffer(capacity);
+        }
+    }
+
+    /**
+     * @brief Add a cluster to the vector
+     * @param x x-coordinate of the cluster
+     * @param y y-coordinate of the cluster
+     * @param data pointer to the data of the cluster
+     * @warning The data pointer must point to a buffer of size cluster_size_x * cluster_size_y * sizeof(T)
+     */
+    void push_back(CoordType x, CoordType y, const std::byte *data) {
+        if (m_size == m_capacity) {
+            allocate_buffer(m_capacity * 2);
+        }
+        std::byte *ptr = element_ptr(m_size);
+        *reinterpret_cast<CoordType *>(ptr) = x;
+        ptr += sizeof(CoordType);
+        *reinterpret_cast<CoordType *>(ptr) = y;
+        ptr += sizeof(CoordType);
+
+        std::copy(data, data + m_cluster_size_x * m_cluster_size_y * sizeof(T),
+                  ptr);
+        m_size++;
+    }
+
+
+    /**
+     * @brief Sum the pixels in each cluster
+     * @return std::vector<T> vector of sums for each cluster
+     */
+    std::vector<T> sum() {
+        std::vector<T> sums(m_size);
+        const size_t stride = element_offset();
+        const size_t n_pixels = m_cluster_size_x * m_cluster_size_y;
+        std::byte *ptr = m_data + 2 * sizeof(CoordType); // skip x and y
+
+        for (size_t i = 0; i < m_size; i++) {
+            sums[i] =
+                std::accumulate(reinterpret_cast<T *>(ptr),
+                                reinterpret_cast<T *>(ptr) + n_pixels, T{});
+            ptr += stride;
+        }
+        return sums;
+    }
+
+    size_t size() const { return m_size; }
+    size_t capacity() const { return m_capacity; }
+    
+    /**
+     * @brief Return the offset in bytes for a single cluster
+     */
+    size_t element_offset() const {
+        return 2*sizeof(CoordType)  +
+               m_cluster_size_x * m_cluster_size_y * sizeof(T);
+    }
+    /**
+     * @brief Return the offset in bytes for the i-th cluster
+     */
+    size_t element_offset(size_t i) const { return element_offset() * i; }
+
+    /**
+     * @brief Return a pointer to the i-th cluster
+     */
+    std::byte *element_ptr(size_t i) { return m_data + element_offset(i); }
+     const std::byte *  element_ptr(size_t i) const { return m_data + element_offset(i); }
+
+    size_t cluster_size_x() const { return m_cluster_size_x; }
+    size_t cluster_size_y() const { return m_cluster_size_y; }
+
+    std::byte *data() { return m_data; }
+    std::byte const *data() const { return m_data; }
+
+    template<typename V>
+    V& at(size_t i) {
+        return *reinterpret_cast<V*>(element_ptr(i));
+    }
+
+    const std::string_view fmt_base() const {
+        //TODO! how do we match on coord_t?
+        return m_fmt_base;
+    }
+
+  private:
+    void allocate_buffer(size_t new_capacity) {
+        size_t num_bytes = element_offset() * new_capacity;
+        std::byte *new_data = new std::byte[num_bytes]{};
+        std::copy(m_data, m_data + element_offset() * m_size, new_data);
+        delete[] m_data;
+        m_data = new_data;
+        m_capacity = new_capacity;
+    }
+};
+
+} // namespace aare
--- a/include/aare/File.hpp
+++ b/include/aare/File.hpp
@ -44,6 +44,7 @@ class File {
    void read_into(std::byte *image_buf);
    void read_into(std::byte *image_buf, size_t n_frames);
    
+    size_t frame_number();                    //!< get the frame number at the current position
    size_t frame_number(size_t frame_index);  //!< get the frame number at the given frame index
    size_t bytes_per_frame() const;           
    size_t pixels_per_frame() const;    
--- a/include/aare/NDArray.hpp
+++ b/include/aare/NDArray.hpp
@ -87,7 +87,7 @@ class NDArray : public ArrayExpr<NDArray<T, Ndim>, Ndim> {
    // Conversion operator from array expression to array
    template <typename E>
    NDArray(ArrayExpr<E, Ndim> &&expr) : NDArray(expr.shape()) {
-        for (int i = 0; i < size_; ++i) {
+        for (size_t i = 0; i < size_; ++i) {
            data_[i] = expr[i];
        }
    }
@ -159,11 +159,11 @@ class NDArray : public ArrayExpr<NDArray<T, Ndim>, Ndim> {
    }

    // TODO! is int the right type for index?
-    T &operator()(int i) { return data_[i]; }
-    const T &operator()(int i) const { return data_[i]; }
+    T &operator()(int64_t i) { return data_[i]; }
+    const T &operator()(int64_t i) const { return data_[i]; }

-    T &operator[](int i) { return data_[i]; }
-    const T &operator[](int i) const { return data_[i]; }
+    T &operator[](int64_t i) { return data_[i]; }
+    const T &operator[](int64_t i) const { return data_[i]; }

    T *data() { return data_; }
    std::byte *buffer() { return reinterpret_cast<std::byte *>(data_); }
--- a/include/aare/Pedestal.hpp
+++ b/include/aare/Pedestal.hpp
@ -18,34 +18,48 @@ template <typename SUM_TYPE = double> class Pedestal {

    uint32_t m_samples;
    NDArray<uint32_t, 2> m_cur_samples;
+    
+    //TODO! in case of int needs to be changed to uint64_t
    NDArray<SUM_TYPE, 2> m_sum;
    NDArray<SUM_TYPE, 2> m_sum2;

+    //Cache mean since it is used over and over in the ClusterFinder
+    //This optimization is related to the access pattern of the ClusterFinder
+    //Relies on having more reads than pushes to the pedestal
+    NDArray<SUM_TYPE, 2> m_mean; 
+
  public:
    Pedestal(uint32_t rows, uint32_t cols, uint32_t n_samples = 1000)
        : m_rows(rows), m_cols(cols), m_samples(n_samples),
          m_cur_samples(NDArray<uint32_t, 2>({rows, cols}, 0)),
          m_sum(NDArray<SUM_TYPE, 2>({rows, cols})),
-          m_sum2(NDArray<SUM_TYPE, 2>({rows, cols})) {
+          m_sum2(NDArray<SUM_TYPE, 2>({rows, cols})),
+          m_mean(NDArray<SUM_TYPE, 2>({rows, cols})) {
        assert(rows > 0 && cols > 0 && n_samples > 0);
        m_sum = 0;
        m_sum2 = 0;
+        m_mean = 0;
    }
    ~Pedestal() = default;

    NDArray<SUM_TYPE, 2> mean() {
-        NDArray<SUM_TYPE, 2> mean_array({m_rows, m_cols});
-        for (uint32_t i = 0; i < m_rows * m_cols; i++) {
-            mean_array(i / m_cols, i % m_cols) = mean(i / m_cols, i % m_cols);
-        }
-        return mean_array;
+        return m_mean;
    }

    SUM_TYPE mean(const uint32_t row, const uint32_t col) const {
+        return m_mean(row, col);
+    }
+
+    SUM_TYPE std(const uint32_t row, const uint32_t col) const {
+        return std::sqrt(variance(row, col));
+    }
+
+    SUM_TYPE variance(const uint32_t row, const uint32_t col) const {
        if (m_cur_samples(row, col) == 0) {
            return 0.0;
        }
-        return m_sum(row, col) / m_cur_samples(row, col);
+        return m_sum2(row, col) / m_cur_samples(row, col) -
+               mean(row, col) * mean(row, col);
    }

    NDArray<SUM_TYPE, 2> variance() {
@ -57,13 +71,7 @@ template <typename SUM_TYPE = double> class Pedestal {
        return variance_array;
    }

-    SUM_TYPE variance(const uint32_t row, const uint32_t col) const {
-        if (m_cur_samples(row, col) == 0) {
-            return 0.0;
-        }
-        return m_sum2(row, col) / m_cur_samples(row, col) -
-               mean(row, col) * mean(row, col);
-    }
+    

    NDArray<SUM_TYPE, 2> std() {
        NDArray<SUM_TYPE, 2> standard_deviation_array({m_rows, m_cols});
@ -75,14 +83,12 @@ template <typename SUM_TYPE = double> class Pedestal {
        return standard_deviation_array;
    }

-    SUM_TYPE std(const uint32_t row, const uint32_t col) const {
-        return std::sqrt(variance(row, col));
-    }
+    

    void clear() {
-        for (uint32_t i = 0; i < m_rows * m_cols; i++) {
-            clear(i / m_cols, i % m_cols);
-        }
+        m_sum = 0;
+        m_sum2 = 0;
+        m_cur_samples = 0;
    }

    
@ -92,7 +98,9 @@ template <typename SUM_TYPE = double> class Pedestal {
        m_sum2(row, col) = 0;
        m_cur_samples(row, col) = 0;
    }
-    // frame level operations
+    
+
+
    template <typename T> void push(NDView<T, 2> frame) {
        assert(frame.size() == m_rows * m_cols);

@ -102,17 +110,37 @@ template <typename SUM_TYPE = double> class Pedestal {
                "Frame shape does not match pedestal shape");
        }

-        for (uint32_t row = 0; row < m_rows; row++) {
-            for (uint32_t col = 0; col < m_cols; col++) {
+        for (size_t row = 0; row < m_rows; row++) {
+            for (size_t col = 0; col < m_cols; col++) {
                push<T>(row, col, frame(row, col));
            }
        }
-
-        // // TODO: test the effect of #pragma omp parallel for
-        // for (uint32_t index = 0; index < m_rows * m_cols; index++) {
-        //     push<T>(index / m_cols, index % m_cols, frame(index));
-        // }
    }
+
+    /**
+     * Push but don't update the cached mean. Speeds up the process
+     * when intitializing the pedestal.
+     * 
+     */
+    template <typename T> void push_no_update(NDView<T, 2> frame) {
+        assert(frame.size() == m_rows * m_cols);
+
+        // TODO! move away from m_rows, m_cols
+        if (frame.shape() != std::array<int64_t, 2>{m_rows, m_cols}) {
+            throw std::runtime_error(
+                "Frame shape does not match pedestal shape");
+        }
+
+        for (size_t row = 0; row < m_rows; row++) {
+            for (size_t col = 0; col < m_cols; col++) {
+                push_no_update<T>(row, col, frame(row, col));
+            }
+        }
+    }
+
+
+
+
    template <typename T> void push(Frame &frame) {
        assert(frame.rows() == static_cast<size_t>(m_rows) &&
               frame.cols() == static_cast<size_t>(m_cols));
@ -132,18 +160,48 @@ template <typename SUM_TYPE = double> class Pedestal {
    template <typename T>
    void push(const uint32_t row, const uint32_t col, const T val_) {
        SUM_TYPE val = static_cast<SUM_TYPE>(val_);
-        const uint32_t idx = index(row, col);
-        if (m_cur_samples(idx) < m_samples) {
-            m_sum(idx) += val;
-            m_sum2(idx) += val * val;
-            m_cur_samples(idx)++;
+        if (m_cur_samples(row, col) < m_samples) {
+            m_sum(row, col) += val;
+            m_sum2(row, col) += val * val;
+            m_cur_samples(row, col)++;
        } else {
-            m_sum(idx) += val - m_sum(idx) / m_cur_samples(idx);
-            m_sum2(idx) += val * val - m_sum2(idx) / m_cur_samples(idx);
+            m_sum(row, col) += val - m_sum(row, col) / m_cur_samples(row, col);
+            m_sum2(row, col) += val * val - m_sum2(row, col) / m_cur_samples(row, col);
+        }
+        //Since we just did a push we know that m_cur_samples(row, col) is at least 1
+        m_mean(row, col) = m_sum(row, col) / m_cur_samples(row, col);
+    }
+
+    template <typename T>
+    void push_no_update(const uint32_t row, const uint32_t col, const T val_) {
+        SUM_TYPE val = static_cast<SUM_TYPE>(val_);
+        if (m_cur_samples(row, col) < m_samples) {
+            m_sum(row, col) += val;
+            m_sum2(row, col) += val * val;
+            m_cur_samples(row, col)++;
+        } else {
+            m_sum(row, col) += val - m_sum(row, col) / m_cur_samples(row, col);
+            m_sum2(row, col) += val * val - m_sum2(row, col) / m_cur_samples(row, col);
        }
    }
-    uint32_t index(const uint32_t row, const uint32_t col) const {
-        return row * m_cols + col;
-    };
+
+    /**
+     * @brief Update the mean of the pedestal. This is used after having done
+     * push_no_update. It is not necessary to call this function after push.
+     */
+    void update_mean(){
+        m_mean = m_sum / m_cur_samples;
+    }
+
+    template<typename T>
+    void push_fast(const uint32_t row, const uint32_t col, const T val_){
+        //Assume we reached the steady state where all pixels have
+        //m_samples samples
+        SUM_TYPE val = static_cast<SUM_TYPE>(val_);
+        m_sum(row, col) += val - m_sum(row, col) / m_samples;
+        m_sum2(row, col) += val * val - m_sum2(row, col) / m_samples;
+        m_mean(row, col) = m_sum(row, col) / m_samples;
+    }
+
 };
 } // namespace aare
--- a/include/aare/defs.hpp
+++ b/include/aare/defs.hpp
@ -47,7 +47,7 @@ class DynamicCluster {
    int cluster_sizeY;
    int16_t x;
    int16_t y;
-    Dtype dt;
+    Dtype dt; // 4 bytes

  private:
    std::byte *m_data;
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "aare"
-version = "2024.11.28.dev0"
+version = "2024.12.16.dev0"

 [tool.scikit-build]
 cmake.verbose = true
--- a/python/aare/init.py
+++ b/python/aare/init.py
@ -3,9 +3,10 @@ from . import _aare


 from ._aare import File, RawMasterFile, RawSubFile
-from ._aare import Pedestal, ClusterFinder, VarClusterFinder
+from ._aare import Pedestal_d, Pedestal_f, ClusterFinder, VarClusterFinder
 from ._aare import DetectorType
 from ._aare import ClusterFile
+from ._aare import hitmap

 from .CtbRawFile import CtbRawFile
 from .RawFile import RawFile
--- a/python/examples/play.py
+++ b/python/examples/play.py
@ -1,15 +1,58 @@
+import sys
+sys.path.append('/home/l_msdetect/erik/aare/build')
+
+#Our normal python imports
+from pathlib import Path
 import matplotlib.pyplot as plt
 import numpy as np
-plt.ion()
-from pathlib import Path
-from aare import ClusterFile
+import boost_histogram as bh
+import time

-base = Path('~/data/aare_test_data/clusters').expanduser()
+from aare import File, ClusterFinder, VarClusterFinder

-f = ClusterFile(base / 'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust')
-# f = ClusterFile(base / 'single_frame_97_clustrers.clust')
+base = Path('/mnt/sls_det_storage/matterhorn_data/aare_test_data/')
+
+f = File(base/'Moench03new/cu_half_speed_master_4.json')
+cf = ClusterFinder((400,400), (3,3))
+for i in range(1000):
+    cf.push_pedestal_frame(f.read_frame())
+
+fig, ax = plt.subplots()
+im = ax.imshow(cf.pedestal())
+cf.pedestal()
+cf.noise()


-for i in range(10):
-    fn, cl = f.read_frame()
-    print(fn, cl.size)
+
+N = 500
+t0 = time.perf_counter()
+hist1 = bh.Histogram(bh.axis.Regular(40, -2, 4000))
+f.seek(0)
+
+t0 = time.perf_counter()
+data = f.read_n(N)
+t_elapsed = time.perf_counter()-t0
+
+
+n_bytes = data.itemsize*data.size
+
+print(f'Reading {N} frames took {t_elapsed:.3f}s {N/t_elapsed:.0f} FPS, {n_bytes/1024**2:.4f} GB/s')
+
+
+for frame in data:
+    a = cf.find_clusters(frame)
+
+clusters = cf.steal_clusters()
+
+# t_elapsed = time.perf_counter()-t0
+# print(f'Clustering {N} frames took {t_elapsed:.2f}s  {N/t_elapsed:.0f} FPS')
+
+
+# t0 = time.perf_counter()
+# total_clusters = clusters.size
+
+# hist1.fill(clusters.sum())
+
+# t_elapsed = time.perf_counter()-t0
+# print(f'Filling histogram with the sum of {total_clusters} clusters took: {t_elapsed:.3f}s, {total_clusters/t_elapsed:.3g} clust/s')
+# print(f'Average number of clusters per frame {total_clusters/N:.3f}')
--- a/python/src/cluster.hpp
+++ b/python/src/cluster.hpp
@ -1,4 +1,5 @@
 #include "aare/ClusterFinder.hpp"
+#include "aare/ClusterVector.hpp"
 #include "aare/NDView.hpp"
 #include "aare/Pedestal.hpp"
 #include "np_helper.hpp"
@ -9,30 +10,101 @@
 #include <pybind11/stl.h>

 namespace py = pybind11;
+using pd_type = float;
+
+template <typename T>
+void define_cluster_vector(py::module &m, const std::string &typestr) {
+    auto class_name = fmt::format("ClusterVector_{}", typestr);
+    py::class_<ClusterVector<T>>(m, class_name.c_str(), py::buffer_protocol())
+        .def(py::init<int, int>())
+        .def_property_readonly("size", &ClusterVector<T>::size)
+        .def("element_offset",
+             py::overload_cast<>(&ClusterVector<T>::element_offset, py::const_))
+        .def_property_readonly("fmt",
+             [typestr](ClusterVector<T> &self) {
+                 return fmt::format(
+                     self.fmt_base(), self.cluster_size_x(),
+                     self.cluster_size_y(), typestr);
+             })
+        .def("sum", [](ClusterVector<T> &self) {
+            auto *vec = new std::vector<T>(self.sum());
+            return return_vector(vec);
+        })
+        .def_buffer([typestr](ClusterVector<T> &self) -> py::buffer_info {
+            return py::buffer_info(
+                self.data(),           /* Pointer to buffer */
+                self.element_offset(), /* Size of one scalar */
+                fmt::format(self.fmt_base(), self.cluster_size_x(),
+                            self.cluster_size_y(),
+                            typestr), /* Format descriptor */
+                1,                    /* Number of dimensions */
+                {self.size()},           /* Buffer dimensions */
+                {self.element_offset()}  /* Strides (in bytes) for each index */
+            );
+        });
+}

 void define_cluster_finder_bindings(py::module &m) {
-    py::class_<ClusterFinder<uint16_t, double>>(m, "ClusterFinder")
-        .def(py::init<Shape<2>, Shape<2>>())
+    py::class_<ClusterFinder<uint16_t, pd_type>>(m, "ClusterFinder")
+        .def(py::init<Shape<2>, Shape<2>, pd_type, size_t>(), py::arg("image_size"),
+             py::arg("cluster_size"), py::arg("n_sigma") = 5.0,
+             py::arg("capacity") = 1'000'000)
        .def("push_pedestal_frame",
-             [](ClusterFinder<uint16_t, double> &self,
+             [](ClusterFinder<uint16_t, pd_type> &self,
                py::array_t<uint16_t> frame) {
                 auto view = make_view_2d(frame);
                 self.push_pedestal_frame(view);
             })
        .def("pedestal",
-             [](ClusterFinder<uint16_t, double> &self) {
-                 auto pd = new NDArray<double, 2>{};
+             [](ClusterFinder<uint16_t, pd_type> &self) {
+                 auto pd = new NDArray<pd_type, 2>{};
                 *pd = self.pedestal();
                 return return_image_data(pd);
             })
-        .def("find_clusters_without_threshold",
-             [](ClusterFinder<uint16_t, double> &self,
+        .def("noise",
+             [](ClusterFinder<uint16_t, pd_type> &self) {
+                 auto arr = new NDArray<pd_type, 2>{};
+                 *arr = self.noise();
+                 return return_image_data(arr);
+             })
+        .def("steal_clusters",
+             [](ClusterFinder<uint16_t, pd_type> &self, bool realloc_same_capacity) {
+                 auto v = new ClusterVector<int>(self.steal_clusters(realloc_same_capacity));
+                 return v;
+             }, py::arg("realloc_same_capacity") = false)
+        .def("find_clusters",
+             [](ClusterFinder<uint16_t, pd_type> &self,
                py::array_t<uint16_t> frame) {
                 auto view = make_view_2d(frame);
-                 auto clusters = self.find_clusters_without_threshold(view);
-                 return clusters;
+                 self.find_clusters(view);
+                 return;
             });

+    m.def("hitmap", [](std::array<size_t, 2> image_size, ClusterVector<int32_t>& cv){
+        
+        py::array_t<int32_t> hitmap(image_size);
+        auto r = hitmap.mutable_unchecked<2>(); 
+
+        // Initialize hitmap to 0
+        for (py::ssize_t i = 0; i < r.shape(0); i++)
+            for (py::ssize_t j = 0; j < r.shape(1); j++)
+                r(i, j) = 0;
+
+        size_t stride = cv.element_offset();
+        auto ptr = cv.data();
+        for(size_t i=0; i<cv.size(); i++){
+            auto x = *reinterpret_cast<int16_t*>(ptr);
+            auto y = *reinterpret_cast<int16_t*>(ptr+sizeof(int16_t));
+            r(y, x) += 1;
+            ptr += stride;
+        }
+        return hitmap;
+    });
+    define_cluster_vector<int>(m, "i");
+    define_cluster_vector<double>(m, "d");
+    define_cluster_vector<float>(m, "f");
+
+
    py::class_<DynamicCluster>(m, "DynamicCluster", py::buffer_protocol())
        .def(py::init<int, int, Dtype>())
        .def("size", &DynamicCluster::size)
--- a/python/src/cluster_file.hpp
+++ b/python/src/cluster_file.hpp
@ -1,7 +1,6 @@
 #include "aare/ClusterFile.hpp"
 #include "aare/defs.hpp"

-
 #include <cstdint>
 #include <filesystem>
 #include <pybind11/iostream.h>
@ -11,40 +10,67 @@
 #include <pybind11/stl/filesystem.h>
 #include <string>

+//Disable warnings for unused parameters, as we ignore some
+//in the __exit__ method
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+
+
 namespace py = pybind11;
 using namespace ::aare;

 void define_cluster_file_io_bindings(py::module &m) {
-    PYBIND11_NUMPY_DTYPE(Cluster, x, y, data);
+    PYBIND11_NUMPY_DTYPE(Cluster3x3, x, y, data);

    py::class_<ClusterFile>(m, "ClusterFile")
-        .def(py::init<const std::filesystem::path &, size_t>(), py::arg(), py::arg("chunk_size") = 1000)
+        .def(py::init<const std::filesystem::path &, size_t,
+                      const std::string &>(),
+             py::arg(), py::arg("chunk_size") = 1000, py::arg("mode") = "r")
        .def("read_clusters",
             [](ClusterFile &self, size_t n_clusters) {
-                 auto* vec = new std::vector<Cluster>(self.read_clusters(n_clusters));
+                 auto *vec =
+                     new std::vector<Cluster3x3>(self.read_clusters(n_clusters));
                 return return_vector(vec);
             })
        .def("read_frame",
             [](ClusterFile &self) {
                 int32_t frame_number;
-                 auto* vec = new std::vector<Cluster>(self.read_frame(frame_number));
+                 auto *vec =
+                     new std::vector<Cluster3x3>(self.read_frame(frame_number));
                 return py::make_tuple(frame_number, return_vector(vec));
             })
+        .def("write_frame", &ClusterFile::write_frame)
        .def("read_cluster_with_cut",
-             [](ClusterFile &self, size_t n_clusters, py::array_t<double> noise_map, int nx, int ny) {
+             [](ClusterFile &self, size_t n_clusters,
+                py::array_t<double> noise_map, int nx, int ny) {
                 auto view = make_view_2d(noise_map);
-                 auto* vec = new std::vector<Cluster>(self.read_cluster_with_cut(n_clusters, view.data(), nx, ny));
+                 auto *vec =
+                     new std::vector<Cluster3x3>(self.read_cluster_with_cut(
+                         n_clusters, view.data(), nx, ny));
                 return return_vector(vec);
             })
        .def("__enter__", [](ClusterFile &self) { return &self; })
-        .def("__exit__", [](ClusterFile &self) {  self.close();})
+        .def("__exit__",
+             [](ClusterFile &self,
+                const std::optional<pybind11::type> &exc_type,
+                const std::optional<pybind11::object> &exc_value,
+                const std::optional<pybind11::object> &traceback) {
+                 self.close();
+             })
        .def("__iter__", [](ClusterFile &self) { return &self; })
        .def("__next__", [](ClusterFile &self) {
-            auto vec =  new std::vector<Cluster>(self.read_clusters(self.chunk_size()));
-            if(vec->size() == 0) {
+            auto vec =
+                new std::vector<Cluster3x3>(self.read_clusters(self.chunk_size()));
+            if (vec->size() == 0) {
                throw py::stop_iteration();
            }
            return return_vector(vec);
        });

-}
+        m.def("calculate_eta2", []( aare::ClusterVector<int32_t> &clusters) {
+            auto eta2 = new NDArray<double, 2>(calculate_eta2(clusters));
+            return return_image_data(eta2);
+        });
+}
+
+#pragma GCC diagnostic pop
--- a/python/src/file.hpp
+++ b/python/src/file.hpp
@ -51,7 +51,8 @@ void define_file_io_bindings(py::module &m) {
        .def(py::init<const std::filesystem::path &, const std::string &,
                      const FileConfig &>())

-        .def("frame_number", &File::frame_number)
+        .def("frame_number", py::overload_cast<>(&File::frame_number))
+        .def("frame_number", py::overload_cast<size_t>(&File::frame_number))
        .def_property_readonly("bytes_per_frame", &File::bytes_per_frame)
        .def_property_readonly("pixels_per_frame", &File::pixels_per_frame)
        .def("seek", &File::seek)
--- a/python/src/module.cpp
+++ b/python/src/module.cpp
@ -22,8 +22,8 @@ PYBIND11_MODULE(_aare, m) {
    define_raw_master_file_bindings(m);
    define_var_cluster_finder_bindings(m);
    define_pixel_map_bindings(m);
-    define_pedestal_bindings<double>(m, "Pedestal");
-    define_pedestal_bindings<float>(m, "Pedestal_float32");
+    define_pedestal_bindings<double>(m, "Pedestal_d");
+    define_pedestal_bindings<float>(m, "Pedestal_f");
    define_cluster_finder_bindings(m);
    define_cluster_file_io_bindings(m);
 }
--- a/python/src/pedestal.hpp
+++ b/python/src/pedestal.hpp
@ -43,5 +43,10 @@ template <typename SUM_TYPE> void define_pedestal_bindings(py::module &m, const
        .def("push", [](Pedestal<SUM_TYPE> &pedestal, py::array_t<uint16_t> &f) {
            auto v = make_view_2d(f);
            pedestal.push(v);
-        });
+        })
+        .def("push_no_update", [](Pedestal<SUM_TYPE> &pedestal, py::array_t<uint16_t, py::array::c_style> &f) {
+            auto v = make_view_2d(f);
+            pedestal.push_no_update(v);
+        }, py::arg().noconvert())
+        .def("update_mean", &Pedestal<SUM_TYPE>::update_mean);
 }
--- a/src/ClusterFile.cpp
+++ b/src/ClusterFile.cpp
@ -1,34 +1,68 @@
 #include "aare/ClusterFile.hpp"

+#include <algorithm>
+
 namespace aare {

-ClusterFile::ClusterFile(const std::filesystem::path &fname, size_t chunk_size): m_chunk_size(chunk_size) {
-    fp = fopen(fname.c_str(), "rb");
-    if (!fp) {
-        throw std::runtime_error("Could not open file: " + fname.string());
+ClusterFile::ClusterFile(const std::filesystem::path &fname, size_t chunk_size,
+                         const std::string &mode)
+    : m_chunk_size(chunk_size), m_mode(mode) {
+
+    if (mode == "r") {
+        fp = fopen(fname.c_str(), "rb");
+        if (!fp) {
+            throw std::runtime_error("Could not open file for reading: " +
+                                     fname.string());
+        }
+    } else if (mode == "w") {
+        fp = fopen(fname.c_str(), "wb");
+        if (!fp) {
+            throw std::runtime_error("Could not open file for writing: " +
+                                     fname.string());
+        }
+    } else {
+        throw std::runtime_error("Unsupported mode: " + mode);
    }
 }

-ClusterFile::~ClusterFile() {
-    close();
-}
+ClusterFile::~ClusterFile() { close(); }

-void ClusterFile::close(){
-    if (fp){
+void ClusterFile::close() {
+    if (fp) {
        fclose(fp);
        fp = nullptr;
-    }  
+    }
 }

-std::vector<Cluster> ClusterFile::read_clusters(size_t n_clusters) {
-    std::vector<Cluster> clusters(n_clusters);
+void ClusterFile::write_frame(int32_t frame_number,
+                              const ClusterVector<int32_t> &clusters) {
+    if (m_mode != "w") {
+        throw std::runtime_error("File not opened for writing");
+    }
+    if (!(clusters.cluster_size_x() == 3) &&
+        !(clusters.cluster_size_y() == 3)) {
+        throw std::runtime_error("Only 3x3 clusters are supported");
+    }
+    fwrite(&frame_number, sizeof(frame_number), 1, fp);
+    uint32_t n_clusters = clusters.size();
+    fwrite(&n_clusters, sizeof(n_clusters), 1, fp);
+    fwrite(clusters.data(), clusters.element_offset(), clusters.size(), fp);
+    // write clusters
+    // fwrite(clusters.data(), sizeof(Cluster), clusters.size(), fp);
+}
+
+std::vector<Cluster3x3> ClusterFile::read_clusters(size_t n_clusters) {
+    if (m_mode != "r") {
+        throw std::runtime_error("File not opened for reading");
+    }
+    std::vector<Cluster3x3> clusters(n_clusters);

    int32_t iframe = 0; // frame number needs to be 4 bytes!
    size_t nph_read = 0;
    uint32_t nn = m_num_left;
    uint32_t nph = m_num_left; // number of clusters in frame needs to be 4

-    auto buf = reinterpret_cast<Cluster *>(clusters.data());
+    auto buf = reinterpret_cast<Cluster3x3 *>(clusters.data());
    // if there are photons left from previous frame read them first
    if (nph) {
        if (nph > n_clusters) {
@ -38,7 +72,8 @@ std::vector<Cluster> ClusterFile::read_clusters(size_t n_clusters) {
        } else {
            nn = nph;
        }
-        nph_read += fread(reinterpret_cast<void *>(buf + nph_read), sizeof(Cluster), nn, fp);
+        nph_read += fread(reinterpret_cast<void *>(buf + nph_read),
+                          sizeof(Cluster3x3), nn, fp);
        m_num_left = nph - nn; // write back the number of photons left
    }

@ -52,8 +87,8 @@ std::vector<Cluster> ClusterFile::read_clusters(size_t n_clusters) {
                else
                    nn = nph;

-                nph_read +=
-                    fread(reinterpret_cast<void *>(buf + nph_read), sizeof(Cluster), nn, fp);
+                nph_read += fread(reinterpret_cast<void *>(buf + nph_read),
+                                  sizeof(Cluster3x3), nn, fp);
                m_num_left = nph - nn;
            }
            if (nph_read >= n_clusters)
@ -67,9 +102,13 @@ std::vector<Cluster> ClusterFile::read_clusters(size_t n_clusters) {
    return clusters;
 }

-std::vector<Cluster> ClusterFile::read_frame(int32_t &out_fnum) {
+std::vector<Cluster3x3> ClusterFile::read_frame(int32_t &out_fnum) {
+    if (m_mode != "r") {
+        throw std::runtime_error("File not opened for reading");
+    }
    if (m_num_left) {
-        throw std::runtime_error("There are still photons left in the last frame");
+        throw std::runtime_error(
+            "There are still photons left in the last frame");
    }

    if (fread(&out_fnum, sizeof(out_fnum), 1, fp) != 1) {
@ -80,20 +119,22 @@ std::vector<Cluster> ClusterFile::read_frame(int32_t &out_fnum) {
    if (fread(&n_clusters, sizeof(n_clusters), 1, fp) != 1) {
        throw std::runtime_error("Could not read number of clusters");
    }
-    std::vector<Cluster> clusters(n_clusters);
+    std::vector<Cluster3x3> clusters(n_clusters);

-    if (fread(clusters.data(), sizeof(Cluster), n_clusters, fp) != static_cast<size_t>(n_clusters)) {
+    if (fread(clusters.data(), sizeof(Cluster3x3), n_clusters, fp) !=
+        static_cast<size_t>(n_clusters)) {
        throw std::runtime_error("Could not read clusters");
    }
    return clusters;
-
 }

-std::vector<Cluster> ClusterFile::read_cluster_with_cut(size_t n_clusters,
-                                                        double *noise_map,
-                                                        int nx, int ny) {
-
-    std::vector<Cluster> clusters(n_clusters);
+std::vector<Cluster3x3> ClusterFile::read_cluster_with_cut(size_t n_clusters,
+                                                           double *noise_map,
+                                                           int nx, int ny) {
+    if (m_mode != "r") {
+        throw std::runtime_error("File not opened for reading");
+    }
+    std::vector<Cluster3x3> clusters(n_clusters);
    // size_t read_clusters_with_cut(FILE *fp, size_t n_clusters, Cluster *buf,
    //                               uint32_t *n_left, double *noise_map, int
    //                               nx, int ny) {
@ -107,7 +148,7 @@ std::vector<Cluster> ClusterFile::read_cluster_with_cut(size_t n_clusters,
    int32_t t2max, tot1;
    int32_t tot3;
    // Cluster *ptr = buf;
-    Cluster *ptr = clusters.data();
+    Cluster3x3 *ptr = clusters.data();
    int good = 1;
    double noise;
    // read photons left from previous frame
@ -124,7 +165,8 @@ std::vector<Cluster> ClusterFile::read_cluster_with_cut(size_t n_clusters,
        }
        for (size_t iph = 0; iph < nn; iph++) {
            // read photons 1 by 1
-            size_t n_read = fread(reinterpret_cast<void *>(ptr), sizeof(Cluster), 1, fp);
+            size_t n_read =
+                fread(reinterpret_cast<void *>(ptr), sizeof(Cluster3x3), 1, fp);
            if (n_read != 1) {
                clusters.resize(nph_read);
                return clusters;
@ -158,70 +200,132 @@ std::vector<Cluster> ClusterFile::read_cluster_with_cut(size_t n_clusters,
                break;
        }
    }
-        if (nph_read < n_clusters) {
-    //         // keep on reading frames and photons until reaching n_clusters
-            while (fread(&iframe, sizeof(iframe), 1, fp)) {
-    //             // printf("%d\n",nph_read);
+    if (nph_read < n_clusters) {
+        //         // keep on reading frames and photons until reaching
+        //         n_clusters
+        while (fread(&iframe, sizeof(iframe), 1, fp)) {
+            //             // printf("%d\n",nph_read);

-                if (fread(&nph, sizeof(nph), 1, fp)) {
-    //                 // printf("** %d\n",nph);
-                    m_num_left = nph;
-                    for (size_t iph = 0; iph < nph; iph++) {
-    //                     // read photons 1 by 1
-                        size_t n_read =
-                            fread(reinterpret_cast<void *>(ptr), sizeof(Cluster), 1, fp);
-                        if (n_read != 1) {
-                            clusters.resize(nph_read);
-                            return clusters;
-                            // return nph_read;
-                        }
-                        good = 1;
-                        if (noise_map) {
-                            if (ptr->x >= 0 && ptr->x < nx && ptr->y >= 0 &&
-                                ptr->y < ny) {
-                                tot1 = ptr->data[4];
-                                analyze_cluster(*ptr, &t2max, &tot3, NULL,
-                                NULL,
-                                                NULL, NULL, NULL);
-                                // noise = noise_map[ptr->y * nx + ptr->x];
-                                noise = noise_map[ptr->y + ny * ptr->x];
-    			    if (tot1 > noise || t2max > 2 * noise ||
-                                    tot3 > 3 * noise) {
-                                    ;
-                                } else
-                                    good = 0;
-                            } else {
-                                printf("Bad pixel number %d %d\n", ptr->x,
-                                ptr->y); good = 0;
-                            }
-                        }
-                        if (good) {
-                            ptr++;
-    			nph_read++;
-                        }
-    		    (m_num_left)--;
-                        if (nph_read >= n_clusters)
-                            break;
+            if (fread(&nph, sizeof(nph), 1, fp)) {
+                //                 // printf("** %d\n",nph);
+                m_num_left = nph;
+                for (size_t iph = 0; iph < nph; iph++) {
+                    //                     // read photons 1 by 1
+                    size_t n_read = fread(reinterpret_cast<void *>(ptr),
+                                          sizeof(Cluster3x3), 1, fp);
+                    if (n_read != 1) {
+                        clusters.resize(nph_read);
+                        return clusters;
+                        // return nph_read;
                    }
+                    good = 1;
+                    if (noise_map) {
+                        if (ptr->x >= 0 && ptr->x < nx && ptr->y >= 0 &&
+                            ptr->y < ny) {
+                            tot1 = ptr->data[4];
+                            analyze_cluster(*ptr, &t2max, &tot3, NULL, NULL,
+                                            NULL, NULL, NULL);
+                            // noise = noise_map[ptr->y * nx + ptr->x];
+                            noise = noise_map[ptr->y + ny * ptr->x];
+                            if (tot1 > noise || t2max > 2 * noise ||
+                                tot3 > 3 * noise) {
+                                ;
+                            } else
+                                good = 0;
+                        } else {
+                            printf("Bad pixel number %d %d\n", ptr->x, ptr->y);
+                            good = 0;
+                        }
+                    }
+                    if (good) {
+                        ptr++;
+                        nph_read++;
+                    }
+                    (m_num_left)--;
+                    if (nph_read >= n_clusters)
+                        break;
                }
-                if (nph_read >= n_clusters)
-                    break;
            }
+            if (nph_read >= n_clusters)
+                break;
        }
-        // printf("%d\n",nph_read);
-        clusters.resize(nph_read);
-        return clusters;
-
+    }
+    // printf("%d\n",nph_read);
+    clusters.resize(nph_read);
+    return clusters;
 }

-int ClusterFile::analyze_cluster(Cluster cl, int32_t *t2, int32_t *t3, char *quad,
+NDArray<double, 2> calculate_eta2(ClusterVector<int> &clusters) {
+    NDArray<double, 2> eta2({clusters.size(), 2});
+    for (size_t i = 0; i < clusters.size(); i++) {
+        // int32_t t2;
+        // auto* ptr = reinterpret_cast<int32_t*> (clusters.element_ptr(i) + 2 *
+        // sizeof(int16_t)); analyze_cluster(clusters.at<Cluster3x3>(i), &t2,
+        // nullptr, nullptr, &eta2(i,0), &eta2(i,1) , nullptr, nullptr);
+        auto [x, y] = calculate_eta2(clusters.at<Cluster3x3>(i));
+        eta2(i, 0) = x;
+        eta2(i, 1) = y;
+    }
+    return eta2;
+}
+
+std::array<double, 2> calculate_eta2(Cluster3x3 &cl) {
+    std::array<double, 2> eta2{};
+
+    std::array<int32_t, 4> tot2;
+    tot2[0] = cl.data[0] + cl.data[1] + cl.data[3] + cl.data[4];
+    tot2[1] = cl.data[1] + cl.data[2] + cl.data[4] + cl.data[5];
+    tot2[2] = cl.data[3] + cl.data[4] + cl.data[6] + cl.data[7];
+    tot2[3] = cl.data[4] + cl.data[5] + cl.data[7] + cl.data[8];
+
+    auto c = std::max_element(tot2.begin(), tot2.end()) - tot2.begin();
+
+    switch (c) {
+    case cBottomLeft:
+        if ((cl.data[3] + cl.data[4]) != 0)
+            eta2[0] =
+                static_cast<double>(cl.data[4]) / (cl.data[3] + cl.data[4]);
+        if ((cl.data[1] + cl.data[4]) != 0)
+            eta2[1] =
+                static_cast<double>(cl.data[4]) / (cl.data[1] + cl.data[4]);
+        break;
+    case cBottomRight:
+        if ((cl.data[2] + cl.data[5]) != 0)
+            eta2[0] =
+                static_cast<double>(cl.data[5]) / (cl.data[4] + cl.data[5]);
+        if ((cl.data[1] + cl.data[4]) != 0)
+            eta2[1] =
+                static_cast<double>(cl.data[4]) / (cl.data[1] + cl.data[4]);
+        break;
+    case cTopLeft:
+        if ((cl.data[7] + cl.data[4]) != 0)
+            eta2[0] =
+                static_cast<double>(cl.data[4]) / (cl.data[3] + cl.data[4]);
+        if ((cl.data[7] + cl.data[4]) != 0)
+            eta2[1] =
+                static_cast<double>(cl.data[7]) / (cl.data[7] + cl.data[4]);
+        break;
+    case cTopRight:
+        if ((cl.data[5] + cl.data[4]) != 0)
+            eta2[0] =
+                static_cast<double>(cl.data[5]) / (cl.data[5] + cl.data[4]);
+        if ((cl.data[7] + cl.data[4]) != 0)
+            eta2[1] =
+                static_cast<double>(cl.data[7]) / (cl.data[7] + cl.data[4]);
+        break;
+    // default:;
+    }
+    return eta2;
+}
+
+int analyze_cluster(Cluster3x3 &cl, int32_t *t2, int32_t *t3, char *quad,
                    double *eta2x, double *eta2y, double *eta3x,
                    double *eta3y) {

    return analyze_data(cl.data, t2, t3, quad, eta2x, eta2y, eta3x, eta3y);
 }

-int ClusterFile::analyze_data(int32_t *data, int32_t *t2, int32_t *t3, char *quad,
+int analyze_data(int32_t *data, int32_t *t2, int32_t *t3, char *quad,
                 double *eta2x, double *eta2y, double *eta3x, double *eta3y) {

    int ok = 1;
@ -263,12 +367,14 @@ int ClusterFile::analyze_data(int32_t *data, int32_t *t2, int32_t *t3, char *qua
                c = i;
            }
        }
-	//printf("*** %d %d %d %d -- %d\n",tot2[0],tot2[1],tot2[2],tot2[3],t2max);
+        // printf("*** %d %d %d %d --
+        // %d\n",tot2[0],tot2[1],tot2[2],tot2[3],t2max);
        if (quad)
            *quad = c;
        if (t2)
            *t2 = t2max;
    }
+
    if (t3)
        *t3 = tot3;

@ -318,6 +424,4 @@ int ClusterFile::analyze_data(int32_t *data, int32_t *t2, int32_t *t3, char *qua
    return ok;
 }

-
-
 } // namespace aare
--- a/src/ClusterVector.test.cpp
+++ b/src/ClusterVector.test.cpp
@ -0,0 +1,108 @@
+#include <cstdint>
+#include "aare/ClusterVector.hpp"
+
+#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include <catch2/catch_test_macros.hpp>
+
+using aare::ClusterVector;
+
+TEST_CASE("ClusterVector 2x2 int32_t capacity 4, push back then read") {
+    struct Cluster_i2x2 {
+        int16_t x;
+        int16_t y;
+        int32_t data[4];
+    };
+
+    ClusterVector<int32_t> cv(2, 2, 4);
+    REQUIRE(cv.capacity() == 4);
+    REQUIRE(cv.size() == 0);
+    REQUIRE(cv.cluster_size_x() == 2);
+    REQUIRE(cv.cluster_size_y() == 2);
+    // int16_t, int16_t, 2x2 int32_t = 20 bytes
+    REQUIRE(cv.element_offset() == 20);
+
+    //Create a cluster and push back into the vector
+    Cluster_i2x2 c1 = {1, 2, {3, 4, 5, 6}};
+    cv.push_back(c1.x, c1.y, reinterpret_cast<std::byte*>(&c1.data[0]));
+    REQUIRE(cv.size() == 1);
+    REQUIRE(cv.capacity() == 4);
+
+    //Read the cluster back out using copy. TODO! Can we improve the API?
+    Cluster_i2x2 c2;
+    std::byte *ptr = cv.element_ptr(0);
+    std::copy(ptr, ptr + cv.element_offset(), reinterpret_cast<std::byte*>(&c2));
+
+    //Check that the data is the same
+    REQUIRE(c1.x == c2.x);
+    REQUIRE(c1.y == c2.y);
+    for(size_t i = 0; i < 4; i++) {
+        REQUIRE(c1.data[i] == c2.data[i]);
+    }
+}
+
+TEST_CASE("Summing 3x1 clusters of int64"){
+    struct Cluster_l3x1{
+        int16_t x;
+        int16_t y;
+        int32_t data[3];
+    };
+
+    ClusterVector<int32_t> cv(3, 1, 2);
+    REQUIRE(cv.capacity() == 2);
+    REQUIRE(cv.size() == 0);
+    REQUIRE(cv.cluster_size_x() == 3);
+    REQUIRE(cv.cluster_size_y() == 1);
+
+    //Create a cluster and push back into the vector
+    Cluster_l3x1 c1 = {1, 2, {3, 4, 5}};
+    cv.push_back(c1.x, c1.y, reinterpret_cast<std::byte*>(&c1.data[0]));
+    REQUIRE(cv.capacity() == 2);
+    REQUIRE(cv.size() == 1);
+
+    Cluster_l3x1 c2 = {6, 7, {8, 9, 10}};
+    cv.push_back(c2.x, c2.y, reinterpret_cast<std::byte*>(&c2.data[0]));
+    REQUIRE(cv.capacity() == 2);
+    REQUIRE(cv.size() == 2);
+
+    Cluster_l3x1 c3 = {11, 12, {13, 14, 15}};
+    cv.push_back(c3.x, c3.y, reinterpret_cast<std::byte*>(&c3.data[0]));
+    REQUIRE(cv.capacity() == 4);
+    REQUIRE(cv.size() == 3);
+
+    auto sums = cv.sum();
+    REQUIRE(sums.size() == 3);
+    REQUIRE(sums[0] == 12);
+    REQUIRE(sums[1] == 27);
+    REQUIRE(sums[2] == 42);
+}
+
+TEST_CASE("Storing floats"){
+    struct Cluster_f4x2{
+        int16_t x;
+        int16_t y;
+        float data[8];
+    };
+
+    ClusterVector<float> cv(2, 4, 2);
+    REQUIRE(cv.capacity() == 2);
+    REQUIRE(cv.size() == 0);
+    REQUIRE(cv.cluster_size_x() == 2);
+    REQUIRE(cv.cluster_size_y() == 4);
+
+    //Create a cluster and push back into the vector
+    Cluster_f4x2 c1 = {1, 2, {3.0, 4.0, 5.0, 6.0,3.0, 4.0, 5.0, 6.0}};
+    cv.push_back(c1.x, c1.y, reinterpret_cast<std::byte*>(&c1.data[0]));
+    REQUIRE(cv.capacity() == 2);
+    REQUIRE(cv.size() == 1);
+    
+
+    Cluster_f4x2 c2 = {6, 7, {8.0, 9.0, 10.0, 11.0,8.0, 9.0, 10.0, 11.0}};
+    cv.push_back(c2.x, c2.y, reinterpret_cast<std::byte*>(&c2.data[0]));
+    REQUIRE(cv.capacity() == 2);
+    REQUIRE(cv.size() == 2);
+
+    auto sums = cv.sum();
+    REQUIRE(sums.size() == 2);
+    REQUIRE_THAT(sums[0], Catch::Matchers::WithinAbs(36.0, 1e-6));
+    REQUIRE_THAT(sums[1], Catch::Matchers::WithinAbs(76.0, 1e-6));
+}
--- a/src/File.cpp
+++ b/src/File.cpp
@ -58,6 +58,8 @@ void File::read_into(std::byte *image_buf) { file_impl->read_into(image_buf); }
 void File::read_into(std::byte *image_buf, size_t n_frames) {
    file_impl->read_into(image_buf, n_frames);
 }
+
+size_t File::frame_number() { return file_impl->frame_number(tell()); }
 size_t File::frame_number(size_t frame_index) {
    return file_impl->frame_number(frame_index);
 }
--- a/src/Frame.test.cpp
+++ b/src/Frame.test.cpp
@ -19,7 +19,7 @@ TEST_CASE("Construct a frame") {
    // data should be initialized to 0
    for (size_t i = 0; i < rows; i++) {
        for (size_t j = 0; j < cols; j++) {
-            uint8_t *data = (uint8_t *)frame.pixel_ptr(i, j);
+            uint8_t *data = reinterpret_cast<uint8_t *>(frame.pixel_ptr(i, j));
            REQUIRE(data != nullptr);
            REQUIRE(*data == 0);
        }
@ -40,7 +40,7 @@ TEST_CASE("Set a value in a 8 bit frame") {
    // only the value we did set should be non-zero
    for (size_t i = 0; i < rows; i++) {
        for (size_t j = 0; j < cols; j++) {
-            uint8_t *data = (uint8_t *)frame.pixel_ptr(i, j);
+            uint8_t *data = reinterpret_cast<uint8_t *>(frame.pixel_ptr(i, j));
            REQUIRE(data != nullptr);
            if (i == 5 && j == 7) {
                REQUIRE(*data == value);
@ -65,7 +65,7 @@ TEST_CASE("Set a value in a 64 bit frame") {
    // only the value we did set should be non-zero
    for (size_t i = 0; i < rows; i++) {
        for (size_t j = 0; j < cols; j++) {
-            uint64_t *data = (uint64_t *)frame.pixel_ptr(i, j);
+            uint64_t *data = reinterpret_cast<uint64_t *>(frame.pixel_ptr(i, j));
            REQUIRE(data != nullptr);
            if (i == 5 && j == 7) {
                REQUIRE(*data == value);
@ -149,4 +149,5 @@ TEST_CASE("test explicit copy constructor") {
    REQUIRE(frame2.bitdepth() == bitdepth);
    REQUIRE(frame2.bytes() == rows * cols * bitdepth / 8);
    REQUIRE(frame2.data() != data);
-}
+}
+
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -17,8 +17,8 @@ endif()
 list(APPEND CMAKE_MODULE_PATH ${Catch2_SOURCE_DIR}/extras)

 add_executable(tests test.cpp)
-target_link_libraries(tests PRIVATE Catch2::Catch2WithMain)
-
+target_link_libraries(tests PRIVATE Catch2::Catch2WithMain aare_core aare_compiler_flags)
+# target_compile_options(tests PRIVATE -fno-omit-frame-pointer -fsanitize=address)
 set_target_properties(tests PROPERTIES
        RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
        OUTPUT_NAME run_tests
@ -34,7 +34,7 @@ set(TestSources
 target_sources(tests PRIVATE ${TestSources} )

 #Work around to remove, this is not the way to do it =) 
-target_link_libraries(tests PRIVATE aare_core aare_compiler_flags)
+# target_link_libraries(tests PRIVATE aare_core aare_compiler_flags)


 #configure a header to pass test file paths
--- a/tests/test.cpp
+++ b/tests/test.cpp
@ -3,6 +3,7 @@
 #include <climits>
 #include <filesystem>
 #include <fstream>
+#include <fmt/core.h>

 TEST_CASE("Test suite can find data assets", "[.integration]") {
    auto fpath = test_data_path() / "numpy" / "test_numpy_file.npy";
@ -18,4 +19,20 @@ TEST_CASE("Test suite can open data assets", "[.integration]") {
 TEST_CASE("Test float32 and char8") {
    REQUIRE(sizeof(float) == 4);
    REQUIRE(CHAR_BIT == 8);
-}
+}
+
+/**
+ * Uncomment the following tests to verify that asan is working
+ */
+
+// TEST_CASE("trigger asan stack"){
+//     int arr[5] =  {1,2,3,4,5};
+//     int val = arr[7];
+//     fmt::print("val: {}\n", val);
+// }
+
+// TEST_CASE("trigger asan heap"){
+//     auto *ptr = new int[5];
+//     ptr[70] = 5;
+//     fmt::print("ptr: {}\n", ptr[70]);
+// }