Added fitting with lmfit (#128)

- added stand alone fitting using: https://jugit.fz-juelich.de/mlz/lmfit.git - fit_gaus, fit_pol1 with and without errors - multi threaded fitting --------- Co-authored-by: JulianHeymes <julian.heymes@psi.ch>
2025-07-12 20:01:49 +02:00 · 2025-02-12 16:35:48 +01:00
parent c0c5e07ad8
commit 7309cff47c
18 changed files with 893 additions and 160 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -48,6 +48,7 @@ option(AARE_FETCH_PYBIND11 "Use FetchContent to download pybind11" ON)
 option(AARE_FETCH_CATCH "Use FetchContent to download catch2" ON)
 option(AARE_FETCH_JSON "Use FetchContent to download nlohmann::json" ON)
 option(AARE_FETCH_ZMQ "Use FetchContent to download libzmq" ON)
 option(AARE_FETCH_LMFIT "Use FetchContent to download lmfit" ON)
 #Convenience option to use system libraries only (no FetchContent)
@ -76,6 +77,34 @@ endif()
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 if(AARE_FETCH_LMFIT)
    set(lmfit_patch git apply ${CMAKE_CURRENT_SOURCE_DIR}/patches/lmfit.patch)
    FetchContent_Declare(
        lmfit
        GIT_REPOSITORY https://jugit.fz-juelich.de/mlz/lmfit.git
        GIT_TAG        main
        PATCH_COMMAND ${lmfit_patch}
        UPDATE_DISCONNECTED 1
        EXCLUDE_FROM_ALL
    )
    #Disable what we don't need from lmfit
    set(BUILD_TESTING OFF CACHE BOOL "")
    set(LMFIT_CPPTEST OFF CACHE BOOL "")
    set(LIB_MAN OFF CACHE BOOL "")
    set(LMFIT_CPPTEST OFF CACHE BOOL "")
    set(BUILD_SHARED_LIBS OFF CACHE BOOL "")
    FetchContent_MakeAvailable(lmfit)
    set_property(TARGET lmfit PROPERTY POSITION_INDEPENDENT_CODE ON)
    target_include_directories (lmfit PUBLIC "${libzmq_SOURCE_DIR}/lib")
    message(STATUS "lmfit include dir: ${lmfit_SOURCE_DIR}/lib")
 else()
    find_package(lmfit REQUIRED)
 endif()
 if(AARE_FETCH_ZMQ)
    # Fetchcontent_Declare is deprecated need to find a way to update this
    # for now setting the policy to old is enough
@ -146,7 +175,6 @@ if (AARE_FETCH_JSON)
    install(
        TARGETS nlohmann_json
        EXPORT "${TARGETS_EXPORT_NAME}"
    )
    message(STATUS "target: ${NLOHMANN_JSON_TARGET_NAME}")
 else()
@ -287,6 +315,7 @@ set(PUBLICHEADERS
    include/aare/defs.hpp
    include/aare/Dtype.hpp
    include/aare/File.hpp
    include/aare/Fit.hpp
    include/aare/FileInterface.hpp
    include/aare/Frame.hpp
    include/aare/geo_helpers.hpp
@ -300,6 +329,7 @@ set(PUBLICHEADERS
    include/aare/RawMasterFile.hpp
    include/aare/RawSubFile.hpp
    include/aare/VarClusterFinder.hpp
    include/aare/utils/task.hpp
 )
@ -312,6 +342,7 @@ set(SourceFiles
    ${CMAKE_CURRENT_SOURCE_DIR}/src/decode.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/Frame.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/File.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/Fit.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/geo_helpers.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.cpp
@ -319,6 +350,7 @@ set(SourceFiles
    ${CMAKE_CURRENT_SOURCE_DIR}/src/RawFile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/RawSubFile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/RawMasterFile.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/task.cpp
 )
@ -338,6 +370,7 @@ target_link_libraries(
    ${STD_FS_LIB} # from helpers.cmake
    PRIVATE 
    aare_compiler_flags 
    lmfit
 )
 set_target_properties(aare_core PROPERTIES
@ -364,6 +397,7 @@ if(AARE_TESTS)
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyFile.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/NumpyHelpers.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/RawFile.test.cpp
            ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/task.test.cpp
        )
    target_sources(tests PRIVATE ${TestSources} )
--- a/docs/src/index.rst
+++ b/docs/src/index.rst
@ -35,6 +35,8 @@ AARE
    pyRawMasterFile
    pyVarClusterFinder
    pyFit
 .. toctree::
    :caption: C++ API
--- a/docs/src/pyFit.rst
+++ b/docs/src/pyFit.rst
@ -0,0 +1,19 @@
 Fit
 ========
 .. py:currentmodule:: aare
 **Functions**
 .. autofunction:: gaus
 .. autofunction:: pol1
 **Fitting**
 .. autofunction:: fit_gaus
 .. autofunction:: fit_pol1
--- a/include/aare/Fit.hpp
+++ b/include/aare/Fit.hpp
@ -0,0 +1,76 @@
 #pragma once
 #include <cmath>
 #include <fmt/core.h>
 #include <vector>
 #include "aare/NDArray.hpp"
 namespace aare {
 namespace func {
 double gaus(const double x, const double *par);
 NDArray<double, 1> gaus(NDView<double, 1> x, NDView<double, 1> par);
 double pol1(const double x, const double *par);
 NDArray<double, 1> pol1(NDView<double, 1> x, NDView<double, 1> par);
 } // namespace func
 static constexpr int DEFAULT_NUM_THREADS = 4;
 /**
 * @brief Fit a 1D Gaussian to data.
 * @param data data to fit
 * @param x x values
 */
 NDArray<double, 1> fit_gaus(NDView<double, 1> x, NDView<double, 1> y);
 /**
 * @brief Fit a 1D Gaussian to each pixel. Data layout [row, col, values]
 * @param x x values
 * @param y y vales, layout [row, col, values]
 * @param n_threads number of threads to use
 */
 NDArray<double, 3> fit_gaus(NDView<double, 1> x, NDView<double, 3> y, int n_threads = DEFAULT_NUM_THREADS);
 /**
 * @brief Fit a 1D Gaussian with error estimates
 * @param x x values
 * @param y y vales, layout [row, col, values]
 * @param y_err error in y, layout [row, col, values]
 * @param par_out output parameters
 * @param par_err_out output error parameters
 */
 void fit_gaus(NDView<double, 1> x, NDView<double, 1> y, NDView<double, 1> y_err,
              NDView<double, 1> par_out, NDView<double, 1> par_err_out);
 /**
 * @brief Fit a 1D Gaussian to each pixel with error estimates. Data layout [row, col, values]
 * @param x x values
 * @param y y vales, layout [row, col, values]
 * @param y_err error in y, layout [row, col, values]
 * @param par_out output parameters, layout [row, col, values]
 * @param par_err_out output parameter errors, layout [row, col, values]
 * @param n_threads number of threads to use
 */
 void fit_gaus(NDView<double, 1> x, NDView<double, 3> y, NDView<double, 3> y_err,
              NDView<double, 3> par_out, NDView<double, 3> par_err_out, int n_threads = DEFAULT_NUM_THREADS);
 NDArray<double, 1> fit_pol1(NDView<double, 1> x, NDView<double, 1> y);
 NDArray<double, 3> fit_pol1(NDView<double, 1> x, NDView<double, 3> y, int n_threads = DEFAULT_NUM_THREADS);
 void fit_pol1(NDView<double, 1> x, NDView<double, 1> y,
                NDView<double, 1> y_err, NDView<double, 1> par_out,
                NDView<double, 1> par_err_out);
 //TODO! not sure we need to offer the different version in C++
 void fit_pol1(NDView<double, 1> x, NDView<double, 3> y,
                NDView<double, 3> y_err, NDView<double, 3> par_out,
                NDView<double, 3> par_err_out, int n_threads = DEFAULT_NUM_THREADS);
 } // namespace aare
--- a/include/aare/utils/task.hpp
+++ b/include/aare/utils/task.hpp
@ -0,0 +1,8 @@
 #include <utility>
 #include <vector>
 namespace aare {
 std::vector<std::pair<int, int>> split_task(int first, int last, int n_threads);
 } // namespace aare
--- a/patches/lmfit.patch
+++ b/patches/lmfit.patch
@ -0,0 +1,13 @@
 diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
 index 4efb7ed..6533660 100644
 --- a/lib/CMakeLists.txt
 +++ b/lib/CMakeLists.txt
@@ -11,7 +11,7 @@ target_compile_definitions(${lib} PRIVATE "LMFIT_EXPORT") # for Windows DLL expo
 target_include_directories(${lib}
     PUBLIC
 -    $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/>
 +    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/>
     $<INSTALL_INTERFACE:include/>
     )
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@ -28,6 +28,7 @@ target_link_libraries(_aare PRIVATE aare_core aare_compiler_flags)
 set( PYTHON_FILES
    aare/__init__.py
    aare/CtbRawFile.py
    aare/func.py
    aare/RawFile.py
    aare/transform.py
    aare/ScanParameters.py
@ -43,10 +44,17 @@ set_target_properties(_aare PROPERTIES
    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/aare
 )
 set(PYTHON_EXAMPLES
    examples/play.py
    examples/fits.py
 )
-# Copy the examples/scripts to the build directory
+
-configure_file(examples/play.py ${CMAKE_BINARY_DIR}/play.py)
+# Copy the python examples to the build directory
 foreach(FILE ${PYTHON_EXAMPLES})
    configure_file(${FILE} ${CMAKE_BINARY_DIR}/${FILE}  )
 endforeach(FILE ${PYTHON_EXAMPLES})
 if(AARE_INSTALL_PYTHONEXT)
--- a/python/aare/init.py
+++ b/python/aare/init.py
@ -10,8 +10,14 @@ from ._aare import hitmap
 from ._aare import ClusterFinderMT, ClusterCollector, ClusterFileSink, ClusterVector_i
 from ._aare import fit_gaus, fit_pol1
 from .CtbRawFile import CtbRawFile
 from .RawFile import RawFile
 from .ScanParameters import ScanParameters
-from .utils import random_pixels, random_pixel
+from .utils import random_pixels, random_pixel, flat_list
 #make functions available in the top level API
 from .func import *
--- a/python/aare/func.py
+++ b/python/aare/func.py
@ -0,0 +1 @@
 from ._aare import gaus, pol1
--- a/python/aare/utils.py
+++ b/python/aare/utils.py
@ -21,3 +21,7 @@ def random_pixel(xmin=0, xmax=512, ymin=0, ymax=1024):
        tuple: (row, col)
    """
    return random_pixels(1, xmin, xmax, ymin, ymax)[0]
 def flat_list(xss):
    """Flatten a list of lists."""
    return [x for xs in xss for x in xs]
--- a/python/examples/fits.py
+++ b/python/examples/fits.py
@ -0,0 +1,79 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from aare import fit_gaus, fit_pol1
 from aare import gaus, pol1
 textpm = f"±"  #
 textmu = f"μ"  #
 textsigma = f"σ"  #
 # ================================= Gauss fit =================================
 # Parameters
 mu = np.random.uniform(1, 100)  # Mean of Gaussian
 sigma = np.random.uniform(4, 20)  # Standard deviation
 num_points = 10000  # Number of points for smooth distribution
 noise_sigma = 100
 # Generate Gaussian distribution
 data = np.random.normal(mu, sigma, num_points)
 # Generate errors for each point
 errors = np.abs(np.random.normal(0, sigma, num_points))  # Errors with mean 0, std 0.5
 # Create subplot
 fig0, ax0 = plt.subplots(1, 1, num=0, figsize=(12, 8))
 x = np.histogram(data, bins=30)[1][:-1] + 0.05
 y = np.histogram(data, bins=30)[0]
 yerr = errors[:30]
 # Add the errors as error bars in the step plot
 ax0.errorbar(x, y, yerr=yerr, fmt=". ", capsize=5)
 ax0.grid()
 par, err = fit_gaus(x, y, yerr)
 print(par, err)
 x = np.linspace(x[0], x[-1], 1000)
 ax0.plot(x, gaus(x, par), marker="")
 ax0.set(xlabel="x", ylabel="Counts", title=f"A0 = {par[0]:0.2f}{textpm}{err[0]:0.2f}\n"
                                           f"{textmu} = {par[1]:0.2f}{textpm}{err[1]:0.2f}\n"
                                           f"{textsigma} = {par[2]:0.2f}{textpm}{err[2]:0.2f}\n"
                                           f"(init: {textmu}: {mu:0.2f}, {textsigma}: {sigma:0.2f})")
 fig0.tight_layout()
 # ================================= pol1 fit =================================
 # Parameters
 n_points = 40
 # Generate random slope and intercept (origin)
 slope = np.random.uniform(-10, 10)  # Random slope between 0.5 and 2.0
 intercept = np.random.uniform(-10, 10)  # Random intercept between -10 and 10
 # Generate random x values
 x_values = np.random.uniform(-10, 10, n_points)
 # Calculate y values based on the linear function y = mx + b + error
 errors = np.abs(np.random.normal(0, np.random.uniform(1, 5), n_points))
 var_points = np.random.normal(0, np.random.uniform(0.1, 2), n_points)
 y_values = slope * x_values + intercept + var_points
 fig1, ax1 = plt.subplots(1, 1, num=1, figsize=(12, 8))
 ax1.errorbar(x_values, y_values, yerr=errors, fmt=". ", capsize=5)
 par, err = fit_pol1(x_values, y_values, errors)
 x = np.linspace(np.min(x_values), np.max(x_values), 1000)
 ax1.plot(x, pol1(x, par), marker="")
 ax1.set(xlabel="x", ylabel="y", title=f"a = {par[0]:0.2f}{textpm}{err[0]:0.2f}\n"
                                      f"b = {par[1]:0.2f}{textpm}{err[1]:0.2f}\n"
                                      f"(init: {slope:0.2f}, {intercept:0.2f})")
 fig1.tight_layout()
 plt.show()
--- a/python/examples/play.py
+++ b/python/examples/play.py
@ -8,6 +8,28 @@ import numpy as np
 import boost_histogram as bh
 import time
 <<<<<<< HEAD
 from aare import File, ClusterFinder, VarClusterFinder, ClusterFile, CtbRawFile
 from aare import gaus, fit_gaus
 base = Path('/mnt/sls_det_storage/moench_data/Julian/MOENCH05/20250113_first_xrays_redo/raw_files/')
 cluster_file = Path('/home/l_msdetect/erik/tmp/Cu.clust')
 t0 = time.perf_counter()
 offset= -0.5
 hist3d = bh.Histogram(
    bh.axis.Regular(160, 0+offset, 160+offset),  #x
    bh.axis.Regular(150, 0+offset, 150+offset),  #y
    bh.axis.Regular(200, 0, 6000), #ADU
 )
 total_clusters = 0
 with ClusterFile(cluster_file, chunk_size = 1000) as f:
    for i, clusters in enumerate(f):
        arr = np.array(clusters)
        total_clusters += clusters.size
        hist3d.fill(arr['y'],arr['x'], clusters.sum_2x2()) #python talks [row, col] cluster finder [x,y]
 =======
 from aare import RawFile
 f = RawFile('/mnt/sls_det_storage/jungfrau_data1/vadym_tests/jf12_M431/laser_scan/laserScan_pedestal_G0_master_0.json')
@ -17,104 +39,30 @@ print(f'{f.frame_number(1)}')
 for i in range(10):
    header, img = f.read_frame()
    print(header['frameNumber'], img.shape)
-
+>>>>>>> developer
 # for i, frame in enumerate(f):
 #     print(f'{i}', end='\r')
 # print()
-# from aare._aare import ClusterFinderMT, ClusterCollector, ClusterFileSink
+t_elapsed = time.perf_counter()-t0
 print(f'Histogram filling took: {t_elapsed:.3f}s {total_clusters/t_elapsed/1e6:.3f}M clusters/s')
 histogram_data = hist3d.counts()
 x = hist3d.axes[2].edges[:-1]
-# cf = ClusterFinderMT((400,400), (3,3), n_threads = 3)
+y = histogram_data[100,100,:]
-# # collector = ClusterCollector(cf)
+xx = np.linspace(x[0], x[-1])
-# out_file = ClusterFileSink(cf, "test.clust")
+# fig, ax = plt.subplots()
 # ax.step(x, y, where = 'post')
-# for i in range(1000):
+y_err = np.sqrt(y)
-#     img = f.read_frame()
+y_err = np.zeros(y.size)
-#     cf.push_pedestal_frame(img)
+y_err += 1
 # print('Pedestal done')
 # cf.sync()
-# for i in range(100):
+# par = fit_gaus2(y,x, y_err)
-#     img = f.read_frame()
+# ax.plot(xx, gaus(xx,par))
-#     cf.find_clusters(img)
+# print(par)
 res = fit_gaus(y,x)
 res2 = fit_gaus(y,x, y_err)
 print(res)
 print(res2)
 # # time.sleep(1)
 # cf.stop()  
 # time.sleep(1)
 # print('Second run')
 # cf.start()
 # for i in range(100):
 #     img = f.read_frame()
 #     cf.find_clusters(img)
 # cf.stop()
 # print('Third run')
 # cf.start()
 # for i in range(129):
 #     img = f.read_frame()
 #     cf.find_clusters(img)
 # cf.stop()
 # out_file.stop()
 # print('Done')
 # cfile = ClusterFile("test.clust")
 # i = 0
 # while True:
 #     try:
 #         cv = cfile.read_frame()
 #         i+=1
 #     except RuntimeError:
 #         break
 # print(f'Read {i} frames') 
 # # cf = ClusterFinder((400,400), (3,3))
 # # for i in range(1000):
 # #     cf.push_pedestal_frame(f.read_frame())
 # # fig, ax = plt.subplots()
 # # im = ax.imshow(cf.pedestal())
 # # cf.pedestal()
 # # cf.noise()
 # # N = 500
 # # t0 = time.perf_counter()
 # # hist1 = bh.Histogram(bh.axis.Regular(40, -2, 4000))
 # # f.seek(0)
 # # t0 = time.perf_counter()
 # # data = f.read_n(N)
 # # t_elapsed = time.perf_counter()-t0
 # # n_bytes = data.itemsize*data.size
 # # print(f'Reading {N} frames took {t_elapsed:.3f}s {N/t_elapsed:.0f} FPS, {n_bytes/1024**2:.4f} GB/s')
 # # for frame in data:
 # #     a = cf.find_clusters(frame)
 # # clusters = cf.steal_clusters()
 # # t_elapsed = time.perf_counter()-t0
 # # print(f'Clustering {N} frames took {t_elapsed:.2f}s  {N/t_elapsed:.0f} FPS')
 # # t0 = time.perf_counter()
 # # total_clusters = clusters.size
 # # hist1.fill(clusters.sum())
 # # t_elapsed = time.perf_counter()-t0
 # # print(f'Filling histogram with the sum of {total_clusters} clusters took: {t_elapsed:.3f}s, {total_clusters/t_elapsed:.3g} clust/s')
 # # print(f'Average number of clusters per frame {total_clusters/N:.3f}')
--- a/python/src/fit.hpp
+++ b/python/src/fit.hpp
@ -0,0 +1,223 @@
 #include <cstdint>
 #include <filesystem>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <pybind11/stl_bind.h>
 #include "aare/Fit.hpp"
 namespace py = pybind11;
 void define_fit_bindings(py::module &m) {
    // TODO! Evaluate without converting to double
    m.def(
        "gaus",
        [](py::array_t<double, py::array::c_style | py::array::forcecast> x,
           py::array_t<double, py::array::c_style | py::array::forcecast> par) {
            auto x_view = make_view_1d(x);
            auto par_view = make_view_1d(par);
            auto y = new NDArray<double, 1>{aare::func::gaus(x_view, par_view)};
            return return_image_data(y);
        },
        R"(
        Evaluate a 1D Gaussian function for all points in x using parameters par.
        Parameters
        ----------
        x : array_like
            The points at which to evaluate the Gaussian function.
        par : array_like
            The parameters of the Gaussian function. The first element is the amplitude, the second element is the mean, and the third element is the standard deviation.
        )", py::arg("x"), py::arg("par"));
    m.def(
        "pol1",
        [](py::array_t<double, py::array::c_style | py::array::forcecast> x,
           py::array_t<double, py::array::c_style | py::array::forcecast> par) {
            auto x_view = make_view_1d(x);
            auto par_view = make_view_1d(par);
            auto y = new NDArray<double, 1>{aare::func::pol1(x_view, par_view)};
            return return_image_data(y);
        },
        R"(
        Evaluate a 1D polynomial function for all points in x using parameters par. (p0+p1*x)
        Parameters
        ----------
        x : array_like
            The points at which to evaluate the polynomial function.
        par : array_like
            The parameters of the polynomial function. The first element is the intercept, and the second element is the slope.    
        )", py::arg("x"), py::arg("par"));
    m.def(
        "fit_gaus",
        [](py::array_t<double, py::array::c_style | py::array::forcecast> x,
           py::array_t<double, py::array::c_style | py::array::forcecast> y,
           int n_threads) {
            if (y.ndim() == 3) {
                auto par = new NDArray<double, 3>{};
                auto y_view = make_view_3d(y);
                auto x_view = make_view_1d(x);
                *par = aare::fit_gaus(x_view, y_view, n_threads);
                return return_image_data(par);
            } else if (y.ndim() == 1) {
                auto par = new NDArray<double, 1>{};
                auto y_view = make_view_1d(y);
                auto x_view = make_view_1d(x);
                *par = aare::fit_gaus(x_view, y_view);
                return return_image_data(par);
            } else {
                throw std::runtime_error("Data must be 1D or 3D");
            }
        },
 R"(
 Fit a 1D Gaussian to data.
 Parameters
 ----------
 x : array_like
    The x values.
 y : array_like
    The y values.
 n_threads : int, optional
    The number of threads to use. Default is 4.
 )",
        py::arg("x"), py::arg("y"), py::arg("n_threads") = 4);
    m.def(
        "fit_gaus",
        [](py::array_t<double, py::array::c_style | py::array::forcecast> x,
           py::array_t<double, py::array::c_style | py::array::forcecast> y,
           py::array_t<double, py::array::c_style | py::array::forcecast>
               y_err, int n_threads) {
            if (y.ndim() == 3) {
                // Allocate memory for the output
                // Need to have pointers to allow python to manage
                // the memory
                auto par = new NDArray<double, 3>({y.shape(0), y.shape(1), 3});
                auto par_err =
                    new NDArray<double, 3>({y.shape(0), y.shape(1), 3});
                auto y_view = make_view_3d(y);
                auto y_view_err = make_view_3d(y_err);
                auto x_view = make_view_1d(x);
                aare::fit_gaus(x_view, y_view, y_view_err, par->view(),
                               par_err->view(), n_threads);
                // return return_image_data(par);
                return py::make_tuple(return_image_data(par),
                                      return_image_data(par_err));
            } else if (y.ndim() == 1) {
                // Allocate memory for the output
                // Need to have pointers to allow python to manage
                // the memory
                auto par = new NDArray<double, 1>({3});
                auto par_err = new NDArray<double, 1>({3});
                // Decode the numpy arrays
                auto y_view = make_view_1d(y);
                auto y_view_err = make_view_1d(y_err);
                auto x_view = make_view_1d(x);
                aare::fit_gaus(x_view, y_view, y_view_err, par->view(),
                               par_err->view());
                return py::make_tuple(return_image_data(par),
                                      return_image_data(par_err));
            } else {
                throw std::runtime_error("Data must be 1D or 3D");
            }
        },
 R"(
 Fit a 1D Gaussian to data with error estimates.
 Parameters
 ----------
 x : array_like
    The x values.
 y : array_like
    The y values.
 y_err : array_like
    The error in the y values.
 n_threads : int, optional
    The number of threads to use. Default is 4.
 )",
        py::arg("x"), py::arg("y"), py::arg("y_err"), py::arg("n_threads") = 4);
    m.def(
        "fit_pol1",
        [](py::array_t<double, py::array::c_style | py::array::forcecast> x,
           py::array_t<double, py::array::c_style | py::array::forcecast> y,
           int n_threads) {
            if (y.ndim() == 3) {
                auto par = new NDArray<double, 3>{};
                auto x_view = make_view_1d(x);
                auto y_view = make_view_3d(y);
                *par = aare::fit_pol1(x_view, y_view, n_threads);
                return return_image_data(par);
            } else if (y.ndim() == 1) {
                auto par = new NDArray<double, 1>{};
                auto x_view = make_view_1d(x);
                auto y_view = make_view_1d(y);
                *par = aare::fit_pol1(x_view, y_view);
                return return_image_data(par);
            } else {
                throw std::runtime_error("Data must be 1D or 3D");
            }
        },
        py::arg("x"), py::arg("y"), py::arg("n_threads") = 4);
    m.def(
        "fit_pol1",
        [](py::array_t<double, py::array::c_style | py::array::forcecast> x,
           py::array_t<double, py::array::c_style | py::array::forcecast> y,
           py::array_t<double, py::array::c_style | py::array::forcecast>
               y_err, int n_threads) {
            if (y.ndim() == 3) {
                auto par =
                    new NDArray<double, 3>({y.shape(0), y.shape(1), 2});
                auto par_err =
                    new NDArray<double, 3>({y.shape(0), y.shape(1), 2});
                auto y_view = make_view_3d(y);
                auto y_view_err = make_view_3d(y_err);
                auto x_view = make_view_1d(x);
                aare::fit_pol1(x_view, y_view,y_view_err, par->view(),
                               par_err->view(), n_threads);
                return py::make_tuple(return_image_data(par),
                                      return_image_data(par_err));
            } else if (y.ndim() == 1) {
                auto par = new NDArray<double, 1>({2});
                auto par_err = new NDArray<double, 1>({2});
                auto y_view = make_view_1d(y);
                auto y_view_err = make_view_1d(y_err);
                auto x_view = make_view_1d(x);
                aare::fit_pol1(x_view, y_view, y_view_err, par->view(),
                               par_err->view());
                return py::make_tuple(return_image_data(par),
                                      return_image_data(par_err));
            } else {
                throw std::runtime_error("Data must be 1D or 3D");
            }
        },
 R"(
 Fit a 1D polynomial to data with error estimates.
 Parameters
 ----------
 x : array_like
    The x values.
 y : array_like
    The y values.
 y_err : array_like
    The error in the y values.
 n_threads : int, optional
    The number of threads to use. Default is 4.
 )",
        py::arg("x"), py::arg("y"), py::arg("y_err"), py::arg("n_threads") = 4);
 }
--- a/python/src/module.cpp
+++ b/python/src/module.cpp
@ -8,6 +8,7 @@
 #include "pedestal.hpp"
 #include "cluster.hpp"
 #include "cluster_file.hpp"
 #include "fit.hpp"
 //Pybind stuff
 #include <pybind11/pybind11.h>
@ -29,5 +30,6 @@ PYBIND11_MODULE(_aare, m) {
    define_cluster_file_io_bindings(m);
    define_cluster_collector_bindings(m);
    define_cluster_file_sink_bindings(m);
    define_fit_bindings(m);
 }
--- a/python/src/np_helper.hpp
+++ b/python/src/np_helper.hpp
@ -39,65 +39,6 @@ template <typename T> py::array return_vector(std::vector<T> *vec) {
                          free_when_done); // numpy array references this parent
 }
 // template <typename Reader> py::array do_read(Reader &r, size_t n_frames) {
 //     py::array image;
 //     if (n_frames == 0)
 //         n_frames = r.total_frames();
 //     std::array<ssize_t, 3> shape{static_cast<ssize_t>(n_frames), r.rows(),
 //                                  r.cols()};
 //     const uint8_t item_size = r.bytes_per_pixel();
 //     if (item_size == 1) {
 //         image = py::array_t<uint8_t, py::array::c_style | py::array::forcecast>(
 //             shape);
 //     } else if (item_size == 2) {
 //         image =
 //             py::array_t<uint16_t, py::array::c_style | py::array::forcecast>(
 //                 shape);
 //     } else if (item_size == 4) {
 //         image =
 //             py::array_t<uint32_t, py::array::c_style | py::array::forcecast>(
 //                 shape);
 //     }
 //     r.read_into(reinterpret_cast<std::byte *>(image.mutable_data()), n_frames);
 //     return image;
 // }
 // py::array return_frame(pl::Frame *ptr) {
 //     py::capsule free_when_done(ptr, [](void *f) {
 //         pl::Frame *foo = reinterpret_cast<pl::Frame *>(f);
 //         delete foo;
 //     });
 //     const uint8_t item_size = ptr->bytes_per_pixel();
 //     std::vector<ssize_t> shape;
 //     for (auto val : ptr->shape())
 //         if (val > 1)
 //             shape.push_back(val);
 //     std::vector<ssize_t> strides;
 //     if (shape.size() == 1)
 //         strides.push_back(item_size);
 //     else if (shape.size() == 2) {
 //         strides.push_back(item_size * shape[1]);
 //         strides.push_back(item_size);
 //     }
 //     if (item_size == 1)
 //         return py::array_t<uint8_t>(
 //             shape, strides,
 //             reinterpret_cast<uint8_t *>(ptr->data()), free_when_done);
 //     else if (item_size == 2)
 //         return py::array_t<uint16_t>(shape, strides,
 //                                      reinterpret_cast<uint16_t *>(ptr->data()),
 //                                      free_when_done);
 //     else if (item_size == 4)
 //         return py::array_t<uint32_t>(shape, strides,
 //                                      reinterpret_cast<uint32_t *>(ptr->data()),
 //                                      free_when_done);
 //     return {};
 // }
 // todo rewrite generic
 template <class T, int Flags> auto get_shape_3d(py::array_t<T, Flags> arr) {
    return aare::Shape<3>{arr.shape(0), arr.shape(1), arr.shape(2)};
@ -111,6 +52,13 @@ template <class T, int Flags> auto get_shape_2d(py::array_t<T, Flags> arr) {
    return aare::Shape<2>{arr.shape(0), arr.shape(1)};
 }
 template <class T, int Flags> auto get_shape_1d(py::array_t<T, Flags> arr) {
    return aare::Shape<1>{arr.shape(0)};
 }
 template <class T, int Flags> auto make_view_2d(py::array_t<T, Flags> arr) {
    return aare::NDView<T, 2>(arr.mutable_data(), get_shape_2d<T, Flags>(arr));
 }
 template <class T, int Flags> auto make_view_1d(py::array_t<T, Flags> arr) {
    return aare::NDView<T, 1>(arr.mutable_data(), get_shape_1d<T, Flags>(arr));
 }
--- a/src/Fit.cpp
+++ b/src/Fit.cpp
@ -0,0 +1,300 @@
 #include "aare/Fit.hpp"
 #include "aare/utils/task.hpp"
 #include <lmcurve2.h>
 #include <lmfit.hpp>
 #include <thread>
 namespace aare {
 namespace func {
 double gaus(const double x, const double *par) {
    return par[0] * exp(-pow(x - par[1], 2) / (2 * pow(par[2], 2)));
 }
 NDArray<double, 1> gaus(NDView<double, 1> x, NDView<double, 1> par) {
    NDArray<double, 1> y({x.shape(0)}, 0);
    for (size_t i = 0; i < x.size(); i++) {
        y(i) = gaus(x(i), par.data());
    }
    return y;
 }
 double pol1(const double x, const double *par) { return par[0] * x + par[1]; }
 NDArray<double, 1> pol1(NDView<double, 1> x, NDView<double, 1> par) {
    NDArray<double, 1> y({x.shape()}, 0);
    for (size_t i = 0; i < x.size(); i++) {
        y(i) = pol1(x(i), par.data());
    }
    return y;
 }
 } // namespace func
 NDArray<double, 1> fit_gaus(NDView<double, 1> x, NDView<double, 1> y) {
    NDArray<double, 1> result({3}, 0);
    lm_control_struct control = lm_control_double;
    // Estimate the initial parameters for the fit
    std::vector<double> start_par{0, 0, 0};
    auto e = std::max_element(y.begin(), y.end());
    auto idx = std::distance(y.begin(), e);
    start_par[0] = *e; // For amplitude we use the maximum value
    start_par[1] =
        x[idx]; // For the mean we use the x value of the maximum value
    // For sigma we estimate the fwhm and divide by 2.35
    // assuming equally spaced x values
    auto delta = x[1] - x[0];
    start_par[2] =
        std::count_if(y.begin(), y.end(),
                      [e, delta](double val) { return val > *e / 2; }) *
        delta / 2.35;
    lmfit::result_t res(start_par);
    lmcurve(res.par.size(), res.par.data(), x.size(), x.data(), y.data(),
            aare::func::gaus, &control, &res.status);
    result(0) = res.par[0];
    result(1) = res.par[1];
    result(2) = res.par[2];
    return result;
 }
 NDArray<double, 3> fit_gaus(NDView<double, 1> x, NDView<double, 3> y,
                            int n_threads) {
    NDArray<double, 3> result({y.shape(0), y.shape(1), 3}, 0);
    auto process = [&x, &y, &result](ssize_t first_row, ssize_t last_row) {
        for (ssize_t row = first_row; row < last_row; row++) {
            for (ssize_t col = 0; col < y.shape(1); col++) {
                NDView<double, 1> values(&y(row, col, 0), {y.shape(2)});
                auto res = fit_gaus(x, values);
                result(row, col, 0) = res(0);
                result(row, col, 1) = res(1);
                result(row, col, 2) = res(2);
            }
        }
    };
    auto tasks = split_task(0, y.shape(0), n_threads);
    std::vector<std::thread> threads;
    for (auto &task : tasks) {
        threads.push_back(std::thread(process, task.first, task.second));
    }
    for (auto &thread : threads) {
        thread.join();
    }
    return result;
 }
 void fit_gaus(NDView<double, 1> x, NDView<double, 3> y, NDView<double, 3> y_err,
              NDView<double, 3> par_out, NDView<double, 3> par_err_out,
              int n_threads) {
    auto process = [&](ssize_t first_row, ssize_t last_row) {
        for (ssize_t row = first_row; row < last_row; row++) {
            for (ssize_t col = 0; col < y.shape(1); col++) {
                NDView<double, 1> y_view(&y(row, col, 0), {y.shape(2)});
                NDView<double, 1> y_err_view(&y_err(row, col, 0),
                                             {y_err.shape(2)});
                NDView<double, 1> par_out_view(&par_out(row, col, 0),
                                               {par_out.shape(2)});
                NDView<double, 1> par_err_out_view(&par_err_out(row, col, 0),
                                                   {par_err_out.shape(2)});
                fit_gaus(x, y_view, y_err_view, par_out_view, par_err_out_view);
            }
        }
    };
    auto tasks = split_task(0, y.shape(0), n_threads);
    std::vector<std::thread> threads;
    for (auto &task : tasks) {
        threads.push_back(std::thread(process, task.first, task.second));
    }
    for (auto &thread : threads) {
        thread.join();
    }
 }
 void fit_gaus(NDView<double, 1> x, NDView<double, 1> y, NDView<double, 1> y_err,
              NDView<double, 1> par_out, NDView<double, 1> par_err_out) {
    // Check that we have the correct sizes
    if (y.size() != x.size() || y.size() != y_err.size() ||
        par_out.size() != 3 || par_err_out.size() != 3) {
        throw std::runtime_error("Data, x, data_err must have the same size "
                                 "and par_out, par_err_out must have size 3");
    }
    lm_control_struct control = lm_control_double;
    // Estimate the initial parameters for the fit
    std::vector<double> start_par{0, 0, 0};
    std::vector<double> start_par_err{0, 0, 0};
    std::vector<double> start_cov{0, 0, 0, 0, 0, 0, 0, 0, 0};
    auto e = std::max_element(y.begin(), y.end());
    auto idx = std::distance(y.begin(), e);
    start_par[0] = *e; // For amplitude we use the maximum value
    start_par[1] =
        x[idx]; // For the mean we use the x value of the maximum value
    // For sigma we estimate the fwhm and divide by 2.35
    // assuming equally spaced x values
    auto delta = x[1] - x[0];
    start_par[2] =
        std::count_if(y.begin(), y.end(),
                      [e, delta](double val) { return val > *e / 2; }) *
        delta / 2.35;
    lmfit::result_t res(start_par);
    lmfit::result_t res_err(start_par_err);
    lmfit::result_t cov(start_cov);
    // TODO can we make lmcurve write the result directly where is should be?
    lmcurve2(res.par.size(), res.par.data(), res_err.par.data(), cov.par.data(),
             x.size(), x.data(), y.data(), y_err.data(), aare::func::gaus,
             &control, &res.status);
    par_out(0) = res.par[0];
    par_out(1) = res.par[1];
    par_out(2) = res.par[2];
    par_err_out(0) = res_err.par[0];
    par_err_out(1) = res_err.par[1];
    par_err_out(2) = res_err.par[2];
 }
 void fit_pol1(NDView<double, 1> x, NDView<double, 1> y, NDView<double, 1> y_err,
              NDView<double, 1> par_out, NDView<double, 1> par_err_out) {
    // Check that we have the correct sizes
    if (y.size() != x.size() || y.size() != y_err.size() ||
        par_out.size() != 2 || par_err_out.size() != 2) {
        throw std::runtime_error("Data, x, data_err must have the same size "
                                 "and par_out, par_err_out must have size 2");
    }
    lm_control_struct control = lm_control_double;
    // Estimate the initial parameters for the fit
    std::vector<double> start_par{0, 0};
    std::vector<double> start_par_err{0, 0};
    std::vector<double> start_cov{0, 0, 0, 0};
    auto y2 = std::max_element(y.begin(), y.end());
    auto x2 = x[std::distance(y.begin(), y2)];
    auto y1 = std::min_element(y.begin(), y.end());
    auto x1 = x[std::distance(y.begin(), y1)];
    start_par[0] =
        (*y2 - *y1) / (x2 - x1); // For amplitude we use the maximum value
    start_par[1] =
        *y1 - ((*y2 - *y1) / (x2 - x1)) *
                  x1; // For the mean we use the x value of the maximum value
    lmfit::result_t res(start_par);
    lmfit::result_t res_err(start_par_err);
    lmfit::result_t cov(start_cov);
    lmcurve2(res.par.size(), res.par.data(), res_err.par.data(), cov.par.data(),
             x.size(), x.data(), y.data(), y_err.data(), aare::func::pol1,
             &control, &res.status);
    par_out(0) = res.par[0];
    par_out(1) = res.par[1];
    par_err_out(0) = res_err.par[0];
    par_err_out(1) = res_err.par[1];
 }
 void fit_pol1(NDView<double, 1> x, NDView<double, 3> y, NDView<double, 3> y_err,
              NDView<double, 3> par_out, NDView<double, 3> par_err_out,
              int n_threads) {
    auto process = [&](ssize_t first_row, ssize_t last_row) {
        for (ssize_t row = first_row; row < last_row; row++) {
            for (ssize_t col = 0; col < y.shape(1); col++) {
                NDView<double, 1> y_view(&y(row, col, 0), {y.shape(2)});
                NDView<double, 1> y_err_view(&y_err(row, col, 0),
                                             {y_err.shape(2)});
                NDView<double, 1> par_out_view(&par_out(row, col, 0),
                                               {par_out.shape(2)});
                NDView<double, 1> par_err_out_view(&par_err_out(row, col, 0),
                                                   {par_err_out.shape(2)});
                fit_pol1(x, y_view, y_err_view, par_out_view, par_err_out_view);
            }
        }
    };
    auto tasks = split_task(0, y.shape(0), n_threads);
    std::vector<std::thread> threads;
    for (auto &task : tasks) {
        threads.push_back(std::thread(process, task.first, task.second));
    }
    for (auto &thread : threads) {
        thread.join();
    }
 }
 NDArray<double, 1> fit_pol1(NDView<double, 1> x, NDView<double, 1> y) {
    // // Check that we have the correct sizes
    // if (y.size() != x.size() || y.size() != y_err.size() ||
    // par_out.size() != 2 || par_err_out.size() != 2) {
    // throw std::runtime_error("Data, x, data_err must have the same size "
    //                      "and par_out, par_err_out must have size 2");
    // }
    NDArray<double, 1> par({2}, 0);
    lm_control_struct control = lm_control_double;
    // Estimate the initial parameters for the fit
    std::vector<double> start_par{0, 0};
    auto y2 = std::max_element(y.begin(), y.end());
    auto x2 = x[std::distance(y.begin(), y2)];
    auto y1 = std::min_element(y.begin(), y.end());
    auto x1 = x[std::distance(y.begin(), y1)];
    start_par[0] = (*y2 - *y1) / (x2 - x1);
    start_par[1] = *y1 - ((*y2 - *y1) / (x2 - x1)) * x1;
    lmfit::result_t res(start_par);
    lmcurve(res.par.size(), res.par.data(), x.size(), x.data(), y.data(),
            aare::func::pol1, &control, &res.status);
    par(0) = res.par[0];
    par(1) = res.par[1];
    return par;
 }
 NDArray<double, 3> fit_pol1(NDView<double, 1> x, NDView<double, 3> y,
                            int n_threads) {
    NDArray<double, 3> result({y.shape(0), y.shape(1), 2}, 0);
    auto process = [&](ssize_t first_row, ssize_t last_row) {
        for (ssize_t row = first_row; row < last_row; row++) {
            for (ssize_t col = 0; col < y.shape(1); col++) {
                NDView<double, 1> values(&y(row, col, 0), {y.shape(2)});
                auto res = fit_pol1(x, values);
                result(row, col, 0) = res(0);
                result(row, col, 1) = res(1);
            }
        }
    };
    auto tasks = split_task(0, y.shape(0), n_threads);
    std::vector<std::thread> threads;
    for (auto &task : tasks) {
        threads.push_back(std::thread(process, task.first, task.second));
    }
    for (auto &thread : threads) {
        thread.join();
    }
    return result;
 }
 } // namespace aare
--- a/src/utils/task.cpp
+++ b/src/utils/task.cpp
@ -0,0 +1,30 @@
 #include "aare/utils/task.hpp"
 namespace aare {
 std::vector<std::pair<int, int>> split_task(int first, int last,
                                            int n_threads) {
    std::vector<std::pair<int, int>> vec;
    vec.reserve(n_threads);
    int n_frames = last - first;
    if (n_threads >= n_frames) {
        for (int i = 0; i != n_frames; ++i) {
            vec.push_back({i, i + 1});
        }
        return vec;
    }
    int step = (n_frames) / n_threads;
    for (int i = 0; i != n_threads; ++i) {
        int start = step * i;
        int stop = step * (i + 1);
        if (i == n_threads - 1)
            stop = last;
        vec.push_back({start, stop});
    }
    return vec;
 }
 } // namespace aare
--- a/src/utils/task.test.cpp
+++ b/src/utils/task.test.cpp
@ -0,0 +1,32 @@
 #include "aare/utils/task.hpp"
 #include <catch2/matchers/catch_matchers_floating_point.hpp>
 #include <catch2/catch_test_macros.hpp>
 TEST_CASE("Split a range into multiple tasks"){    
    auto tasks = aare::split_task(0, 10, 3);
    REQUIRE(tasks.size() == 3);
    REQUIRE(tasks[0].first == 0);
    REQUIRE(tasks[0].second == 3);
    REQUIRE(tasks[1].first == 3);
    REQUIRE(tasks[1].second == 6);
    REQUIRE(tasks[2].first == 6);
    REQUIRE(tasks[2].second == 10);
    tasks = aare::split_task(0, 10, 1);
    REQUIRE(tasks.size() == 1);
    REQUIRE(tasks[0].first == 0);
    REQUIRE(tasks[0].second == 10);
    tasks = aare::split_task(0, 10, 10);
    REQUIRE(tasks.size() == 10);
    for (int i = 0; i < 10; i++){
        REQUIRE(tasks[i].first == i);
        REQUIRE(tasks[i].second == i+1);
    }
 }