Apply calibration to Jungfrau raw data (#216)

- Added function to read calibration file - Multi threaded pedestal subtraction and application of the calibration
2025-12-27 15:31:24 +01:00 · 2025-07-18 10:19:14 +02:00
parent 1414d75320
commit abae2674a9
12 changed files with 236 additions and 87 deletions
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -7,6 +7,7 @@ Features:

 - Cluster finder now works with 5x5, 7x7 and 9x9 clusters
 - Added ClusterVector::empty() member
+- Added apply_calibration function for Jungfrau data

 Bugfixes:
 - Fixed reading RawFiles with ROI fully excluding some sub files. 
--- a/docs/src/index.rst
+++ b/docs/src/index.rst
@@ -25,6 +25,7 @@ AARE
    :maxdepth: 1
    
    pyFile
+    pycalibration
    pyCtbRawFile
    pyClusterFile
    pyClusterVector
--- a/docs/src/pycalibration.rst
+++ b/docs/src/pycalibration.rst
@@ -0,0 +1,24 @@
+
+Calibration
+==============
+
+Functions for applying calibration to data.
+
+.. code-block:: python
+
+    import aare
+
+    # Load calibration data for a single JF module (512x1024 pixels)
+    calibration = aare.load_calibration('path/to/calibration/file.bin')
+
+    raw_data = ...  # Load your raw data here
+    pedestal = ...  # Load your pedestal data here
+
+    # Apply calibration to raw data to convert from raw ADC values to keV
+    data = aare.apply_calibration(raw_data, pd=pedestal, cal=calibration)
+
+.. py:currentmodule:: aare
+
+.. autofunction:: apply_calibration
+
+.. autofunction:: load_calibration
--- a/include/aare/calibration.hpp
+++ b/include/aare/calibration.hpp
@@ -0,0 +1,86 @@
+#pragma once
+
+#include "aare/defs.hpp"
+#include "aare/utils/task.hpp"
+#include <cstdint>
+#include <future>
+
+namespace aare {
+
+// Really try to convince the compile to inline this function
+// TODO! Clang?
+#if (defined(_MSC_VER) || defined(__INTEL_COMPILER))
+#define STRONG_INLINE __forceinline
+#else
+#define STRONG_INLINE inline
+#endif
+
+#if defined(__GNUC__)
+#define ALWAYS_INLINE __attribute__((always_inline)) inline
+#else
+#define ALWAYS_INLINE STRONG_INLINE
+#endif
+
+/**
+ * @brief Get the gain from the raw ADC value. In Jungfrau the gain is
+ * encoded in the left most 2 bits of the raw value.
+ * 00 -> gain 0
+ * 01 -> gain 1
+ * 11 -> gain 2
+ * @param raw the raw ADC value
+ * @return the gain as an integer
+ */
+ALWAYS_INLINE int get_gain(uint16_t raw) {
+    switch (raw >> 14) {
+    case 0:
+        return 0;
+    case 1:
+        return 1;
+    case 3:
+        return 2;
+    default:
+        return 0;
+    }
+}
+
+ALWAYS_INLINE uint16_t get_value(uint16_t raw) { return raw & ADC_MASK; }
+
+ALWAYS_INLINE std::pair<uint16_t, int16_t> get_value_and_gain(uint16_t raw) {
+    static_assert(
+        sizeof(std::pair<uint16_t, int16_t>) ==
+            sizeof(uint16_t) + sizeof(int16_t),
+        "Size of pair<uint16_t, int16_t> does not match expected size");
+    return {get_value(raw), get_gain(raw)};
+}
+
+template <class T>
+void apply_calibration_impl(NDView<T, 3> res, NDView<uint16_t, 3> raw_data,
+                       NDView<T, 3> ped, NDView<T, 3> cal, int start,
+                       int stop) {
+
+    for (int frame_nr = start; frame_nr != stop; ++frame_nr) {
+        for (int row = 0; row != raw_data.shape(1); ++row) {
+            for (int col = 0; col != raw_data.shape(2); ++col) {
+                auto [value, gain] = get_value_and_gain(raw_data(frame_nr, row, col));
+                res(frame_nr, row, col) =
+                    (value - ped(gain, row, col)) / cal(gain, row, col); //TODO! use multiplication
+            }
+        }
+    }
+}
+
+template <class T>
+void apply_calibration(NDView<T, 3> res, NDView<uint16_t, 3> raw_data,
+                       NDView<T, 3> ped, NDView<T, 3> cal,
+                       ssize_t n_threads = 4) {
+    std::vector<std::future<void>> futures;
+    futures.reserve(n_threads);
+    auto limits = split_task(0, raw_data.shape(0), n_threads);
+    for (const auto &lim : limits)
+        futures.push_back(std::async(&apply_calibration_impl<T>, res, raw_data, ped, cal,
+                                     lim.first, lim.second));
+    for (auto &f : futures)
+        f.get();
+}
+
+} // namespace aare
--- a/include/aare/defs.hpp
+++ b/include/aare/defs.hpp
@@ -231,4 +231,6 @@ template <> FrameDiscardPolicy StringTo(const std::string & /*mode*/);

 using DataTypeVariants = std::variant<uint16_t, uint32_t>;

+constexpr uint16_t ADC_MASK = 0x3FFF; // used to mask out the gain bits in Jungfrau
+
 } // namespace aare
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -32,6 +32,7 @@ set( PYTHON_FILES
    aare/ClusterFinder.py
    aare/ClusterVector.py

+    aare/calibration.py
    aare/func.py
    aare/RawFile.py
    aare/transform.py
--- a/python/aare/init.py
+++ b/python/aare/init.py
@@ -30,3 +30,8 @@ from .utils import random_pixels, random_pixel, flat_list, add_colorbar

 #make functions available in the top level API
 from .func import *
+
+from .calibration import *
+from ._aare import apply_calibration
+
+from ._aare import VarClusterFinder
--- a/python/aare/calibration.py
+++ b/python/aare/calibration.py
@@ -0,0 +1,21 @@
+#Calibration related functions
+import numpy as np
+def load_calibration(fname, hg0=False):
+    """
+    Load calibration data from a file.
+    
+    Parameters:
+    fname (str): Path to the calibration file.
+    hg0 (bool): If True, load HG0 calibration data instead of G0.
+
+    """
+    gains = 3
+    rows = 512
+    cols = 1024
+    with open(fname, 'rb') as f:
+        cal = np.fromfile(f, count=gains * rows * cols, dtype=np.double).reshape(
+            gains, rows, cols
+        )
+        if hg0:
+            cal[0] = np.fromfile(f, count=rows * cols, dtype=np.double).reshape(rows, cols)
+    return cal
--- a/python/examples/play.py
+++ b/python/examples/play.py
@@ -1,89 +1,8 @@
-import sys
-sys.path.append('/home/l_msdetect/erik/aare/build')

+from aare import apply_calibration
+import numpy as np
+raw = np.zeros((5,10,10), dtype=np.uint16)
+pedestal = np.zeros((3,10,10), dtype=np.float32)
+calibration = np.ones((3,10,10), dtype=np.float32)
+calibrated = apply_calibration(raw, pedestal, calibration,)

-from aare import RawSubFile, DetectorType, RawFile
-
-from pathlib import Path
-path = Path("/home/l_msdetect/erik/data/aare-test-data/raw/jungfrau/")
-f = RawSubFile(path/"jungfrau_single_d0_f0_0.raw", DetectorType.Jungfrau, 512, 1024, 16)
-
-# f = RawFile(path/"jungfrau_single_master_0.json")
-
-
-# from aare._aare import ClusterVector_i, Interpolator
-
-# import pickle 
-# import numpy as np
-# import matplotlib.pyplot as plt
-# import boost_histogram as bh
-# import torch
-# import math
-# import time
-
-
-
-# def gaussian_2d(mx, my, sigma = 1, res=100, grid_size = 2):
-#     """
-#     Generate a 2D gaussian as position mx, my, with sigma=sigma. 
-#     The gaussian is placed on a 2x2 pixel matrix with resolution 
-#     res in one dimesion.
-#     """
-#     x = torch.linspace(0, pixel_size*grid_size, res)
-#     x,y = torch.meshgrid(x,x, indexing="ij")
-#     return 1 / (2*math.pi*sigma**2) * \
-#       torch.exp(-((x - my)**2 / (2*sigma**2) + (y - mx)**2 / (2*sigma**2)))
-
-# scale = 1000 #Scale factor when converting to integer
-# pixel_size = 25 #um
-# grid = 2
-# resolution = 100
-# sigma_um = 10
-# xa = np.linspace(0,grid*pixel_size,resolution)
-# ticks = [0, 25, 50]
-
-# hit = np.array((20,20))
-# etahist_fname = "/home/l_msdetect/erik/tmp/test_hist.pkl"
-
-# local_resolution = 99
-# grid_size = 3
-# xaxis = np.linspace(0,grid_size*pixel_size, local_resolution)
-# t = gaussian_2d(hit[0],hit[1], grid_size = grid_size, sigma = 10, res = local_resolution)
-# pixels = t.reshape(grid_size, t.shape[0] // grid_size, grid_size, t.shape[1] // grid_size).sum(axis = 3).sum(axis = 1)
-# pixels = pixels.numpy()
-# pixels = (pixels*scale).astype(np.int32)
-# v = ClusterVector_i(3,3)
-# v.push_back(1,1, pixels)
-
-# with open(etahist_fname, "rb") as f:
-#         hist = pickle.load(f)
-# eta = hist.view().copy()
-# etabinsx = np.array(hist.axes.edges.T[0].flat)
-# etabinsy = np.array(hist.axes.edges.T[1].flat)
-# ebins = np.array(hist.axes.edges.T[2].flat)
-# p = Interpolator(eta, etabinsx[0:-1], etabinsy[0:-1], ebins[0:-1])
-
-
-
-
-# #Generate the hit
-
-
-
-
-# tmp = p.interpolate(v)
-# print(f'tmp:{tmp}')
-# pos = np.array((tmp['x'], tmp['y']))*25
-
-
-# print(pixels)
-# fig, ax = plt.subplots(figsize = (7,7))
-# ax.pcolormesh(xaxis, xaxis, t)
-# ax.plot(*pos, 'o')
-# ax.set_xticks([0,25,50,75])
-# ax.set_yticks([0,25,50,75])
-# ax.set_xlim(0,75)
-# ax.set_ylim(0,75)
-# ax.grid()
-# print(f'{hit=}')
-# print(f'{pos=}')
--- a/python/src/bind_calibration.hpp
+++ b/python/src/bind_calibration.hpp
@@ -0,0 +1,43 @@
+#include "aare/calibration.hpp"
+
+#include <cstdint>
+#include <filesystem>
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl_bind.h>
+
+namespace py = pybind11;
+
+template <typename DataType>
+py::array_t<DataType> pybind_apply_calibration(
+    py::array_t<uint16_t, py::array::c_style | py::array::forcecast> data,
+    py::array_t<DataType, py::array::c_style | py::array::forcecast> pedestal,
+    py::array_t<DataType, py::array::c_style | py::array::forcecast>
+        calibration,
+    int n_threads = 4) {
+
+    auto data_span = make_view_3d(data);
+    auto ped = make_view_3d(pedestal);
+    auto cal = make_view_3d(calibration);
+
+    /* No pointer is passed, so NumPy will allocate the buffer */
+    auto result = py::array_t<DataType>(data_span.shape());
+    auto res = make_view_3d(result);
+
+    aare::apply_calibration<DataType>(res, data_span, ped, cal, n_threads);
+
+    return result;
+}
+
+void bind_calibration(py::module &m) {
+    m.def("apply_calibration", &pybind_apply_calibration<float>,
+          py::arg("raw_data").noconvert(), py::kw_only(),
+          py::arg("pd").noconvert(), py::arg("cal").noconvert(),
+          py::arg("n_threads") = 4);
+
+    m.def("apply_calibration", &pybind_apply_calibration<double>,
+          py::arg("raw_data").noconvert(), py::kw_only(),
+          py::arg("pd").noconvert(), py::arg("cal").noconvert(),
+          py::arg("n_threads") = 4);
+}
--- a/python/src/module.cpp
+++ b/python/src/module.cpp
@@ -8,6 +8,7 @@
 #include "bind_ClusterFinder.hpp"
 #include "bind_ClusterFinderMT.hpp"
 #include "bind_ClusterVector.hpp"
+#include "bind_calibration.hpp"

 // TODO! migrate the other names
 #include "ctb_raw_file.hpp"
@@ -62,6 +63,8 @@ PYBIND11_MODULE(_aare, m) {
    define_interpolation_bindings(m);
    define_jungfrau_data_file_io_bindings(m);

+    bind_calibration(m);
+
    DEFINE_CLUSTER_BINDINGS(int, 3, 3, uint16_t, i);
    DEFINE_CLUSTER_BINDINGS(double, 3, 3, uint16_t, d);
    DEFINE_CLUSTER_BINDINGS(float, 3, 3, uint16_t, f);
--- a/python/tests/test_calibration.py
+++ b/python/tests/test_calibration.py
@@ -0,0 +1,43 @@
+import pytest 
+import numpy as np
+from aare import apply_calibration
+
+def test_apply_calibration_small_data():
+    # The raw data consists of 10 4x5 images
+    raw = np.zeros((10, 4, 5), dtype=np.uint16)
+
+    # We need a pedestal for each gain, so 3 
+    pedestal = np.zeros((3, 4, 5), dtype=np.float32)
+
+    # And the same for calibration
+    calibration = np.ones((3, 4, 5), dtype=np.float32)
+
+    # Set the known values, probing one pixel in each gain
+    raw[0, 0, 0] = 100 #ADC value of 100, gain 0
+    pedestal[0, 0, 0] = 10
+    calibration[0, 0, 0] = 43.7
+
+    raw[2, 3, 3] = (1<<14) + 1000 #ADC value of 1000, gain 1
+    pedestal[1, 3, 3] = 500
+    calibration[1, 3, 3] = 2.0
+
+    raw[1,1,4] = (3<<14) + 857 #ADC value of 857, gain 2
+    pedestal[2,1,4] = 100
+    calibration[2,1,4] = 3.0
+
+    
+
+    data = apply_calibration(raw, pd = pedestal, cal = calibration)
+
+
+    # The formula that is applied is:
+    # calibrated = (raw - pedestal) / calibration
+    assert data.shape == (10, 4, 5)
+    assert data[0, 0, 0] == (100 - 10) / 43.7
+    assert data[2, 3, 3] ==  (1000 - 500) / 2.0
+    assert data[1, 1, 4] == (857 - 100) / 3.0
+
+    # Other pixels should be zero
+    assert data[2,2,2] == 0
+    assert data[0,1,1] == 0
+    assert data[1,3,0] == 0