mirror of
https://github.com/slsdetectorgroup/aare.git
synced 2026-07-01 20:04:50 +02:00
To improve codebase quality and reduce human error, this PR introduces the pre-commit framework. This ensures that all code adheres to project standards before it is even committed, maintaining a consistent style and catching common mistakes early. Key Changes: - Code Formatting: Automated C++ formatting using clang-format (based on the project's .clang-format file). - Syntax Validation: Basic checks for file integrity and syntax. - Spell Check: Automated scanning for typos in source code and comments. - CMake Formatting: Standardization of CMakeLists.txt and .cmake configuration files. - GitHub Workflow: Added a CI action that validates every Pull Request against the pre-commit configuration to ensure compliance. The configuration includes a [ci] block to handle automated fixes within the PR. Currently, this is disabled. If we want the CI to automatically commit formatting fixes back to the PR branch, this can be toggled to true in .pre-commit-config.yaml. ```yaml ci: autofix_commit_msg: [pre-commit] auto fixes from pre-commit hooks autofix_prs: false autoupdate_schedule: monthly ``` The last large commit with the fit functions, for example, was not formatted according to the clang-format rules. This PR would allow to avoid similar mistakes in the future. Python fomat with `ruff` for tests and sanitiser for `.ipynb` notebooks can be added as well.
348 KiB
348 KiB
In [1]:
import time
import random
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import sys
sys.path.insert(0, '/home/ferjao_k/sw/aare/build')
from aare import fit_gaus # lmfit
from aare import Gaussian, fit # minuit2In [2]:
ROWS = 100
COLS = 100
N_SCAN = 100
NOISE_FRAC = 0.05
SEED = 42
N_THREADS = 4
N_REPEATS = 7
N_WARMUP = 3 # untimed iterations (icache + branch predictor warmup)
COOLDOWN = 2.0 # seconds between (method, thread_count) pairsIn [3]:
def generate_3d_data(rows, cols, n_scan, noise_frac, seed):
"""
Generate a synthetic detector image stack where each pixel has a
Gaussian response curve with per-pixel variation in A, mu, sigma.
Returns x (n_scan,), y (rows, cols, n_scan), y_err (rows, cols, n_scan),
and the ground-truth parameter arrays.
"""
rng = np.random.default_rng(seed)
# Per-pixel true params each of shape: [rows, cols, 1]
A_true = rng.uniform(200, 1000, size=(rows, cols))
mu_true = rng.uniform(20, 80, size=(rows, cols))
sig_true = rng.uniform(3, 12, size=(rows, cols))
# One common binned energy array
x = np.linspace(0, 100, n_scan) # shape [1, 1, nscan]
# Build ground truth signals per-pixel
exponent = -0.5 * ((x[None, None, :] - mu_true[:, :, None]) / sig_true[:,:, None])**2 # shape [rows, cols, nscan]
y_clean = A_true[:, :, None] * np.exp(exponent)
# Perturb with noise
noise_sigma = noise_frac * A_true[:, :, None] * np.ones_like(y_clean) # shape [rows, cols, nscan]
noise = rng.normal(0, noise_sigma)
y = y_clean + noise
y_err = noise_sigma.copy()
return x, y, y_err, A_true, mu_true, sig_true In [4]:
def bench(fn, n_warmup=N_WARMUP, n_repeats=N_REPEATS):
"""
Warmup then time `fn` over `n_repeats` calls.
Returns (last_result, list_of_walltimes_in_seconds).
"""
# warmup: primes icache, branch predictor, and lets CPU ramp to boost clock
for _ in range(n_warmup):
res = fn()
times = []
for _ in range(n_repeats):
t0 = time.perf_counter()
res = fn()
t1 = time.perf_counter()
times.append(t1 - t0)
return res, timesIn [5]:
# Generate 2 x 2 dataset of Gaussian-like profiles for each pixel
x2, y2, yerr2, true_A2, true_mu2, true_sig2 = generate_3d_data(
2, 2, N_SCAN, NOISE_FRAC, SEED
)
model_g = Gaussian()
model_g.compute_errors = True
result = model_g.fit(x2, y2, yerr2)
from pprint import pprint
print("== True Gaussian params == ")
print("A_true = \n", true_A2)
print("mu_true = \n", true_mu2)
print("sig_true = \n",true_sig2)
print("\n")
print("== Fit results ==")
par = result['par']
# print(par)
A_fit = par[:, :, 0]
mu_fit = par[:, :, 1]
sig_fit = par[:, :, 2]
print("A_fit = \n", A_fit)
print("mu_fit = \n", mu_fit)
print("sig_fit = \n", sig_fit)== True Gaussian params == A_true = [[819.16483884 551.1027518 ] [886.87833593 757.89442325]] mu_true = [[25.65064087 78.5373411 ] [65.66838212 67.16385832]] sig_true = [[ 4.15302269 7.05347344] [ 6.33718222 11.3408849 ]] == Fit results == A_fit = [[812.09277132 559.04069721] [899.09335849 759.24481682]] mu_fit = [[25.6598209 78.40461782] [65.52261318 66.84540995]] sig_fit = [[ 4.2778026 7.041045 ] [ 6.29190225 11.34233504]]
In [6]:
fig, ax = plt.subplots(2, 2, figsize=(12,8))
# Gaussians in 2x2 frame: True vs Fit
for row in range(2):
for col in range(2):
ax[row, col].plot(x2, y2[row, col,:], label="data")
ax[row, col].plot(x2, model_g(x2, result['par'][row, col,:]), linewidth=1, color="green", label="minuit")
ax[row, col].set_title(f"Gaussian Fit to data in pixel [{row}, {col}]")
ax[row, col].legend()In [7]:
# ===============
# DATA GENERATION
# ===============
print(f"Generating synthetic data: {ROWS}x{COLS} pixels, "
f"{N_SCAN} scan points, noise_frac={NOISE_FRAC}\n")
x, y, yerr, true_A, true_mu, true_sig = generate_3d_data(
ROWS, COLS, N_SCAN, NOISE_FRAC, SEED
)
model = Gaussian()
print(f"model.max_calls = {model.max_calls}")
print(f"model.tolerance = {model.tolerance}")
print("model.compute_errors =", model.compute_errors)
METHOD_DEFS = [
("lmfit (LM)",
lambda nt: lambda: fit_gaus(x, y, n_threads=nt),
"#2196F3", {"linewidth": 3.0, "linestyle": "-"}),
("Minuit2 (obj API)",
lambda nt: lambda: model.fit(x, y, n_threads=nt),
"#FF9800", {"linewidth": 2.5, "linestyle": ":"}),
]
colors = {label: c for label, _, c, _ in METHOD_DEFS}
styles = {label: s for label, _, _, s in METHOD_DEFS}Generating synthetic data: 100x100 pixels, 100 scan points, noise_frac=0.05 model.max_calls = 100 model.tolerance = 0.5 model.compute_errors = False
In [8]:
# ====================================
# SINGLE-CALL BENCHMARK (at N_THREADS)
# ====================================
def extract_result(label, res):
"""Normalize return values across fitters into a common dict."""
if isinstance(res, dict):
out = {"par": res["par"]}
if "par_err" in res:
out["par_err"] = res["par_err"]
if "chi2" in res:
out["chi2"] = res["chi2"]
return out
# fit_gaus without y_err returns a raw array
return {"par": res}
methods = {}
for label, factory, _, _ in METHOD_DEFS:
time.sleep(COOLDOWN)
res, times = bench(factory(N_THREADS))
entry = extract_result(label, res)
entry["times"] = times
methods[label] = entry
# ---- Print summary ----
ndf = N_SCAN - 3
print(f"{'Method':24s} {'time (ms)':>10s} {'med|dA|':>10s} {'med|dMu|':>10s} {'med|dSig|':>10s}")
print("-" * 80)
for name, m in methods.items():
par = m["par"]
med_t = np.median(m["times"]) * 1e3
dA = np.median(np.abs(par[:,:,0] - true_A))
dMu = np.median(np.abs(par[:,:,1] - true_mu))
dSig = np.median(np.abs(par[:,:,2] - true_sig))
chi2_str = ""
if "chi2" in m:
chi2_str = f" chi2/ndf={np.median(m['chi2'] / ndf):.4f}"
print(f"[{name:22s}] {med_t:8.2f} ms "
f"{dA:10.3f} {dMu:10.4f} {dSig:10.4f}{chi2_str}")Method time (ms) med|dA| med|dMu| med|dSig| -------------------------------------------------------------------------------- [lmfit (LM) ] 189.58 ms 6.272 0.0940 0.0949 [Minuit2 (obj API) ] 124.59 ms 6.272 0.0940 0.0949 chi2/ndf=880.9946
In [9]:
# ===============
# THREAD SCALING
# ===============
thread_counts = [1, 2, 4, 8, 16]
thread_times = {label: [] for label, _, _, _ in METHOD_DEFS}
ttimes_stddev = {label: [] for label, _, _, _ in METHOD_DEFS}
for nt in thread_counts:
# shuffle method order per thread count to decorrelate thermal bias
run_order = list(METHOD_DEFS)
random.shuffle(run_order)
for label, factory, _, _ in run_order:
time.sleep(COOLDOWN)
_, times = bench(factory(nt))
med = np.median(times) * 1e3
std = np.std(times) * 1e3
thread_times[label].append(med)
ttimes_stddev[label].append(std)
per_px = med / (ROWS * COLS) * 1e3
per_px_std = std / (ROWS * COLS) * 1e3
print(f" {label:22s} n_threads={nt:2d} "
f"{med:8.2f} ± {std:6.2f} ms "
f"({per_px:.4f} ± {per_px_std:.4f} μs/pixel)")
print("\n")Minuit2 (obj API) n_threads= 1 456.26 ± 10.76 ms (45.6262 ± 1.0762 μs/pixel) lmfit (LM) n_threads= 1 752.77 ± 108.16 ms (75.2771 ± 10.8165 μs/pixel) Minuit2 (obj API) n_threads= 2 238.34 ± 26.57 ms (23.8345 ± 2.6566 μs/pixel) lmfit (LM) n_threads= 2 410.95 ± 84.96 ms (41.0945 ± 8.4959 μs/pixel) lmfit (LM) n_threads= 4 205.44 ± 35.26 ms (20.5445 ± 3.5259 μs/pixel) Minuit2 (obj API) n_threads= 4 139.22 ± 14.06 ms (13.9224 ± 1.4060 μs/pixel) Minuit2 (obj API) n_threads= 8 130.12 ± 3.27 ms (13.0118 ± 0.3269 μs/pixel) lmfit (LM) n_threads= 8 199.97 ± 6.88 ms (19.9968 ± 0.6877 μs/pixel) Minuit2 (obj API) n_threads=16 134.81 ± 10.13 ms (13.4807 ± 1.0130 μs/pixel) lmfit (LM) n_threads=16 188.32 ± 9.86 ms (18.8322 ± 0.9860 μs/pixel)
In [10]:
# =============================
# FIGURE 1: Residual histograms
# =============================
param_names = ["A", "μ", "σ"]
param_truths = [true_A, true_mu, true_sig]
fig1, axes1 = plt.subplots(1, 3, figsize=(15, 5))
fig1.suptitle(f"Parameter Residuals — {ROWS}×{COLS} pixels, {N_SCAN} scan points",
fontsize=14, fontweight="bold")
for col, (pname, truth) in enumerate(zip(param_names, param_truths)):
ax = axes1[col]
# collect residuals across all methods for shared bin edges
res_by_method = {}
all_res = []
for mname, m in methods.items():
residual = (m["par"][:, :, col] - truth).ravel()
res_by_method[mname] = residual
all_res.append(residual)
all_res = np.concatenate(all_res)
lo, hi = np.percentile(all_res, [0.5, 99.5])
edges = np.linspace(lo, hi, 101)
for mname, r in res_by_method.items():
ax.hist(r, bins=edges, histtype="step", label=mname,
color=colors[mname],
linewidth=styles[mname]["linewidth"],
linestyle=styles[mname]["linestyle"])
ax.axvline(0, color="k", linestyle="--", linewidth=1, alpha=0.7)
ax.set_xlabel(f"Fitted {pname} − True {pname}")
ax.set_ylabel("Pixel count")
ax.set_title(f"Δ{pname}")
ax.legend(fontsize=8)
ax.grid(alpha=0.3)
fig1.tight_layout()
# fig1.savefig("fig1_residual_histograms.png", dpi=150, bbox_inches="tight")
# print("\nSaved fig1_residual_histograms.png")
# ====================================================
# FIGURE 2: Performance — bar chart + thread scaling
# ====================================================
fig2 = plt.figure(figsize=(14, 5))
gs = GridSpec(1, 2, figure=fig2, width_ratios=[1, 1.3])
# -- Left: bar chart at N_THREADS --
ax2a = fig2.add_subplot(gs[0])
names = list(methods.keys())
medians = [np.median(methods[n]["times"]) * 1e3 for n in names]
bars = ax2a.barh(names, medians,
color=[colors[n] for n in names],
edgecolor="white", height=0.5)
ax2a.set_xlabel("Median wall time (ms)")
ax2a.set_title(f"Single call — {ROWS}×{COLS} px, {N_THREADS} threads")
for bar, val in zip(bars, medians):
ax2a.text(bar.get_width() + max(medians) * 0.02,
bar.get_y() + bar.get_height() / 2,
f"{val:.1f} ms", va="center", fontsize=10)
ax2a.grid(axis="x", alpha=0.3)
ax2a.set_xlim(0, max(medians) * 1.25)
# -- Right: thread scaling with error bars --
ax2b = fig2.add_subplot(gs[1])
for label, _, _, _ in METHOD_DEFS:
tt = thread_times[label]
sd = ttimes_stddev[label]
speedup = [tt[0] / t for t in tt]
# propagate uncertainty: S = t0/t → δS/S = sqrt((δt0/t0)² + (δt/t)²)
speedup_err = [
s * np.sqrt((sd[0] / tt[0])**2 + (sd[i] / tt[i])**2)
for i, s in enumerate(speedup)
]
ax2b.errorbar(thread_counts, speedup, yerr=speedup_err,
fmt="o-", label=label, color=colors[label],
linewidth=2, markersize=7, capsize=4)
ax2b.plot(thread_counts, thread_counts, "k--", alpha=0.4, label="Ideal linear")
ax2b.set_xlabel("Number of threads")
ax2b.set_ylabel("Speedup vs 1 thread")
ax2b.set_title("Thread scaling")
ax2b.set_xticks(thread_counts)
ax2b.legend(fontsize=9)
ax2b.grid(alpha=0.3)
fig2.tight_layout()
# fig2.savefig("fig2_performance.png", dpi=150, bbox_inches="tight")
# print("Saved fig2_performance.png")
plt.show()In [ ]: