From dfe29400d35352a2aad4a5632eb42580cc6ae63b Mon Sep 17 00:00:00 2001
From: wakonig_k <klaus.wakonig@psi.ch>
Date: Fri, 17 Apr 2026 13:33:22 +0200
Subject: [PATCH] wip

---
 .github/scripts/aggregate_benchmarks.py | 63 ++++++++++++++++++++++-
 .github/scripts/run_benchmarks.sh       | 66 ++++++++++++++++---------
 2 files changed, 104 insertions(+), 25 deletions(-)

diff --git a/.github/scripts/aggregate_benchmarks.py b/.github/scripts/aggregate_benchmarks.py
index 02fecd13..22818257 100644
--- a/.github/scripts/aggregate_benchmarks.py
+++ b/.github/scripts/aggregate_benchmarks.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python3
-"""Aggregate benchmark JSON files by taking the median across runner attempts.
+"""Aggregate and merge benchmark JSON files.
 
 The workflow runs the same benchmark suite on multiple independent runners.
 This script reads every JSON file produced by those attempts, normalizes the
 contained benchmark values, and writes a compact mapping JSON where each value is
-the median across attempts.
+the median across attempts. It can also merge independent hyperfine JSON files
+from one runner into a single hyperfine-style JSON file.
 """
 
 from __future__ import annotations
@@ -13,6 +14,7 @@ import argparse
 import json
 import statistics
 from pathlib import Path
+from typing import Any
 
 from compare_benchmarks import Benchmark, extract_benchmarks
 
@@ -63,6 +65,29 @@ def aggregate(collected: dict[str, list[Benchmark]]) -> dict[str, dict[str, obje
     return aggregated
 
 
+def merge_hyperfine_results(paths: list[Path]) -> dict[str, Any]:
+    """Merge hyperfine result files.
+
+    Args:
+        paths (list[Path]): Hyperfine JSON files to merge.
+
+    Returns:
+        dict[str, Any]: Hyperfine-style JSON object containing all result rows.
+
+    Raises:
+        ValueError: If any file has no hyperfine ``results`` list.
+    """
+
+    merged: dict[str, Any] = {"results": []}
+    for path in paths:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        results = data.get("results", []) if isinstance(data, dict) else None
+        if not isinstance(results, list):
+            raise ValueError(f"{path} has no hyperfine results list")
+        merged["results"].extend(results)
+    return merged
+
+
 def main_from_paths(input_dir: Path, output: Path) -> int:
     """Aggregate all JSON files in a directory and write the result.
 
@@ -89,6 +114,32 @@ def main_from_paths(input_dir: Path, output: Path) -> int:
     return 0
 
 
+def merge_from_paths(input_dir: Path, output: Path) -> int:
+    """Merge all hyperfine JSON files in a directory and write the result.
+
+    Args:
+        input_dir (Path): Directory containing hyperfine JSON files.
+        output (Path): Path where the merged JSON should be written.
+
+    Returns:
+        int: Always ``0`` on success.
+
+    Raises:
+        ValueError: If no JSON files are found in ``input_dir``.
+    """
+
+    paths = sorted(input_dir.glob("*.json"))
+    if not paths:
+        raise ValueError(f"No hyperfine JSON files found in {input_dir}")
+
+    output.parent.mkdir(parents=True, exist_ok=True)
+    output.write_text(
+        json.dumps(merge_hyperfine_results(paths), indent=2, sort_keys=True) + "\n",
+        encoding="utf-8",
+    )
+    return 0
+
+
 def main() -> int:
     """Run the benchmark aggregation command line interface.
 
@@ -97,9 +148,17 @@ def main() -> int:
     """
 
     parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mode",
+        choices=("aggregate", "merge-hyperfine"),
+        default="aggregate",
+        help="Operation to perform.",
+    )
     parser.add_argument("--input-dir", required=True, type=Path)
     parser.add_argument("--output", required=True, type=Path)
     args = parser.parse_args()
+    if args.mode == "merge-hyperfine":
+        return merge_from_paths(input_dir=args.input_dir, output=args.output)
     return main_from_paths(input_dir=args.input_dir, output=args.output)
 
 
diff --git a/.github/scripts/run_benchmarks.sh b/.github/scripts/run_benchmarks.sh
index 63484b50..e35514e4 100644
--- a/.github/scripts/run_benchmarks.sh
+++ b/.github/scripts/run_benchmarks.sh
@@ -3,37 +3,57 @@ set -euo pipefail
 
 mkdir -p benchmark-results
 benchmark_json="${BENCHMARK_JSON:-benchmark-results/current.json}"
-benchmark_dir="${BENCHMARK_HYPERFINE_DIR:-tests/benchmarks/hyperfine}"
+benchmark_root="$(dirname "$benchmark_json")"
+hyperfine_benchmark_dir="${BENCHMARK_HYPERFINE_DIR:-tests/benchmarks/hyperfine}"
+pytest_benchmark_dir="${BENCHMARK_PYTEST_DIR:-tests/unit_tests/benchmarks}"
+benchmark_work_dir="$benchmark_root/raw-results"
+hyperfine_json_dir="$benchmark_work_dir/hyperfine"
+pytest_json="$benchmark_work_dir/pytest.json"
 
 shopt -s nullglob
-benchmark_scripts=("$benchmark_dir"/benchmark_*.sh)
+benchmark_scripts=()
+benchmark_scripts=("$hyperfine_benchmark_dir"/benchmark_*.sh)
 shopt -u nullglob
 
-if [ "${#benchmark_scripts[@]}" -eq 0 ]; then
-  echo "No hyperfine benchmark scripts matching benchmark_*.sh found in $benchmark_dir" >&2
+if [ "${#benchmark_scripts[@]}" -eq 0 ] && [ ! -d "$pytest_benchmark_dir" ]; then
+  echo "No benchmark scripts or pytest benchmarks found" >&2
   exit 1
 fi
 
 echo "Benchmark Python: $(command -v python)"
 python -c 'import sys; print(sys.version)'
 
-commands=()
-names=()
-for benchmark_script in "${benchmark_scripts[@]}"; do
-  title="$(sed -n 's/^# BENCHMARK_TITLE:[[:space:]]*//p' "$benchmark_script" | head -n 1)"
-  if [ -z "$title" ]; then
-    title="$(basename "$benchmark_script" .sh)"
-  fi
-  echo "Preflight benchmark script: $benchmark_script"
-  bash "$benchmark_script"
-  names+=(--command-name "$title")
-  commands+=("bash $(printf "%q" "$benchmark_script")")
-done
+rm -rf "$benchmark_work_dir"
+mkdir -p "$hyperfine_json_dir"
 
-hyperfine \
-  --show-output \
-  --warmup 1 \
-  --runs 5 \
-  "${names[@]}" \
-  --export-json "$benchmark_json" \
-  "${commands[@]}"
+if [ "${#benchmark_scripts[@]}" -gt 0 ]; then
+  for benchmark_script in "${benchmark_scripts[@]}"; do
+    title="$(sed -n 's/^# BENCHMARK_TITLE:[[:space:]]*//p' "$benchmark_script" | head -n 1)"
+    if [ -z "$title" ]; then
+      title="$(basename "$benchmark_script" .sh)"
+    fi
+    benchmark_name="$(basename "$benchmark_script" .sh)"
+    benchmark_result_json="$hyperfine_json_dir/$benchmark_name.json"
+    echo "Preflight benchmark script: $benchmark_script"
+    bash "$benchmark_script"
+
+    hyperfine \
+      --show-output \
+      --warmup 1 \
+      --runs 5 \
+      --command-name "$title" \
+      --export-json "$benchmark_result_json" \
+      "bash $(printf "%q" "$benchmark_script")"
+  done
+fi
+
+if [ -d "$pytest_benchmark_dir" ]; then
+  pytest \
+    -q "$pytest_benchmark_dir" \
+    --benchmark-only \
+    --benchmark-json "$pytest_json"
+fi
+
+python .github/scripts/aggregate_benchmarks.py \
+  --input-dir "$benchmark_work_dir" \
+  --output "$benchmark_json"