mirror of
https://github.com/bec-project/bec_widgets.git
synced 2026-05-01 12:32:30 +02:00
108 lines
3.3 KiB
Python
108 lines
3.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Aggregate benchmark JSON files by taking the median across runner attempts.
|
|
|
|
The workflow runs the same benchmark suite on multiple independent runners.
|
|
This script reads every JSON file produced by those attempts, normalizes the
|
|
contained benchmark values, and writes a compact mapping JSON where each value is
|
|
the median across attempts.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import statistics
|
|
from pathlib import Path
|
|
|
|
from compare_benchmarks import Benchmark, extract_benchmarks
|
|
|
|
|
|
def collect_benchmarks(paths: list[Path]) -> dict[str, list[Benchmark]]:
|
|
"""Collect benchmarks from multiple JSON files.
|
|
|
|
Args:
|
|
paths (list[Path]): Paths to hyperfine, pytest-benchmark, or compact
|
|
mapping JSON files.
|
|
|
|
Returns:
|
|
dict[str, list[Benchmark]]: Benchmarks grouped by benchmark name.
|
|
"""
|
|
|
|
collected: dict[str, list[Benchmark]] = {}
|
|
for path in paths:
|
|
for name, benchmark in extract_benchmarks(path).items():
|
|
collected.setdefault(name, []).append(benchmark)
|
|
return collected
|
|
|
|
|
|
def aggregate(collected: dict[str, list[Benchmark]]) -> dict[str, dict[str, object]]:
|
|
"""Aggregate grouped benchmarks using the median value.
|
|
|
|
Args:
|
|
collected (dict[str, list[Benchmark]]): Benchmarks grouped by benchmark
|
|
name.
|
|
|
|
Returns:
|
|
dict[str, dict[str, object]]: Compact mapping JSON data. Each benchmark
|
|
contains ``value``, ``unit``, ``metric``, ``attempts``, and
|
|
``attempt_values``.
|
|
"""
|
|
|
|
aggregated: dict[str, dict[str, object]] = {}
|
|
for name, benchmarks in sorted(collected.items()):
|
|
values = [benchmark.value for benchmark in benchmarks]
|
|
unit = next((benchmark.unit for benchmark in benchmarks if benchmark.unit), "")
|
|
metric = next((benchmark.metric for benchmark in benchmarks if benchmark.metric), "value")
|
|
aggregated[name] = {
|
|
"value": statistics.median(values),
|
|
"unit": unit,
|
|
"metric": f"median-of-attempt-{metric}",
|
|
"attempts": len(values),
|
|
"attempt_values": values,
|
|
}
|
|
return aggregated
|
|
|
|
|
|
def main_from_paths(input_dir: Path, output: Path) -> int:
|
|
"""Aggregate all JSON files in a directory and write the result.
|
|
|
|
Args:
|
|
input_dir (Path): Directory containing benchmark JSON files.
|
|
output (Path): Path where the aggregate JSON should be written.
|
|
|
|
Returns:
|
|
int: Always ``0`` on success.
|
|
|
|
Raises:
|
|
ValueError: If no JSON files are found in ``input_dir``.
|
|
"""
|
|
|
|
paths = sorted(input_dir.rglob("*.json"))
|
|
if not paths:
|
|
raise ValueError(f"No benchmark JSON files found in {input_dir}")
|
|
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
output.write_text(
|
|
json.dumps(aggregate(collect_benchmarks(paths)), indent=2, sort_keys=True) + "\n",
|
|
encoding="utf-8",
|
|
)
|
|
return 0
|
|
|
|
|
|
def main() -> int:
|
|
"""Run the benchmark aggregation command line interface.
|
|
|
|
Returns:
|
|
int: Always ``0`` on success.
|
|
"""
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--input-dir", required=True, type=Path)
|
|
parser.add_argument("--output", required=True, type=Path)
|
|
args = parser.parse_args()
|
|
return main_from_paths(input_dir=args.input_dir, output=args.output)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|