mirror of
https://github.com/bec-project/bec_widgets.git
synced 2026-05-06 06:44:23 +02:00
wip
This commit is contained in:
@@ -1,5 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Compare benchmark JSON files and write a GitHub Actions summary."""
|
||||
"""Compare benchmark JSON files and write a GitHub Actions summary.
|
||||
|
||||
The script supports JSON emitted by hyperfine, JSON emitted by pytest-benchmark,
|
||||
and a compact mapping format generated by ``aggregate_benchmarks.py``. Timing
|
||||
formats prefer median values and fall back to mean values when median values are
|
||||
not present.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -13,6 +19,15 @@ from typing import Any
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Benchmark:
|
||||
"""Normalized benchmark result.
|
||||
|
||||
Attributes:
|
||||
name (str): Stable benchmark name used to match baseline and current results.
|
||||
value (float): Numeric benchmark value used for comparison.
|
||||
unit (str): Display unit for the value, for example ``"s"``.
|
||||
metric (str): Source metric name, for example ``"median"`` or ``"mean"``.
|
||||
"""
|
||||
|
||||
name: str
|
||||
value: float
|
||||
unit: str
|
||||
@@ -21,6 +36,18 @@ class Benchmark:
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Comparison:
|
||||
"""Comparison between one baseline benchmark and one current benchmark.
|
||||
|
||||
Attributes:
|
||||
name (str): Benchmark name.
|
||||
baseline (float): Baseline benchmark value.
|
||||
current (float): Current benchmark value.
|
||||
delta_percent (float): Percent change from baseline to current.
|
||||
unit (str): Display unit for both values.
|
||||
metric (str): Current result metric used for comparison.
|
||||
regressed (bool): Whether the change exceeds the configured threshold.
|
||||
"""
|
||||
|
||||
name: str
|
||||
baseline: float
|
||||
current: float
|
||||
@@ -31,11 +58,29 @@ class Comparison:
|
||||
|
||||
|
||||
def _read_json(path: Path) -> Any:
|
||||
"""Read JSON data from a file.
|
||||
|
||||
Args:
|
||||
path (Path): Path to the JSON file.
|
||||
|
||||
Returns:
|
||||
Any: Parsed JSON value.
|
||||
"""
|
||||
|
||||
with path.open("r", encoding="utf-8") as stream:
|
||||
return json.load(stream)
|
||||
|
||||
|
||||
def _as_float(value: Any) -> float | None:
|
||||
"""Convert a value to a finite float.
|
||||
|
||||
Args:
|
||||
value (Any): Value to convert.
|
||||
|
||||
Returns:
|
||||
float | None: Converted finite float, or ``None`` if conversion fails.
|
||||
"""
|
||||
|
||||
try:
|
||||
result = float(value)
|
||||
except (TypeError, ValueError):
|
||||
@@ -46,6 +91,15 @@ def _as_float(value: Any) -> float | None:
|
||||
|
||||
|
||||
def _extract_hyperfine(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
"""Extract normalized benchmarks from hyperfine JSON.
|
||||
|
||||
Args:
|
||||
data (dict[str, Any]): Parsed hyperfine JSON object.
|
||||
|
||||
Returns:
|
||||
dict[str, Benchmark]: Benchmarks keyed by command name.
|
||||
"""
|
||||
|
||||
benchmarks: dict[str, Benchmark] = {}
|
||||
for result in data.get("results", []):
|
||||
if not isinstance(result, dict):
|
||||
@@ -62,6 +116,15 @@ def _extract_hyperfine(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
|
||||
|
||||
def _extract_pytest_benchmark(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
"""Extract normalized benchmarks from pytest-benchmark JSON.
|
||||
|
||||
Args:
|
||||
data (dict[str, Any]): Parsed pytest-benchmark JSON object.
|
||||
|
||||
Returns:
|
||||
dict[str, Benchmark]: Benchmarks keyed by full benchmark name.
|
||||
"""
|
||||
|
||||
benchmarks: dict[str, Benchmark] = {}
|
||||
for benchmark in data.get("benchmarks", []):
|
||||
if not isinstance(benchmark, dict):
|
||||
@@ -82,6 +145,16 @@ def _extract_pytest_benchmark(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
|
||||
|
||||
def _extract_simple_mapping(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
"""Extract normalized benchmarks from a compact mapping JSON object.
|
||||
|
||||
Args:
|
||||
data (dict[str, Any]): Parsed mapping where each benchmark is either a
|
||||
raw number or an object containing ``value``, ``unit``, and ``metric``.
|
||||
|
||||
Returns:
|
||||
dict[str, Benchmark]: Benchmarks keyed by mapping key.
|
||||
"""
|
||||
|
||||
benchmarks: dict[str, Benchmark] = {}
|
||||
|
||||
for name, raw_value in data.items():
|
||||
@@ -103,6 +176,20 @@ def _extract_simple_mapping(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
|
||||
|
||||
def extract_benchmarks(path: Path) -> dict[str, Benchmark]:
|
||||
"""Extract normalized benchmarks from a supported JSON file.
|
||||
|
||||
Args:
|
||||
path (Path): Path to a hyperfine, pytest-benchmark, or compact mapping
|
||||
JSON file.
|
||||
|
||||
Returns:
|
||||
dict[str, Benchmark]: Normalized benchmarks keyed by name.
|
||||
|
||||
Raises:
|
||||
ValueError: If the JSON root is not an object or no supported benchmark
|
||||
entries can be extracted.
|
||||
"""
|
||||
|
||||
data = _read_json(path)
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"{path} must contain a JSON object")
|
||||
@@ -122,6 +209,22 @@ def compare_benchmarks(
|
||||
threshold_percent: float,
|
||||
higher_is_better: bool,
|
||||
) -> tuple[list[Comparison], list[str], list[str]]:
|
||||
"""Compare baseline benchmarks with current benchmarks.
|
||||
|
||||
Args:
|
||||
baseline (dict[str, Benchmark]): Baseline benchmarks keyed by name.
|
||||
current (dict[str, Benchmark]): Current benchmarks keyed by name.
|
||||
threshold_percent (float): Regression threshold in percent.
|
||||
higher_is_better (bool): If ``True``, lower current values are treated as
|
||||
regressions. If ``False``, higher current values are treated as
|
||||
regressions.
|
||||
|
||||
Returns:
|
||||
tuple[list[Comparison], list[str], list[str]]: Comparisons for common
|
||||
benchmark names, names missing from current results, and names newly
|
||||
present in current results.
|
||||
"""
|
||||
|
||||
comparisons: list[Comparison] = []
|
||||
missing_in_current: list[str] = []
|
||||
new_in_current: list[str] = []
|
||||
@@ -165,6 +268,16 @@ def compare_benchmarks(
|
||||
|
||||
|
||||
def _format_value(value: float, unit: str) -> str:
|
||||
"""Format a benchmark value for Markdown output.
|
||||
|
||||
Args:
|
||||
value (float): Numeric benchmark value.
|
||||
unit (str): Display unit.
|
||||
|
||||
Returns:
|
||||
str: Formatted value with optional unit suffix.
|
||||
"""
|
||||
|
||||
suffix = f" {unit}" if unit else ""
|
||||
return f"{value:.6g}{suffix}"
|
||||
|
||||
@@ -177,6 +290,20 @@ def write_summary(
|
||||
threshold_percent: float,
|
||||
higher_is_better: bool,
|
||||
) -> None:
|
||||
"""Write a Markdown benchmark comparison summary.
|
||||
|
||||
Args:
|
||||
path (Path): Path where the summary should be written.
|
||||
comparisons (list[Comparison]): Comparison rows for matching benchmarks.
|
||||
missing_in_current (list[str]): Baseline benchmark names missing from the
|
||||
current result.
|
||||
new_in_current (list[str]): Current benchmark names not present in the
|
||||
baseline result.
|
||||
threshold_percent (float): Regression threshold in percent.
|
||||
higher_is_better (bool): Whether higher benchmark values are considered
|
||||
better.
|
||||
"""
|
||||
|
||||
regressions = [comparison for comparison in comparisons if comparison.regressed]
|
||||
direction = "higher is better" if higher_is_better else "lower is better"
|
||||
sorted_comparisons = sorted(comparisons, key=lambda comparison: comparison.name)
|
||||
@@ -245,6 +372,12 @@ def write_summary(
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Run the benchmark comparison command line interface.
|
||||
|
||||
Returns:
|
||||
int: ``1`` when a regression exceeds the threshold, otherwise ``0``.
|
||||
"""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--baseline", required=True, type=Path)
|
||||
parser.add_argument("--current", required=True, type=Path)
|
||||
|
||||
Reference in New Issue
Block a user