mirror of
https://github.com/bec-project/bec_widgets.git
synced 2026-04-25 09:40:48 +02:00
ci: add benchmark workflow
This commit is contained in:
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Aggregate and merge benchmark JSON files.
|
||||
|
||||
The workflow runs the same benchmark suite on multiple independent runners.
|
||||
This script reads every JSON file produced by those attempts, normalizes the
|
||||
contained benchmark values, and writes a compact mapping JSON where each value is
|
||||
the median across attempts. It can also merge independent hyperfine JSON files
|
||||
from one runner into a single hyperfine-style JSON file.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import statistics
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from compare_benchmarks import Benchmark, extract_benchmarks
|
||||
|
||||
|
||||
def collect_benchmarks(paths: list[Path]) -> dict[str, list[Benchmark]]:
|
||||
"""Collect benchmarks from multiple JSON files.
|
||||
|
||||
Args:
|
||||
paths (list[Path]): Paths to hyperfine, pytest-benchmark, or compact
|
||||
mapping JSON files.
|
||||
|
||||
Returns:
|
||||
dict[str, list[Benchmark]]: Benchmarks grouped by benchmark name.
|
||||
"""
|
||||
|
||||
collected: dict[str, list[Benchmark]] = {}
|
||||
for path in paths:
|
||||
for name, benchmark in extract_benchmarks(path).items():
|
||||
collected.setdefault(name, []).append(benchmark)
|
||||
return collected
|
||||
|
||||
|
||||
def aggregate(collected: dict[str, list[Benchmark]]) -> dict[str, dict[str, object]]:
|
||||
"""Aggregate grouped benchmarks using the median value.
|
||||
|
||||
Args:
|
||||
collected (dict[str, list[Benchmark]]): Benchmarks grouped by benchmark
|
||||
name.
|
||||
|
||||
Returns:
|
||||
dict[str, dict[str, object]]: Compact mapping JSON data. Each benchmark
|
||||
contains ``value``, ``unit``, ``metric``, ``attempts``, and
|
||||
``attempt_values``.
|
||||
"""
|
||||
|
||||
aggregated: dict[str, dict[str, object]] = {}
|
||||
for name, benchmarks in sorted(collected.items()):
|
||||
values = [benchmark.value for benchmark in benchmarks]
|
||||
unit = next((benchmark.unit for benchmark in benchmarks if benchmark.unit), "")
|
||||
metric = next((benchmark.metric for benchmark in benchmarks if benchmark.metric), "value")
|
||||
aggregated[name] = {
|
||||
"value": statistics.median(values),
|
||||
"unit": unit,
|
||||
"metric": f"median-of-attempt-{metric}",
|
||||
"attempts": len(values),
|
||||
"attempt_values": values,
|
||||
}
|
||||
return aggregated
|
||||
|
||||
|
||||
def merge_hyperfine_results(paths: list[Path]) -> dict[str, Any]:
|
||||
"""Merge hyperfine result files.
|
||||
|
||||
Args:
|
||||
paths (list[Path]): Hyperfine JSON files to merge.
|
||||
|
||||
Returns:
|
||||
dict[str, Any]: Hyperfine-style JSON object containing all result rows.
|
||||
|
||||
Raises:
|
||||
ValueError: If any file has no hyperfine ``results`` list.
|
||||
"""
|
||||
|
||||
merged: dict[str, Any] = {"results": []}
|
||||
for path in paths:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
results = data.get("results", []) if isinstance(data, dict) else None
|
||||
if not isinstance(results, list):
|
||||
raise ValueError(f"{path} has no hyperfine results list")
|
||||
merged["results"].extend(results)
|
||||
return merged
|
||||
|
||||
|
||||
def main_from_paths(input_dir: Path, output: Path) -> int:
|
||||
"""Aggregate all JSON files in a directory and write the result.
|
||||
|
||||
Args:
|
||||
input_dir (Path): Directory containing benchmark JSON files.
|
||||
output (Path): Path where the aggregate JSON should be written.
|
||||
|
||||
Returns:
|
||||
int: Always ``0`` on success.
|
||||
|
||||
Raises:
|
||||
ValueError: If no JSON files are found in ``input_dir``.
|
||||
"""
|
||||
|
||||
paths = sorted(input_dir.rglob("*.json"))
|
||||
if not paths:
|
||||
raise ValueError(f"No benchmark JSON files found in {input_dir}")
|
||||
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
output.write_text(
|
||||
json.dumps(aggregate(collect_benchmarks(paths)), indent=2, sort_keys=True) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def merge_from_paths(input_dir: Path, output: Path) -> int:
|
||||
"""Merge all hyperfine JSON files in a directory and write the result.
|
||||
|
||||
Args:
|
||||
input_dir (Path): Directory containing hyperfine JSON files.
|
||||
output (Path): Path where the merged JSON should be written.
|
||||
|
||||
Returns:
|
||||
int: Always ``0`` on success.
|
||||
|
||||
Raises:
|
||||
ValueError: If no JSON files are found in ``input_dir``.
|
||||
"""
|
||||
|
||||
paths = sorted(input_dir.glob("*.json"))
|
||||
if not paths:
|
||||
raise ValueError(f"No hyperfine JSON files found in {input_dir}")
|
||||
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
output.write_text(
|
||||
json.dumps(merge_hyperfine_results(paths), indent=2, sort_keys=True) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Run the benchmark aggregation command line interface.
|
||||
|
||||
Returns:
|
||||
int: Always ``0`` on success.
|
||||
"""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=("aggregate", "merge-hyperfine"),
|
||||
default="aggregate",
|
||||
help="Operation to perform.",
|
||||
)
|
||||
parser.add_argument("--input-dir", required=True, type=Path)
|
||||
parser.add_argument("--output", required=True, type=Path)
|
||||
args = parser.parse_args()
|
||||
if args.mode == "merge-hyperfine":
|
||||
return merge_from_paths(input_dir=args.input_dir, output=args.output)
|
||||
return main_from_paths(input_dir=args.input_dir, output=args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,411 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Compare benchmark JSON files and write a GitHub Actions summary.
|
||||
|
||||
The script supports JSON emitted by hyperfine, JSON emitted by pytest-benchmark,
|
||||
and a compact mapping format generated by ``aggregate_benchmarks.py``. Timing
|
||||
formats prefer median values and fall back to mean values when median values are
|
||||
not present.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Benchmark:
|
||||
"""Normalized benchmark result.
|
||||
|
||||
Attributes:
|
||||
name (str): Stable benchmark name used to match baseline and current results.
|
||||
value (float): Numeric benchmark value used for comparison.
|
||||
unit (str): Display unit for the value, for example ``"s"``.
|
||||
metric (str): Source metric name, for example ``"median"`` or ``"mean"``.
|
||||
"""
|
||||
|
||||
name: str
|
||||
value: float
|
||||
unit: str
|
||||
metric: str = "value"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Comparison:
|
||||
"""Comparison between one baseline benchmark and one current benchmark.
|
||||
|
||||
Attributes:
|
||||
name (str): Benchmark name.
|
||||
baseline (float): Baseline benchmark value.
|
||||
current (float): Current benchmark value.
|
||||
delta_percent (float): Percent change from baseline to current.
|
||||
unit (str): Display unit for both values.
|
||||
metric (str): Current result metric used for comparison.
|
||||
regressed (bool): Whether the change exceeds the configured threshold.
|
||||
"""
|
||||
|
||||
name: str
|
||||
baseline: float
|
||||
current: float
|
||||
delta_percent: float
|
||||
unit: str
|
||||
metric: str
|
||||
regressed: bool
|
||||
|
||||
|
||||
def _read_json(path: Path) -> Any:
|
||||
"""Read JSON data from a file.
|
||||
|
||||
Args:
|
||||
path (Path): Path to the JSON file.
|
||||
|
||||
Returns:
|
||||
Any: Parsed JSON value.
|
||||
"""
|
||||
|
||||
with path.open("r", encoding="utf-8") as stream:
|
||||
return json.load(stream)
|
||||
|
||||
|
||||
def _as_float(value: Any) -> float | None:
|
||||
"""Convert a value to a finite float.
|
||||
|
||||
Args:
|
||||
value (Any): Value to convert.
|
||||
|
||||
Returns:
|
||||
float | None: Converted finite float, or ``None`` if conversion fails.
|
||||
"""
|
||||
|
||||
try:
|
||||
result = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if math.isfinite(result):
|
||||
return result
|
||||
return None
|
||||
|
||||
|
||||
def _extract_hyperfine(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
"""Extract normalized benchmarks from hyperfine JSON.
|
||||
|
||||
Args:
|
||||
data (dict[str, Any]): Parsed hyperfine JSON object.
|
||||
|
||||
Returns:
|
||||
dict[str, Benchmark]: Benchmarks keyed by command name.
|
||||
"""
|
||||
|
||||
benchmarks: dict[str, Benchmark] = {}
|
||||
for result in data.get("results", []):
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
name = str(result.get("command") or result.get("name") or "").strip()
|
||||
metric = "median"
|
||||
value = _as_float(result.get(metric))
|
||||
if value is None:
|
||||
metric = "mean"
|
||||
value = _as_float(result.get(metric))
|
||||
if name and value is not None:
|
||||
benchmarks[name] = Benchmark(name=name, value=value, unit="s", metric=metric)
|
||||
return benchmarks
|
||||
|
||||
|
||||
def _extract_pytest_benchmark(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
"""Extract normalized benchmarks from pytest-benchmark JSON.
|
||||
|
||||
Args:
|
||||
data (dict[str, Any]): Parsed pytest-benchmark JSON object.
|
||||
|
||||
Returns:
|
||||
dict[str, Benchmark]: Benchmarks keyed by full benchmark name.
|
||||
"""
|
||||
|
||||
benchmarks: dict[str, Benchmark] = {}
|
||||
for benchmark in data.get("benchmarks", []):
|
||||
if not isinstance(benchmark, dict):
|
||||
continue
|
||||
|
||||
name = str(benchmark.get("fullname") or benchmark.get("name") or "").strip()
|
||||
stats = benchmark.get("stats", {})
|
||||
value = None
|
||||
metric = "median"
|
||||
if isinstance(stats, dict):
|
||||
value = _as_float(stats.get(metric))
|
||||
if value is None:
|
||||
metric = "mean"
|
||||
value = _as_float(stats.get(metric))
|
||||
if name and value is not None:
|
||||
benchmarks[name] = Benchmark(name=name, value=value, unit="s", metric=metric)
|
||||
return benchmarks
|
||||
|
||||
|
||||
def _extract_simple_mapping(data: dict[str, Any]) -> dict[str, Benchmark]:
|
||||
"""Extract normalized benchmarks from a compact mapping JSON object.
|
||||
|
||||
Args:
|
||||
data (dict[str, Any]): Parsed mapping where each benchmark is either a
|
||||
raw number or an object containing ``value``, ``unit``, and ``metric``.
|
||||
|
||||
Returns:
|
||||
dict[str, Benchmark]: Benchmarks keyed by mapping key.
|
||||
"""
|
||||
|
||||
benchmarks: dict[str, Benchmark] = {}
|
||||
|
||||
for name, raw_value in data.items():
|
||||
if name in {"version", "context", "commit", "timestamp"}:
|
||||
continue
|
||||
|
||||
value = _as_float(raw_value)
|
||||
unit = ""
|
||||
metric = "value"
|
||||
if value is None and isinstance(raw_value, dict):
|
||||
value = _as_float(raw_value.get("value"))
|
||||
unit = str(raw_value.get("unit") or "")
|
||||
metric = str(raw_value.get("metric") or "value")
|
||||
|
||||
if value is not None:
|
||||
benchmarks[str(name)] = Benchmark(name=str(name), value=value, unit=unit, metric=metric)
|
||||
|
||||
return benchmarks
|
||||
|
||||
|
||||
def extract_benchmarks(path: Path) -> dict[str, Benchmark]:
|
||||
"""Extract normalized benchmarks from a supported JSON file.
|
||||
|
||||
Args:
|
||||
path (Path): Path to a hyperfine, pytest-benchmark, or compact mapping
|
||||
JSON file.
|
||||
|
||||
Returns:
|
||||
dict[str, Benchmark]: Normalized benchmarks keyed by name.
|
||||
|
||||
Raises:
|
||||
ValueError: If the JSON root is not an object or no supported benchmark
|
||||
entries can be extracted.
|
||||
"""
|
||||
|
||||
data = _read_json(path)
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"{path} must contain a JSON object")
|
||||
|
||||
extractors = (_extract_hyperfine, _extract_pytest_benchmark, _extract_simple_mapping)
|
||||
for extractor in extractors:
|
||||
benchmarks = extractor(data)
|
||||
if benchmarks:
|
||||
return benchmarks
|
||||
|
||||
raise ValueError(f"No supported benchmark entries found in {path}")
|
||||
|
||||
|
||||
def compare_benchmarks(
|
||||
baseline: dict[str, Benchmark],
|
||||
current: dict[str, Benchmark],
|
||||
threshold_percent: float,
|
||||
higher_is_better: bool,
|
||||
) -> tuple[list[Comparison], list[str], list[str]]:
|
||||
"""Compare baseline benchmarks with current benchmarks.
|
||||
|
||||
Args:
|
||||
baseline (dict[str, Benchmark]): Baseline benchmarks keyed by name.
|
||||
current (dict[str, Benchmark]): Current benchmarks keyed by name.
|
||||
threshold_percent (float): Regression threshold in percent.
|
||||
higher_is_better (bool): If ``True``, lower current values are treated as
|
||||
regressions. If ``False``, higher current values are treated as
|
||||
regressions.
|
||||
|
||||
Returns:
|
||||
tuple[list[Comparison], list[str], list[str]]: Comparisons for common
|
||||
benchmark names, names missing from current results, and names newly
|
||||
present in current results.
|
||||
"""
|
||||
|
||||
comparisons: list[Comparison] = []
|
||||
missing_in_current: list[str] = []
|
||||
new_in_current: list[str] = []
|
||||
|
||||
for name, baseline_benchmark in sorted(baseline.items()):
|
||||
current_benchmark = current.get(name)
|
||||
if current_benchmark is None:
|
||||
missing_in_current.append(name)
|
||||
continue
|
||||
|
||||
if baseline_benchmark.value == 0:
|
||||
delta_percent = 0.0
|
||||
else:
|
||||
delta_percent = (
|
||||
(current_benchmark.value - baseline_benchmark.value)
|
||||
/ abs(baseline_benchmark.value)
|
||||
* 100
|
||||
)
|
||||
|
||||
if higher_is_better:
|
||||
regressed = delta_percent <= -threshold_percent
|
||||
else:
|
||||
regressed = delta_percent >= threshold_percent
|
||||
|
||||
comparisons.append(
|
||||
Comparison(
|
||||
name=name,
|
||||
baseline=baseline_benchmark.value,
|
||||
current=current_benchmark.value,
|
||||
delta_percent=delta_percent,
|
||||
unit=current_benchmark.unit or baseline_benchmark.unit,
|
||||
metric=current_benchmark.metric,
|
||||
regressed=regressed,
|
||||
)
|
||||
)
|
||||
|
||||
for name in sorted(set(current) - set(baseline)):
|
||||
new_in_current.append(name)
|
||||
|
||||
return comparisons, missing_in_current, new_in_current
|
||||
|
||||
|
||||
def _format_value(value: float, unit: str) -> str:
|
||||
"""Format a benchmark value for Markdown output.
|
||||
|
||||
Args:
|
||||
value (float): Numeric benchmark value.
|
||||
unit (str): Display unit.
|
||||
|
||||
Returns:
|
||||
str: Formatted value with optional unit suffix.
|
||||
"""
|
||||
|
||||
suffix = f" {unit}" if unit else ""
|
||||
return f"{value:.6g}{suffix}"
|
||||
|
||||
|
||||
def write_summary(
|
||||
path: Path,
|
||||
comparisons: list[Comparison],
|
||||
missing_in_current: list[str],
|
||||
new_in_current: list[str],
|
||||
threshold_percent: float,
|
||||
higher_is_better: bool,
|
||||
) -> None:
|
||||
"""Write a Markdown benchmark comparison summary.
|
||||
|
||||
Args:
|
||||
path (Path): Path where the summary should be written.
|
||||
comparisons (list[Comparison]): Comparison rows for matching benchmarks.
|
||||
missing_in_current (list[str]): Baseline benchmark names missing from the
|
||||
current result.
|
||||
new_in_current (list[str]): Current benchmark names not present in the
|
||||
baseline result.
|
||||
threshold_percent (float): Regression threshold in percent.
|
||||
higher_is_better (bool): Whether higher benchmark values are considered
|
||||
better.
|
||||
"""
|
||||
|
||||
regressions = [comparison for comparison in comparisons if comparison.regressed]
|
||||
direction = "higher is better" if higher_is_better else "lower is better"
|
||||
sorted_comparisons = sorted(comparisons, key=lambda comparison: comparison.name)
|
||||
|
||||
lines = [
|
||||
"<!-- bw-benchmark-comment -->",
|
||||
"## Benchmark comparison",
|
||||
"",
|
||||
f"Threshold: {threshold_percent:g}% ({direction}).",
|
||||
]
|
||||
lines.append("")
|
||||
|
||||
if regressions:
|
||||
lines.extend(
|
||||
[
|
||||
f"{len(regressions)} benchmark(s) regressed beyond the configured threshold.",
|
||||
"",
|
||||
"| Benchmark | Baseline | Current | Change |",
|
||||
"| --- | ---: | ---: | ---: |",
|
||||
]
|
||||
)
|
||||
for comparison in regressions:
|
||||
lines.append(
|
||||
"| "
|
||||
f"{comparison.name} | "
|
||||
f"{_format_value(comparison.baseline, comparison.unit)} | "
|
||||
f"{_format_value(comparison.current, comparison.unit)} | "
|
||||
f"{comparison.delta_percent:+.2f}% |"
|
||||
)
|
||||
else:
|
||||
lines.append("No benchmark regression exceeded the configured threshold.")
|
||||
|
||||
if sorted_comparisons:
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
"<details>",
|
||||
"<summary>All benchmark results</summary>",
|
||||
"",
|
||||
"| Benchmark | Baseline | Current | Change | Status |",
|
||||
"| --- | ---: | ---: | ---: | --- |",
|
||||
]
|
||||
)
|
||||
for comparison in sorted_comparisons:
|
||||
status = "regressed" if comparison.regressed else "ok"
|
||||
lines.append(
|
||||
"| "
|
||||
f"{comparison.name} | "
|
||||
f"{_format_value(comparison.baseline, comparison.unit)} | "
|
||||
f"{_format_value(comparison.current, comparison.unit)} | "
|
||||
f"{comparison.delta_percent:+.2f}% | "
|
||||
f"{status} |"
|
||||
)
|
||||
lines.extend(["", "</details>"])
|
||||
|
||||
if missing_in_current:
|
||||
lines.extend(["", "Missing benchmarks in the current run:"])
|
||||
lines.extend(f"- `{name}`" for name in missing_in_current)
|
||||
|
||||
if new_in_current:
|
||||
lines.extend(["", "New benchmarks in the current run:"])
|
||||
lines.extend(f"- `{name}`" for name in new_in_current)
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Run the benchmark comparison command line interface.
|
||||
|
||||
Returns:
|
||||
int: ``1`` when a regression exceeds the threshold, otherwise ``0``.
|
||||
"""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--baseline", required=True, type=Path)
|
||||
parser.add_argument("--current", required=True, type=Path)
|
||||
parser.add_argument("--summary", required=True, type=Path)
|
||||
parser.add_argument("--threshold-percent", required=True, type=float)
|
||||
parser.add_argument("--higher-is-better", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
baseline = extract_benchmarks(args.baseline)
|
||||
current = extract_benchmarks(args.current)
|
||||
comparisons, missing_in_current, new_in_current = compare_benchmarks(
|
||||
baseline=baseline,
|
||||
current=current,
|
||||
threshold_percent=args.threshold_percent,
|
||||
higher_is_better=args.higher_is_better,
|
||||
)
|
||||
|
||||
write_summary(
|
||||
path=args.summary,
|
||||
comparisons=comparisons,
|
||||
missing_in_current=missing_in_current,
|
||||
new_in_current=new_in_current,
|
||||
threshold_percent=args.threshold_percent,
|
||||
higher_is_better=args.higher_is_better,
|
||||
)
|
||||
|
||||
return 1 if any(comparison.regressed for comparison in comparisons) else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
mkdir -p benchmark-results
|
||||
benchmark_json="${BENCHMARK_JSON:-benchmark-results/current.json}"
|
||||
benchmark_root="$(dirname "$benchmark_json")"
|
||||
hyperfine_benchmark_dir="${BENCHMARK_HYPERFINE_DIR:-tests/benchmarks/hyperfine}"
|
||||
pytest_benchmark_dirs="${BENCHMARK_PYTEST_DIRS:-${BENCHMARK_PYTEST_DIR:-}}"
|
||||
benchmark_work_dir="$benchmark_root/raw-results"
|
||||
hyperfine_json_dir="$benchmark_work_dir/hyperfine"
|
||||
pytest_json="$benchmark_work_dir/pytest.json"
|
||||
|
||||
shopt -s nullglob
|
||||
benchmark_scripts=()
|
||||
benchmark_scripts=("$hyperfine_benchmark_dir"/benchmark_*.sh)
|
||||
shopt -u nullglob
|
||||
|
||||
pytest_dirs=()
|
||||
for pytest_benchmark_dir in $pytest_benchmark_dirs; do
|
||||
if [ -d "$pytest_benchmark_dir" ]; then
|
||||
pytest_dirs+=("$pytest_benchmark_dir")
|
||||
else
|
||||
echo "Pytest benchmark directory not found: $pytest_benchmark_dir" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${#benchmark_scripts[@]}" -eq 0 ] && [ "${#pytest_dirs[@]}" -eq 0 ]; then
|
||||
echo "No benchmark scripts or pytest benchmarks found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Benchmark Python: $(command -v python)"
|
||||
python -c 'import sys; print(sys.version)'
|
||||
|
||||
rm -rf "$benchmark_work_dir"
|
||||
mkdir -p "$hyperfine_json_dir"
|
||||
|
||||
if [ "${#benchmark_scripts[@]}" -gt 0 ]; then
|
||||
for benchmark_script in "${benchmark_scripts[@]}"; do
|
||||
title="$(sed -n 's/^# BENCHMARK_TITLE:[[:space:]]*//p' "$benchmark_script" | head -n 1)"
|
||||
if [ -z "$title" ]; then
|
||||
title="$(basename "$benchmark_script" .sh)"
|
||||
fi
|
||||
benchmark_name="$(basename "$benchmark_script" .sh)"
|
||||
benchmark_result_json="$hyperfine_json_dir/$benchmark_name.json"
|
||||
echo "Preflight benchmark script: $benchmark_script"
|
||||
bash "$benchmark_script"
|
||||
|
||||
hyperfine \
|
||||
--show-output \
|
||||
--warmup 1 \
|
||||
--runs 5 \
|
||||
--command-name "$title" \
|
||||
--export-json "$benchmark_result_json" \
|
||||
"bash $(printf "%q" "$benchmark_script")"
|
||||
done
|
||||
fi
|
||||
|
||||
if [ "${#pytest_dirs[@]}" -gt 0 ]; then
|
||||
pytest \
|
||||
-q "${pytest_dirs[@]}" \
|
||||
--benchmark-only \
|
||||
--benchmark-json "$pytest_json"
|
||||
fi
|
||||
|
||||
python .github/scripts/aggregate_benchmarks.py \
|
||||
--input-dir "$benchmark_work_dir" \
|
||||
--output "$benchmark_json"
|
||||
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run a command with BEC e2e services available."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import bec_lib
|
||||
from bec_ipython_client import BECIPythonClient
|
||||
from bec_lib.redis_connector import RedisConnector
|
||||
from bec_lib.service_config import ServiceConfig, ServiceConfigModel
|
||||
from redis import Redis
|
||||
|
||||
|
||||
def _wait_for_redis(host: str, port: int) -> None:
|
||||
client = Redis(host=host, port=port)
|
||||
deadline = time.monotonic() + 10
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
if client.ping():
|
||||
return
|
||||
except Exception:
|
||||
time.sleep(0.1)
|
||||
raise RuntimeError(f"Redis did not start on {host}:{port}")
|
||||
|
||||
|
||||
def _start_redis(files_path: Path, host: str, port: int) -> subprocess.Popen:
|
||||
redis_server = shutil.which("redis-server")
|
||||
if redis_server is None:
|
||||
raise RuntimeError("redis-server executable not found")
|
||||
|
||||
return subprocess.Popen(
|
||||
[
|
||||
redis_server,
|
||||
"--bind",
|
||||
host,
|
||||
"--port",
|
||||
str(port),
|
||||
"--save",
|
||||
"",
|
||||
"--appendonly",
|
||||
"no",
|
||||
"--dir",
|
||||
str(files_path),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def _write_configs(files_path: Path, host: str, port: int) -> Path:
|
||||
test_config = files_path / "test_config.yaml"
|
||||
services_config = files_path / "services_config.yaml"
|
||||
|
||||
bec_lib_path = Path(bec_lib.__file__).resolve().parent
|
||||
shutil.copyfile(bec_lib_path / "tests" / "test_config.yaml", test_config)
|
||||
|
||||
service_config = ServiceConfigModel(
|
||||
redis={"host": host, "port": port}, file_writer={"base_path": str(files_path)}
|
||||
)
|
||||
services_config.write_text(service_config.model_dump_json(indent=4), encoding="utf-8")
|
||||
return services_config
|
||||
|
||||
|
||||
def _load_demo_config(services_config: Path) -> None:
|
||||
bec = BECIPythonClient(ServiceConfig(services_config), RedisConnector, forced=True)
|
||||
bec.start()
|
||||
try:
|
||||
bec.config.load_demo_config()
|
||||
finally:
|
||||
bec.shutdown()
|
||||
bec._client._reset_singleton()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("command", nargs=argparse.REMAINDER)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command[:1] == ["--"]:
|
||||
args.command = args.command[1:]
|
||||
if not args.command:
|
||||
raise ValueError("No command provided")
|
||||
|
||||
host = "127.0.0.1"
|
||||
port = 6379
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="bec-benchmark-") as tmp:
|
||||
files_path = Path(tmp)
|
||||
services_config = _write_configs(files_path, host, port)
|
||||
redis_process = _start_redis(files_path, host, port)
|
||||
processes = None
|
||||
service_handler = None
|
||||
try:
|
||||
_wait_for_redis(host, port)
|
||||
|
||||
from bec_server.bec_server_utils.service_handler import ServiceHandler
|
||||
|
||||
service_handler = ServiceHandler(
|
||||
bec_path=files_path, config_path=services_config, interface="subprocess"
|
||||
)
|
||||
processes = service_handler.start()
|
||||
_load_demo_config(services_config)
|
||||
|
||||
env = os.environ.copy()
|
||||
return subprocess.run(args.command, env=env, check=False).returncode
|
||||
finally:
|
||||
if service_handler is not None and processes is not None:
|
||||
service_handler.stop(processes)
|
||||
redis_process.terminate()
|
||||
try:
|
||||
redis_process.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
redis_process.kill()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user