From 42439097e9476b258d7a1f6a928dedd6d8ff4ec6 Mon Sep 17 00:00:00 2001 From: wakonig_k Date: Fri, 17 Apr 2026 10:07:22 +0200 Subject: [PATCH] ci: add benchmark workflow --- .github/scripts/aggregate_benchmarks.py | 166 +++++++ .github/scripts/compare_benchmarks.py | 411 ++++++++++++++++++ .github/scripts/run_benchmarks.sh | 69 +++ .github/scripts/run_with_bec_servers.py | 122 ++++++ .github/workflows/benchmark.yml | 239 ++++++++++ .github/workflows/ci.yml | 24 +- .github/workflows/pytest-matrix.yml | 25 +- .github/workflows/pytest.yml | 22 +- .../hyperfine/benchmark_import_bec_widgets.sh | 5 + .../benchmark_launch_bec_with_companion.sh | 5 + .../benchmark_launch_bec_without_companion.sh | 5 + .../hyperfine/utils/exit_bec_startup.py | 5 + .../benchmarks/test_dock_area_benchmark.py | 27 ++ 13 files changed, 1093 insertions(+), 32 deletions(-) create mode 100644 .github/scripts/aggregate_benchmarks.py create mode 100644 .github/scripts/compare_benchmarks.py create mode 100644 .github/scripts/run_benchmarks.sh create mode 100644 .github/scripts/run_with_bec_servers.py create mode 100644 .github/workflows/benchmark.yml create mode 100755 tests/benchmarks/hyperfine/benchmark_import_bec_widgets.sh create mode 100755 tests/benchmarks/hyperfine/benchmark_launch_bec_with_companion.sh create mode 100755 tests/benchmarks/hyperfine/benchmark_launch_bec_without_companion.sh create mode 100644 tests/benchmarks/hyperfine/utils/exit_bec_startup.py create mode 100644 tests/unit_tests/benchmarks/test_dock_area_benchmark.py diff --git a/.github/scripts/aggregate_benchmarks.py b/.github/scripts/aggregate_benchmarks.py new file mode 100644 index 00000000..22818257 --- /dev/null +++ b/.github/scripts/aggregate_benchmarks.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +"""Aggregate and merge benchmark JSON files. + +The workflow runs the same benchmark suite on multiple independent runners. +This script reads every JSON file produced by those attempts, normalizes the +contained benchmark values, and writes a compact mapping JSON where each value is +the median across attempts. It can also merge independent hyperfine JSON files +from one runner into a single hyperfine-style JSON file. +""" + +from __future__ import annotations + +import argparse +import json +import statistics +from pathlib import Path +from typing import Any + +from compare_benchmarks import Benchmark, extract_benchmarks + + +def collect_benchmarks(paths: list[Path]) -> dict[str, list[Benchmark]]: + """Collect benchmarks from multiple JSON files. + + Args: + paths (list[Path]): Paths to hyperfine, pytest-benchmark, or compact + mapping JSON files. + + Returns: + dict[str, list[Benchmark]]: Benchmarks grouped by benchmark name. + """ + + collected: dict[str, list[Benchmark]] = {} + for path in paths: + for name, benchmark in extract_benchmarks(path).items(): + collected.setdefault(name, []).append(benchmark) + return collected + + +def aggregate(collected: dict[str, list[Benchmark]]) -> dict[str, dict[str, object]]: + """Aggregate grouped benchmarks using the median value. + + Args: + collected (dict[str, list[Benchmark]]): Benchmarks grouped by benchmark + name. + + Returns: + dict[str, dict[str, object]]: Compact mapping JSON data. Each benchmark + contains ``value``, ``unit``, ``metric``, ``attempts``, and + ``attempt_values``. + """ + + aggregated: dict[str, dict[str, object]] = {} + for name, benchmarks in sorted(collected.items()): + values = [benchmark.value for benchmark in benchmarks] + unit = next((benchmark.unit for benchmark in benchmarks if benchmark.unit), "") + metric = next((benchmark.metric for benchmark in benchmarks if benchmark.metric), "value") + aggregated[name] = { + "value": statistics.median(values), + "unit": unit, + "metric": f"median-of-attempt-{metric}", + "attempts": len(values), + "attempt_values": values, + } + return aggregated + + +def merge_hyperfine_results(paths: list[Path]) -> dict[str, Any]: + """Merge hyperfine result files. + + Args: + paths (list[Path]): Hyperfine JSON files to merge. + + Returns: + dict[str, Any]: Hyperfine-style JSON object containing all result rows. + + Raises: + ValueError: If any file has no hyperfine ``results`` list. + """ + + merged: dict[str, Any] = {"results": []} + for path in paths: + data = json.loads(path.read_text(encoding="utf-8")) + results = data.get("results", []) if isinstance(data, dict) else None + if not isinstance(results, list): + raise ValueError(f"{path} has no hyperfine results list") + merged["results"].extend(results) + return merged + + +def main_from_paths(input_dir: Path, output: Path) -> int: + """Aggregate all JSON files in a directory and write the result. + + Args: + input_dir (Path): Directory containing benchmark JSON files. + output (Path): Path where the aggregate JSON should be written. + + Returns: + int: Always ``0`` on success. + + Raises: + ValueError: If no JSON files are found in ``input_dir``. + """ + + paths = sorted(input_dir.rglob("*.json")) + if not paths: + raise ValueError(f"No benchmark JSON files found in {input_dir}") + + output.parent.mkdir(parents=True, exist_ok=True) + output.write_text( + json.dumps(aggregate(collect_benchmarks(paths)), indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + return 0 + + +def merge_from_paths(input_dir: Path, output: Path) -> int: + """Merge all hyperfine JSON files in a directory and write the result. + + Args: + input_dir (Path): Directory containing hyperfine JSON files. + output (Path): Path where the merged JSON should be written. + + Returns: + int: Always ``0`` on success. + + Raises: + ValueError: If no JSON files are found in ``input_dir``. + """ + + paths = sorted(input_dir.glob("*.json")) + if not paths: + raise ValueError(f"No hyperfine JSON files found in {input_dir}") + + output.parent.mkdir(parents=True, exist_ok=True) + output.write_text( + json.dumps(merge_hyperfine_results(paths), indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + return 0 + + +def main() -> int: + """Run the benchmark aggregation command line interface. + + Returns: + int: Always ``0`` on success. + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "--mode", + choices=("aggregate", "merge-hyperfine"), + default="aggregate", + help="Operation to perform.", + ) + parser.add_argument("--input-dir", required=True, type=Path) + parser.add_argument("--output", required=True, type=Path) + args = parser.parse_args() + if args.mode == "merge-hyperfine": + return merge_from_paths(input_dir=args.input_dir, output=args.output) + return main_from_paths(input_dir=args.input_dir, output=args.output) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/scripts/compare_benchmarks.py b/.github/scripts/compare_benchmarks.py new file mode 100644 index 00000000..2bb2f9bd --- /dev/null +++ b/.github/scripts/compare_benchmarks.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python3 +"""Compare benchmark JSON files and write a GitHub Actions summary. + +The script supports JSON emitted by hyperfine, JSON emitted by pytest-benchmark, +and a compact mapping format generated by ``aggregate_benchmarks.py``. Timing +formats prefer median values and fall back to mean values when median values are +not present. +""" + +from __future__ import annotations + +import argparse +import json +import math +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +@dataclass(frozen=True) +class Benchmark: + """Normalized benchmark result. + + Attributes: + name (str): Stable benchmark name used to match baseline and current results. + value (float): Numeric benchmark value used for comparison. + unit (str): Display unit for the value, for example ``"s"``. + metric (str): Source metric name, for example ``"median"`` or ``"mean"``. + """ + + name: str + value: float + unit: str + metric: str = "value" + + +@dataclass(frozen=True) +class Comparison: + """Comparison between one baseline benchmark and one current benchmark. + + Attributes: + name (str): Benchmark name. + baseline (float): Baseline benchmark value. + current (float): Current benchmark value. + delta_percent (float): Percent change from baseline to current. + unit (str): Display unit for both values. + metric (str): Current result metric used for comparison. + regressed (bool): Whether the change exceeds the configured threshold. + """ + + name: str + baseline: float + current: float + delta_percent: float + unit: str + metric: str + regressed: bool + + +def _read_json(path: Path) -> Any: + """Read JSON data from a file. + + Args: + path (Path): Path to the JSON file. + + Returns: + Any: Parsed JSON value. + """ + + with path.open("r", encoding="utf-8") as stream: + return json.load(stream) + + +def _as_float(value: Any) -> float | None: + """Convert a value to a finite float. + + Args: + value (Any): Value to convert. + + Returns: + float | None: Converted finite float, or ``None`` if conversion fails. + """ + + try: + result = float(value) + except (TypeError, ValueError): + return None + if math.isfinite(result): + return result + return None + + +def _extract_hyperfine(data: dict[str, Any]) -> dict[str, Benchmark]: + """Extract normalized benchmarks from hyperfine JSON. + + Args: + data (dict[str, Any]): Parsed hyperfine JSON object. + + Returns: + dict[str, Benchmark]: Benchmarks keyed by command name. + """ + + benchmarks: dict[str, Benchmark] = {} + for result in data.get("results", []): + if not isinstance(result, dict): + continue + name = str(result.get("command") or result.get("name") or "").strip() + metric = "median" + value = _as_float(result.get(metric)) + if value is None: + metric = "mean" + value = _as_float(result.get(metric)) + if name and value is not None: + benchmarks[name] = Benchmark(name=name, value=value, unit="s", metric=metric) + return benchmarks + + +def _extract_pytest_benchmark(data: dict[str, Any]) -> dict[str, Benchmark]: + """Extract normalized benchmarks from pytest-benchmark JSON. + + Args: + data (dict[str, Any]): Parsed pytest-benchmark JSON object. + + Returns: + dict[str, Benchmark]: Benchmarks keyed by full benchmark name. + """ + + benchmarks: dict[str, Benchmark] = {} + for benchmark in data.get("benchmarks", []): + if not isinstance(benchmark, dict): + continue + + name = str(benchmark.get("fullname") or benchmark.get("name") or "").strip() + stats = benchmark.get("stats", {}) + value = None + metric = "median" + if isinstance(stats, dict): + value = _as_float(stats.get(metric)) + if value is None: + metric = "mean" + value = _as_float(stats.get(metric)) + if name and value is not None: + benchmarks[name] = Benchmark(name=name, value=value, unit="s", metric=metric) + return benchmarks + + +def _extract_simple_mapping(data: dict[str, Any]) -> dict[str, Benchmark]: + """Extract normalized benchmarks from a compact mapping JSON object. + + Args: + data (dict[str, Any]): Parsed mapping where each benchmark is either a + raw number or an object containing ``value``, ``unit``, and ``metric``. + + Returns: + dict[str, Benchmark]: Benchmarks keyed by mapping key. + """ + + benchmarks: dict[str, Benchmark] = {} + + for name, raw_value in data.items(): + if name in {"version", "context", "commit", "timestamp"}: + continue + + value = _as_float(raw_value) + unit = "" + metric = "value" + if value is None and isinstance(raw_value, dict): + value = _as_float(raw_value.get("value")) + unit = str(raw_value.get("unit") or "") + metric = str(raw_value.get("metric") or "value") + + if value is not None: + benchmarks[str(name)] = Benchmark(name=str(name), value=value, unit=unit, metric=metric) + + return benchmarks + + +def extract_benchmarks(path: Path) -> dict[str, Benchmark]: + """Extract normalized benchmarks from a supported JSON file. + + Args: + path (Path): Path to a hyperfine, pytest-benchmark, or compact mapping + JSON file. + + Returns: + dict[str, Benchmark]: Normalized benchmarks keyed by name. + + Raises: + ValueError: If the JSON root is not an object or no supported benchmark + entries can be extracted. + """ + + data = _read_json(path) + if not isinstance(data, dict): + raise ValueError(f"{path} must contain a JSON object") + + extractors = (_extract_hyperfine, _extract_pytest_benchmark, _extract_simple_mapping) + for extractor in extractors: + benchmarks = extractor(data) + if benchmarks: + return benchmarks + + raise ValueError(f"No supported benchmark entries found in {path}") + + +def compare_benchmarks( + baseline: dict[str, Benchmark], + current: dict[str, Benchmark], + threshold_percent: float, + higher_is_better: bool, +) -> tuple[list[Comparison], list[str], list[str]]: + """Compare baseline benchmarks with current benchmarks. + + Args: + baseline (dict[str, Benchmark]): Baseline benchmarks keyed by name. + current (dict[str, Benchmark]): Current benchmarks keyed by name. + threshold_percent (float): Regression threshold in percent. + higher_is_better (bool): If ``True``, lower current values are treated as + regressions. If ``False``, higher current values are treated as + regressions. + + Returns: + tuple[list[Comparison], list[str], list[str]]: Comparisons for common + benchmark names, names missing from current results, and names newly + present in current results. + """ + + comparisons: list[Comparison] = [] + missing_in_current: list[str] = [] + new_in_current: list[str] = [] + + for name, baseline_benchmark in sorted(baseline.items()): + current_benchmark = current.get(name) + if current_benchmark is None: + missing_in_current.append(name) + continue + + if baseline_benchmark.value == 0: + delta_percent = 0.0 + else: + delta_percent = ( + (current_benchmark.value - baseline_benchmark.value) + / abs(baseline_benchmark.value) + * 100 + ) + + if higher_is_better: + regressed = delta_percent <= -threshold_percent + else: + regressed = delta_percent >= threshold_percent + + comparisons.append( + Comparison( + name=name, + baseline=baseline_benchmark.value, + current=current_benchmark.value, + delta_percent=delta_percent, + unit=current_benchmark.unit or baseline_benchmark.unit, + metric=current_benchmark.metric, + regressed=regressed, + ) + ) + + for name in sorted(set(current) - set(baseline)): + new_in_current.append(name) + + return comparisons, missing_in_current, new_in_current + + +def _format_value(value: float, unit: str) -> str: + """Format a benchmark value for Markdown output. + + Args: + value (float): Numeric benchmark value. + unit (str): Display unit. + + Returns: + str: Formatted value with optional unit suffix. + """ + + suffix = f" {unit}" if unit else "" + return f"{value:.6g}{suffix}" + + +def write_summary( + path: Path, + comparisons: list[Comparison], + missing_in_current: list[str], + new_in_current: list[str], + threshold_percent: float, + higher_is_better: bool, +) -> None: + """Write a Markdown benchmark comparison summary. + + Args: + path (Path): Path where the summary should be written. + comparisons (list[Comparison]): Comparison rows for matching benchmarks. + missing_in_current (list[str]): Baseline benchmark names missing from the + current result. + new_in_current (list[str]): Current benchmark names not present in the + baseline result. + threshold_percent (float): Regression threshold in percent. + higher_is_better (bool): Whether higher benchmark values are considered + better. + """ + + regressions = [comparison for comparison in comparisons if comparison.regressed] + direction = "higher is better" if higher_is_better else "lower is better" + sorted_comparisons = sorted(comparisons, key=lambda comparison: comparison.name) + + lines = [ + "", + "## Benchmark comparison", + "", + f"Threshold: {threshold_percent:g}% ({direction}).", + ] + lines.append("") + + if regressions: + lines.extend( + [ + f"{len(regressions)} benchmark(s) regressed beyond the configured threshold.", + "", + "| Benchmark | Baseline | Current | Change |", + "| --- | ---: | ---: | ---: |", + ] + ) + for comparison in regressions: + lines.append( + "| " + f"{comparison.name} | " + f"{_format_value(comparison.baseline, comparison.unit)} | " + f"{_format_value(comparison.current, comparison.unit)} | " + f"{comparison.delta_percent:+.2f}% |" + ) + else: + lines.append("No benchmark regression exceeded the configured threshold.") + + if sorted_comparisons: + lines.extend( + [ + "", + "
", + "All benchmark results", + "", + "| Benchmark | Baseline | Current | Change | Status |", + "| --- | ---: | ---: | ---: | --- |", + ] + ) + for comparison in sorted_comparisons: + status = "regressed" if comparison.regressed else "ok" + lines.append( + "| " + f"{comparison.name} | " + f"{_format_value(comparison.baseline, comparison.unit)} | " + f"{_format_value(comparison.current, comparison.unit)} | " + f"{comparison.delta_percent:+.2f}% | " + f"{status} |" + ) + lines.extend(["", "
"]) + + if missing_in_current: + lines.extend(["", "Missing benchmarks in the current run:"]) + lines.extend(f"- `{name}`" for name in missing_in_current) + + if new_in_current: + lines.extend(["", "New benchmarks in the current run:"]) + lines.extend(f"- `{name}`" for name in new_in_current) + + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + +def main() -> int: + """Run the benchmark comparison command line interface. + + Returns: + int: ``1`` when a regression exceeds the threshold, otherwise ``0``. + """ + + parser = argparse.ArgumentParser() + parser.add_argument("--baseline", required=True, type=Path) + parser.add_argument("--current", required=True, type=Path) + parser.add_argument("--summary", required=True, type=Path) + parser.add_argument("--threshold-percent", required=True, type=float) + parser.add_argument("--higher-is-better", action="store_true") + args = parser.parse_args() + + baseline = extract_benchmarks(args.baseline) + current = extract_benchmarks(args.current) + comparisons, missing_in_current, new_in_current = compare_benchmarks( + baseline=baseline, + current=current, + threshold_percent=args.threshold_percent, + higher_is_better=args.higher_is_better, + ) + + write_summary( + path=args.summary, + comparisons=comparisons, + missing_in_current=missing_in_current, + new_in_current=new_in_current, + threshold_percent=args.threshold_percent, + higher_is_better=args.higher_is_better, + ) + + return 1 if any(comparison.regressed for comparison in comparisons) else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/scripts/run_benchmarks.sh b/.github/scripts/run_benchmarks.sh new file mode 100644 index 00000000..64c23790 --- /dev/null +++ b/.github/scripts/run_benchmarks.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -euo pipefail + +mkdir -p benchmark-results +benchmark_json="${BENCHMARK_JSON:-benchmark-results/current.json}" +benchmark_root="$(dirname "$benchmark_json")" +hyperfine_benchmark_dir="${BENCHMARK_HYPERFINE_DIR:-tests/benchmarks/hyperfine}" +pytest_benchmark_dirs="${BENCHMARK_PYTEST_DIRS:-${BENCHMARK_PYTEST_DIR:-}}" +benchmark_work_dir="$benchmark_root/raw-results" +hyperfine_json_dir="$benchmark_work_dir/hyperfine" +pytest_json="$benchmark_work_dir/pytest.json" + +shopt -s nullglob +benchmark_scripts=() +benchmark_scripts=("$hyperfine_benchmark_dir"/benchmark_*.sh) +shopt -u nullglob + +pytest_dirs=() +for pytest_benchmark_dir in $pytest_benchmark_dirs; do + if [ -d "$pytest_benchmark_dir" ]; then + pytest_dirs+=("$pytest_benchmark_dir") + else + echo "Pytest benchmark directory not found: $pytest_benchmark_dir" >&2 + exit 1 + fi +done + +if [ "${#benchmark_scripts[@]}" -eq 0 ] && [ "${#pytest_dirs[@]}" -eq 0 ]; then + echo "No benchmark scripts or pytest benchmarks found" >&2 + exit 1 +fi + +echo "Benchmark Python: $(command -v python)" +python -c 'import sys; print(sys.version)' + +rm -rf "$benchmark_work_dir" +mkdir -p "$hyperfine_json_dir" + +if [ "${#benchmark_scripts[@]}" -gt 0 ]; then + for benchmark_script in "${benchmark_scripts[@]}"; do + title="$(sed -n 's/^# BENCHMARK_TITLE:[[:space:]]*//p' "$benchmark_script" | head -n 1)" + if [ -z "$title" ]; then + title="$(basename "$benchmark_script" .sh)" + fi + benchmark_name="$(basename "$benchmark_script" .sh)" + benchmark_result_json="$hyperfine_json_dir/$benchmark_name.json" + echo "Preflight benchmark script: $benchmark_script" + bash "$benchmark_script" + + hyperfine \ + --show-output \ + --warmup 1 \ + --runs 5 \ + --command-name "$title" \ + --export-json "$benchmark_result_json" \ + "bash $(printf "%q" "$benchmark_script")" + done +fi + +if [ "${#pytest_dirs[@]}" -gt 0 ]; then + pytest \ + -q "${pytest_dirs[@]}" \ + --benchmark-only \ + --benchmark-json "$pytest_json" +fi + +python .github/scripts/aggregate_benchmarks.py \ + --input-dir "$benchmark_work_dir" \ + --output "$benchmark_json" diff --git a/.github/scripts/run_with_bec_servers.py b/.github/scripts/run_with_bec_servers.py new file mode 100644 index 00000000..719e25f0 --- /dev/null +++ b/.github/scripts/run_with_bec_servers.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +"""Run a command with BEC e2e services available.""" + +from __future__ import annotations + +import argparse +import os +import shutil +import subprocess +import tempfile +import time +from pathlib import Path + +import bec_lib +from bec_ipython_client import BECIPythonClient +from bec_lib.redis_connector import RedisConnector +from bec_lib.service_config import ServiceConfig, ServiceConfigModel +from redis import Redis + + +def _wait_for_redis(host: str, port: int) -> None: + client = Redis(host=host, port=port) + deadline = time.monotonic() + 10 + while time.monotonic() < deadline: + try: + if client.ping(): + return + except Exception: + time.sleep(0.1) + raise RuntimeError(f"Redis did not start on {host}:{port}") + + +def _start_redis(files_path: Path, host: str, port: int) -> subprocess.Popen: + redis_server = shutil.which("redis-server") + if redis_server is None: + raise RuntimeError("redis-server executable not found") + + return subprocess.Popen( + [ + redis_server, + "--bind", + host, + "--port", + str(port), + "--save", + "", + "--appendonly", + "no", + "--dir", + str(files_path), + ] + ) + + +def _write_configs(files_path: Path, host: str, port: int) -> Path: + test_config = files_path / "test_config.yaml" + services_config = files_path / "services_config.yaml" + + bec_lib_path = Path(bec_lib.__file__).resolve().parent + shutil.copyfile(bec_lib_path / "tests" / "test_config.yaml", test_config) + + service_config = ServiceConfigModel( + redis={"host": host, "port": port}, file_writer={"base_path": str(files_path)} + ) + services_config.write_text(service_config.model_dump_json(indent=4), encoding="utf-8") + return services_config + + +def _load_demo_config(services_config: Path) -> None: + bec = BECIPythonClient(ServiceConfig(services_config), RedisConnector, forced=True) + bec.start() + try: + bec.config.load_demo_config() + finally: + bec.shutdown() + bec._client._reset_singleton() + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("command", nargs=argparse.REMAINDER) + args = parser.parse_args() + + if args.command[:1] == ["--"]: + args.command = args.command[1:] + if not args.command: + raise ValueError("No command provided") + + host = "127.0.0.1" + port = 6379 + + with tempfile.TemporaryDirectory(prefix="bec-benchmark-") as tmp: + files_path = Path(tmp) + services_config = _write_configs(files_path, host, port) + redis_process = _start_redis(files_path, host, port) + processes = None + service_handler = None + try: + _wait_for_redis(host, port) + + from bec_server.bec_server_utils.service_handler import ServiceHandler + + service_handler = ServiceHandler( + bec_path=files_path, config_path=services_config, interface="subprocess" + ) + processes = service_handler.start() + _load_demo_config(services_config) + + env = os.environ.copy() + return subprocess.run(args.command, env=env, check=False).returncode + finally: + if service_handler is not None and processes is not None: + service_handler.stop(processes) + redis_process.terminate() + try: + redis_process.wait(timeout=10) + except subprocess.TimeoutExpired: + redis_process.kill() + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000..f8724689 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,239 @@ +name: BW Benchmarks + +on: [workflow_call] + +permissions: + contents: read + +env: + BENCHMARK_JSON: benchmark-results/current.json + BENCHMARK_BASELINE_JSON: gh-pages-benchmark-data/benchmarks/latest.json + BENCHMARK_SUMMARY: benchmark-results/summary.md + BENCHMARK_COMMAND: "bash .github/scripts/run_benchmarks.sh" + BENCHMARK_THRESHOLD_PERCENT: 10 + BENCHMARK_HIGHER_IS_BETTER: false + +jobs: + benchmark_attempt: + runs-on: ubuntu-latest + continue-on-error: true + permissions: + contents: read + defaults: + run: + shell: bash -el {0} + strategy: + fail-fast: false + matrix: + attempt: [1, 2, 3] + + env: + BENCHMARK_JSON: benchmark-results/current-${{ matrix.attempt }}.json + BEC_CORE_BRANCH: main + OPHYD_DEVICES_BRANCH: main + PLUGIN_REPO_BRANCH: main + BENCHMARK_PYTEST_DIRS: tests/unit_tests/benchmarks + QTWEBENGINE_DISABLE_SANDBOX: 1 + QT_QPA_PLATFORM: "offscreen" + + steps: + - name: Checkout BEC Widgets + uses: actions/checkout@v4 + with: + repository: bec-project/bec_widgets + ref: ${{ github.event.pull_request.head.sha || github.sha }} + + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + auto-activate-base: true + python-version: "3.11" + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y libgl1 libegl1 x11-utils libxkbcommon-x11-0 libdbus-1-3 xvfb + sudo apt-get -y install libnss3 libxdamage1 libasound2t64 libatomic1 libxcursor1 + sudo apt-get -y install ttyd hyperfine redis-server + + - name: Install full e2e environment + run: | + echo -e "\033[35;1m Using branch $BEC_CORE_BRANCH of BEC CORE \033[0;m"; + git clone --branch "$BEC_CORE_BRANCH" https://github.com/bec-project/bec.git + echo -e "\033[35;1m Using branch $OPHYD_DEVICES_BRANCH of OPHYD_DEVICES \033[0;m"; + git clone --branch "$OPHYD_DEVICES_BRANCH" https://github.com/bec-project/ophyd_devices.git + export OHPYD_DEVICES_PATH=$PWD/ophyd_devices + echo -e "\033[35;1m Using branch $PLUGIN_REPO_BRANCH of bec_testing_plugin \033[0;m"; + git clone --branch "$PLUGIN_REPO_BRANCH" https://github.com/bec-project/bec_testing_plugin.git + cd ./bec + conda create -q -n test-environment python=3.11 + conda activate test-environment + source ./bin/install_bec_dev.sh -t + cd ../ + python -m pip install -e ./ophyd_devices -e .[dev,pyside6] -e ./bec_testing_plugin pytest-benchmark + + mkdir -p "$(dirname "$BENCHMARK_JSON")" + python .github/scripts/run_with_bec_servers.py -- bash -lc "$BENCHMARK_COMMAND" + test -s "$BENCHMARK_JSON" + + - name: Upload benchmark artifact + uses: actions/upload-artifact@v4 + with: + name: bw-benchmark-json-${{ matrix.attempt }} + path: ${{ env.BENCHMARK_JSON }} + + benchmark: + needs: [benchmark_attempt] + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + pull-requests: write + + steps: + - name: Checkout BEC Widgets + uses: actions/checkout@v4 + with: + repository: bec-project/bec_widgets + ref: ${{ github.event.pull_request.head.sha || github.sha }} + + - name: Download benchmark attempts + uses: actions/download-artifact@v4 + with: + pattern: bw-benchmark-json-* + path: benchmark-results/attempts + merge-multiple: true + + - name: Aggregate benchmark attempts + run: | + python .github/scripts/aggregate_benchmarks.py \ + --input-dir benchmark-results/attempts \ + --output "$BENCHMARK_JSON" + + - name: Upload aggregate benchmark artifact + uses: actions/upload-artifact@v4 + with: + name: bw-benchmark-json + path: ${{ env.BENCHMARK_JSON }} + + - name: Fetch gh-pages benchmark data + run: | + if git ls-remote --exit-code --heads origin gh-pages; then + git clone --depth=1 --branch gh-pages "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY.git" gh-pages-benchmark-data + else + mkdir -p gh-pages-benchmark-data + fi + + - name: Compare with latest gh-pages benchmark + id: compare + continue-on-error: true + run: | + if [ ! -s "$BENCHMARK_BASELINE_JSON" ]; then + mkdir -p "$(dirname "$BENCHMARK_SUMMARY")" + { + echo "" + echo "## Benchmark comparison" + echo + echo "No benchmark baseline was found on gh-pages." + } > "$BENCHMARK_SUMMARY" + exit 0 + fi + + args=( + --baseline "$BENCHMARK_BASELINE_JSON" + --current "$BENCHMARK_JSON" + --summary "$BENCHMARK_SUMMARY" + --threshold-percent "$BENCHMARK_THRESHOLD_PERCENT" + ) + + if [ "$BENCHMARK_HIGHER_IS_BETTER" = "true" ]; then + args+=(--higher-is-better) + fi + + set +e + python .github/scripts/compare_benchmarks.py "${args[@]}" + status=$? + set -e + + if [ ! -s "$BENCHMARK_SUMMARY" ]; then + mkdir -p "$(dirname "$BENCHMARK_SUMMARY")" + { + echo "" + echo "## Benchmark comparison" + echo + echo "Benchmark comparison failed before writing a summary." + } > "$BENCHMARK_SUMMARY" + fi + + exit "$status" + + - name: Find existing benchmark PR comment + if: github.event_name == 'pull_request' + id: fc + uses: peter-evans/find-comment@v3 + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: github-actions[bot] + body-includes: "" + + - name: Create or update benchmark PR comment + if: github.event_name == 'pull_request' + uses: peter-evans/create-or-update-comment@v5 + with: + issue-number: ${{ github.event.pull_request.number }} + comment-id: ${{ steps.fc.outputs.comment-id }} + body-path: ${{ env.BENCHMARK_SUMMARY }} + edit-mode: replace + + - name: Fail on benchmark regression + if: github.event_name == 'pull_request' && steps.compare.outcome == 'failure' + run: exit 1 + + publish: + needs: [benchmark] + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + permissions: + contents: write + + steps: + - name: Checkout BEC Widgets + uses: actions/checkout@v4 + with: + repository: bec-project/bec_widgets + ref: ${{ github.sha }} + + - name: Download aggregate benchmark artifact + uses: actions/download-artifact@v4 + with: + name: bw-benchmark-json + path: . + + - name: Prepare gh-pages for publishing + run: | + # Clean up any existing worktree/directory + if [ -d gh-pages-benchmark-data ]; then + git worktree remove gh-pages-benchmark-data --force || rm -rf gh-pages-benchmark-data + fi + + if git ls-remote --exit-code --heads origin gh-pages; then + git fetch --depth=1 origin gh-pages + git worktree add gh-pages-benchmark-data FETCH_HEAD + else + git worktree add --detach gh-pages-benchmark-data + git -C gh-pages-benchmark-data checkout --orphan gh-pages + git -C gh-pages-benchmark-data rm -rf . + fi + + - name: Publish benchmark data to gh-pages + working-directory: gh-pages-benchmark-data + run: | + mkdir -p benchmarks/history + cp "../$BENCHMARK_JSON" benchmarks/latest.json + cp "../$BENCHMARK_JSON" "benchmarks/history/${GITHUB_SHA}.json" + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add benchmarks/latest.json "benchmarks/history/${GITHUB_SHA}.json" + git commit -m "Update BW benchmark data for ${GITHUB_SHA}" || exit 0 + git push origin HEAD:gh-pages diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5570667a..26cea7cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,19 +1,19 @@ name: Full CI -on: +on: push: pull_request: workflow_dispatch: inputs: BEC_WIDGETS_BRANCH: - description: 'Branch of BEC Widgets to install' + description: "Branch of BEC Widgets to install" required: false type: string BEC_CORE_BRANCH: - description: 'Branch of BEC Core to install' + description: "Branch of BEC Core to install" required: false type: string OPHYD_DEVICES_BRANCH: - description: 'Branch of Ophyd Devices to install' + description: "Branch of Ophyd Devices to install" required: false type: string @@ -23,6 +23,7 @@ concurrency: permissions: pull-requests: write + contents: read jobs: check_pr_status: @@ -33,6 +34,15 @@ jobs: if: needs.check_pr_status.outputs.branch-pr == '' uses: ./.github/workflows/formatter.yml + benchmark: + needs: [check_pr_status] + if: needs.check_pr_status.outputs.branch-pr == '' + permissions: + contents: write + issues: write + pull-requests: write + uses: ./.github/workflows/benchmark.yml + unit-test: needs: [check_pr_status, formatter] if: needs.check_pr_status.outputs.branch-pr == '' @@ -69,9 +79,9 @@ jobs: uses: ./.github/workflows/child_repos.yml with: BEC_CORE_BRANCH: ${{ inputs.BEC_CORE_BRANCH || 'main' }} - OPHYD_DEVICES_BRANCH: ${{ inputs.OPHYD_DEVICES_BRANCH || 'main'}} + OPHYD_DEVICES_BRANCH: ${{ inputs.OPHYD_DEVICES_BRANCH || 'main'}} BEC_WIDGETS_BRANCH: ${{ inputs.BEC_WIDGETS_BRANCH || github.head_ref || github.sha }} - + plugin_repos: needs: [check_pr_status, formatter] if: needs.check_pr_status.outputs.branch-pr == '' @@ -81,4 +91,4 @@ jobs: BEC_WIDGETS_BRANCH: ${{ inputs.BEC_WIDGETS_BRANCH || github.head_ref || github.sha }} secrets: - GH_READ_TOKEN: ${{ secrets.GH_READ_TOKEN }} \ No newline at end of file + GH_READ_TOKEN: ${{ secrets.GH_READ_TOKEN }} diff --git a/.github/workflows/pytest-matrix.yml b/.github/workflows/pytest-matrix.yml index 9446714c..ff7fb279 100644 --- a/.github/workflows/pytest-matrix.yml +++ b/.github/workflows/pytest-matrix.yml @@ -1,25 +1,25 @@ name: Run Pytest with different Python versions -on: +on: workflow_call: inputs: pr_number: - description: 'Pull request number' + description: "Pull request number" required: false type: number BEC_CORE_BRANCH: - description: 'Branch of BEC Core to install' + description: "Branch of BEC Core to install" required: false - default: 'main' + default: "main" type: string OPHYD_DEVICES_BRANCH: - description: 'Branch of Ophyd Devices to install' + description: "Branch of Ophyd Devices to install" required: false - default: 'main' + default: "main" type: string BEC_WIDGETS_BRANCH: - description: 'Branch of BEC Widgets to install' + description: "Branch of BEC Widgets to install" required: false - default: 'main' + default: "main" type: string jobs: @@ -30,15 +30,14 @@ jobs: python-version: ["3.11", "3.12", "3.13"] env: - BEC_WIDGETS_BRANCH: main # Set the branch you want for bec_widgets - BEC_CORE_BRANCH: main # Set the branch you want for bec - OPHYD_DEVICES_BRANCH: main # Set the branch you want for ophyd_devices + BEC_WIDGETS_BRANCH: main # Set the branch you want for bec_widgets + BEC_CORE_BRANCH: main # Set the branch you want for bec + OPHYD_DEVICES_BRANCH: main # Set the branch you want for ophyd_devices PROJECT_PATH: ${{ github.repository }} QTWEBENGINE_DISABLE_SANDBOX: 1 QT_QPA_PLATFORM: "offscreen" steps: - - name: Checkout BEC Widgets uses: actions/checkout@v4 with: @@ -56,4 +55,4 @@ jobs: - name: Run Pytest run: | pip install pytest pytest-random-order - pytest -v --junitxml=report.xml --random-order ./tests/unit_tests + pytest -v --junitxml=report.xml --random-order --ignore=tests/unit_tests/benchmarks ./tests/unit_tests diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 64f9c0c7..3c316da9 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -1,32 +1,30 @@ name: Run Pytest with Coverage -on: +on: workflow_call: inputs: pr_number: - description: 'Pull request number' + description: "Pull request number" required: false type: number BEC_CORE_BRANCH: - description: 'Branch of BEC Core to install' + description: "Branch of BEC Core to install" required: false - default: 'main' + default: "main" type: string OPHYD_DEVICES_BRANCH: - description: 'Branch of Ophyd Devices to install' + description: "Branch of Ophyd Devices to install" required: false - default: 'main' + default: "main" type: string BEC_WIDGETS_BRANCH: - description: 'Branch of BEC Widgets to install' + description: "Branch of BEC Widgets to install" required: false - default: 'main' + default: "main" type: string secrets: CODECOV_TOKEN: required: true - - permissions: pull-requests: write @@ -55,7 +53,7 @@ jobs: - name: Run Pytest with Coverage id: coverage - run: pytest --random-order --cov=bec_widgets --cov-config=pyproject.toml --cov-branch --cov-report=xml --no-cov-on-fail tests/unit_tests/ + run: pytest --random-order --cov=bec_widgets --cov-config=pyproject.toml --cov-branch --cov-report=xml --no-cov-on-fail --ignore=tests/unit_tests/benchmarks tests/unit_tests/ - name: Upload test artifacts uses: actions/upload-artifact@v4 @@ -69,4 +67,4 @@ jobs: uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - slug: bec-project/bec_widgets \ No newline at end of file + slug: bec-project/bec_widgets diff --git a/tests/benchmarks/hyperfine/benchmark_import_bec_widgets.sh b/tests/benchmarks/hyperfine/benchmark_import_bec_widgets.sh new file mode 100755 index 00000000..4cf9fc0f --- /dev/null +++ b/tests/benchmarks/hyperfine/benchmark_import_bec_widgets.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# BENCHMARK_TITLE: Import bec_widgets +set -euo pipefail + +python -c 'import bec_widgets; print(bec_widgets.__file__)' diff --git a/tests/benchmarks/hyperfine/benchmark_launch_bec_with_companion.sh b/tests/benchmarks/hyperfine/benchmark_launch_bec_with_companion.sh new file mode 100755 index 00000000..23a44625 --- /dev/null +++ b/tests/benchmarks/hyperfine/benchmark_launch_bec_with_companion.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# BENCHMARK_TITLE: BEC IPython client with companion app +set -euo pipefail + +bec --post-startup-file tests/benchmarks/hyperfine/utils/exit_bec_startup.py diff --git a/tests/benchmarks/hyperfine/benchmark_launch_bec_without_companion.sh b/tests/benchmarks/hyperfine/benchmark_launch_bec_without_companion.sh new file mode 100755 index 00000000..a8147dcc --- /dev/null +++ b/tests/benchmarks/hyperfine/benchmark_launch_bec_without_companion.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# BENCHMARK_TITLE: BEC IPython client without companion app +set -euo pipefail + +bec --nogui --post-startup-file tests/benchmarks/hyperfine/utils/exit_bec_startup.py diff --git a/tests/benchmarks/hyperfine/utils/exit_bec_startup.py b/tests/benchmarks/hyperfine/utils/exit_bec_startup.py new file mode 100644 index 00000000..d4ed74ae --- /dev/null +++ b/tests/benchmarks/hyperfine/utils/exit_bec_startup.py @@ -0,0 +1,5 @@ +import time + +_ip = get_ipython() +_ip.confirm_exit = False +_ip.ask_exit() diff --git a/tests/unit_tests/benchmarks/test_dock_area_benchmark.py b/tests/unit_tests/benchmarks/test_dock_area_benchmark.py new file mode 100644 index 00000000..c00f1682 --- /dev/null +++ b/tests/unit_tests/benchmarks/test_dock_area_benchmark.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import pytest + +from bec_widgets.widgets.containers.dock_area.dock_area import BECDockArea +from bec_widgets.widgets.plots.waveform.waveform import Waveform +from tests.unit_tests.client_mocks import mocked_client + + +@pytest.fixture +def dock_area(qtbot, mocked_client): + widget = BECDockArea(client=mocked_client) + qtbot.addWidget(widget) + qtbot.waitExposed(widget) + yield widget + + +def test_add_waveform_to_dock_area(benchmark, dock_area, qtbot, mocked_client): + """Benchmark adding a Waveform widget to an existing dock area.""" + + def add_waveform(): + dock_area.new("Waveform") + return dock_area + + dock = benchmark(add_waveform) + + assert dock is not None