DataDesigner/scripts/benchmarks/benchmark_cli_startup.py

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Benchmark CLI startup time in an isolated environment.

Measures actual `data-designer --help` invocation time (what users experience)
and pure import cost, in both cold-start (no __pycache__) and warm-start
(cached bytecode) scenarios.

Usage:
    # Full isolated benchmark (creates temp venv)
    python scripts/benchmarks/benchmark_cli_startup.py

    # With import trace and JSON output
    python scripts/benchmarks/benchmark_cli_startup.py --verbose --json
"""

from __future__ import annotations

import argparse
import json
import os
import platform
import re
import shutil
import statistics
import subprocess
import tempfile
import time
from dataclasses import asdict, dataclass, field


@dataclass(frozen=True)
class TimingStats:
    """Aggregated timing statistics for a set of runs."""

    mean: float
    median: float
    stdev: float
    min: float
    max: float
    samples: list[float] = field(default_factory=list)


@dataclass(frozen=True)
class BenchmarkResult:
    """Timing result for a single measurement type (cli_help or import_only)."""

    name: str
    cold: TimingStats
    warm: TimingStats


@dataclass(frozen=True)
class CompilationResult:
    """Bytecode compilation overhead measurement."""

    cold_without_precompile: float
    cold_with_precompile: float
    overhead: float


@dataclass(frozen=True)
class BenchmarkReport:
    """Full benchmark report with metadata and results."""

    timestamp: str
    python_version: str
    platform_name: str
    arch: str
    git_commit: str
    git_branch: str
    venv_setup_sec: float
    warm_runs: int
    results: list[BenchmarkResult]
    top_imports: list[dict[str, str | float]] | None = None
    compilation_overhead: CompilationResult | None = None


def _compute_stats(samples: list[float]) -> TimingStats:
    """Compute aggregated statistics from a list of timing samples."""
    if len(samples) == 1:
        val = samples[0]
        return TimingStats(mean=val, median=val, stdev=0.0, min=val, max=val, samples=list(samples))
    return TimingStats(
        mean=statistics.mean(samples),
        median=statistics.median(samples),
        stdev=statistics.stdev(samples),
        min=min(samples),
        max=max(samples),
        samples=list(samples),
    )


def _time_subprocess(cmd: list[str], env: dict[str, str] | None = None) -> float:
    """Run a subprocess and return wall-clock elapsed time in seconds."""
    start = time.perf_counter()
    result = subprocess.run(cmd, capture_output=True, env=env)
    elapsed = time.perf_counter() - start
    if result.returncode != 0:
        stderr = result.stderr.decode(errors="replace").strip()
        raise RuntimeError(f"Command failed (rc={result.returncode}): {' '.join(cmd)}\n{stderr}")
    return elapsed


def _git_info() -> tuple[str, str]:
    """Return (commit_hash, branch_name) from git, or ('unknown', 'unknown')."""
    commit = "unknown"
    branch = "unknown"
    try:
        commit = (
            subprocess.run(
                ["git", "rev-parse", "--short", "HEAD"],
                capture_output=True,
                text=True,
            ).stdout.strip()
            or "unknown"
        )
        branch = (
            subprocess.run(
                ["git", "rev-parse", "--abbrev-ref", "HEAD"],
                capture_output=True,
                text=True,
            ).stdout.strip()
            or "unknown"
        )
    except FileNotFoundError:
        pass
    return commit, branch


def _setup_isolated_venv(tmp_dir: str, quiet: bool = False, compile_bytecode: bool = False) -> tuple[str, str, float]:
    """Create an isolated venv and install data-designer. Returns (cli_path, python_path, setup_time)."""
    label = " (with --compile-bytecode)" if compile_bytecode else ""
    if not quiet:
        print(f"  Setting up isolated venv in {tmp_dir}{label}...")
    env = {**os.environ, "UV_PROJECT_ENVIRONMENT": tmp_dir}
    cmd = ["uv", "sync", "--package", "data-designer"]
    if compile_bytecode:
        cmd.append("--compile-bytecode")
    start = time.perf_counter()
    result = subprocess.run(
        cmd,
        capture_output=True,
        env=env,
    )
    setup_time = time.perf_counter() - start
    if result.returncode != 0:
        stderr = result.stderr.decode(errors="replace").strip()
        raise RuntimeError(f"Failed to set up isolated venv:\n{stderr}")

    cli_path = os.path.join(tmp_dir, "bin", "data-designer")
    python_path = os.path.join(tmp_dir, "bin", "python3")

    if not os.path.exists(cli_path):
        raise FileNotFoundError(f"CLI not found at {cli_path}")
    if not os.path.exists(python_path):
        raise FileNotFoundError(f"Python not found at {python_path}")

    if not quiet:
        print(f"  Venv ready in {setup_time:.1f}s")
    return cli_path, python_path, setup_time


def _find_pycache_dirs(base: str) -> list[str]:
    """Find all __pycache__ directories under base."""
    pycache_dirs: list[str] = []
    for root, dirs, _ in os.walk(base):
        for d in dirs:
            if d == "__pycache__":
                pycache_dirs.append(os.path.join(root, d))
    return pycache_dirs


def _clear_pycache(base: str) -> int:
    """Remove all __pycache__ directories under base. Returns count removed."""
    dirs = _find_pycache_dirs(base)
    for d in dirs:
        shutil.rmtree(d, ignore_errors=True)
    return len(dirs)


def _run_measurement(
    name: str,
    cmd: list[str],
    warm_runs: int,
    pycache_base: str | None,
    env: dict[str, str] | None = None,
    quiet: bool = False,
) -> BenchmarkResult:
    """Run cold + warm measurements for a command."""
    # Cold start: clear __pycache__ first
    if pycache_base:
        count = _clear_pycache(pycache_base)
        if not quiet and count > 0:
            print(f"    Cleared {count} __pycache__ dirs for cold start")

    if not quiet:
        print("    Cold run...", end="", flush=True)
    cold_time = _time_subprocess(cmd, env=env)
    if not quiet:
        print(f" {cold_time:.3f}s")

    # Warm runs
    warm_samples: list[float] = []
    if not quiet:
        print(f"    Warm runs (n={warm_runs})...", end="", flush=True)
    for _ in range(warm_runs):
        t = _time_subprocess(cmd, env=env)
        warm_samples.append(t)
    warm_stats = _compute_stats(warm_samples)
    if not quiet:
        print(f" mean={warm_stats.mean:.3f}s, stdev={warm_stats.stdev:.3f}s")

    return BenchmarkResult(
        name=name,
        cold=_compute_stats([cold_time]),
        warm=warm_stats,
    )


def _get_top_imports(python_path: str, top_n: int) -> list[dict[str, str | float]]:
    """Run python -X importtime and parse the top N slowest imports."""
    result = subprocess.run(
        [python_path, "-X", "importtime", "-c", "from data_designer.cli.main import main"],
        capture_output=True,
        text=True,
    )
    # importtime output goes to stderr
    lines = result.stderr.strip().splitlines()
    import_lines: list[tuple[float, float, str]] = []
    pattern = re.compile(r"import time:\s+(\d+)\s+\|\s+(\d+)\s+\|\s+(.+)")
    for line in lines:
        m = pattern.match(line)
        if m:
            self_us = int(m.group(1))
            cumulative_us = int(m.group(2))
            module = m.group(3).strip()
            import_lines.append((self_us, cumulative_us, module))

    # Sort by self-time descending
    import_lines.sort(key=lambda x: x[0], reverse=True)
    top: list[dict[str, str | float]] = []
    for self_us, cumulative_us, module in import_lines[:top_n]:
        top.append(
            {
                "module": module,
                "self_sec": round(self_us / 1_000_000, 4),
                "cumulative_sec": round(cumulative_us / 1_000_000, 4),
            }
        )
    return top


def _print_results(report: BenchmarkReport) -> None:
    """Print human-readable benchmark results."""
    print()
    print("=" * 70)
    print("CLI Startup Benchmark Results")
    print("=" * 70)
    print(f"  Python:    {report.python_version}")
    print(f"  Platform:  {report.platform_name} ({report.arch})")
    print(f"  Git:       {report.git_commit} ({report.git_branch})")
    print(f"  Venv setup: {report.venv_setup_sec:.1f}s")
    print(f"  Warm runs: {report.warm_runs}")
    print()

    for result in report.results:
        print(f"  {result.name}")
        print(f"    Cold:  {result.cold.mean:.3f}s")
        print(
            f"    Warm:  {result.warm.mean:.3f}s mean, "
            f"{result.warm.median:.3f}s median, "
            f"{result.warm.stdev:.3f}s stdev "
            f"[{result.warm.min:.3f}s - {result.warm.max:.3f}s]"
        )
        print()

    if report.compilation_overhead:
        co = report.compilation_overhead
        print("  compilation_overhead")
        print(f"    Without precompile:  {co.cold_without_precompile:.3f}s")
        print(f"    With precompile:     {co.cold_with_precompile:.3f}s")
        print(f"    Overhead:            {co.overhead:.3f}s")
        print()

    if report.top_imports:
        print(f"  Top {len(report.top_imports)} slowest imports (by self time):")
        print(f"    {'Self (s)':<12} {'Cumulative (s)':<16} Module")
        print(f"    {'--------':<12} {'--------------':<16} ------")
        for entry in report.top_imports:
            print(f"    {entry['self_sec']:<12.4f} {entry['cumulative_sec']:<16.4f} {entry['module']}")
        print()

    print("=" * 70)


def _parse_args() -> argparse.Namespace:
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(
        description="Benchmark data-designer CLI startup time.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--warm-runs",
        type=int,
        default=10,
        help="Number of warm runs per measurement (default: 10).",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        help="Run python -X importtime and report top slowest imports.",
    )
    parser.add_argument(
        "--output",
        type=str,
        default=None,
        help="Write JSON report to this file path.",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        dest="json_stdout",
        help="Output JSON only to stdout (no human-readable output).",
    )
    parser.add_argument(
        "--top-imports",
        type=int,
        default=15,
        help="Number of slowest imports to show with --verbose (default: 15).",
    )
    parser.add_argument(
        "--skip-compilation-check",
        action="store_true",
        help="Skip the compilation overhead measurement (faster iteration).",
    )
    parser.add_argument(
        "--skip-config-list-check",
        action="store_true",
        help="Skip the config list measurement and its extra venv (faster iteration).",
    )
    return parser.parse_args()


def main() -> None:
    args = _parse_args()
    git_commit, git_branch = _git_info()
    timestamp = time.strftime("%Y-%m-%dT%H:%M:%S%z")
    quiet = args.json_stdout

    if not quiet:
        print("CLI Startup Benchmark")
        print("-" * 40)

    if args.skip_compilation_check:
        _run_without_compilation_check(args, git_commit, git_branch, timestamp, quiet)
    else:
        _run_with_compilation_check(args, git_commit, git_branch, timestamp, quiet)


def _run_without_compilation_check(
    args: argparse.Namespace,
    git_commit: str,
    git_branch: str,
    timestamp: str,
    quiet: bool,
) -> None:
    """Measurement flow without compilation overhead check.

    Without --skip-config-list-check (default):
      Two venvs created in parallel: main + config-list.
      [1/3] import_only — cold + warm in main venv
      [2/3] cli_help — cold + warm in main venv
      [3/3] config_list — cold in config-list venv, warm in main venv

    With --skip-config-list-check:
      Single venv (main only).
      [1/2] import_only — cold + warm in main venv
      [2/2] cli_help — cold + warm in main venv
    """
    include_config_list = not args.skip_config_list_check
    tmp_main = tempfile.mkdtemp(prefix="dd-bench-main-" if include_config_list else "dd-bench-")
    tmp_config_list = tempfile.mkdtemp(prefix="dd-bench-config-list-") if include_config_list else ""

    try:
        if include_config_list:
            # Set up both venvs in parallel
            if not quiet:
                print("\n  Setting up two venvs in parallel...")
            env_main = {**os.environ, "UV_PROJECT_ENVIRONMENT": tmp_main}
            env_config_list = {**os.environ, "UV_PROJECT_ENVIRONMENT": tmp_config_list}

            setup_start = time.perf_counter()
            proc_main = subprocess.Popen(
                ["uv", "sync", "--package", "data-designer"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                env=env_main,
            )
            proc_config_list = subprocess.Popen(
                ["uv", "sync", "--package", "data-designer"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                env=env_config_list,
            )
            rc_main = proc_main.wait()
            rc_config_list = proc_config_list.wait()
            venv_setup_sec = time.perf_counter() - setup_start

            if rc_main != 0:
                stderr = (proc_main.stderr.read() or b"").decode(errors="replace").strip()
                raise RuntimeError(f"Failed to set up main venv:\n{stderr}")
            if rc_config_list != 0:
                stderr = (proc_config_list.stderr.read() or b"").decode(errors="replace").strip()
                raise RuntimeError(f"Failed to set up config-list venv:\n{stderr}")

            cli_main = os.path.join(tmp_main, "bin", "data-designer")
            python_main = os.path.join(tmp_main, "bin", "python3")
            cli_config_list = os.path.join(tmp_config_list, "bin", "data-designer")

            if not quiet:
                print(f"  Both venvs ready in {venv_setup_sec:.1f}s")
        else:
            cli_main, python_main, venv_setup_sec = _setup_isolated_venv(tmp_main, quiet=quiet)
            env_main = {**os.environ, "UV_PROJECT_ENVIRONMENT": tmp_main}

        total = 3 if include_config_list else 2

        # [1/N] import_only cold + warm
        import_cmd = [python_main, "-c", "from data_designer.cli.main import main"]
        if not quiet:
            print(f"\n  [1/{total}] import_only: python -c 'from data_designer.cli.main import main'")
        import_result = _run_measurement(
            name="import_only",
            cmd=import_cmd,
            warm_runs=args.warm_runs,
            pycache_base=tmp_main,
            env=env_main,
            quiet=quiet,
        )

        # [2/N] cli_help cold + warm
        cli_cmd = [cli_main, "--help"]
        if not quiet:
            print(f"\n  [2/{total}] cli_help: {' '.join(cli_cmd)}")
        cli_result = _run_measurement(
            name="cli_help",
            cmd=cli_cmd,
            warm_runs=args.warm_runs,
            pycache_base=tmp_main,
            env=env_main,
            quiet=quiet,
        )

        results: list[BenchmarkResult] = [import_result, cli_result]

        # [3/3] config_list — cold in config-list venv, warm in main venv
        if include_config_list:
            config_list_cmd_cold = [cli_config_list, "config", "list"]
            config_list_cmd_warm = [cli_main, "config", "list"]
            if not quiet:
                print(f"\n  [3/{total}] config_list: data-designer config list")

            # Cold run in isolated config-list venv
            _clear_pycache(tmp_config_list)
            if not quiet:
                print("    Cold run (config-list venv)...", end="", flush=True)
            cold_time = _time_subprocess(config_list_cmd_cold, env=env_config_list)
            if not quiet:
                print(f" {cold_time:.3f}s")

            # Done with config-list venv
            shutil.rmtree(tmp_config_list, ignore_errors=True)
            tmp_config_list = ""

            # Warm runs in main venv
            warm_samples: list[float] = []
            if not quiet:
                print(f"    Warm runs (n={args.warm_runs})...", end="", flush=True)
            for _ in range(args.warm_runs):
                warm_samples.append(_time_subprocess(config_list_cmd_warm, env=env_main))
            warm_stats = _compute_stats(warm_samples)
            if not quiet:
                print(f" mean={warm_stats.mean:.3f}s, stdev={warm_stats.stdev:.3f}s")

            config_list_result = BenchmarkResult(
                name="config_list",
                cold=_compute_stats([cold_time]),
                warm=warm_stats,
            )
            results.append(config_list_result)

        top_imports = _collect_top_imports(args, python_main, quiet)
        _emit_report(
            args=args,
            timestamp=timestamp,
            git_commit=git_commit,
            git_branch=git_branch,
            venv_setup_sec=venv_setup_sec,
            results=results,
            top_imports=top_imports,
            compilation_overhead=None,
            quiet=quiet,
        )
    finally:
        if os.path.exists(tmp_main):
            if not quiet:
                print(f"\n  Cleaning up {tmp_main}...")
            shutil.rmtree(tmp_main, ignore_errors=True)
        if tmp_config_list and os.path.exists(tmp_config_list):
            shutil.rmtree(tmp_config_list, ignore_errors=True)


def _run_with_compilation_check(
    args: argparse.Namespace,
    git_commit: str,
    git_branch: str,
    timestamp: str,
    quiet: bool,
) -> None:
    """Measurement flow with compilation overhead check.

    Without --skip-config-list-check (default):
      Three venvs created in parallel: main, compile, config-list.
      [1/4] compilation_overhead — cold cli_help in main + compile venvs
      [2/4] config_list — cold in config-list venv, warm in main venv
      Clean up compile + config-list venvs
      [3/4] import_only — cold + warm in main venv
      [4/4] cli_help warm — warm runs in main venv (cold captured in step 1)

    With --skip-config-list-check:
      Two venvs created in parallel: main + compile.
      [1/3] compilation_overhead — cold cli_help in main + compile venvs
      [2/3] import_only — cold + warm in main venv
      [3/3] cli_help warm — warm runs in main venv (cold captured in step 1)
    """
    include_config_list = not args.skip_config_list_check
    tmp_main = tempfile.mkdtemp(prefix="dd-bench-main-")
    tmp_compile = tempfile.mkdtemp(prefix="dd-bench-compile-")
    tmp_config_list = tempfile.mkdtemp(prefix="dd-bench-config-list-") if include_config_list else ""

    try:
        # Set up all venvs in parallel
        venv_count = 3 if include_config_list else 2
        if not quiet:
            print(f"\n  Setting up {venv_count} venvs in parallel...")
        env_main = {**os.environ, "UV_PROJECT_ENVIRONMENT": tmp_main}
        env_compile = {**os.environ, "UV_PROJECT_ENVIRONMENT": tmp_compile}

        setup_start = time.perf_counter()
        proc_main = subprocess.Popen(
            ["uv", "sync", "--package", "data-designer"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=env_main,
        )
        proc_compile = subprocess.Popen(
            ["uv", "sync", "--package", "data-designer", "--compile-bytecode"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=env_compile,
        )
        proc_config_list = None
        env_config_list: dict[str, str] = {}
        if include_config_list:
            env_config_list = {**os.environ, "UV_PROJECT_ENVIRONMENT": tmp_config_list}
            proc_config_list = subprocess.Popen(
                ["uv", "sync", "--package", "data-designer"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                env=env_config_list,
            )

        rc_main = proc_main.wait()
        rc_compile = proc_compile.wait()
        rc_config_list = proc_config_list.wait() if proc_config_list else 0
        venv_setup_sec = time.perf_counter() - setup_start

        if rc_main != 0:
            stderr = (proc_main.stderr.read() or b"").decode(errors="replace").strip()
            raise RuntimeError(f"Failed to set up main venv:\n{stderr}")
        if rc_compile != 0:
            stderr = (proc_compile.stderr.read() or b"").decode(errors="replace").strip()
            raise RuntimeError(f"Failed to set up compile venv:\n{stderr}")
        if rc_config_list != 0 and proc_config_list:
            stderr = (proc_config_list.stderr.read() or b"").decode(errors="replace").strip()
            raise RuntimeError(f"Failed to set up config-list venv:\n{stderr}")

        cli_main = os.path.join(tmp_main, "bin", "data-designer")
        python_main = os.path.join(tmp_main, "bin", "python3")
        cli_compile = os.path.join(tmp_compile, "bin", "data-designer")
        cli_config_list = os.path.join(tmp_config_list, "bin", "data-designer") if include_config_list else ""

        if not quiet:
            print(f"  All {venv_count} venvs ready in {venv_setup_sec:.1f}s")

        total = 4 if include_config_list else 3
        step = 0

        # --- [1/N] compilation_overhead: cold cli_help in both venvs ----------
        step += 1
        if not quiet:
            print(f"\n  [{step}/{total}] compilation_overhead: cold cli_help with vs without --compile-bytecode")

        _clear_pycache(tmp_main)

        if not quiet:
            print("    Cold run (no precompile)...", end="", flush=True)
        cold_no_compile = _time_subprocess([cli_main, "--help"], env=env_main)
        if not quiet:
            print(f" {cold_no_compile:.3f}s")

        if not quiet:
            print("    Cold run (precompiled)...", end="", flush=True)
        cold_compile_time = _time_subprocess([cli_compile, "--help"], env=env_compile)
        if not quiet:
            print(f" {cold_compile_time:.3f}s")

        compilation_overhead = CompilationResult(
            cold_without_precompile=cold_no_compile,
            cold_with_precompile=cold_compile_time,
            overhead=cold_no_compile - cold_compile_time,
        )

        # --- [2/N] config_list (if enabled): cold in config-list venv, warm in main ---
        if include_config_list:
            step += 1
            config_list_cmd_cold = [cli_config_list, "config", "list"]
            config_list_cmd_warm = [cli_main, "config", "list"]
            if not quiet:
                print(f"\n  [{step}/{total}] config_list: data-designer config list")

            # Cold run in isolated config-list venv
            _clear_pycache(tmp_config_list)
            if not quiet:
                print("    Cold run (config-list venv)...", end="", flush=True)
            config_list_cold_time = _time_subprocess(config_list_cmd_cold, env=env_config_list)
            if not quiet:
                print(f" {config_list_cold_time:.3f}s")

            # Done with config-list venv
            shutil.rmtree(tmp_config_list, ignore_errors=True)
            tmp_config_list = ""

            # Warm runs in main venv
            config_list_warm_samples: list[float] = []
            if not quiet:
                print(f"    Warm runs (n={args.warm_runs})...", end="", flush=True)
            for _ in range(args.warm_runs):
                config_list_warm_samples.append(_time_subprocess(config_list_cmd_warm, env=env_main))
            config_list_warm_stats = _compute_stats(config_list_warm_samples)
            if not quiet:
                print(f" mean={config_list_warm_stats.mean:.3f}s, stdev={config_list_warm_stats.stdev:.3f}s")

            config_list_result = BenchmarkResult(
                name="config_list",
                cold=_compute_stats([config_list_cold_time]),
                warm=config_list_warm_stats,
            )

        # Done with compile venv
        shutil.rmtree(tmp_compile, ignore_errors=True)
        tmp_compile = ""

        # --- [N-1/N] import_only: cold + warm in main venv ---------------------
        step += 1
        import_cmd = [python_main, "-c", "from data_designer.cli.main import main"]
        if not quiet:
            print(f"\n  [{step}/{total}] import_only: python -c 'from data_designer.cli.main import main'")
        import_result = _run_measurement(
            name="import_only",
            cmd=import_cmd,
            warm_runs=args.warm_runs,
            pycache_base=tmp_main,
            env=env_main,
            quiet=quiet,
        )

        # --- [N/N] cli_help warm (cold was captured in step 1) ---------------
        step += 1
        cli_cmd = [cli_main, "--help"]
        if not quiet:
            print(f"\n  [{step}/{total}] cli_help warm: {' '.join(cli_cmd)}")
            print(f"    Warm runs (n={args.warm_runs})...", end="", flush=True)
        warm_samples: list[float] = []
        for _ in range(args.warm_runs):
            warm_samples.append(_time_subprocess(cli_cmd, env=env_main))
        warm_stats = _compute_stats(warm_samples)
        if not quiet:
            print(f" mean={warm_stats.mean:.3f}s, stdev={warm_stats.stdev:.3f}s")

        cli_result = BenchmarkResult(
            name="cli_help",
            cold=_compute_stats([cold_no_compile]),
            warm=warm_stats,
        )

        results: list[BenchmarkResult] = [import_result, cli_result]
        if include_config_list:
            results.append(config_list_result)

        top_imports = _collect_top_imports(args, python_main, quiet)
        _emit_report(
            args=args,
            timestamp=timestamp,
            git_commit=git_commit,
            git_branch=git_branch,
            venv_setup_sec=venv_setup_sec,
            results=results,
            top_imports=top_imports,
            compilation_overhead=compilation_overhead,
            quiet=quiet,
        )
    finally:
        if os.path.exists(tmp_main):
            if not quiet:
                print(f"\n  Cleaning up {tmp_main}...")
            shutil.rmtree(tmp_main, ignore_errors=True)
        if tmp_compile and os.path.exists(tmp_compile):
            shutil.rmtree(tmp_compile, ignore_errors=True)
        if tmp_config_list and os.path.exists(tmp_config_list):
            shutil.rmtree(tmp_config_list, ignore_errors=True)


def _collect_top_imports(
    args: argparse.Namespace, python_path: str, quiet: bool
) -> list[dict[str, str | float]] | None:
    """Optionally collect import trace data."""
    if not args.verbose:
        return None
    if not quiet:
        print(f"\n  Collecting import trace (top {args.top_imports})...")
    return _get_top_imports(python_path, args.top_imports)


def _emit_report(
    *,
    args: argparse.Namespace,
    timestamp: str,
    git_commit: str,
    git_branch: str,
    venv_setup_sec: float,
    results: list[BenchmarkResult],
    top_imports: list[dict[str, str | float]] | None,
    compilation_overhead: CompilationResult | None,
    quiet: bool,
) -> None:
    """Build, print, and optionally write the benchmark report."""
    report = BenchmarkReport(
        timestamp=timestamp,
        python_version=platform.python_version(),
        platform_name=platform.system(),
        arch=platform.machine(),
        git_commit=git_commit,
        git_branch=git_branch,
        venv_setup_sec=venv_setup_sec,
        warm_runs=args.warm_runs,
        results=results,
        top_imports=top_imports,
        compilation_overhead=compilation_overhead,
    )

    if quiet:
        print(json.dumps(asdict(report), indent=2))
    else:
        _print_results(report)

    if args.output:
        with open(args.output, "w") as f:
            json.dump(asdict(report), f, indent=2)
        if not quiet:
            print(f"  JSON report written to {args.output}")


if __name__ == "__main__":
    main()