diff --git a/studio/install_llama_prebuilt.py b/studio/install_llama_prebuilt.py
new file mode 100755
index 000000000..a9d0b7235
--- /dev/null
+++ b/studio/install_llama_prebuilt.py
@@ -0,0 +1,3395 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
+
+"""Cross platform llama.cpp prebuilt installer for Unsloth Studio"""
+
+from __future__ import annotations
+
+import argparse
+import fnmatch
+import hashlib
+import json
+import os
+import platform
+import random
+import shutil
+import site
+import socket
+import subprocess
+import sys
+import tarfile
+import tempfile
+import textwrap
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+import zipfile
+from contextlib import contextmanager
+from dataclasses import dataclass
+
+try:
+    from filelock import FileLock, Timeout as FileLockTimeout
+except ImportError:
+    FileLock = None
+    FileLockTimeout = None
+from pathlib import Path
+from typing import Any, Iterable, Iterator
+
+
+EXIT_SUCCESS = 0
+EXIT_FALLBACK = 2
+EXIT_ERROR = 1
+
+APPROVED_PREBUILT_LLAMA_TAG = "b8508"
+DEFAULT_LLAMA_TAG = os.environ.get("UNSLOTH_LLAMA_TAG", APPROVED_PREBUILT_LLAMA_TAG)
+DEFAULT_PUBLISHED_REPO = os.environ.get(
+    "UNSLOTH_LLAMA_RELEASE_REPO", "unslothai/llama.cpp"
+)
+DEFAULT_PUBLISHED_TAG = os.environ.get("UNSLOTH_LLAMA_RELEASE_TAG")
+DEFAULT_PUBLISHED_MANIFEST_ASSET = os.environ.get(
+    "UNSLOTH_LLAMA_RELEASE_MANIFEST_ASSET", "llama-prebuilt-manifest.json"
+)
+DEFAULT_PUBLISHED_SHA256_ASSET = os.environ.get(
+    "UNSLOTH_LLAMA_RELEASE_SHA256_ASSET", "llama-prebuilt-sha256.json"
+)
+UPSTREAM_REPO = "ggml-org/llama.cpp"
+UPSTREAM_RELEASES_API = f"https://api.github.com/repos/{UPSTREAM_REPO}/releases/latest"
+TEST_MODEL_URL = (
+    "https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf"
+)
+TEST_MODEL_SHA256 = "270cba1bd5109f42d03350f60406024560464db173c0e387d91f0426d3bd256d"
+VALIDATION_MODEL_CACHE_DIRNAME = ".cache"
+VALIDATION_MODEL_CACHE_FILENAME = "stories260K.gguf"
+INSTALL_LOCK_TIMEOUT_SECONDS = 300
+INSTALL_STAGING_ROOT_NAME = ".staging"
+GITHUB_AUTH_HOSTS = {"api.github.com", "github.com"}
+RETRYABLE_HTTP_STATUS = {408, 429, 500, 502, 503, 504}
+HTTP_FETCH_ATTEMPTS = 4
+HTTP_FETCH_BASE_DELAY_SECONDS = 0.75
+SERVER_PORT_BIND_ATTEMPTS = 3
+SERVER_BIND_RETRY_WINDOW_SECONDS = 5.0
+TTY_PROGRESS_START_DELAY_SECONDS = 0.5
+
+
+@dataclass
+class HostInfo:
+    system: str
+    machine: str
+    is_windows: bool
+    is_linux: bool
+    is_macos: bool
+    is_x86_64: bool
+    is_arm64: bool
+    nvidia_smi: str | None
+    driver_cuda_version: tuple[int, int] | None
+    compute_caps: list[str]
+    visible_cuda_devices: str | None
+    has_physical_nvidia: bool
+    has_usable_nvidia: bool
+
+
+@dataclass
+class AssetChoice:
+    repo: str
+    tag: str
+    name: str
+    url: str
+    source_label: str
+    runtime_name: str | None = None
+    runtime_url: str | None = None
+    is_ready_bundle: bool = False
+    install_kind: str = ""
+    bundle_profile: str | None = None
+    runtime_line: str | None = None
+    coverage_class: str | None = None
+    supported_sms: list[str] | None = None
+    min_sm: int | None = None
+    max_sm: int | None = None
+    selection_log: list[str] | None = None
+    expected_sha256: str | None = None
+
+
+@dataclass(frozen = True)
+class PublishedLlamaArtifact:
+    asset_name: str
+    install_kind: str
+    runtime_line: str | None
+    coverage_class: str | None
+    supported_sms: list[str]
+    min_sm: int | None
+    max_sm: int | None
+    bundle_profile: str | None
+    rank: int
+
+
+@dataclass
+class PublishedReleaseBundle:
+    repo: str
+    release_tag: str
+    upstream_tag: str
+    assets: dict[str, str]
+    manifest_asset_name: str
+    artifacts: list[PublishedLlamaArtifact]
+    selection_log: list[str]
+
+
+@dataclass
+class LinuxCudaSelection:
+    attempts: list[AssetChoice]
+    selection_log: list[str]
+
+    @property
+    def primary(self) -> AssetChoice:
+        if not self.attempts:
+            raise RuntimeError("linux CUDA selection unexpectedly had no attempts")
+        return self.attempts[0]
+
+
+@dataclass
+class CudaRuntimePreference:
+    runtime_line: str | None
+    selection_log: list[str]
+
+
+@dataclass(frozen = True)
+class ApprovedArtifactHash:
+    asset_name: str
+    sha256: str
+    repo: str | None
+    kind: str | None
+
+
+@dataclass
+class ApprovedReleaseChecksums:
+    repo: str
+    release_tag: str
+    upstream_tag: str
+    source_commit: str | None
+    artifacts: dict[str, ApprovedArtifactHash]
+
+
+class PrebuiltFallback(RuntimeError):
+    pass
+
+
+def log(message: str) -> None:
+    print(f"[llama-prebuilt] {message}")
+
+
+def log_lines(lines: Iterable[str]) -> None:
+    for line in lines:
+        log(line)
+
+
+def parsed_hostname(url: str | None) -> str | None:
+    if not url:
+        return None
+    try:
+        hostname = urllib.parse.urlparse(url).hostname
+    except Exception:
+        return None
+    if not hostname:
+        return None
+    return hostname.lower()
+
+
+def should_send_github_auth(url: str | None) -> bool:
+    return parsed_hostname(url) in GITHUB_AUTH_HOSTS
+
+
+def auth_headers(url: str | None = None) -> dict[str, str]:
+    headers = {
+        "User-Agent": "unsloth-studio-llama-prebuilt",
+    }
+    token = os.environ.get("GH_TOKEN") or os.environ.get("GITHUB_TOKEN")
+    if token and should_send_github_auth(url):
+        headers["Authorization"] = f"Bearer {token}"
+    return headers
+
+
+def github_api_headers(url: str | None = None) -> dict[str, str]:
+    return {
+        "Accept": "application/vnd.github+json",
+        **auth_headers(url),
+    }
+
+
+def is_github_api_url(url: str | None) -> bool:
+    return parsed_hostname(url) == "api.github.com"
+
+
+def is_retryable_url_error(exc: Exception) -> bool:
+    if isinstance(exc, urllib.error.HTTPError):
+        return exc.code in RETRYABLE_HTTP_STATUS
+    if isinstance(exc, urllib.error.URLError):
+        return True
+    if isinstance(exc, TimeoutError):
+        return True
+    if isinstance(exc, socket.timeout):
+        return True
+    return False
+
+
+def sleep_backoff(
+    attempt: int, *, base_delay: float = HTTP_FETCH_BASE_DELAY_SECONDS
+) -> None:
+    delay = base_delay * (2 ** max(attempt - 1, 0))
+    delay += random.uniform(0.0, 0.2)
+    time.sleep(delay)
+
+
+def atomic_write_bytes(destination: Path, data: bytes) -> None:
+    destination.parent.mkdir(parents = True, exist_ok = True)
+    with tempfile.NamedTemporaryFile(
+        prefix = destination.name + ".tmp-",
+        dir = destination.parent,
+        delete = False,
+    ) as handle:
+        tmp_path = Path(handle.name)
+        handle.write(data)
+        handle.flush()
+        os.fsync(handle.fileno())
+    os.replace(tmp_path, destination)
+
+
+def atomic_replace_from_tempfile(tmp_path: Path, destination: Path) -> None:
+    destination.parent.mkdir(parents = True, exist_ok = True)
+    os.replace(tmp_path, destination)
+
+
+def source_archive_logical_name(upstream_tag: str) -> str:
+    return f"llama.cpp-source-{upstream_tag}.tar.gz"
+
+
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def normalize_sha256_digest(value: str | None) -> str | None:
+    if not isinstance(value, str) or not value:
+        return None
+    lowered = value.lower()
+    if lowered.startswith("sha256:"):
+        lowered = lowered.split(":", 1)[1]
+    if len(lowered) != 64 or any(ch not in "0123456789abcdef" for ch in lowered):
+        return None
+    return lowered
+
+
+def format_byte_count(num_bytes: float) -> str:
+    units = ["B", "KiB", "MiB", "GiB", "TiB"]
+    value = float(num_bytes)
+    for unit in units:
+        if abs(value) < 1024.0 or unit == units[-1]:
+            if unit == "B":
+                return f"{int(value)} {unit}"
+            return f"{value:.1f} {unit}"
+        value /= 1024.0
+    return f"{num_bytes:.1f} B"
+
+
+class DownloadProgress:
+    def __init__(self, label: str, total_bytes: int | None) -> None:
+        self.label = label
+        self.total_bytes = total_bytes if total_bytes and total_bytes > 0 else None
+        self.start_time = time.monotonic()
+        self.last_emit = 0.0
+        term_ok = os.environ.get("TERM", "").lower() != "dumb"
+        self.stream = (
+            sys.stderr
+            if sys.stderr.isatty()
+            else sys.stdout
+            if sys.stdout.isatty()
+            else sys.stderr
+        )
+        self.is_tty = term_ok and self.stream.isatty()
+        self.completed = False
+        self.last_milestone_percent = -1
+        self.last_milestone_bytes = 0
+        self.has_rendered_tty_progress = False
+
+    def _render(self, downloaded_bytes: int, *, final: bool = False) -> str:
+        elapsed = max(time.monotonic() - self.start_time, 1e-6)
+        speed = downloaded_bytes / elapsed
+        speed_text = f"{format_byte_count(speed)}/s"
+        if self.total_bytes is not None:
+            percent = min(100.0, (downloaded_bytes / self.total_bytes) * 100.0)
+            return (
+                f"{self.label}: {percent:5.1f}% "
+                f"({format_byte_count(downloaded_bytes)}/{format_byte_count(self.total_bytes)}) "
+                f"at {speed_text}"
+            )
+        if final:
+            return f"{self.label}: {format_byte_count(downloaded_bytes)} downloaded at {speed_text}"
+        return f"{self.label}: {format_byte_count(downloaded_bytes)} downloaded at {speed_text}"
+
+    def update(self, downloaded_bytes: int) -> None:
+        now = time.monotonic()
+        if self.is_tty:
+            elapsed = now - self.start_time
+            if not self.has_rendered_tty_progress:
+                if (
+                    self.total_bytes is not None
+                    and downloaded_bytes >= self.total_bytes
+                ):
+                    return
+                if elapsed < TTY_PROGRESS_START_DELAY_SECONDS:
+                    return
+            min_interval = 0.2
+            if (
+                self.has_rendered_tty_progress
+                and not self.completed
+                and (now - self.last_emit) < min_interval
+            ):
+                return
+            self.last_emit = now
+            line = self._render(downloaded_bytes)
+            self.stream.write("\r\033[K" + line)
+            self.stream.flush()
+            self.has_rendered_tty_progress = True
+            return
+
+        should_emit = False
+        if self.total_bytes is not None:
+            percent = int((downloaded_bytes * 100) / max(self.total_bytes, 1))
+            milestone_percent = min((percent // 25) * 25, 100)
+            if (
+                milestone_percent > self.last_milestone_percent
+                and milestone_percent < 100
+            ):
+                self.last_milestone_percent = milestone_percent
+                should_emit = True
+        else:
+            byte_step = 25 * 1024 * 1024
+            if (
+                downloaded_bytes - self.last_milestone_bytes >= byte_step
+                and (now - self.last_emit) >= 5.0
+            ):
+                self.last_milestone_bytes = downloaded_bytes
+                should_emit = True
+
+        if not should_emit:
+            return
+
+        self.last_emit = now
+        self.stream.write(self._render(downloaded_bytes) + "\n")
+        self.stream.flush()
+
+    def finish(self, downloaded_bytes: int) -> None:
+        self.completed = True
+        line = self._render(downloaded_bytes, final = True)
+        if self.is_tty:
+            if not self.has_rendered_tty_progress:
+                return
+            self.stream.write("\r\033[K")
+        else:
+            self.stream.write(line + "\n")
+        self.stream.flush()
+
+
+def download_label_from_url(url: str) -> str:
+    name = Path(urllib.parse.urlparse(url).path).name
+    return name or url
+
+
+def download_bytes(
+    url: str,
+    *,
+    timeout: int = 120,
+    attempts: int = HTTP_FETCH_ATTEMPTS,
+    headers: dict[str, str] | None = None,
+    progress_label: str | None = None,
+) -> bytes:
+    last_exc: Exception | None = None
+    for attempt in range(1, attempts + 1):
+        try:
+            request = urllib.request.Request(url, headers = headers or auth_headers(url))
+            with urllib.request.urlopen(request, timeout = timeout) as response:
+                total_bytes: int | None = None
+                content_length = response.headers.get("Content-Length")
+                if content_length and content_length.isdigit():
+                    total_bytes = int(content_length)
+                progress = (
+                    DownloadProgress(progress_label, total_bytes)
+                    if progress_label
+                    else None
+                )
+                data = bytearray()
+                while True:
+                    chunk = response.read(1024 * 1024)
+                    if not chunk:
+                        break
+                    data.extend(chunk)
+                    if progress is not None:
+                        progress.update(len(data))
+                if progress is not None:
+                    progress.finish(len(data))
+                return bytes(data)
+        except Exception as exc:
+            last_exc = exc
+            if attempt >= attempts or not is_retryable_url_error(exc):
+                raise
+            log(f"fetch failed ({attempt}/{attempts}) for {url}: {exc}; retrying")
+            sleep_backoff(attempt)
+    assert last_exc is not None
+    raise last_exc
+
+
+def fetch_json(url: str) -> Any:
+    data = download_bytes(
+        url,
+        timeout = 30,
+        headers = github_api_headers(url)
+        if is_github_api_url(url)
+        else auth_headers(url),
+    )
+    if not data:
+        raise RuntimeError(f"downloaded empty JSON payload from {url}")
+    try:
+        payload = json.loads(data.decode("utf-8"))
+    except (UnicodeDecodeError, json.JSONDecodeError) as exc:
+        raise RuntimeError(f"downloaded invalid JSON from {url}: {exc}") from exc
+    if not isinstance(payload, dict) and not isinstance(payload, list):
+        raise RuntimeError(
+            f"downloaded unexpected JSON type from {url}: {type(payload).__name__}"
+        )
+    return payload
+
+
+def download_file(url: str, destination: Path) -> None:
+    destination.parent.mkdir(parents = True, exist_ok = True)
+    last_exc: Exception | None = None
+    for attempt in range(1, HTTP_FETCH_ATTEMPTS + 1):
+        tmp_path: Path | None = None
+        try:
+            request = urllib.request.Request(url, headers = auth_headers(url))
+            with tempfile.NamedTemporaryFile(
+                prefix = destination.name + ".tmp-",
+                dir = destination.parent,
+                delete = False,
+            ) as handle:
+                tmp_path = Path(handle.name)
+                with urllib.request.urlopen(request, timeout = 120) as response:
+                    total_bytes: int | None = None
+                    content_length = response.headers.get("Content-Length")
+                    if content_length and content_length.isdigit():
+                        total_bytes = int(content_length)
+                    progress = DownloadProgress(
+                        f"Downloading {destination.name}", total_bytes
+                    )
+                    downloaded_bytes = 0
+                    while True:
+                        chunk = response.read(1024 * 1024)
+                        if not chunk:
+                            break
+                        handle.write(chunk)
+                        downloaded_bytes += len(chunk)
+                        progress.update(downloaded_bytes)
+                    progress.finish(downloaded_bytes)
+                handle.flush()
+                os.fsync(handle.fileno())
+            if not tmp_path.exists() or tmp_path.stat().st_size == 0:
+                raise RuntimeError(f"downloaded empty file from {url}")
+            atomic_replace_from_tempfile(tmp_path, destination)
+            return
+        except Exception as exc:
+            last_exc = exc
+            if tmp_path is not None:
+                try:
+                    tmp_path.unlink(missing_ok = True)
+                except Exception:
+                    pass
+            if attempt >= HTTP_FETCH_ATTEMPTS or not is_retryable_url_error(exc):
+                raise
+            log(
+                f"download failed ({attempt}/{HTTP_FETCH_ATTEMPTS}) for {url}: {exc}; retrying"
+            )
+            sleep_backoff(attempt)
+    assert last_exc is not None
+    raise last_exc
+
+
+def download_file_verified(
+    url: str,
+    destination: Path,
+    *,
+    expected_sha256: str,
+    label: str,
+) -> None:
+    normalized_expected = normalize_sha256_digest(expected_sha256)
+    if not normalized_expected:
+        raise PrebuiltFallback(f"{label} did not have a valid approved sha256")
+
+    for attempt in range(1, 3):
+        download_file(url, destination)
+        actual_sha256 = sha256_file(destination)
+        if actual_sha256 == normalized_expected:
+            log(f"verified {label} sha256={actual_sha256}")
+            return
+
+        log(
+            f"{label} checksum mismatch on attempt {attempt}/2: "
+            f"expected={normalized_expected} actual={actual_sha256}"
+        )
+        destination.unlink(missing_ok = True)
+        if attempt == 2:
+            raise PrebuiltFallback(
+                f"{label} checksum mismatch after retry: expected={normalized_expected} actual={actual_sha256}"
+            )
+        log(f"retrying {label} download after checksum mismatch")
+
+
+def upstream_source_archive_urls(tag: str) -> list[str]:
+    encoded_tag = urllib.parse.quote(tag, safe = "")
+    return [
+        f"https://codeload.github.com/{UPSTREAM_REPO}/tar.gz/refs/tags/{encoded_tag}",
+        f"https://github.com/{UPSTREAM_REPO}/archive/refs/tags/{encoded_tag}.tar.gz",
+    ]
+
+
+def github_release_assets(repo: str, tag: str) -> dict[str, str]:
+    payload = fetch_json(
+        f"https://api.github.com/repos/{repo}/releases/tags/{urllib.parse.quote(tag, safe = '')}"
+    )
+    if not isinstance(payload, dict):
+        raise RuntimeError(f"unexpected release payload for {repo}@{tag}")
+    return release_asset_map(payload)
+
+
+def github_release(repo: str, tag: str) -> dict[str, Any]:
+    payload = fetch_json(
+        f"https://api.github.com/repos/{repo}/releases/tags/{urllib.parse.quote(tag, safe = '')}"
+    )
+    if not isinstance(payload, dict):
+        raise RuntimeError(f"unexpected release payload for {repo}@{tag}")
+    return payload
+
+
+def github_releases(repo: str, *, per_page: int = 100) -> list[dict[str, Any]]:
+    releases: list[dict[str, Any]] = []
+    page = 1
+    while True:
+        payload = fetch_json(
+            f"https://api.github.com/repos/{repo}/releases?per_page={per_page}&page={page}"
+        )
+        if not isinstance(payload, list):
+            raise RuntimeError(f"unexpected releases payload for {repo}")
+        page_items = [item for item in payload if isinstance(item, dict)]
+        releases.extend(page_items)
+        if len(payload) < per_page:
+            break
+        page += 1
+    return releases
+
+
+def latest_upstream_release_tag() -> str:
+    payload = fetch_json(UPSTREAM_RELEASES_API)
+    tag = payload.get("tag_name")
+    if not isinstance(tag, str) or not tag:
+        raise RuntimeError(
+            f"latest release tag was missing from {UPSTREAM_RELEASES_API}"
+        )
+    return tag
+
+
+def normalize_compute_cap(value: Any) -> str | None:
+    raw = str(value).strip()
+    if not raw:
+        return None
+    if "." in raw:
+        parts = raw.split(".", 1)
+        if len(parts) != 2:
+            return None
+        major, minor = parts
+        if not major.isdigit() or not minor.isdigit():
+            return None
+        return f"{int(major)}{int(minor)}"
+    if raw.isdigit():
+        return str(int(raw))
+    return None
+
+
+def normalize_compute_caps(compute_caps: Iterable[str]) -> list[str]:
+    normalized: list[str] = []
+    seen: set[str] = set()
+    for raw in compute_caps:
+        normalized_value = normalize_compute_cap(raw)
+        if normalized_value is None:
+            continue
+        if normalized_value in seen:
+            continue
+        seen.add(normalized_value)
+        normalized.append(normalized_value)
+    normalized.sort(key = int)
+    return normalized
+
+
+def parse_cuda_visible_devices(value: str | None) -> list[str] | None:
+    if value is None:
+        return None
+    raw = value.strip()
+    if not raw or raw == "-1":
+        return []
+    return [token.strip() for token in raw.split(",") if token.strip()]
+
+
+def supports_explicit_visible_device_matching(
+    visible_devices: list[str] | None,
+) -> bool:
+    if not visible_devices:
+        return False
+    for token in visible_devices:
+        lowered = token.lower()
+        if token.isdigit() or lowered.startswith("gpu-"):
+            continue
+        return False
+    return True
+
+
+def select_visible_gpu_rows(
+    gpu_rows: Iterable[tuple[str, str, str]],
+    visible_devices: list[str] | None,
+) -> list[tuple[str, str, str]]:
+    rows = list(gpu_rows)
+    if visible_devices is None:
+        return rows
+    if not visible_devices:
+        return []
+
+    by_index = {index: (index, uuid, cap) for index, uuid, cap in rows}
+    by_uuid = {uuid.lower(): (index, uuid, cap) for index, uuid, cap in rows}
+    selected: list[tuple[str, str, str]] = []
+    seen_indices: set[str] = set()
+    for token in visible_devices:
+        row = by_index.get(token)
+        if row is None:
+            normalized_token = token.lower()
+            row = by_uuid.get(normalized_token)
+            if row is None and normalized_token.startswith("gpu-"):
+                row = by_uuid.get(normalized_token)
+            if row is None and not normalized_token.startswith("gpu-"):
+                row = by_uuid.get("gpu-" + normalized_token)
+        if row is None:
+            continue
+        index = row[0]
+        if index in seen_indices:
+            continue
+        seen_indices.add(index)
+        selected.append(row)
+    return selected
+
+
+def dir_provides_exact_library(directory: str | Path, library: str) -> bool:
+    if not library:
+        return False
+    candidate = Path(directory) / library
+    return candidate.exists() and (candidate.is_file() or candidate.is_symlink())
+
+
+def linux_runtime_dirs_for_required_libraries(
+    required_libraries: Iterable[str],
+) -> list[str]:
+    required = [library for library in required_libraries if library]
+    candidates: list[str | Path] = []
+
+    env_dirs = os.environ.get("CUDA_RUNTIME_LIB_DIR", "")
+    if env_dirs:
+        candidates.extend(part for part in env_dirs.split(os.pathsep) if part)
+    ld_library_path = os.environ.get("LD_LIBRARY_PATH", "")
+    if ld_library_path:
+        candidates.extend(part for part in ld_library_path.split(os.pathsep) if part)
+
+    cuda_roots: list[Path] = []
+    for name in ("CUDA_HOME", "CUDA_PATH", "CUDA_ROOT"):
+        value = os.environ.get(name)
+        if value:
+            cuda_roots.append(Path(value))
+    cuda_roots.extend(
+        Path(path) for path in glob_paths("/usr/local/cuda", "/usr/local/cuda-*")
+    )
+
+    for root in cuda_roots:
+        candidates.extend(
+            [
+                root / "lib",
+                root / "lib64",
+                root / "targets" / "x86_64-linux" / "lib",
+            ]
+        )
+
+    candidates.extend(
+        Path(path)
+        for path in glob_paths(
+            "/lib",
+            "/lib64",
+            "/usr/lib",
+            "/usr/lib64",
+            "/usr/local/lib",
+            "/usr/local/lib64",
+            "/lib/x86_64-linux-gnu",
+            "/usr/lib/x86_64-linux-gnu",
+        )
+    )
+    candidates.extend(
+        Path(path)
+        for path in glob_paths("/usr/local/lib/ollama/cuda_v*", "/usr/lib/wsl/lib")
+    )
+    candidates.extend(Path(path) for path in python_runtime_dirs())
+    candidates.extend(Path(path) for path in ldconfig_runtime_dirs(required))
+
+    resolved = dedupe_existing_dirs(candidates)
+    if not required:
+        return resolved
+
+    matched: list[tuple[int, str]] = []
+    for directory in resolved:
+        base = Path(directory)
+        provided = sum(
+            1 for library in required if dir_provides_exact_library(directory, library)
+        )
+        if provided:
+            matched.append((provided, directory))
+
+    matched.sort(key = lambda item: item[0], reverse = True)
+    return [directory for _, directory in matched]
+
+
+def detected_linux_runtime_lines() -> tuple[list[str], dict[str, list[str]]]:
+    line_requirements = {
+        "cuda13": ["libcudart.so.13", "libcublas.so.13"],
+        "cuda12": ["libcudart.so.12", "libcublas.so.12"],
+    }
+    detected: list[str] = []
+    runtime_dirs: dict[str, list[str]] = {}
+    for line, required in line_requirements.items():
+        dirs = linux_runtime_dirs_for_required_libraries(required)
+        library_matches: dict[str, list[str]] = {}
+        matching_dirs: list[str] = []
+        for library in required:
+            matched_dirs = [
+                directory
+                for directory in dirs
+                if any(Path(directory).glob(f"{library}*"))
+            ]
+            if not matched_dirs:
+                library_matches = {}
+                matching_dirs = []
+                break
+            library_matches[library] = matched_dirs
+            for directory in matched_dirs:
+                if directory not in matching_dirs:
+                    matching_dirs.append(directory)
+        if library_matches:
+            detected.append(line)
+            runtime_dirs[line] = matching_dirs
+    return detected, runtime_dirs
+
+
+def release_asset_map(release: dict[str, Any]) -> dict[str, str]:
+    assets = release.get("assets")
+    if not isinstance(assets, list):
+        return {}
+    return {
+        asset["name"]: asset.get("browser_download_url", "")
+        for asset in assets
+        if isinstance(asset, dict)
+        and isinstance(asset.get("name"), str)
+        and isinstance(asset.get("browser_download_url"), str)
+    }
+
+
+def parse_published_artifact(raw: Any) -> PublishedLlamaArtifact | None:
+    if not isinstance(raw, dict):
+        raise ValueError("artifact entry was not an object")
+    asset_name = raw.get("asset_name")
+    install_kind = raw.get("install_kind")
+    if not isinstance(asset_name, str) or not asset_name:
+        raise ValueError("artifact.asset_name was missing or not a string")
+    if not isinstance(install_kind, str) or not install_kind:
+        raise ValueError(
+            f"artifact {asset_name} install_kind was missing or not a string"
+        )
+
+    supported_sms_raw = raw.get("supported_sms", [])
+    if not isinstance(supported_sms_raw, (list, tuple)):
+        raise ValueError(f"artifact {asset_name} supported_sms must be a list or tuple")
+    if any(not isinstance(value, (int, str)) for value in supported_sms_raw):
+        raise ValueError(
+            f"artifact {asset_name} supported_sms entries must be ints or strings"
+        )
+    supported_sms = normalize_compute_caps(supported_sms_raw)
+
+    min_sm_raw = raw.get("min_sm")
+    max_sm_raw = raw.get("max_sm")
+    try:
+        min_sm = int(min_sm_raw) if min_sm_raw is not None else None
+        max_sm = int(max_sm_raw) if max_sm_raw is not None else None
+    except (TypeError, ValueError) as exc:
+        raise ValueError(
+            f"artifact {asset_name} min_sm/max_sm were not integers"
+        ) from exc
+    runtime_line = raw.get("runtime_line")
+    coverage_class = raw.get("coverage_class")
+    bundle_profile = raw.get("bundle_profile")
+    rank_raw = raw.get("rank", 1000)
+    if runtime_line is not None and not isinstance(runtime_line, str):
+        raise ValueError(f"artifact {asset_name} runtime_line was not a string")
+    if coverage_class is not None and not isinstance(coverage_class, str):
+        raise ValueError(f"artifact {asset_name} coverage_class was not a string")
+    if bundle_profile is not None and not isinstance(bundle_profile, str):
+        raise ValueError(f"artifact {asset_name} bundle_profile was not a string")
+    try:
+        rank = int(rank_raw)
+    except (TypeError, ValueError):
+        raise ValueError(f"artifact {asset_name} rank was not an integer")
+    return PublishedLlamaArtifact(
+        asset_name = asset_name,
+        install_kind = install_kind,
+        runtime_line = runtime_line
+        if isinstance(runtime_line, str) and runtime_line
+        else None,
+        coverage_class = coverage_class
+        if isinstance(coverage_class, str) and coverage_class
+        else None,
+        supported_sms = supported_sms,
+        min_sm = min_sm,
+        max_sm = max_sm,
+        bundle_profile = bundle_profile
+        if isinstance(bundle_profile, str) and bundle_profile
+        else None,
+        rank = rank,
+    )
+
+
+def parse_published_release_bundle(
+    repo: str, release: dict[str, Any]
+) -> PublishedReleaseBundle | None:
+    release_tag = release.get("tag_name")
+    if not isinstance(release_tag, str) or not release_tag:
+        return None
+
+    assets = release_asset_map(release)
+    manifest_url = assets.get(DEFAULT_PUBLISHED_MANIFEST_ASSET)
+    if not manifest_url:
+        return None
+
+    # Mixed repos are filtered by an explicit release-side manifest rather than
+    # by release tag or asset filename conventions.
+    manifest_payload = fetch_json(manifest_url)
+    if not isinstance(manifest_payload, dict):
+        raise RuntimeError(
+            f"published manifest {DEFAULT_PUBLISHED_MANIFEST_ASSET} was not a JSON object"
+        )
+    component = manifest_payload.get("component")
+    upstream_tag = manifest_payload.get("upstream_tag")
+    if component != "llama.cpp":
+        return None
+    if not isinstance(upstream_tag, str) or not upstream_tag:
+        raise RuntimeError(
+            f"published manifest {DEFAULT_PUBLISHED_MANIFEST_ASSET} in {repo}@{release_tag} omitted upstream_tag"
+        )
+
+    artifacts_payload = manifest_payload.get("artifacts")
+    if not isinstance(artifacts_payload, list):
+        raise RuntimeError(
+            f"published manifest {DEFAULT_PUBLISHED_MANIFEST_ASSET} in {repo}@{release_tag} omitted artifacts"
+        )
+
+    artifacts: list[PublishedLlamaArtifact] = []
+    for index, raw_artifact in enumerate(artifacts_payload):
+        try:
+            artifact = parse_published_artifact(raw_artifact)
+        except ValueError as exc:
+            log(
+                f"published artifact ignored for {repo}@{release_tag} artifact[{index}]: {exc}"
+            )
+            continue
+        if artifact is not None:
+            artifacts.append(artifact)
+    selection_log = [
+        f"published_release: repo={repo}",
+        f"published_release: tag={release_tag}",
+        f"published_release: manifest={DEFAULT_PUBLISHED_MANIFEST_ASSET}",
+        f"published_release: upstream_tag={upstream_tag}",
+    ]
+    return PublishedReleaseBundle(
+        repo = repo,
+        release_tag = release_tag,
+        upstream_tag = upstream_tag,
+        assets = assets,
+        manifest_asset_name = DEFAULT_PUBLISHED_MANIFEST_ASSET,
+        artifacts = artifacts,
+        selection_log = selection_log,
+    )
+
+
+def parse_approved_release_checksums(
+    repo: str,
+    release_tag: str,
+    payload: Any,
+) -> ApprovedReleaseChecksums:
+    if not isinstance(payload, dict):
+        raise RuntimeError(
+            f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} was not a JSON object"
+        )
+    if payload.get("component") != "llama.cpp":
+        raise RuntimeError(
+            f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} did not describe llama.cpp"
+        )
+    payload_release_tag = payload.get("release_tag")
+    if not isinstance(payload_release_tag, str) or not payload_release_tag:
+        raise RuntimeError(
+            f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} omitted release_tag"
+        )
+    if payload_release_tag != release_tag:
+        raise RuntimeError(
+            f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} release_tag={payload_release_tag} "
+            f"did not match pinned release tag {release_tag}"
+        )
+    upstream_tag = payload.get("upstream_tag")
+    if not isinstance(upstream_tag, str) or not upstream_tag:
+        raise RuntimeError(
+            f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} omitted upstream_tag"
+        )
+    artifacts_payload = payload.get("artifacts")
+    if not isinstance(artifacts_payload, dict):
+        raise RuntimeError(
+            f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} omitted artifacts"
+        )
+
+    artifacts: dict[str, ApprovedArtifactHash] = {}
+    for asset_name, raw_entry in artifacts_payload.items():
+        if not isinstance(asset_name, str) or not asset_name:
+            raise RuntimeError(
+                "published checksum asset used a non-string artifact key"
+            )
+        if not isinstance(raw_entry, dict):
+            raise RuntimeError(
+                f"published checksum entry for {asset_name} was not an object"
+            )
+        digest = normalize_sha256_digest(raw_entry.get("sha256"))
+        if not digest:
+            raise RuntimeError(
+                f"published checksum entry for {asset_name} omitted a valid sha256"
+            )
+        repo_value = raw_entry.get("repo")
+        kind_value = raw_entry.get("kind")
+        artifacts[asset_name] = ApprovedArtifactHash(
+            asset_name = asset_name,
+            sha256 = digest,
+            repo = repo_value if isinstance(repo_value, str) and repo_value else None,
+            kind = kind_value if isinstance(kind_value, str) and kind_value else None,
+        )
+
+    source_commit = payload.get("source_commit")
+    return ApprovedReleaseChecksums(
+        repo = repo,
+        release_tag = release_tag,
+        upstream_tag = upstream_tag,
+        source_commit = source_commit
+        if isinstance(source_commit, str) and source_commit
+        else None,
+        artifacts = artifacts,
+    )
+
+
+def load_approved_release_checksums(
+    repo: str, release_tag: str
+) -> ApprovedReleaseChecksums:
+    try:
+        release = github_release(repo, release_tag)
+    except Exception as exc:
+        raise PrebuiltFallback(
+            f"approved prebuilt release {repo}@{release_tag} was not available"
+        ) from exc
+    assets = release_asset_map(release)
+    checksum_url = assets.get(DEFAULT_PUBLISHED_SHA256_ASSET)
+    if not checksum_url:
+        raise PrebuiltFallback(
+            f"approved prebuilt release {repo}@{release_tag} did not expose {DEFAULT_PUBLISHED_SHA256_ASSET}"
+        )
+    try:
+        payload = fetch_json(checksum_url)
+        checksums = parse_approved_release_checksums(repo, release_tag, payload)
+    except PrebuiltFallback:
+        raise
+    except Exception as exc:
+        raise PrebuiltFallback(
+            f"approved checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} in {repo}@{release_tag} was invalid"
+        ) from exc
+    return checksums
+
+
+def iter_published_release_bundles(
+    repo: str, published_release_tag: str = ""
+) -> Iterable[PublishedReleaseBundle]:
+    releases = (
+        [github_release(repo, published_release_tag)]
+        if published_release_tag
+        else github_releases(repo)
+    )
+    for release in releases:
+        if not published_release_tag and (
+            release.get("draft") or release.get("prerelease")
+        ):
+            continue
+        try:
+            bundle = parse_published_release_bundle(repo, release)
+        except Exception as exc:
+            release_tag = release.get("tag_name", "unknown")
+            log(f"published release metadata ignored for {repo}@{release_tag}: {exc}")
+            continue
+        if bundle is None:
+            continue
+        yield bundle
+
+
+def linux_cuda_choice_from_release(
+    host: HostInfo,
+    release: PublishedReleaseBundle,
+    preferred_runtime_line: str | None = None,
+    selection_preamble: Iterable[str] = (),
+) -> LinuxCudaSelection | None:
+    host_sms = normalize_compute_caps(host.compute_caps)
+    detected_runtime_lines, runtime_dirs = detected_linux_runtime_lines()
+    driver_runtime_lines = compatible_linux_runtime_lines(host)
+    runtime_lines = [
+        runtime_line
+        for runtime_line in detected_runtime_lines
+        if runtime_line in driver_runtime_lines
+    ]
+    ordered_runtime_lines = list(runtime_lines)
+    selection_log = (
+        list(release.selection_log)
+        + list(selection_preamble)
+        + [
+            f"linux_cuda_selection: release={release.release_tag}",
+            f"linux_cuda_selection: detected_sms={','.join(host_sms) if host_sms else 'unknown'}",
+            "linux_cuda_selection: detected_runtime_lines="
+            + (",".join(detected_runtime_lines) if detected_runtime_lines else "none"),
+            "linux_cuda_selection: driver_runtime_lines="
+            + (",".join(driver_runtime_lines) if driver_runtime_lines else "none"),
+            "linux_cuda_selection: compatible_runtime_lines="
+            + (",".join(runtime_lines) if runtime_lines else "none"),
+        ]
+    )
+    for runtime_line in ("cuda13", "cuda12"):
+        selection_log.append(
+            "linux_cuda_selection: runtime_dirs "
+            f"{runtime_line}="
+            + (
+                ",".join(runtime_dirs.get(runtime_line, []))
+                if runtime_dirs.get(runtime_line)
+                else "none"
+            )
+        )
+    published_artifacts = [
+        artifact
+        for artifact in release.artifacts
+        if artifact.install_kind == "linux-cuda"
+    ]
+    published_asset_names = sorted(
+        artifact.asset_name for artifact in published_artifacts
+    )
+    selection_log.append(
+        "linux_cuda_selection: published_assets="
+        + (",".join(published_asset_names) if published_asset_names else "none")
+    )
+
+    if not host_sms:
+        selection_log.append(
+            "linux_cuda_selection: compute capability detection unavailable; prefer portable by runtime line"
+        )
+    if not runtime_lines:
+        selection_log.append(
+            "linux_cuda_selection: no Linux CUDA runtime line satisfied both runtime libraries and driver compatibility"
+        )
+        return None
+
+    if preferred_runtime_line:
+        if preferred_runtime_line in ordered_runtime_lines:
+            ordered_runtime_lines = [preferred_runtime_line] + [
+                runtime_line
+                for runtime_line in ordered_runtime_lines
+                if runtime_line != preferred_runtime_line
+            ]
+            selection_log.append(
+                "linux_cuda_selection: torch_preferred_runtime_line="
+                f"{preferred_runtime_line} reordered_attempts={','.join(ordered_runtime_lines)}"
+            )
+        else:
+            selection_log.append(
+                "linux_cuda_selection: torch_preferred_runtime_line="
+                f"{preferred_runtime_line} unavailable_on_host"
+            )
+
+    attempts: list[AssetChoice] = []
+    seen_attempts: set[str] = set()
+
+    def add_attempt(
+        artifact: PublishedLlamaArtifact, asset_url: str, reason: str
+    ) -> None:
+        asset_name = artifact.asset_name
+        if asset_name in seen_attempts:
+            return
+        seen_attempts.add(asset_name)
+        attempts.append(
+            AssetChoice(
+                repo = release.repo,
+                tag = release.release_tag,
+                name = asset_name,
+                url = asset_url,
+                source_label = "published",
+                is_ready_bundle = True,
+                install_kind = "linux-cuda",
+                bundle_profile = artifact.bundle_profile,
+                runtime_line = artifact.runtime_line,
+                coverage_class = artifact.coverage_class,
+                supported_sms = artifact.supported_sms,
+                min_sm = artifact.min_sm,
+                max_sm = artifact.max_sm,
+                selection_log = list(selection_log)
+                + [
+                    "linux_cuda_selection: selected "
+                    f"{asset_name} runtime_line={artifact.runtime_line} coverage_class={artifact.coverage_class} reason={reason}"
+                ],
+            )
+        )
+
+    for runtime_line in ordered_runtime_lines:
+        coverage_candidates: list[tuple[PublishedLlamaArtifact, str]] = []
+        portable_candidate: tuple[PublishedLlamaArtifact, str] | None = None
+        for artifact in published_artifacts:
+            if artifact.runtime_line != runtime_line:
+                continue
+            asset_name = artifact.asset_name
+            asset_url = release.assets.get(asset_name)
+            if not asset_url:
+                selection_log.append(
+                    f"linux_cuda_selection: reject {asset_name} missing asset"
+                )
+                continue
+            if not host_sms and artifact.coverage_class != "portable":
+                selection_log.append(
+                    "linux_cuda_selection: reject "
+                    f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} "
+                    "reason=unknown_compute_caps_prefer_portable"
+                )
+                continue
+
+            if not artifact.supported_sms:
+                selection_log.append(
+                    "linux_cuda_selection: reject "
+                    f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} "
+                    "reason=artifact_missing_supported_sms"
+                )
+                continue
+            if artifact.min_sm is None or artifact.max_sm is None:
+                selection_log.append(
+                    "linux_cuda_selection: reject "
+                    f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} "
+                    "reason=artifact_missing_sm_bounds"
+                )
+                continue
+
+            supported_sms = {str(value) for value in artifact.supported_sms}
+            missing_sms = [sm for sm in host_sms if sm not in supported_sms]
+            out_of_range_sms = [
+                sm
+                for sm in host_sms
+                if not (artifact.min_sm <= int(sm) <= artifact.max_sm)
+            ]
+            reasons: list[str] = []
+            if missing_sms:
+                reasons.append(f"missing_sms={','.join(missing_sms)}")
+            if out_of_range_sms:
+                reasons.append(f"out_of_range_sms={','.join(out_of_range_sms)}")
+            if reasons:
+                selection_log.append(
+                    "linux_cuda_selection: reject "
+                    f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} "
+                    f"coverage={artifact.min_sm}-{artifact.max_sm} supported={','.join(artifact.supported_sms)} "
+                    f"reasons={' '.join(reasons)}"
+                )
+                continue
+
+            selection_log.append(
+                "linux_cuda_selection: accept "
+                f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} "
+                f"coverage={artifact.min_sm}-{artifact.max_sm} supported={','.join(artifact.supported_sms)}"
+            )
+            if artifact.coverage_class == "portable":
+                portable_candidate = (artifact, asset_url)
+            else:
+                coverage_candidates.append((artifact, asset_url))
+
+        if coverage_candidates:
+            artifact, url = sorted(
+                coverage_candidates,
+                key = lambda item: (
+                    (item[0].max_sm or 0) - (item[0].min_sm or 0),
+                    item[0].rank,
+                    item[0].max_sm or 0,
+                ),
+            )[0]
+            add_attempt(artifact, url, "best coverage for runtime line")
+        if portable_candidate:
+            artifact, url = portable_candidate
+            add_attempt(artifact, url, "portable fallback for runtime line")
+
+    if not attempts:
+        return None
+
+    selection_log.append(
+        "linux_cuda_selection: attempt_order="
+        + ",".join(choice.name for choice in attempts)
+    )
+    for attempt in attempts:
+        attempt.selection_log = list(selection_log) + [
+            "linux_cuda_selection: attempt "
+            f"{attempt.name} runtime_line={attempt.runtime_line} coverage_class={attempt.coverage_class}"
+        ]
+    return LinuxCudaSelection(attempts = attempts, selection_log = selection_log)
+
+
+def latest_published_linux_cuda_tag(host: HostInfo, published_repo: str) -> str | None:
+    for release in iter_published_release_bundles(published_repo):
+        if linux_cuda_choice_from_release(host, release):
+            return release.upstream_tag
+    return None
+
+
+def iter_upstream_releases() -> Iterable[dict[str, Any]]:
+    for release in github_releases(UPSTREAM_REPO):
+        if release.get("draft") or release.get("prerelease"):
+            continue
+        yield release
+
+
+def pinned_published_release_bundle(
+    repo: str, published_release_tag: str
+) -> PublishedReleaseBundle:
+    bundle = next(iter_published_release_bundles(repo, published_release_tag), None)
+    if bundle is None:
+        raise PrebuiltFallback(
+            f"published release {repo}@{published_release_tag} did not expose a usable llama.cpp manifest"
+        )
+    return bundle
+
+
+def resolve_requested_llama_tag(
+    requested_tag: str | None,
+) -> str:
+    if requested_tag and requested_tag != "latest":
+        return requested_tag
+    return latest_upstream_release_tag()
+
+
+def resolve_requested_install_tag(
+    requested_tag: str | None,
+    published_release_tag: str = "",
+) -> str:
+    approved_tag = APPROVED_PREBUILT_LLAMA_TAG
+    normalized_requested = requested_tag or "latest"
+    if normalized_requested not in {"latest", approved_tag}:
+        raise PrebuiltFallback(
+            f"prebuilt installs are pinned to approved release {approved_tag}; requested {normalized_requested}"
+        )
+    if published_release_tag and published_release_tag != approved_tag:
+        raise PrebuiltFallback(
+            f"prebuilt installs require published release tag {approved_tag}; requested {published_release_tag}"
+        )
+    return approved_tag
+
+
+def run_capture(
+    command: list[str],
+    *,
+    timeout: int = 30,
+    check: bool = False,
+    env: dict[str, str] | None = None,
+) -> subprocess.CompletedProcess[str]:
+    result = subprocess.run(
+        command,
+        capture_output = True,
+        text = True,
+        timeout = timeout,
+        env = env,
+    )
+    if check and result.returncode != 0:
+        raise subprocess.CalledProcessError(
+            result.returncode, command, result.stdout, result.stderr
+        )
+    return result
+
+
+def detect_host() -> HostInfo:
+    system = platform.system()
+    machine = platform.machine().lower()
+    is_windows = system == "Windows"
+    is_linux = system == "Linux"
+    is_macos = system == "Darwin"
+    is_x86_64 = machine in {"x86_64", "amd64"}
+    is_arm64 = machine in {"arm64", "aarch64"}
+
+    nvidia_smi = shutil.which("nvidia-smi")
+    driver_cuda_version = None
+    compute_caps: list[str] = []
+    visible_cuda_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
+    visible_device_tokens = parse_cuda_visible_devices(visible_cuda_devices)
+    has_physical_nvidia = False
+    has_usable_nvidia = False
+    if nvidia_smi:
+        try:
+            result = run_capture([nvidia_smi], timeout = 20)
+            merged = "\n".join(part for part in (result.stdout, result.stderr) if part)
+            if "NVIDIA-SMI" in merged:
+                has_physical_nvidia = True
+                has_usable_nvidia = visible_device_tokens != []
+            for line in merged.splitlines():
+                if "CUDA Version:" in line:
+                    raw = line.split("CUDA Version:", 1)[1].strip().split()[0]
+                    major, minor = raw.split(".", 1)
+                    driver_cuda_version = (int(major), int(minor))
+                    break
+        except Exception:
+            pass
+
+        try:
+            caps = run_capture(
+                [
+                    nvidia_smi,
+                    "--query-gpu=index,uuid,compute_cap",
+                    "--format=csv,noheader",
+                ],
+                timeout = 20,
+            )
+            visible_gpu_rows: list[tuple[str, str, str]] = []
+            for raw in caps.stdout.splitlines():
+                parts = [part.strip() for part in raw.split(",")]
+                if len(parts) != 3:
+                    continue
+                index, uuid, cap = parts
+                visible_gpu_row = select_visible_gpu_rows(
+                    [(index, uuid, cap)],
+                    visible_device_tokens,
+                )
+                if not visible_gpu_row:
+                    continue
+                visible_gpu_rows.extend(visible_gpu_row)
+                normalized_cap = normalize_compute_cap(cap)
+                if normalized_cap is None:
+                    continue
+                if normalized_cap not in compute_caps:
+                    compute_caps.append(normalized_cap)
+
+            if visible_gpu_rows:
+                has_usable_nvidia = True
+            elif visible_device_tokens == []:
+                has_usable_nvidia = False
+            elif supports_explicit_visible_device_matching(visible_device_tokens):
+                has_usable_nvidia = False
+            elif has_physical_nvidia:
+                has_usable_nvidia = True
+        except Exception:
+            pass
+
+    return HostInfo(
+        system = system,
+        machine = machine,
+        is_windows = is_windows,
+        is_linux = is_linux,
+        is_macos = is_macos,
+        is_x86_64 = is_x86_64,
+        is_arm64 = is_arm64,
+        nvidia_smi = nvidia_smi,
+        driver_cuda_version = driver_cuda_version,
+        compute_caps = compute_caps,
+        visible_cuda_devices = visible_cuda_devices,
+        has_physical_nvidia = has_physical_nvidia,
+        has_usable_nvidia = has_usable_nvidia,
+    )
+
+
+def pick_windows_cuda_runtime(host: HostInfo) -> str | None:
+    if not host.driver_cuda_version:
+        return None
+    major, minor = host.driver_cuda_version
+    if major > 13 or (major == 13 and minor >= 1):
+        return "13.1"
+    if major > 12 or (major == 12 and minor >= 4):
+        return "12.4"
+    return None
+
+
+def compatible_linux_runtime_lines(host: HostInfo) -> list[str]:
+    if not host.driver_cuda_version:
+        return []
+    major, _minor = host.driver_cuda_version
+    if major >= 13:
+        return ["cuda13", "cuda12"]
+    if major >= 12:
+        return ["cuda12"]
+    return []
+
+
+def windows_runtime_line_info() -> dict[str, tuple[str, ...]]:
+    return {
+        "cuda13": ("cudart64_13*.dll", "cublas64_13*.dll", "cublasLt64_13*.dll"),
+        "cuda12": ("cudart64_12*.dll", "cublas64_12*.dll", "cublasLt64_12*.dll"),
+    }
+
+
+def detected_windows_runtime_lines() -> tuple[list[str], dict[str, list[str]]]:
+    dirs = windows_runtime_dirs()
+    detected: list[str] = []
+    runtime_dirs: dict[str, list[str]] = {}
+    for runtime_line, required_patterns in windows_runtime_line_info().items():
+        matching_dirs = windows_runtime_dirs_for_patterns(required_patterns, dirs)
+        if matching_dirs:
+            detected.append(runtime_line)
+            runtime_dirs[runtime_line] = matching_dirs
+    return detected, runtime_dirs
+
+
+def compatible_windows_runtime_lines(host: HostInfo) -> list[str]:
+    driver_runtime = pick_windows_cuda_runtime(host)
+    if driver_runtime == "13.1":
+        return ["cuda13", "cuda12"]
+    if driver_runtime == "12.4":
+        return ["cuda12"]
+    return []
+
+
+def runtime_line_from_cuda_version(cuda_version: str | None) -> str | None:
+    if not cuda_version:
+        return None
+    raw = str(cuda_version).strip()
+    if not raw:
+        return None
+    major, _, _ = raw.partition(".")
+    if major == "12":
+        return "cuda12"
+    if major == "13":
+        return "cuda13"
+    return None
+
+
+def detect_torch_cuda_runtime_preference(host: HostInfo) -> CudaRuntimePreference:
+    selection_log: list[str] = []
+    if host.is_macos:
+        selection_log.append("torch_cuda_preference: skipped on macOS")
+        return CudaRuntimePreference(runtime_line = None, selection_log = selection_log)
+    if not (host.has_usable_nvidia and (host.is_linux or host.is_windows)):
+        selection_log.append(
+            "torch_cuda_preference: skipped because CUDA host prerequisites were not met"
+        )
+        return CudaRuntimePreference(runtime_line = None, selection_log = selection_log)
+
+    try:
+        import torch
+    except Exception as exc:
+        selection_log.append(f"torch_cuda_preference: import failed: {exc}")
+        return CudaRuntimePreference(runtime_line = None, selection_log = selection_log)
+
+    cuda_version = getattr(getattr(torch, "version", None), "cuda", None)
+    if not isinstance(cuda_version, str) or not cuda_version.strip():
+        selection_log.append(
+            "torch_cuda_preference: torch.version.cuda missing; skipping Torch shortcut"
+        )
+        return CudaRuntimePreference(runtime_line = None, selection_log = selection_log)
+
+    try:
+        cuda_available = bool(torch.cuda.is_available())
+    except Exception as exc:
+        selection_log.append(
+            f"torch_cuda_preference: torch.cuda.is_available() failed: {exc}"
+        )
+        return CudaRuntimePreference(runtime_line = None, selection_log = selection_log)
+
+    if not cuda_available:
+        selection_log.append(
+            "torch_cuda_preference: torch.cuda.is_available() returned False; falling back to normal selection"
+        )
+        return CudaRuntimePreference(runtime_line = None, selection_log = selection_log)
+
+    runtime_line = runtime_line_from_cuda_version(cuda_version)
+    if runtime_line is None:
+        selection_log.append(
+            f"torch_cuda_preference: unsupported torch.version.cuda={cuda_version}; falling back to normal selection"
+        )
+        return CudaRuntimePreference(runtime_line = None, selection_log = selection_log)
+
+    selection_log.append(
+        "torch_cuda_preference: selected runtime_line="
+        f"{runtime_line} from torch.version.cuda={cuda_version}"
+    )
+    return CudaRuntimePreference(runtime_line = runtime_line, selection_log = selection_log)
+
+
+def windows_cuda_attempts(
+    host: HostInfo,
+    llama_tag: str,
+    upstream_assets: dict[str, str],
+    preferred_runtime_line: str | None,
+    selection_preamble: Iterable[str] = (),
+) -> list[AssetChoice]:
+    selection_log = list(selection_preamble)
+    runtime_by_line = {"cuda12": "12.4", "cuda13": "13.1"}
+    driver_runtime = pick_windows_cuda_runtime(host)
+    detected_runtime_lines, runtime_dirs = detected_windows_runtime_lines()
+    compatible_runtime_lines = compatible_windows_runtime_lines(host)
+    normal_runtime_lines: list[str]
+    if detected_runtime_lines:
+        normal_runtime_lines = [
+            line for line in compatible_runtime_lines if line in detected_runtime_lines
+        ]
+    else:
+        normal_runtime_lines = compatible_runtime_lines
+    selection_log.append(
+        "windows_cuda_selection: driver_runtime="
+        + (driver_runtime if driver_runtime else "unknown")
+    )
+    selection_log.append(
+        "windows_cuda_selection: detected_runtime_lines="
+        + (",".join(detected_runtime_lines) if detected_runtime_lines else "none")
+    )
+    for runtime_line in ("cuda13", "cuda12"):
+        selection_log.append(
+            "windows_cuda_selection: runtime_dirs "
+            f"{runtime_line}="
+            + (
+                ",".join(runtime_dirs.get(runtime_line, []))
+                if runtime_dirs.get(runtime_line)
+                else "none"
+            )
+        )
+    if detected_runtime_lines:
+        selection_log.append(
+            "windows_cuda_selection: host_runtime_order="
+            + (",".join(normal_runtime_lines) if normal_runtime_lines else "none")
+        )
+    else:
+        selection_log.append(
+            "windows_cuda_selection: no CUDA runtime DLL line detected; falling back to driver order"
+        )
+    if not normal_runtime_lines:
+        if detected_runtime_lines:
+            selection_log.append(
+                "windows_cuda_selection: detected CUDA runtime DLLs were incompatible with the reported driver"
+            )
+        fallback_runtime_lines = (
+            ["cuda13", "cuda12"]
+            if driver_runtime == "13.1"
+            else (["cuda12"] if driver_runtime == "12.4" else [])
+        )
+        normal_runtime_lines = fallback_runtime_lines
+
+    runtime_order: list[str] = []
+    if preferred_runtime_line and preferred_runtime_line in normal_runtime_lines:
+        runtime_order.append(preferred_runtime_line)
+        selection_log.append(
+            "windows_cuda_selection: torch_preferred_runtime_line="
+            f"{preferred_runtime_line} reordered_attempts"
+        )
+    elif preferred_runtime_line:
+        selection_log.append(
+            "windows_cuda_selection: torch_preferred_runtime_line="
+            f"{preferred_runtime_line} unavailable_or_incompatible"
+        )
+    else:
+        selection_log.append(
+            "windows_cuda_selection: no Torch runtime preference available"
+        )
+
+    runtime_order.extend(
+        runtime_line
+        for runtime_line in normal_runtime_lines
+        if runtime_line not in runtime_order
+    )
+    selection_log.append(
+        "windows_cuda_selection: normal_runtime_order="
+        + (",".join(normal_runtime_lines) if normal_runtime_lines else "none")
+    )
+    selection_log.append(
+        "windows_cuda_selection: attempt_runtime_order="
+        + (",".join(runtime_order) if runtime_order else "none")
+    )
+
+    attempts: list[AssetChoice] = []
+    for runtime_line in runtime_order:
+        runtime = runtime_by_line[runtime_line]
+        upstream_name = f"llama-{llama_tag}-bin-win-cuda-{runtime}-x64.zip"
+        asset_url = upstream_assets.get(upstream_name)
+        if not asset_url:
+            selection_log.append(
+                f"windows_cuda_selection: skip missing asset {upstream_name}"
+            )
+            continue
+        attempts.append(
+            AssetChoice(
+                repo = UPSTREAM_REPO,
+                tag = llama_tag,
+                name = upstream_name,
+                url = asset_url,
+                source_label = "upstream",
+                install_kind = "windows-cuda",
+                runtime_line = runtime_line,
+                selection_log = list(selection_log)
+                + [
+                    f"windows_cuda_selection: selected {upstream_name} runtime={runtime}"
+                ],
+            )
+        )
+    return attempts
+
+
+def resolve_windows_cuda_choices(
+    host: HostInfo, llama_tag: str, upstream_assets: dict[str, str]
+) -> list[AssetChoice]:
+    torch_preference = detect_torch_cuda_runtime_preference(host)
+    attempts = windows_cuda_attempts(
+        host,
+        llama_tag,
+        upstream_assets,
+        torch_preference.runtime_line,
+        torch_preference.selection_log,
+    )
+    return attempts
+
+
+def resolve_linux_cuda_choice(
+    host: HostInfo, llama_tag: str, published_repo: str, published_release_tag: str
+) -> LinuxCudaSelection:
+    torch_preference = detect_torch_cuda_runtime_preference(host)
+    skipped_tag_mismatches = 0
+    for release in iter_published_release_bundles(
+        published_repo, published_release_tag
+    ):
+        if release.upstream_tag != llama_tag:
+            skipped_tag_mismatches += 1
+            continue
+        selection = linux_cuda_choice_from_release(
+            host,
+            release,
+            preferred_runtime_line = torch_preference.runtime_line,
+            selection_preamble = torch_preference.selection_log,
+        )
+        if selection is not None:
+            return selection
+    if skipped_tag_mismatches:
+        log(
+            "published Linux CUDA selection skipped "
+            f"{skipped_tag_mismatches} release(s) with upstream_tag != {llama_tag}"
+        )
+    raise PrebuiltFallback("no compatible published Linux CUDA bundle was found")
+
+
+def resolve_upstream_asset_choice(host: HostInfo, llama_tag: str) -> AssetChoice:
+    upstream_assets = github_release_assets(UPSTREAM_REPO, llama_tag)
+    if host.is_linux and host.is_x86_64:
+        upstream_name = f"llama-{llama_tag}-bin-ubuntu-x64.tar.gz"
+        if upstream_name not in upstream_assets:
+            raise PrebuiltFallback("upstream Linux CPU asset was not found")
+        return AssetChoice(
+            repo = UPSTREAM_REPO,
+            tag = llama_tag,
+            name = upstream_name,
+            url = upstream_assets[upstream_name],
+            source_label = "upstream",
+            install_kind = "linux-cpu",
+        )
+
+    if host.is_windows and host.is_x86_64:
+        if host.has_usable_nvidia:
+            attempts = resolve_windows_cuda_choices(host, llama_tag, upstream_assets)
+            if attempts:
+                return attempts[0]
+            raise PrebuiltFallback("no compatible Windows CUDA asset was found")
+
+        upstream_name = f"llama-{llama_tag}-bin-win-cpu-x64.zip"
+        if upstream_name not in upstream_assets:
+            raise PrebuiltFallback("upstream Windows CPU asset was not found")
+        return AssetChoice(
+            repo = UPSTREAM_REPO,
+            tag = llama_tag,
+            name = upstream_name,
+            url = upstream_assets[upstream_name],
+            source_label = "upstream",
+            install_kind = "windows-cpu",
+        )
+
+    if host.is_macos and host.is_arm64:
+        upstream_name = f"llama-{llama_tag}-bin-macos-arm64.tar.gz"
+        if upstream_name not in upstream_assets:
+            raise PrebuiltFallback("upstream macOS arm64 asset was not found")
+        return AssetChoice(
+            repo = UPSTREAM_REPO,
+            tag = llama_tag,
+            name = upstream_name,
+            url = upstream_assets[upstream_name],
+            source_label = "upstream",
+            install_kind = "macos-arm64",
+        )
+
+    if host.is_macos and host.is_x86_64:
+        upstream_name = f"llama-{llama_tag}-bin-macos-x64.tar.gz"
+        if upstream_name not in upstream_assets:
+            raise PrebuiltFallback("upstream macOS x64 asset was not found")
+        return AssetChoice(
+            repo = UPSTREAM_REPO,
+            tag = llama_tag,
+            name = upstream_name,
+            url = upstream_assets[upstream_name],
+            source_label = "upstream",
+            install_kind = "macos-x64",
+        )
+
+    raise PrebuiltFallback(
+        f"no prebuilt policy exists for {host.system} {host.machine}"
+    )
+
+
+def resolve_asset_choice(
+    host: HostInfo, llama_tag: str, published_repo: str, published_release_tag: str
+) -> AssetChoice:
+    if host.is_linux and host.is_x86_64 and host.has_usable_nvidia:
+        return resolve_linux_cuda_choice(
+            host, llama_tag, published_repo, published_release_tag
+        ).primary
+    return resolve_upstream_asset_choice(host, llama_tag)
+
+
+def extract_archive(archive_path: Path, destination: Path) -> None:
+    def safe_extract_path(base: Path, member_name: str) -> Path:
+        normalized = member_name.replace("\\", "/")
+        member_path = Path(normalized)
+        if member_path.is_absolute():
+            raise PrebuiltFallback(
+                f"archive member used an absolute path: {member_name}"
+            )
+
+        target = (base / member_path).resolve()
+        base_resolved = base.resolve()
+        try:
+            target.relative_to(base_resolved)
+        except ValueError as exc:
+            raise PrebuiltFallback(
+                f"archive member escaped destination: {member_name}"
+            ) from exc
+        return target
+
+    def safe_link_target(
+        base: Path, member_name: str, link_name: str, target: Path
+    ) -> tuple[str, Path]:
+        normalized = link_name.replace("\\", "/")
+        link_path = Path(normalized)
+        if link_path.is_absolute():
+            raise PrebuiltFallback(
+                f"archive link used an absolute target: {member_name} -> {link_name}"
+            )
+        if not normalized:
+            raise PrebuiltFallback(f"archive link used an empty target: {member_name}")
+
+        resolved = (target.parent / link_path).resolve()
+        base_resolved = base.resolve()
+        try:
+            resolved.relative_to(base_resolved)
+        except ValueError as exc:
+            raise PrebuiltFallback(
+                f"archive link escaped destination: {member_name} -> {link_name}"
+            ) from exc
+        return normalized, resolved
+
+    def extract_zip_safely(source: Path, base: Path) -> None:
+        with zipfile.ZipFile(source) as archive:
+            for member in archive.infolist():
+                target = safe_extract_path(base, member.filename)
+                mode = (member.external_attr >> 16) & 0o170000
+                if mode == 0o120000:
+                    raise PrebuiltFallback(
+                        f"zip archive contained a symlink entry: {member.filename}"
+                    )
+                if member.is_dir():
+                    target.mkdir(parents = True, exist_ok = True)
+                    continue
+                target.parent.mkdir(parents = True, exist_ok = True)
+                with archive.open(member, "r") as src, target.open("wb") as dst:
+                    shutil.copyfileobj(src, dst)
+
+    def extract_tar_safely(source: Path, base: Path) -> None:
+        pending_links: list[tuple[tarfile.TarInfo, Path]] = []
+        with tarfile.open(source, "r:gz") as archive:
+            for member in archive.getmembers():
+                target = safe_extract_path(base, member.name)
+                if member.isdir():
+                    target.mkdir(parents = True, exist_ok = True)
+                    continue
+                if member.islnk() or member.issym():
+                    pending_links.append((member, target))
+                    continue
+                if not member.isfile():
+                    raise PrebuiltFallback(
+                        f"tar archive contained an unsupported entry: {member.name}"
+                    )
+                target.parent.mkdir(parents = True, exist_ok = True)
+                extracted = archive.extractfile(member)
+                if extracted is None:
+                    raise PrebuiltFallback(
+                        f"tar archive entry could not be read: {member.name}"
+                    )
+                with extracted, target.open("wb") as dst:
+                    shutil.copyfileobj(extracted, dst)
+
+        unresolved = list(pending_links)
+        while unresolved:
+            next_round: list[tuple[tarfile.TarInfo, Path]] = []
+            progressed = False
+            for member, target in unresolved:
+                normalized_link, resolved_target = safe_link_target(
+                    base, member.name, member.linkname, target
+                )
+                if not resolved_target.exists() and not resolved_target.is_symlink():
+                    next_round.append((member, target))
+                    continue
+                if resolved_target.is_dir():
+                    raise PrebuiltFallback(
+                        f"archive link targeted a directory: {member.name} -> {member.linkname}"
+                    )
+
+                target.parent.mkdir(parents = True, exist_ok = True)
+                if target.exists() or target.is_symlink():
+                    target.unlink()
+
+                if member.issym():
+                    target.symlink_to(normalized_link)
+                else:
+                    shutil.copy2(resolved_target, target)
+                progressed = True
+
+            if not progressed:
+                details = ", ".join(
+                    f"{member.name} -> {member.linkname}" for member, _ in next_round
+                )
+                raise PrebuiltFallback(
+                    f"tar archive contained unresolved link entries: {details}"
+                )
+            unresolved = next_round
+
+    destination.mkdir(parents = True, exist_ok = True)
+    if archive_path.name.endswith(".zip"):
+        extract_zip_safely(archive_path, destination)
+        return
+    if archive_path.name.endswith(".tar.gz"):
+        extract_tar_safely(archive_path, destination)
+        return
+    raise PrebuiltFallback(f"unsupported archive format: {archive_path.name}")
+
+
+def copy_globs(
+    source_dir: Path, destination: Path, patterns: list[str], *, required: bool = True
+) -> None:
+    destination.mkdir(parents = True, exist_ok = True)
+    matched_sources: dict[str, Path] = {}
+    for path in sorted(
+        (candidate for candidate in source_dir.rglob("*") if candidate.is_file()),
+        key = lambda candidate: (
+            len(candidate.relative_to(source_dir).parts),
+            str(candidate),
+        ),
+    ):
+        for pattern in patterns:
+            if fnmatch.fnmatch(path.name, pattern):
+                previous = matched_sources.get(path.name)
+                if previous is not None and previous != path:
+                    raise PrebuiltFallback(
+                        f"ambiguous archive layout for {path.name}: "
+                        f"{previous.relative_to(source_dir)} and {path.relative_to(source_dir)}"
+                    )
+                matched_sources[path.name] = path
+                break
+
+    if required and not matched_sources:
+        raise PrebuiltFallback(f"required files missing from {source_dir}: {patterns}")
+
+    for name, path in matched_sources.items():
+        shutil.copy2(path, destination / name)
+
+
+def ensure_converter_scripts(install_dir: Path, llama_tag: str) -> None:
+    canonical = install_dir / "convert_hf_to_gguf.py"
+    if not canonical.exists():
+        # Hydrated source tree should have placed this file already.
+        # Fall back to a network fetch so the install is not blocked.
+        raw_base = f"https://raw.githubusercontent.com/ggml-org/llama.cpp/{llama_tag}"
+        source_url = f"{raw_base}/convert_hf_to_gguf.py"
+        data = download_bytes(
+            source_url,
+            progress_label = f"Downloading {download_label_from_url(source_url)}",
+        )
+        if not data:
+            raise RuntimeError(f"downloaded empty converter script from {source_url}")
+        if b"import " not in data and b"def " not in data and b"#!/" not in data:
+            raise RuntimeError(
+                f"downloaded converter script did not look like Python source: {source_url}"
+            )
+        atomic_write_bytes(canonical, data)
+    legacy = install_dir / "convert-hf-to-gguf.py"
+    if legacy.exists() or legacy.is_symlink():
+        legacy.unlink()
+    try:
+        legacy.symlink_to("convert_hf_to_gguf.py")
+    except OSError:
+        shutil.copy2(canonical, legacy)
+
+
+def extracted_archive_root(extract_dir: Path) -> Path:
+    children = [path for path in extract_dir.iterdir()]
+    if len(children) == 1 and children[0].is_dir():
+        return children[0]
+    return extract_dir
+
+
+def copy_directory_contents(source_dir: Path, destination: Path) -> None:
+    destination.mkdir(parents = True, exist_ok = True)
+    for item in source_dir.iterdir():
+        target = destination / item.name
+        if item.is_dir():
+            shutil.copytree(item, target, dirs_exist_ok = True)
+        else:
+            shutil.copy2(item, target)
+
+
+def hydrate_source_tree(
+    upstream_tag: str,
+    install_dir: Path,
+    work_dir: Path,
+    *,
+    expected_sha256: str,
+) -> None:
+    archive_path = work_dir / f"llama.cpp-source-{upstream_tag}.tar.gz"
+    source_urls = upstream_source_archive_urls(upstream_tag)
+    extract_dir = Path(tempfile.mkdtemp(prefix = "source-extract-", dir = work_dir))
+
+    try:
+        log(f"downloading llama.cpp source tree for upstream tag {upstream_tag}")
+        last_exc: Exception | None = None
+        downloaded = False
+        for index, source_url in enumerate(source_urls):
+            try:
+                if index > 0:
+                    log(
+                        f"retrying source tree download from fallback URL: {source_url}"
+                    )
+                download_file_verified(
+                    source_url,
+                    archive_path,
+                    expected_sha256 = expected_sha256,
+                    label = f"llama.cpp source tree for {upstream_tag}",
+                )
+                downloaded = True
+                break
+            except Exception as exc:
+                last_exc = exc
+                if index == len(source_urls) - 1:
+                    raise
+                log(f"source tree download failed from {source_url}: {exc}")
+        if not downloaded:
+            assert last_exc is not None
+            raise last_exc
+        extract_archive(archive_path, extract_dir)
+        source_root = extracted_archive_root(extract_dir)
+        required_paths = [
+            source_root / "CMakeLists.txt",
+            source_root / "convert_hf_to_gguf.py",
+            source_root / "gguf-py",
+        ]
+        missing = [
+            str(path.relative_to(source_root))
+            for path in required_paths
+            if not path.exists()
+        ]
+        if missing:
+            raise PrebuiltFallback(
+                "upstream source archive was missing required repo files: "
+                + ", ".join(missing)
+            )
+        copy_directory_contents(source_root, install_dir)
+    except PrebuiltFallback:
+        raise
+    except Exception as exc:
+        raise PrebuiltFallback(
+            f"failed to hydrate upstream llama.cpp source tree for {upstream_tag}: {exc}"
+        ) from exc
+    finally:
+        remove_tree(extract_dir)
+
+
+def normalize_install_layout(install_dir: Path, host: HostInfo) -> tuple[Path, Path]:
+    build_bin = install_dir / "build" / "bin"
+    if host.is_windows:
+        exec_dir = build_bin / "Release"
+        exec_dir.mkdir(parents = True, exist_ok = True)
+        return exec_dir / "llama-server.exe", exec_dir / "llama-quantize.exe"
+
+    install_dir.mkdir(parents = True, exist_ok = True)
+    build_bin.mkdir(parents = True, exist_ok = True)
+    return install_dir / "llama-server", install_dir / "llama-quantize"
+
+
+def discover_installed_executable(install_dir: Path, executable_name: str) -> Path:
+    direct = install_dir / executable_name
+    if direct.exists() and direct.is_file():
+        return direct
+    candidate = next(
+        (path for path in install_dir.rglob(executable_name) if path.is_file()), None
+    )
+    if candidate is None:
+        raise PrebuiltFallback(f"{executable_name} was not installed")
+    return candidate
+
+
+def write_exec_wrapper(entrypoint: Path, target: Path) -> None:
+    relative_target = os.path.relpath(target, entrypoint.parent)
+    script = "\n".join(
+        [
+            "#!/bin/sh",
+            f'exec "$(dirname "$0")/{relative_target}" "$@"',
+            "",
+        ]
+    )
+    atomic_write_bytes(entrypoint, script.encode("utf-8"))
+    os.chmod(entrypoint, 0o755)
+
+
+def create_exec_entrypoint(entrypoint: Path, target: Path) -> None:
+    if entrypoint == target:
+        return
+    if entrypoint.exists() or entrypoint.is_symlink():
+        entrypoint.unlink()
+    try:
+        entrypoint.symlink_to(os.path.relpath(target, entrypoint.parent))
+    except Exception:
+        write_exec_wrapper(entrypoint, target)
+
+
+def overlay_directory_for_choice(
+    install_dir: Path, choice: AssetChoice, host: HostInfo
+) -> Path:
+    if host.is_windows or choice.install_kind.startswith("windows"):
+        path = install_dir / "build" / "bin" / "Release"
+    else:
+        path = install_dir / "build" / "bin"
+    path.mkdir(parents = True, exist_ok = True)
+    return path
+
+
+def runtime_patterns_for_choice(choice: AssetChoice) -> list[str]:
+    if choice.install_kind in {"linux-cpu", "linux-cuda"}:
+        return [
+            "llama-server",
+            "llama-quantize",
+            "libllama.so*",
+            "libggml.so*",
+            "libggml-base.so*",
+            "libmtmd.so*",
+            "libggml-cpu-*.so*",
+            "libggml-cuda.so*",
+            "libggml-rpc.so*",
+        ]
+    if choice.install_kind in {"macos-arm64", "macos-x64"}:
+        return ["llama-server", "llama-quantize", "lib*.dylib"]
+    if choice.install_kind in {"windows-cpu", "windows-cuda"}:
+        return ["*.exe", "*.dll"]
+    raise PrebuiltFallback(
+        f"unsupported install kind for runtime overlay: {choice.install_kind}"
+    )
+
+
+def metadata_patterns_for_choice(choice: AssetChoice) -> list[str]:
+    patterns = ["BUILD_INFO.txt", "THIRD_PARTY_LICENSES.txt"]
+    if choice.install_kind.startswith("windows"):
+        patterns.append("LICENSE.txt")
+    else:
+        patterns.append("LICENSE")
+    return patterns
+
+
+@contextmanager
+def install_lock(lock_path: Path) -> Iterator[None]:
+    lock_path.parent.mkdir(parents = True, exist_ok = True)
+
+    if FileLock is None:
+        # Fallback: exclusive file creation as a simple lock.
+        # Write our PID so stale locks from crashed processes can be detected.
+        fd: int | None = None
+        deadline = time.monotonic() + INSTALL_LOCK_TIMEOUT_SECONDS
+        while True:
+            try:
+                fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_RDWR)
+                os.write(fd, f"{os.getpid()}\n".encode())
+                os.fsync(fd)
+                break
+            except FileExistsError:
+                # Check if the holder process is still alive
+                stale = False
+                try:
+                    raw = lock_path.read_text().strip()
+                except FileNotFoundError:
+                    # Lock vanished between our open attempt and read -- retry
+                    continue
+                if not raw:
+                    # File exists but PID not yet written -- another process
+                    # just created it. Wait briefly for the write to land.
+                    time.sleep(0.1)
+                    continue
+                try:
+                    holder_pid = int(raw)
+                    os.kill(holder_pid, 0)  # signal 0 = existence check
+                except ValueError:
+                    # PID unreadable (corrupted file)
+                    stale = True
+                except ProcessLookupError:
+                    # Process is dead
+                    stale = True
+                except PermissionError:
+                    # Process is alive but owned by another user -- not stale
+                    pass
+                if stale:
+                    lock_path.unlink(missing_ok = True)
+                    continue
+                if time.monotonic() >= deadline:
+                    raise RuntimeError(
+                        f"timed out after {INSTALL_LOCK_TIMEOUT_SECONDS}s waiting for concurrent install lock: {lock_path}"
+                    )
+                time.sleep(0.5)
+        try:
+            yield
+        finally:
+            if fd is not None:
+                os.close(fd)
+            lock_path.unlink(missing_ok = True)
+        return
+
+    try:
+        with FileLock(lock_path, timeout = INSTALL_LOCK_TIMEOUT_SECONDS):
+            yield
+    except FileLockTimeout as exc:
+        raise RuntimeError(
+            f"timed out after {INSTALL_LOCK_TIMEOUT_SECONDS}s waiting for concurrent install lock: {lock_path}"
+        ) from exc
+
+
+def install_lock_path(install_dir: Path) -> Path:
+    return install_dir.parent / f".{install_dir.name}.install.lock"
+
+
+def install_staging_root(install_dir: Path) -> Path:
+    root = install_dir.parent / INSTALL_STAGING_ROOT_NAME
+    root.mkdir(parents = True, exist_ok = True)
+    return root
+
+
+def prune_install_staging_root(install_dir: Path) -> None:
+    root = install_dir.parent / INSTALL_STAGING_ROOT_NAME
+    try:
+        root.rmdir()
+    except OSError:
+        pass
+
+
+def create_install_staging_dir(install_dir: Path) -> Path:
+    staging_dir = Path(
+        tempfile.mkdtemp(
+            prefix = f"{install_dir.name}.staging-", dir = install_staging_root(install_dir)
+        )
+    )
+    log(f"created install staging dir {staging_dir}")
+    return staging_dir
+
+
+def unique_install_side_path(install_dir: Path, label: str) -> Path:
+    root = install_staging_root(install_dir)
+    timestamp = time.strftime("%Y%m%d%H%M%S", time.gmtime())
+    prefix = f"{install_dir.name}.{label}-{timestamp}-{os.getpid()}"
+    candidate = root / prefix
+    counter = 0
+    while candidate.exists():
+        counter += 1
+        candidate = root / f"{prefix}-{counter}"
+    return candidate
+
+
+def remove_tree(path: Path | None) -> None:
+    if path and path.exists():
+        shutil.rmtree(path, ignore_errors = True)
+
+
+def remove_tree_logged(path: Path | None, label: str) -> None:
+    if not path:
+        return
+    if not path.exists():
+        log(f"{label} already absent at {path}")
+        return
+    log(f"removing {label} at {path}")
+    try:
+        shutil.rmtree(path)
+    except Exception as exc:
+        log(f"failed to remove {label} at {path}: {exc}")
+        raise
+
+
+def cleanup_install_side_paths(
+    install_dir: Path,
+    *,
+    staging_dir: Path | None = None,
+    rollback_dir: Path | None = None,
+    failed_dir: Path | None = None,
+    active_dir: Path | None = None,
+) -> None:
+    cleanup_failures: list[str] = []
+    for label, path in (
+        ("failed install path", failed_dir),
+        ("rollback path", rollback_dir),
+        ("active install path", active_dir),
+        ("staging dir", staging_dir),
+    ):
+        if not path:
+            continue
+        try:
+            remove_tree_logged(path, label)
+        except Exception as exc:
+            cleanup_failures.append(f"{label} ({path}): {exc}")
+    prune_install_staging_root(install_dir)
+    if cleanup_failures:
+        raise RuntimeError("cleanup failed for " + "; ".join(cleanup_failures))
+
+
+def confirm_install_tree(install_dir: Path, host: HostInfo) -> None:
+    if host.is_windows:
+        expected = [
+            install_dir / "build" / "bin" / "Release" / "llama-server.exe",
+            install_dir / "build" / "bin" / "Release" / "llama-quantize.exe",
+            install_dir / "convert_hf_to_gguf.py",
+            install_dir / "gguf-py",
+        ]
+    else:
+        expected = [
+            install_dir / "llama-server",
+            install_dir / "llama-quantize",
+            install_dir / "build" / "bin" / "llama-server",
+            install_dir / "build" / "bin" / "llama-quantize",
+            install_dir / "convert_hf_to_gguf.py",
+            install_dir / "gguf-py",
+        ]
+
+    expected.append(install_dir / "UNSLOTH_PREBUILT_INFO.json")
+    missing = [str(path) for path in expected if not path.exists()]
+    if missing:
+        raise RuntimeError(
+            "activated install was missing expected files: " + ", ".join(missing)
+        )
+
+
+def activate_install_tree(staging_dir: Path, install_dir: Path, host: HostInfo) -> None:
+    rollback_dir: Path | None = None
+    failed_dir: Path | None = None
+    try:
+        if install_dir.exists():
+            rollback_dir = unique_install_side_path(install_dir, "rollback")
+            log(f"moving existing install to rollback path {rollback_dir}")
+            os.replace(install_dir, rollback_dir)
+            log(f"moved existing install to rollback path {rollback_dir.name}")
+
+        log(f"activating staged install {staging_dir} -> {install_dir}")
+        os.replace(staging_dir, install_dir)
+        log(f"activated staged install at {install_dir}")
+        log(f"confirming activated install tree at {install_dir}")
+        confirm_install_tree(install_dir, host)
+        log(f"activated install tree confirmed at {install_dir}")
+    except Exception as exc:
+        log(f"activation failed for staged install: {exc}")
+        try:
+            if install_dir.exists():
+                failed_dir = unique_install_side_path(install_dir, "failed")
+                log(f"moving failed active install to {failed_dir}")
+                os.replace(install_dir, failed_dir)
+            elif staging_dir.exists():
+                failed_dir = staging_dir
+                staging_dir = None
+                log(f"retaining failed staging tree at {failed_dir}")
+
+            if rollback_dir and rollback_dir.exists():
+                log(f"restoring rollback path {rollback_dir} -> {install_dir}")
+                os.replace(rollback_dir, install_dir)
+                log(f"restored previous install from rollback path {rollback_dir.name}")
+                raise PrebuiltFallback(
+                    "staged prebuilt validation passed but activation failed; restored previous install "
+                    f"({textwrap.shorten(str(exc), width = 200, placeholder = '...')})"
+                ) from exc
+        except PrebuiltFallback:
+            raise
+        except Exception as rollback_exc:
+            log(f"rollback after failed activation also failed: {rollback_exc}")
+
+        log(
+            "rollback restoration failed; cleaning staging, install, and rollback paths before source build fallback"
+        )
+        cleanup_error: Exception | None = None
+        try:
+            cleanup_install_side_paths(
+                install_dir,
+                staging_dir = staging_dir,
+                rollback_dir = rollback_dir,
+                failed_dir = failed_dir,
+                active_dir = install_dir,
+            )
+        except Exception as cleanup_exc:
+            cleanup_error = cleanup_exc
+            log(f"cleanup after rollback failure also failed: {cleanup_exc}")
+        details = textwrap.shorten(str(exc), width = 200, placeholder = "...")
+        if cleanup_error is not None:
+            raise PrebuiltFallback(
+                "staged prebuilt validation passed but activation and rollback failed; "
+                f"cleanup also reported errors ({details}; cleanup={cleanup_error})"
+            ) from exc
+        raise PrebuiltFallback(
+            "staged prebuilt validation passed but activation and rollback failed; "
+            f"cleaned install state for fresh source build ({details})"
+        ) from exc
+    else:
+        if rollback_dir:
+            remove_tree_logged(rollback_dir, "rollback path")
+    finally:
+        remove_tree(failed_dir)
+        remove_tree(staging_dir)
+        prune_install_staging_root(install_dir)
+
+
+def install_from_archives(
+    choice: AssetChoice, host: HostInfo, install_dir: Path, work_dir: Path
+) -> tuple[Path, Path]:
+    main_archive = work_dir / choice.name
+    log(f"downloading {choice.name} from {choice.source_label} release")
+    if not choice.expected_sha256:
+        raise PrebuiltFallback(
+            f"approved checksum was missing for selected asset {choice.name}"
+        )
+    download_file_verified(
+        choice.url,
+        main_archive,
+        expected_sha256 = choice.expected_sha256,
+        label = f"prebuilt archive {choice.name}",
+    )
+
+    install_dir.mkdir(parents = True, exist_ok = True)
+    extract_dir = Path(tempfile.mkdtemp(prefix = "extract-", dir = work_dir))
+
+    try:
+        extract_archive(main_archive, extract_dir)
+        source_dir = extract_dir
+        overlay_dir = overlay_directory_for_choice(install_dir, choice, host)
+        copy_globs(
+            source_dir, overlay_dir, runtime_patterns_for_choice(choice), required = True
+        )
+        copy_globs(
+            source_dir,
+            install_dir,
+            metadata_patterns_for_choice(choice),
+            required = False,
+        )
+    finally:
+        remove_tree(extract_dir)
+
+    if host.is_windows:
+        exec_dir = install_dir / "build" / "bin" / "Release"
+        server_src = next(exec_dir.glob("llama-server.exe"), None)
+        quantize_src = next(exec_dir.glob("llama-quantize.exe"), None)
+        if server_src is None or quantize_src is None:
+            raise PrebuiltFallback("windows executables were not installed correctly")
+        return server_src, quantize_src
+
+    build_bin = install_dir / "build" / "bin"
+    source_server = build_bin / "llama-server"
+    source_quantize = build_bin / "llama-quantize"
+    if not source_server.exists() or not source_quantize.exists():
+        raise PrebuiltFallback(
+            "unix executables were not installed correctly into build/bin"
+        )
+    os.chmod(source_server, 0o755)
+    os.chmod(source_quantize, 0o755)
+
+    root_server = install_dir / "llama-server"
+    root_quantize = install_dir / "llama-quantize"
+    if source_server != root_server:
+        create_exec_entrypoint(root_server, source_server)
+    if source_quantize != root_quantize:
+        create_exec_entrypoint(root_quantize, source_quantize)
+    build_server = build_bin / "llama-server"
+    build_quantize = build_bin / "llama-quantize"
+    if source_server != build_server:
+        create_exec_entrypoint(build_server, source_server)
+    if source_quantize != build_quantize:
+        create_exec_entrypoint(build_quantize, source_quantize)
+
+    return source_server, source_quantize
+
+
+def ensure_repo_shape(install_dir: Path) -> None:
+    required = [
+        install_dir / "CMakeLists.txt",
+        install_dir / "convert_hf_to_gguf.py",
+        install_dir / "gguf-py",
+    ]
+    missing = [
+        str(path.relative_to(install_dir)) for path in required if not path.exists()
+    ]
+    if missing:
+        raise PrebuiltFallback(
+            "hydrated llama.cpp source tree was missing: " + ", ".join(missing)
+        )
+
+
+def validation_model_cache_path(install_dir: Path) -> Path:
+    cache_dir = install_dir.parent / VALIDATION_MODEL_CACHE_DIRNAME
+    cache_dir.mkdir(parents = True, exist_ok = True)
+    return cache_dir / VALIDATION_MODEL_CACHE_FILENAME
+
+
+def validated_validation_model_bytes(data: bytes) -> bytes:
+    if not data:
+        raise RuntimeError(f"downloaded empty validation model from {TEST_MODEL_URL}")
+    digest = hashlib.sha256(data).hexdigest()
+    if digest != TEST_MODEL_SHA256:
+        raise RuntimeError(
+            "validation model checksum mismatch: "
+            f"expected={TEST_MODEL_SHA256} actual={digest}"
+        )
+    return data
+
+
+def download_validation_model(path: Path, cache_path: Path | None = None) -> None:
+    try:
+        data: bytes | None = None
+        if cache_path and cache_path.exists():
+            try:
+                data = validated_validation_model_bytes(cache_path.read_bytes())
+                log(f"using cached tiny GGUF validation model from {cache_path}")
+            except Exception as exc:
+                log(
+                    f"cached tiny GGUF validation model was invalid; refreshing cache ({exc})"
+                )
+                data = None
+        if data is None:
+            log("downloading tiny GGUF validation model")
+            data = validated_validation_model_bytes(
+                download_bytes(
+                    TEST_MODEL_URL,
+                    progress_label = f"Downloading {download_label_from_url(TEST_MODEL_URL)}",
+                )
+            )
+            if cache_path is not None:
+                atomic_write_bytes(cache_path, data)
+        atomic_write_bytes(path, data)
+    except Exception as exc:
+        raise PrebuiltFallback(f"validation model unavailable: {exc}") from exc
+
+
+def free_local_port() -> int:
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.bind(("127.0.0.1", 0))
+    _, port = sock.getsockname()
+    sock.close()
+    return int(port)
+
+
+def read_log_excerpt(log_path: Path, *, max_lines: int = 60) -> str:
+    try:
+        content = log_path.read_text(encoding = "utf-8", errors = "replace")
+    except FileNotFoundError:
+        return ""
+    return "\n".join(content.splitlines()[-max_lines:])
+
+
+def is_retryable_server_bind_error(
+    exc: Exception | None,
+    output: str = "",
+    *,
+    exited_quickly: bool = False,
+) -> bool:
+    haystack = output.lower()
+    bind_markers = (
+        "address already in use",
+        "only one usage of each socket address",
+        "failed to bind",
+        "bind failed",
+        "failed to listen",
+        "errno 98",
+        "errno 10048",
+    )
+    if any(marker in haystack for marker in bind_markers):
+        return True
+
+    if isinstance(exc, urllib.error.URLError):
+        reason = exc.reason
+        if exited_quickly and isinstance(reason, ConnectionRefusedError):
+            return True
+        if isinstance(reason, OSError) and reason.errno in {
+            98,
+            99,
+            111,
+            10048,
+            10049,
+            10061,
+        }:
+            return exited_quickly
+    if exited_quickly and isinstance(exc, ConnectionRefusedError):
+        return True
+    if isinstance(exc, OSError) and exc.errno in {98, 99, 111, 10048, 10049, 10061}:
+        return exited_quickly
+    return False
+
+
+def dedupe_existing_dirs(paths: Iterable[str | Path]) -> list[str]:
+    unique: list[str] = []
+    seen: set[str] = set()
+    for raw in paths:
+        if not raw:
+            continue
+        path = Path(raw).expanduser()
+        if not path.is_dir():
+            continue
+        resolved = str(path.resolve())
+        if resolved in seen:
+            continue
+        seen.add(resolved)
+        unique.append(resolved)
+    return unique
+
+
+def linux_missing_libraries(
+    binary_path: Path, *, env: dict[str, str] | None = None
+) -> list[str]:
+    try:
+        result = run_capture(["ldd", str(binary_path)], timeout = 20, env = env)
+    except Exception:
+        return []
+
+    missing: list[str] = []
+    for line in (result.stdout + result.stderr).splitlines():
+        line = line.strip()
+        if "=> not found" not in line:
+            continue
+        library = line.split("=>", 1)[0].strip()
+        if library and library not in missing:
+            missing.append(library)
+    return missing
+
+
+def python_runtime_dirs() -> list[str]:
+    candidates: list[Path] = []
+    search_roots = [Path(entry) for entry in sys.path if entry]
+    try:
+        search_roots.extend(Path(path) for path in site.getsitepackages())
+    except Exception:
+        pass
+    try:
+        user_site = site.getusersitepackages()
+        if user_site:
+            search_roots.append(Path(user_site))
+    except Exception:
+        pass
+
+    for root in search_roots:
+        if not root.is_dir():
+            continue
+        candidates.extend(root.glob("nvidia/*/lib"))
+        candidates.extend(root.glob("nvidia/*/bin"))
+        candidates.extend(root.glob("torch/lib"))
+    return dedupe_existing_dirs(candidates)
+
+
+def ldconfig_runtime_dirs(required_libraries: Iterable[str]) -> list[str]:
+    try:
+        result = run_capture(["ldconfig", "-p"], timeout = 20)
+    except Exception:
+        return []
+
+    required = set(required_libraries)
+    candidates: list[str] = []
+    for line in result.stdout.splitlines():
+        if "=>" not in line:
+            continue
+        library, _, location = line.partition("=>")
+        library = library.strip().split()[0]
+        if required and library not in required:
+            continue
+        path = Path(location.strip()).parent
+        candidates.append(str(path))
+    return dedupe_existing_dirs(candidates)
+
+
+def linux_runtime_dirs(binary_path: Path) -> list[str]:
+    missing = linux_missing_libraries(binary_path)
+    if not missing:
+        return []
+    return linux_runtime_dirs_for_required_libraries(missing)
+
+
+def preflight_linux_installed_binaries(
+    binaries: Iterable[Path],
+    install_dir: Path,
+    host: HostInfo,
+) -> None:
+    if not host.is_linux:
+        return
+
+    issues: list[str] = []
+    for binary_path in binaries:
+        env = binary_env(binary_path, install_dir, host)
+        missing = linux_missing_libraries(binary_path, env = env)
+        if not missing:
+            continue
+        runtime_dirs = [
+            part for part in env.get("LD_LIBRARY_PATH", "").split(os.pathsep) if part
+        ]
+        issues.append(
+            f"{binary_path.name}: missing={','.join(missing)} "
+            f"ld_library_path={','.join(runtime_dirs) if runtime_dirs else 'none'}"
+        )
+
+    if issues:
+        raise PrebuiltFallback(
+            "linux extracted binary preflight failed:\n" + "\n".join(issues)
+        )
+
+
+def glob_paths(*patterns: str) -> list[str]:
+    matches: list[str] = []
+    for pattern in patterns:
+        if any(char in pattern for char in "*?[]"):
+            matches.extend(str(path) for path in Path("/").glob(pattern.lstrip("/")))
+        else:
+            matches.append(pattern)
+    return matches
+
+
+def windows_runtime_dirs() -> list[str]:
+    candidates: list[str | Path] = []
+
+    env_dirs = os.environ.get("CUDA_RUNTIME_DLL_DIR", "")
+    if env_dirs:
+        candidates.extend(part for part in env_dirs.split(os.pathsep) if part)
+
+    path_dirs = os.environ.get("PATH", "")
+    if path_dirs:
+        candidates.extend(part for part in path_dirs.split(os.pathsep) if part)
+
+    cuda_roots: list[Path] = []
+    for name in ("CUDA_PATH", "CUDA_HOME", "CUDA_ROOT"):
+        value = os.environ.get(name)
+        if value:
+            cuda_roots.append(Path(value))
+
+    for root in cuda_roots:
+        candidates.extend([root / "bin", root / "lib" / "x64"])
+
+    program_files = os.environ.get("ProgramFiles", r"C:\Program Files")
+    toolkit_base = Path(program_files) / "NVIDIA GPU Computing Toolkit" / "CUDA"
+    if toolkit_base.is_dir():
+        candidates.extend(toolkit_base.glob("v*/bin"))
+        candidates.extend(toolkit_base.glob("v*/lib/x64"))
+
+    candidates.extend(Path(path) for path in python_runtime_dirs())
+    return dedupe_existing_dirs(candidates)
+
+
+def windows_runtime_dirs_for_patterns(
+    required_patterns: Iterable[str],
+    candidate_dirs: Iterable[str] | None = None,
+) -> list[str]:
+    directories = (
+        list(candidate_dirs) if candidate_dirs is not None else windows_runtime_dirs()
+    )
+    matching_dirs: list[str] = []
+    for pattern in required_patterns:
+        matched_dirs = [
+            directory for directory in directories if any(Path(directory).glob(pattern))
+        ]
+        if not matched_dirs:
+            return []
+        for directory in matched_dirs:
+            if directory not in matching_dirs:
+                matching_dirs.append(directory)
+    return matching_dirs
+
+
+def windows_runtime_dirs_for_runtime_line(runtime_line: str | None) -> list[str]:
+    if not runtime_line:
+        return []
+    patterns = windows_runtime_line_info().get(runtime_line)
+    if not patterns:
+        return []
+    return windows_runtime_dirs_for_patterns(patterns)
+
+
+def binary_env(
+    binary_path: Path,
+    install_dir: Path,
+    host: HostInfo,
+    *,
+    runtime_line: str | None = None,
+) -> dict[str, str]:
+    env = os.environ.copy()
+    if host.is_windows:
+        path_dirs = [
+            str(binary_path.parent),
+            *windows_runtime_dirs_for_runtime_line(runtime_line),
+        ]
+        existing = [part for part in env.get("PATH", "").split(os.pathsep) if part]
+        env["PATH"] = os.pathsep.join(dedupe_existing_dirs([*path_dirs, *existing]))
+    elif host.is_linux:
+        ld_dirs = [
+            str(binary_path.parent),
+            str(install_dir),
+            *linux_runtime_dirs(binary_path),
+        ]
+        existing = [
+            part for part in env.get("LD_LIBRARY_PATH", "").split(os.pathsep) if part
+        ]
+        env["LD_LIBRARY_PATH"] = os.pathsep.join(
+            dedupe_existing_dirs([*ld_dirs, *existing])
+        )
+    elif host.is_macos:
+        dyld_dirs = [str(binary_path.parent), str(install_dir)]
+        existing = [
+            part for part in env.get("DYLD_LIBRARY_PATH", "").split(os.pathsep) if part
+        ]
+        env["DYLD_LIBRARY_PATH"] = os.pathsep.join(
+            dedupe_existing_dirs([*dyld_dirs, *existing])
+        )
+    return env
+
+
+def validate_quantize(
+    quantize_path: Path,
+    probe_path: Path,
+    quantized_path: Path,
+    install_dir: Path,
+    host: HostInfo,
+    *,
+    runtime_line: str | None = None,
+) -> None:
+    command = [str(quantize_path), str(probe_path), str(quantized_path), "Q6_K", "2"]
+    result = subprocess.run(
+        command,
+        capture_output = True,
+        text = True,
+        timeout = 120,
+        env = binary_env(quantize_path, install_dir, host, runtime_line = runtime_line),
+    )
+    if (
+        result.returncode != 0
+        or not quantized_path.exists()
+        or quantized_path.stat().st_size == 0
+    ):
+        raise PrebuiltFallback(
+            "llama-quantize validation failed:\n"
+            + result.stdout
+            + ("\n" + result.stderr if result.stderr else "")
+        )
+
+
+def validate_server(
+    server_path: Path,
+    probe_path: Path,
+    host: HostInfo,
+    install_dir: Path,
+    *,
+    runtime_line: str | None = None,
+) -> None:
+    last_failure: PrebuiltFallback | None = None
+    for port_attempt in range(1, SERVER_PORT_BIND_ATTEMPTS + 1):
+        port = free_local_port()
+        command = [
+            str(server_path),
+            "-m",
+            str(probe_path),
+            "--host",
+            "127.0.0.1",
+            "--port",
+            str(port),
+            "-c",
+            "32",
+            "--parallel",
+            "1",
+            "--threads",
+            "1",
+            "--ubatch-size",
+            "32",
+            "--batch-size",
+            "32",
+        ]
+        if host.has_usable_nvidia or (host.is_macos and host.is_arm64):
+            command.extend(["--n-gpu-layers", "1"])
+
+        log_fd, log_name = tempfile.mkstemp(prefix = "llama-server-", suffix = ".log")
+        os.close(log_fd)
+        log_path = Path(log_name)
+        process: subprocess.Popen[str] | None = None
+        try:
+            with log_path.open("w", encoding = "utf-8", errors = "replace") as log_handle:
+                process = subprocess.Popen(
+                    command,
+                    stdout = log_handle,
+                    stderr = subprocess.STDOUT,
+                    text = True,
+                    env = binary_env(
+                        server_path, install_dir, host, runtime_line = runtime_line
+                    ),
+                )
+                deadline = time.time() + 20
+                startup_started = time.time()
+                response_body = ""
+                last_error: Exception | None = None
+                while time.time() < deadline:
+                    if process.poll() is not None:
+                        process.wait(timeout = 5)
+                        log_handle.flush()
+                        output = read_log_excerpt(log_path)
+                        exited_quickly = (
+                            time.time() - startup_started
+                        ) <= SERVER_BIND_RETRY_WINDOW_SECONDS
+                        failure = PrebuiltFallback(
+                            "llama-server exited during startup:\n" + output
+                        )
+                        if (
+                            port_attempt < SERVER_PORT_BIND_ATTEMPTS
+                            and is_retryable_server_bind_error(
+                                last_error,
+                                output,
+                                exited_quickly = exited_quickly,
+                            )
+                        ):
+                            log(
+                                f"llama-server startup hit a port race on {port}; retrying with a fresh port "
+                                f"({port_attempt}/{SERVER_PORT_BIND_ATTEMPTS})"
+                            )
+                            last_failure = failure
+                            break
+                        raise failure
+
+                    payload = json.dumps({"prompt": "a", "n_predict": 1}).encode(
+                        "utf-8"
+                    )
+                    request = urllib.request.Request(
+                        f"http://127.0.0.1:{port}/completion",
+                        data = payload,
+                        headers = {"Content-Type": "application/json"},
+                    )
+                    try:
+                        with urllib.request.urlopen(request, timeout = 5) as response:
+                            status_code = response.status
+                            response_body = response.read().decode("utf-8", "replace")
+                            if status_code == 200:
+                                return
+                            last_error = RuntimeError(
+                                f"unexpected HTTP status {status_code}"
+                            )
+                    except urllib.error.HTTPError as exc:
+                        response_body = exc.read().decode("utf-8", "replace")
+                        last_error = exc
+                    except Exception as exc:
+                        last_error = exc
+                    time.sleep(0.5)
+                else:
+                    log_handle.flush()
+                    output = read_log_excerpt(log_path)
+                    raise PrebuiltFallback(
+                        "llama-server completion validation timed out"
+                        + (f" ({last_error})" if last_error else "")
+                        + ":\n"
+                        + output
+                        + ("\n" + response_body if response_body else "")
+                    )
+        finally:
+            if process is not None and process.poll() is None:
+                process.terminate()
+                try:
+                    process.wait(timeout = 5)
+                except subprocess.TimeoutExpired:
+                    process.kill()
+                    process.wait(timeout = 5)
+            try:
+                log_path.unlink(missing_ok = True)
+            except Exception:
+                pass
+    if last_failure is not None:
+        raise last_failure
+    raise PrebuiltFallback("llama-server validation failed unexpectedly")
+
+
+def collect_system_report(
+    host: HostInfo, choice: AssetChoice | None, install_dir: Path
+) -> str:
+    lines = [
+        f"platform={host.system} machine={host.machine}",
+        f"driver_cuda_version={host.driver_cuda_version}",
+        f"compute_caps={','.join(host.compute_caps) if host.compute_caps else 'unknown'}",
+        f"cuda_visible_devices={host.visible_cuda_devices if host.visible_cuda_devices is not None else 'unset'}",
+        f"has_physical_nvidia={host.has_physical_nvidia}",
+        f"has_usable_nvidia={host.has_usable_nvidia}",
+        f"chosen_asset={(choice.name if choice else 'none')}",
+        f"asset_source={(choice.source_label if choice else 'none')}",
+    ]
+    if host.is_linux and host.has_physical_nvidia:
+        runtime_lines, runtime_dirs = detected_linux_runtime_lines()
+        lines.append(
+            "linux_runtime_lines="
+            + (",".join(runtime_lines) if runtime_lines else "none")
+        )
+        for runtime_line in ("cuda13", "cuda12"):
+            lines.append(
+                f"linux_runtime_dirs_{runtime_line}="
+                + (
+                    ",".join(runtime_dirs.get(runtime_line, []))
+                    if runtime_dirs.get(runtime_line)
+                    else "none"
+                )
+            )
+    if choice and choice.selection_log:
+        lines.append("selection_log:")
+        lines.extend(choice.selection_log)
+    if host.nvidia_smi:
+        try:
+            smi = run_capture([host.nvidia_smi], timeout = 20)
+            excerpt = "\n".join((smi.stdout + smi.stderr).splitlines()[:20])
+            lines.append("nvidia-smi:")
+            lines.append(excerpt)
+        except Exception as exc:
+            lines.append(f"nvidia-smi error: {exc}")
+
+    if host.is_linux:
+        server_binary = install_dir / "llama-server"
+        if server_binary.exists():
+            server_env = binary_env(server_binary, install_dir, host)
+            lines.append(
+                "linux_missing_libs="
+                + (
+                    ",".join(linux_missing_libraries(server_binary, env = server_env))
+                    or "none"
+                )
+            )
+            lines.append(
+                "linux_runtime_dirs="
+                + (
+                    ",".join(
+                        [
+                            part
+                            for part in server_env.get("LD_LIBRARY_PATH", "").split(
+                                os.pathsep
+                            )
+                            if part
+                        ]
+                    )
+                    or "none"
+                )
+            )
+            try:
+                ldd = run_capture(
+                    ["ldd", str(server_binary)], timeout = 20, env = server_env
+                )
+                lines.append("ldd llama-server:")
+                lines.append((ldd.stdout + ldd.stderr).strip())
+            except Exception as exc:
+                lines.append(f"ldd error: {exc}")
+    elif host.is_windows:
+        lines.append(
+            "windows_runtime_dirs=" + (",".join(windows_runtime_dirs()) or "none")
+        )
+        runtime_lines, runtime_dirs = detected_windows_runtime_lines()
+        lines.append(
+            "windows_runtime_lines="
+            + (",".join(runtime_lines) if runtime_lines else "none")
+        )
+        for runtime_line in ("cuda13", "cuda12"):
+            lines.append(
+                f"windows_runtime_dirs_{runtime_line}="
+                + (
+                    ",".join(runtime_dirs.get(runtime_line, []))
+                    if runtime_dirs.get(runtime_line)
+                    else "none"
+                )
+            )
+    elif host.is_macos:
+        server_binary = install_dir / "llama-server"
+        if server_binary.exists():
+            try:
+                otool = run_capture(["otool", "-L", str(server_binary)], timeout = 20)
+                lines.append("otool -L llama-server:")
+                lines.append((otool.stdout + otool.stderr).strip())
+            except Exception as exc:
+                lines.append(f"otool error: {exc}")
+
+    return "\n".join(lines)
+
+
+def apply_approved_hashes(
+    attempts: Iterable[AssetChoice],
+    checksums: ApprovedReleaseChecksums,
+) -> list[AssetChoice]:
+    approved_attempts: list[AssetChoice] = []
+    missing_assets: list[str] = []
+    for attempt in attempts:
+        approved = checksums.artifacts.get(attempt.name)
+        if approved is None:
+            missing_assets.append(attempt.name)
+            continue
+        attempt.expected_sha256 = approved.sha256
+        approved_attempts.append(attempt)
+    if not approved_attempts:
+        missing_text = ", ".join(missing_assets) if missing_assets else "none"
+        raise PrebuiltFallback(
+            "approved checksum asset did not contain the selected prebuilt archive(s): "
+            f"{missing_text}"
+        )
+    return approved_attempts
+
+
+def require_approved_source_hash(
+    checksums: ApprovedReleaseChecksums, llama_tag: str
+) -> ApprovedArtifactHash:
+    source_asset_name = source_archive_logical_name(llama_tag)
+    approved_source = checksums.artifacts.get(source_asset_name)
+    if approved_source is None:
+        raise PrebuiltFallback(
+            f"approved checksum asset did not contain source archive {source_asset_name}"
+        )
+    return approved_source
+
+
+def resolve_install_attempts(
+    llama_tag: str,
+    host: HostInfo,
+    published_repo: str,
+    published_release_tag: str,
+) -> tuple[str, str, list[AssetChoice], ApprovedReleaseChecksums]:
+    requested_tag = llama_tag
+    resolved_tag = resolve_requested_install_tag(llama_tag, published_release_tag)
+    checksums = load_approved_release_checksums(published_repo, resolved_tag)
+    require_approved_source_hash(checksums, resolved_tag)
+
+    if host.is_linux and host.is_x86_64 and host.has_usable_nvidia:
+        linux_cuda_selection = resolve_linux_cuda_choice(
+            host, resolved_tag, published_repo, published_release_tag
+        )
+        attempts = apply_approved_hashes(linux_cuda_selection.attempts, checksums)
+        if not attempts:
+            raise PrebuiltFallback("no compatible Linux CUDA asset was found")
+        log_lines(linux_cuda_selection.selection_log)
+        return requested_tag, resolved_tag, attempts, checksums
+
+    if host.is_windows and host.is_x86_64 and host.has_usable_nvidia:
+        upstream_assets = github_release_assets(UPSTREAM_REPO, resolved_tag)
+        attempts = apply_approved_hashes(
+            resolve_windows_cuda_choices(host, resolved_tag, upstream_assets), checksums
+        )
+        if not attempts:
+            raise PrebuiltFallback("no compatible Windows CUDA asset was found")
+        if attempts[0].selection_log:
+            log_lines(attempts[0].selection_log)
+        return requested_tag, resolved_tag, attempts, checksums
+
+    choice = resolve_asset_choice(
+        host, resolved_tag, published_repo, published_release_tag
+    )
+    approved_attempts = apply_approved_hashes([choice], checksums)
+    if choice.selection_log:
+        log_lines(choice.selection_log)
+    return requested_tag, resolved_tag, approved_attempts, checksums
+
+
+def write_prebuilt_metadata(
+    install_dir: Path,
+    *,
+    requested_tag: str,
+    llama_tag: str,
+    choice: AssetChoice,
+    prebuilt_fallback_used: bool,
+) -> None:
+    metadata = {
+        "requested_tag": requested_tag,
+        "tag": llama_tag,
+        "asset": choice.name,
+        "source": choice.source_label,
+        "bundle_profile": choice.bundle_profile,
+        "runtime_line": choice.runtime_line,
+        "coverage_class": choice.coverage_class,
+        "prebuilt_fallback_used": prebuilt_fallback_used,
+        "installed_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+    }
+    (install_dir / "UNSLOTH_PREBUILT_INFO.json").write_text(
+        json.dumps(metadata, indent = 2) + "\n"
+    )
+
+
+def validate_prebuilt_choice(
+    choice: AssetChoice,
+    host: HostInfo,
+    install_dir: Path,
+    work_dir: Path,
+    probe_path: Path,
+    *,
+    requested_tag: str,
+    llama_tag: str,
+    approved_checksums: ApprovedReleaseChecksums,
+    prebuilt_fallback_used: bool,
+    quantized_path: Path,
+) -> tuple[Path, Path]:
+    source_archive = approved_checksums.artifacts.get(
+        source_archive_logical_name(llama_tag)
+    )
+    if source_archive is None:
+        raise PrebuiltFallback(
+            f"approved checksum asset did not contain source archive {source_archive_logical_name(llama_tag)}"
+        )
+    log(f"hydrating upstream llama.cpp source for {llama_tag} into {install_dir}")
+    hydrate_source_tree(
+        llama_tag,
+        install_dir,
+        work_dir,
+        expected_sha256 = source_archive.sha256,
+    )
+    log(f"overlaying prebuilt bundle {choice.name} into {install_dir}")
+    server_path, quantize_path = install_from_archives(
+        choice, host, install_dir, work_dir
+    )
+    preflight_linux_installed_binaries((server_path, quantize_path), install_dir, host)
+    ensure_repo_shape(install_dir)
+    write_prebuilt_metadata(
+        install_dir,
+        requested_tag = requested_tag,
+        llama_tag = llama_tag,
+        choice = choice,
+        prebuilt_fallback_used = prebuilt_fallback_used,
+    )
+    validate_quantize(
+        quantize_path,
+        probe_path,
+        quantized_path,
+        install_dir,
+        host,
+        runtime_line = choice.runtime_line,
+    )
+    validate_server(
+        server_path,
+        probe_path,
+        host,
+        install_dir,
+        runtime_line = choice.runtime_line,
+    )
+    log(f"staged prebuilt validation succeeded for {choice.name}")
+    return server_path, quantize_path
+
+
+def validate_prebuilt_attempts(
+    attempts: Iterable[AssetChoice],
+    host: HostInfo,
+    install_dir: Path,
+    work_dir: Path,
+    probe_path: Path,
+    *,
+    requested_tag: str,
+    llama_tag: str,
+    approved_checksums: ApprovedReleaseChecksums,
+) -> tuple[AssetChoice, Path, bool]:
+    attempt_list = list(attempts)
+    if not attempt_list:
+        raise PrebuiltFallback("no prebuilt bundle attempts were available")
+
+    tried_fallback = False
+    for index, attempt in enumerate(attempt_list):
+        if index > 0:
+            tried_fallback = True
+            log(
+                "retrying CUDA prebuilt "
+                f"{attempt.name} install_kind={attempt.install_kind} "
+                f"runtime_line={attempt.runtime_line} coverage_class={attempt.coverage_class}"
+            )
+
+        staging_dir = create_install_staging_dir(install_dir)
+        quantized_path = work_dir / f"stories260K-q4-{index}.gguf"
+        if quantized_path.exists():
+            quantized_path.unlink()
+        try:
+            validate_prebuilt_choice(
+                attempt,
+                host,
+                staging_dir,
+                work_dir,
+                probe_path,
+                requested_tag = requested_tag,
+                llama_tag = llama_tag,
+                approved_checksums = approved_checksums,
+                prebuilt_fallback_used = tried_fallback,
+                quantized_path = quantized_path,
+            )
+        except Exception as exc:
+            remove_tree(staging_dir)
+            prune_install_staging_root(install_dir)
+            if isinstance(exc, PrebuiltFallback):
+                attempt_error = exc
+            else:
+                attempt_error = PrebuiltFallback(
+                    f"candidate attempt failed before activation for {attempt.name}: {exc}"
+                )
+            if index == len(attempt_list) - 1:
+                raise attempt_error from exc
+            log(
+                "selected CUDA bundle failed before activation; trying next prebuilt fallback "
+                f"({textwrap.shorten(str(attempt_error), width = 200, placeholder = '...')})"
+            )
+            continue
+
+        return attempt, staging_dir, tried_fallback
+
+    raise PrebuiltFallback("no prebuilt bundle passed validation")
+
+
+def install_prebuilt(
+    install_dir: Path, llama_tag: str, published_repo: str, published_release_tag: str
+) -> None:
+    host = detect_host()
+    choice: AssetChoice | None = None
+    try:
+        with install_lock(install_lock_path(install_dir)):
+            if install_dir.exists():
+                log(
+                    f"existing llama.cpp install detected at {install_dir}; validating staged prebuilt update before replacement"
+                )
+            else:
+                log(
+                    f"no existing llama.cpp install detected at {install_dir}; performing fresh prebuilt install"
+                )
+            requested_tag, llama_tag, attempts, approved_checksums = (
+                resolve_install_attempts(
+                    llama_tag,
+                    host,
+                    published_repo,
+                    published_release_tag,
+                )
+            )
+            choice = attempts[0]
+            log(
+                f"selected {choice.name} ({choice.source_label}) for {host.system} {host.machine}"
+            )
+            with tempfile.TemporaryDirectory(prefix = "unsloth-llama-prebuilt-") as tmp:
+                work_dir = Path(tmp)
+                probe_path = work_dir / "stories260K.gguf"
+                download_validation_model(
+                    probe_path, validation_model_cache_path(install_dir)
+                )
+                choice, selected_staging_dir, _ = validate_prebuilt_attempts(
+                    attempts,
+                    host,
+                    install_dir,
+                    work_dir,
+                    probe_path,
+                    requested_tag = requested_tag,
+                    llama_tag = llama_tag,
+                    approved_checksums = approved_checksums,
+                )
+                activate_install_tree(selected_staging_dir, install_dir, host)
+                try:
+                    ensure_converter_scripts(install_dir, llama_tag)
+                except Exception as exc:
+                    log(
+                        "converter script fetch failed after activation; install remains valid "
+                        f"({textwrap.shorten(str(exc), width = 200, placeholder = '...')})"
+                    )
+    except PrebuiltFallback as exc:
+        log("prebuilt install path failed; falling back to source build")
+        log(f"prebuilt fallback reason: {exc}")
+        report = collect_system_report(host, choice, install_dir)
+        print(report)
+        raise SystemExit(EXIT_FALLBACK) from exc
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description = "Install and validate a prebuilt llama.cpp bundle for Unsloth Studio."
+    )
+    parser.add_argument("--install-dir", help = "Target ~/.unsloth/llama.cpp directory")
+    parser.add_argument(
+        "--llama-tag",
+        default = DEFAULT_LLAMA_TAG,
+        help = f"llama.cpp release tag. Prebuilt installs are pinned to the approved tag {APPROVED_PREBUILT_LLAMA_TAG}.",
+    )
+    parser.add_argument(
+        "--published-repo",
+        default = DEFAULT_PUBLISHED_REPO,
+        help = "Published bundle repository",
+    )
+    parser.add_argument(
+        "--published-release-tag",
+        default = DEFAULT_PUBLISHED_TAG,
+        help = "Published GitHub release tag to pin. By default, scan releases until a compatible llama.cpp bundle is found.",
+    )
+    resolve_group = parser.add_mutually_exclusive_group()
+    resolve_group.add_argument(
+        "--resolve-llama-tag",
+        nargs = "?",
+        const = "latest",
+        help = "Resolve a llama.cpp tag such as 'latest' to the logical upstream release tag.",
+    )
+    resolve_group.add_argument(
+        "--resolve-install-tag",
+        nargs = "?",
+        const = "latest",
+        help = "Resolve a llama.cpp tag such as 'latest' to the concrete tag installable on the current host.",
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    if args.resolve_llama_tag is not None:
+        print(resolve_requested_llama_tag(args.resolve_llama_tag))
+        return EXIT_SUCCESS
+
+    if args.resolve_install_tag is not None:
+        print(
+            resolve_requested_install_tag(
+                args.resolve_install_tag, args.published_release_tag or ""
+            )
+        )
+        return EXIT_SUCCESS
+
+    if not args.install_dir:
+        raise SystemExit(
+            "install_llama_prebuilt.py: --install-dir is required unless --resolve-llama-tag or --resolve-install-tag is used"
+        )
+    install_prebuilt(
+        install_dir = Path(args.install_dir).expanduser().resolve(),
+        llama_tag = args.llama_tag,
+        published_repo = args.published_repo,
+        published_release_tag = args.published_release_tag or "",
+    )
+    return EXIT_SUCCESS
+
+
+if __name__ == "__main__":
+    try:
+        raise SystemExit(main())
+    except SystemExit:
+        raise
+    except Exception as exc:
+        message = textwrap.shorten(str(exc), width = 400, placeholder = "...")
+        log(f"fatal helper error: {message}")
+        raise SystemExit(EXIT_ERROR)
diff --git a/studio/setup.ps1 b/studio/setup.ps1
index c58bcd5c8..d8465fd03 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -503,7 +503,6 @@ if ($DriverMaxCuda) {
             $isCompat = ($tkMaj -lt $drMajorCuda) -or ($tkMaj -eq $drMajorCuda -and $tkMin -le $drMinorCuda)
             if ($isCompat) {
                 # Also verify the toolkit supports our GPU architecture
-                Write-Host "   [DEBUG] Checking CUDA compatibility: toolkit=$tkMaj.$tkMin arch=sm_$CudaArch" -ForegroundColor Magenta
                 $archOk = $true
                 if ($CudaArch) {
                     $archOk = Test-NvccArchSupport -NvccExe $candidateNvcc -Arch $CudaArch
@@ -1296,6 +1295,93 @@ if ($LASTEXITCODE -ne 0) {
 $ErrorActionPreference = $prevEAP_t5
 Write-Host "[OK] Transformers 5.x pre-installed to .venv_t5/" -ForegroundColor Green
 
+# ==========================================================================
+#  PHASE 3.4: Prefer prebuilt llama.cpp bundles before source build
+# ==========================================================================
+$UnslothHome = Join-Path $env:USERPROFILE ".unsloth"
+if (-not (Test-Path $UnslothHome)) { New-Item -ItemType Directory -Force $UnslothHome | Out-Null }
+$LlamaCppDir = Join-Path $UnslothHome "llama.cpp"
+$NeedLlamaSourceBuild = $false
+$SkipPrebuiltInstall = $false
+$RequestedLlamaTag = if ($env:UNSLOTH_LLAMA_TAG) { $env:UNSLOTH_LLAMA_TAG } else { "latest" }
+$HelperReleaseRepo = if ($env:UNSLOTH_LLAMA_RELEASE_REPO) { $env:UNSLOTH_LLAMA_RELEASE_REPO } else { "unslothai/llama.cpp" }
+$resolveOutput = & python "$PSScriptRoot\install_llama_prebuilt.py" --resolve-install-tag $RequestedLlamaTag --published-repo $HelperReleaseRepo 2>&1
+$resolveExit = $LASTEXITCODE
+$ResolvedLlamaTag = if ($resolveOutput) { ($resolveOutput | Select-Object -Last 1).ToString().Trim() } else { "" }
+if ($resolveExit -ne 0 -or [string]::IsNullOrWhiteSpace($ResolvedLlamaTag)) {
+    Write-Host ""
+    Write-Host "[WARN] Failed to resolve an installable prebuilt llama.cpp tag via $HelperReleaseRepo" -ForegroundColor Yellow
+    if ($resolveOutput) {
+        $resolveOutput | ForEach-Object { Write-Host $_ }
+    }
+    $fallbackOutput = & python "$PSScriptRoot\install_llama_prebuilt.py" --resolve-llama-tag $RequestedLlamaTag 2>$null
+    $fallbackExit = $LASTEXITCODE
+    $ResolvedLlamaTag = if ($fallbackExit -eq 0 -and $fallbackOutput) {
+        ($fallbackOutput | Select-Object -Last 1).ToString().Trim()
+    } elseif ($RequestedLlamaTag -eq "latest") {
+        # Try Unsloth release repo first, then fall back to ggml-org upstream
+        $resolvedLatest = $null
+        try {
+            $latestRelease = Invoke-RestMethod -Uri "https://api.github.com/repos/$HelperReleaseRepo/releases/latest" -ErrorAction Stop
+            $resolvedLatest = $latestRelease.tag_name
+        } catch {}
+        if (-not $resolvedLatest) {
+            try {
+                $latestRelease = Invoke-RestMethod -Uri "https://api.github.com/repos/ggml-org/llama.cpp/releases/latest" -ErrorAction Stop
+                $resolvedLatest = $latestRelease.tag_name
+            } catch {}
+        }
+        if ($resolvedLatest) { $resolvedLatest } else { $RequestedLlamaTag }
+    } else {
+        $RequestedLlamaTag
+    }
+    $NeedLlamaSourceBuild = $true
+    $SkipPrebuiltInstall = $true
+}
+
+Write-Host ""
+Write-Host "Resolved llama.cpp release tag: $ResolvedLlamaTag" -ForegroundColor Gray
+
+if ($env:UNSLOTH_LLAMA_FORCE_COMPILE -eq "1") {
+    Write-Host ""
+    Write-Host "[WARN] UNSLOTH_LLAMA_FORCE_COMPILE=1 -- skipping prebuilt llama.cpp install" -ForegroundColor Yellow
+    $NeedLlamaSourceBuild = $true
+} else {
+    Write-Host ""
+    Write-Host "Installing prebuilt llama.cpp bundle (preferred path)..." -ForegroundColor Cyan
+    if (Test-Path $LlamaCppDir) {
+        Write-Host "Existing llama.cpp install detected -- validating staged prebuilt update before replacement" -ForegroundColor Gray
+    }
+    if ($SkipPrebuiltInstall) {
+        Write-Host "[WARN] Skipping prebuilt install because prebuilt tag resolution failed -- falling back to source build" -ForegroundColor Yellow
+    } else {
+        $prebuiltArgs = @(
+            "$PSScriptRoot\install_llama_prebuilt.py",
+            "--install-dir", $LlamaCppDir,
+            "--llama-tag", $ResolvedLlamaTag,
+            "--published-repo", $HelperReleaseRepo
+        )
+        if ($env:UNSLOTH_LLAMA_RELEASE_TAG) {
+            $prebuiltArgs += @("--published-release-tag", $env:UNSLOTH_LLAMA_RELEASE_TAG)
+        }
+        $prevEAPPrebuilt = $ErrorActionPreference
+        $ErrorActionPreference = "Continue"
+        & python @prebuiltArgs
+        $prebuiltExit = $LASTEXITCODE
+        $ErrorActionPreference = $prevEAPPrebuilt
+
+        if ($prebuiltExit -eq 0) {
+            Write-Host "[OK] Prebuilt llama.cpp installed and validated" -ForegroundColor Green
+        } else {
+            if (Test-Path $LlamaCppDir) {
+                Write-Host "[WARN] Prebuilt update failed; existing install was restored or cleaned before source build fallback" -ForegroundColor Yellow
+            }
+            Write-Host "[WARN] Prebuilt llama.cpp path unavailable or failed validation -- falling back to source build" -ForegroundColor Yellow
+            $NeedLlamaSourceBuild = $true
+        }
+    }
+}
+
 # ==========================================================================
 #  PHASE 3.5: Install OpenSSL dev (for HTTPS support in llama-server)
 # ==========================================================================
@@ -1303,42 +1389,46 @@ Write-Host "[OK] Transformers 5.x pre-installed to .venv_t5/" -ForegroundColor G
 # ShiningLight.OpenSSL.Dev includes headers + libs that cmake can find.
 $OpenSslAvailable = $false
 
-# Check if OpenSSL dev is already installed (look for include dir)
-$OpenSslRoots = @(
-    'C:\Program Files\OpenSSL-Win64',
-    'C:\Program Files\OpenSSL',
-    'C:\OpenSSL-Win64'
-)
-$OpenSslRoot = $null
-foreach ($root in $OpenSslRoots) {
-    if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) {
-        $OpenSslRoot = $root
-        break
-    }
-}
-
-if ($OpenSslRoot) {
-    $OpenSslAvailable = $true
-    Write-Host "[OK] OpenSSL dev found at $OpenSslRoot" -ForegroundColor Green
-} else {
-    Write-Host "" 
-    Write-Host "Installing OpenSSL dev (for HTTPS in llama-server)..." -ForegroundColor Cyan
-    $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue)
-    if ($HasWinget) {
-        winget install -e --id ShiningLight.OpenSSL.Dev --accept-package-agreements --accept-source-agreements
-        # Re-check after install
-        foreach ($root in $OpenSslRoots) {
-            if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) {
-                $OpenSslRoot = $root
-                $OpenSslAvailable = $true
-                Write-Host "[OK] OpenSSL dev installed at $OpenSslRoot" -ForegroundColor Green
-                break
-            }
+if ($NeedLlamaSourceBuild) {
+    # Check if OpenSSL dev is already installed (look for include dir)
+    $OpenSslRoots = @(
+        'C:\Program Files\OpenSSL-Win64',
+        'C:\Program Files\OpenSSL',
+        'C:\OpenSSL-Win64'
+    )
+    $OpenSslRoot = $null
+    foreach ($root in $OpenSslRoots) {
+        if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) {
+            $OpenSslRoot = $root
+            break
         }
     }
-    if (-not $OpenSslAvailable) {
-        Write-Host "[WARN] OpenSSL dev not available -- llama-server will be built without HTTPS" -ForegroundColor Yellow
+
+    if ($OpenSslRoot) {
+        $OpenSslAvailable = $true
+        Write-Host "[OK] OpenSSL dev found at $OpenSslRoot" -ForegroundColor Green
+    } else {
+        Write-Host "" 
+        Write-Host "Installing OpenSSL dev (for HTTPS in llama-server)..." -ForegroundColor Cyan
+        $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue)
+        if ($HasWinget) {
+            winget install -e --id ShiningLight.OpenSSL.Dev --accept-package-agreements --accept-source-agreements
+            # Re-check after install
+            foreach ($root in $OpenSslRoots) {
+                if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) {
+                    $OpenSslRoot = $root
+                    $OpenSslAvailable = $true
+                    Write-Host "[OK] OpenSSL dev installed at $OpenSslRoot" -ForegroundColor Green
+                    break
+                }
+            }
+        }
+        if (-not $OpenSslAvailable) {
+            Write-Host "[WARN] OpenSSL dev not available -- llama-server will be built without HTTPS" -ForegroundColor Yellow
+        }
     }
+} else {
+    Write-Host "[SKIP] OpenSSL dev install -- prebuilt llama.cpp already validated" -ForegroundColor Yellow
 }
 
 # ==========================================================================
@@ -1351,9 +1441,7 @@ if ($OpenSslRoot) {
 #   - llama-server:   for GGUF model inference (with HTTPS if OpenSSL available)
 #   - llama-quantize: for GGUF export quantization
 # Prerequisites (git, cmake, VS Build Tools, CUDA Toolkit) already installed in Phase 1.
-$UnslothHome = Join-Path $env:USERPROFILE ".unsloth"
-if (-not (Test-Path $UnslothHome)) { New-Item -ItemType Directory -Force $UnslothHome | Out-Null }
-$LlamaCppDir = Join-Path $UnslothHome "llama.cpp"
+$OriginalLlamaCppDir = $LlamaCppDir
 $BuildDir = Join-Path $LlamaCppDir "build"
 $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe"
 
@@ -1376,7 +1464,10 @@ if (Test-Path $LlamaServerBin) {
     }
 }
 
-if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) {
+if (-not $NeedLlamaSourceBuild) {
+    Write-Host ""
+    Write-Host "[OK] Using validated prebuilt llama.cpp install at $LlamaCppDir" -ForegroundColor Green
+} elseif ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) {
     Write-Host ""
     Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green
 } elseif (-not $HasCmakeForBuild) {
@@ -1432,29 +1523,49 @@ if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) {
 
     # -- Step A: Clone or pull llama.cpp --
 
+    $UseConcreteRef = ($ResolvedLlamaTag -ne "latest" -and -not [string]::IsNullOrWhiteSpace($ResolvedLlamaTag))
+
     if (Test-Path (Join-Path $LlamaCppDir ".git")) {
-        Write-Host "   llama.cpp repo already cloned, pulling latest..." -ForegroundColor Gray
-        git -C $LlamaCppDir pull 2>&1 | Out-Null
+        Write-Host "   Syncing llama.cpp to $ResolvedLlamaTag..." -ForegroundColor Gray
+        if ($UseConcreteRef) {
+            git -C $LlamaCppDir fetch --depth 1 origin $ResolvedLlamaTag 2>&1 | Out-Null
+        } else {
+            git -C $LlamaCppDir fetch --depth 1 origin 2>&1 | Out-Null
+        }
         if ($LASTEXITCODE -ne 0) {
-            Write-Host "   [WARN] git pull failed -- using existing source" -ForegroundColor Yellow
+            Write-Host "   [WARN] git fetch failed -- using existing source" -ForegroundColor Yellow
+        } else {
+            git -C $LlamaCppDir checkout -B unsloth-llama-build FETCH_HEAD 2>&1 | Out-Null
+            if ($LASTEXITCODE -ne 0) {
+                $BuildOk = $false
+                $FailedStep = "git checkout"
+            } else {
+                git -C $LlamaCppDir clean -fdx 2>&1 | Out-Null
+            }
         }
     } else {
-        Write-Host "   Cloning llama.cpp..." -ForegroundColor Gray
-        if (Test-Path $LlamaCppDir) { Remove-Item -Recurse -Force $LlamaCppDir }
-        git clone --depth 1 https://github.com/ggml-org/llama.cpp.git $LlamaCppDir 2>&1 | Out-Null
+        Write-Host "   Cloning llama.cpp @ $ResolvedLlamaTag..." -ForegroundColor Gray
+        $buildTmp = "$LlamaCppDir.build.$PID"
+        if (Test-Path $buildTmp) { Remove-Item -Recurse -Force $buildTmp }
+        $cloneArgs = @("clone", "--depth", "1")
+        if ($UseConcreteRef) {
+            $cloneArgs += @("--branch", $ResolvedLlamaTag)
+        }
+        $cloneArgs += @("https://github.com/ggml-org/llama.cpp.git", $buildTmp)
+        git @cloneArgs 2>&1 | Out-Null
         if ($LASTEXITCODE -ne 0) {
             $BuildOk = $false
             $FailedStep = "git clone"
+            if (Test-Path $buildTmp) { Remove-Item -Recurse -Force $buildTmp }
+        }
+        # Use temp dir for build; swap into $LlamaCppDir only after build succeeds
+        if ($BuildOk) {
+            $LlamaCppDir = $buildTmp
+            $BuildDir = Join-Path $LlamaCppDir "build"
         }
     }
 
     # -- Step B: cmake configure --
-    # Clean stale CMake cache to prevent previous CUDA settings from leaking
-    # into a CPU-only rebuild (or vice versa).
-    $CmakeCacheFile = Join-Path $BuildDir "CMakeCache.txt"
-    if (Test-Path $CmakeCacheFile) {
-        Remove-Item -Recurse -Force $BuildDir
-    }
 
     if ($BuildOk) {
         Write-Host ""
@@ -1555,6 +1666,21 @@ if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) {
         }
     }
 
+    # Swap temp build dir into final location (only if we built in a temp dir)
+    if ($BuildOk -and $LlamaCppDir -ne $OriginalLlamaCppDir) {
+        if (Test-Path $OriginalLlamaCppDir) { Remove-Item -Recurse -Force $OriginalLlamaCppDir }
+        Move-Item $LlamaCppDir $OriginalLlamaCppDir
+        $LlamaCppDir = $OriginalLlamaCppDir
+        $BuildDir = Join-Path $LlamaCppDir "build"
+        $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe"
+    } elseif (-not $BuildOk -and $LlamaCppDir -ne $OriginalLlamaCppDir) {
+        # Build failed -- clean up temp dir, preserve existing install
+        if (Test-Path $LlamaCppDir) { Remove-Item -Recurse -Force $LlamaCppDir }
+        $LlamaCppDir = $OriginalLlamaCppDir
+        $BuildDir = Join-Path $LlamaCppDir "build"
+        $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe"
+    }
+
     # Restore ErrorActionPreference
     $ErrorActionPreference = $prevEAP
 
diff --git a/studio/setup.sh b/studio/setup.sh
index 0e9917375..4cfabec95 100755
--- a/studio/setup.sh
+++ b/studio/setup.sh
@@ -341,10 +341,98 @@ else
     echo "✅ Python dependencies up to date — skipping"
 fi
 
-# ── 7. WSL: pre-install GGUF build dependencies ──
+# ── 7. Prefer prebuilt llama.cpp bundles before any source build path ──
+UNSLOTH_HOME="$HOME/.unsloth"
+mkdir -p "$UNSLOTH_HOME"
+LLAMA_CPP_DIR="$UNSLOTH_HOME/llama.cpp"
+LLAMA_SERVER_BIN="$LLAMA_CPP_DIR/build/bin/llama-server"
+_NEED_LLAMA_SOURCE_BUILD=false
+_LLAMA_FORCE_COMPILE="${UNSLOTH_LLAMA_FORCE_COMPILE:-0}"
+_REQUESTED_LLAMA_TAG="${UNSLOTH_LLAMA_TAG:-latest}"
+_HELPER_RELEASE_REPO="${UNSLOTH_LLAMA_RELEASE_REPO:-unslothai/llama.cpp}"
+_RESOLVE_LLAMA_LOG="$(mktemp)"
+set +e
+python "$SCRIPT_DIR/install_llama_prebuilt.py" \
+    --resolve-install-tag "$_REQUESTED_LLAMA_TAG" \
+    --published-repo "$_HELPER_RELEASE_REPO" >"$_RESOLVE_LLAMA_LOG" 2>&1
+_RESOLVE_LLAMA_STATUS=$?
+set -e
+if [ "$_RESOLVE_LLAMA_STATUS" -eq 0 ]; then
+    _RESOLVED_LLAMA_TAG="$(tail -n 1 "$_RESOLVE_LLAMA_LOG" | tr -d '\r')"
+else
+    _RESOLVED_LLAMA_TAG=""
+fi
+if [ -z "$_RESOLVED_LLAMA_TAG" ]; then
+    echo ""
+    echo "⚠️  Failed to resolve an installable prebuilt llama.cpp tag via $_HELPER_RELEASE_REPO"
+    cat "$_RESOLVE_LLAMA_LOG" >&2 || true
+    set +e
+    _RESOLVED_LLAMA_TAG="$(python "$SCRIPT_DIR/install_llama_prebuilt.py" --resolve-llama-tag "$_REQUESTED_LLAMA_TAG" 2>/dev/null)"
+    _RESOLVE_UPSTREAM_STATUS=$?
+    set -e
+    if [ "$_RESOLVE_UPSTREAM_STATUS" -ne 0 ] || [ -z "$_RESOLVED_LLAMA_TAG" ]; then
+        if [ "$_REQUESTED_LLAMA_TAG" = "latest" ]; then
+            # Try Unsloth release repo first, then fall back to ggml-org upstream
+            _RESOLVED_LLAMA_TAG="$(curl -fsSL "https://api.github.com/repos/${_HELPER_RELEASE_REPO}/releases/latest" 2>/dev/null | python -c "import sys,json; print(json.load(sys.stdin)['tag_name'])" 2>/dev/null)" || _RESOLVED_LLAMA_TAG=""
+            if [ -z "$_RESOLVED_LLAMA_TAG" ]; then
+                _RESOLVED_LLAMA_TAG="$(curl -fsSL https://api.github.com/repos/ggml-org/llama.cpp/releases/latest 2>/dev/null | python -c "import sys,json; print(json.load(sys.stdin)['tag_name'])" 2>/dev/null)" || _RESOLVED_LLAMA_TAG=""
+            fi
+        fi
+        if [ -z "$_RESOLVED_LLAMA_TAG" ]; then
+            _RESOLVED_LLAMA_TAG="$_REQUESTED_LLAMA_TAG"
+        fi
+    fi
+    _NEED_LLAMA_SOURCE_BUILD=true
+    _SKIP_PREBUILT_INSTALL=true
+fi
+rm -f "$_RESOLVE_LLAMA_LOG"
+
+echo ""
+echo "Resolved llama.cpp release tag: $_RESOLVED_LLAMA_TAG"
+
+if [ "$_LLAMA_FORCE_COMPILE" = "1" ]; then
+    echo ""
+    echo "⚠️  UNSLOTH_LLAMA_FORCE_COMPILE=1 -- skipping prebuilt llama.cpp install"
+    _NEED_LLAMA_SOURCE_BUILD=true
+else
+    echo ""
+    echo "Installing prebuilt llama.cpp bundle (preferred path)..."
+    if [ -d "$LLAMA_CPP_DIR" ]; then
+        echo "Existing llama.cpp install detected -- validating staged prebuilt update before replacement"
+    fi
+    if [ "${_SKIP_PREBUILT_INSTALL:-false}" = true ]; then
+        echo "⚠️  Skipping prebuilt install because prebuilt tag resolution failed -- falling back to source build"
+    else
+        _PREBUILT_CMD=(
+            python "$SCRIPT_DIR/install_llama_prebuilt.py"
+            --install-dir "$LLAMA_CPP_DIR"
+            --llama-tag "$_RESOLVED_LLAMA_TAG"
+            --published-repo "$_HELPER_RELEASE_REPO"
+        )
+        if [ -n "${UNSLOTH_LLAMA_RELEASE_TAG:-}" ]; then
+            _PREBUILT_CMD+=(--published-release-tag "$UNSLOTH_LLAMA_RELEASE_TAG")
+        fi
+        set +e
+        "${_PREBUILT_CMD[@]}"
+        _PREBUILT_STATUS=$?
+        set -e
+
+        if [ "$_PREBUILT_STATUS" -eq 0 ]; then
+            echo "✅ Prebuilt llama.cpp installed and validated"
+        else
+            if [ -d "$LLAMA_CPP_DIR" ]; then
+                echo "⚠️  Prebuilt update failed; existing install was restored or cleaned before source build fallback"
+            fi
+            echo "⚠️  Prebuilt llama.cpp path unavailable or failed validation -- falling back to source build"
+            _NEED_LLAMA_SOURCE_BUILD=true
+        fi
+    fi
+fi
+
+# ── 8. WSL: pre-install GGUF build dependencies for fallback source builds ──
 # On WSL, sudo requires a password and can't be entered during GGUF export
 # (runs in a non-interactive subprocess). Install build deps here instead.
-if grep -qi microsoft /proc/version 2>/dev/null; then
+if [ "$_NEED_LLAMA_SOURCE_BUILD" = true ] && grep -qi microsoft /proc/version 2>/dev/null; then
     echo ""
     echo "⚠️  WSL detected -- installing build dependencies for GGUF export..."
     _GGUF_DEPS="pciutils build-essential cmake curl git libcurl4-openssl-dev"
@@ -402,22 +490,19 @@ if grep -qi microsoft /proc/version 2>/dev/null; then
     fi
 fi
 
-# ── 8. Build llama.cpp binaries for GGUF inference + export ──
+# ── 9. Build llama.cpp binaries for GGUF inference + export when prebuilt install fails ──
 # Builds at ~/.unsloth/llama.cpp — a single shared location under the user's
 # home directory. This is used by both the inference server and the GGUF
 # export pipeline (unsloth-zoo).
 #   - llama-server: for GGUF model inference
 #   - llama-quantize: for GGUF export quantization (symlinked to root for check_llama_cpp())
-UNSLOTH_HOME="$HOME/.unsloth"
-mkdir -p "$UNSLOTH_HOME"
-LLAMA_CPP_DIR="$UNSLOTH_HOME/llama.cpp"
-LLAMA_SERVER_BIN="$LLAMA_CPP_DIR/build/bin/llama-server"
-if [ "${_SKIP_GGUF_BUILD:-}" = true ]; then
+if [ "$_NEED_LLAMA_SOURCE_BUILD" = false ]; then
+    :
+elif [ "${_SKIP_GGUF_BUILD:-}" = true ]; then
     echo ""
     echo "Skipping llama-server build (missing dependencies)"
     echo "   Install the missing packages and re-run setup to enable GGUF inference."
 else
-rm -rf "$LLAMA_CPP_DIR"
 {
     # Check prerequisites
     if ! command -v cmake &>/dev/null; then
@@ -432,7 +517,13 @@ rm -rf "$LLAMA_CPP_DIR"
         echo "Building llama-server for GGUF inference..."
 
         BUILD_OK=true
-        run_quiet_no_exit "clone llama.cpp" git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "$LLAMA_CPP_DIR" || BUILD_OK=false
+        _CLONE_BRANCH_ARGS=()
+        if [ "$_RESOLVED_LLAMA_TAG" != "latest" ] && [ -n "$_RESOLVED_LLAMA_TAG" ]; then
+            _CLONE_BRANCH_ARGS=(--branch "$_RESOLVED_LLAMA_TAG")
+        fi
+        _BUILD_TMP="${LLAMA_CPP_DIR}.build.$$"
+        rm -rf "$_BUILD_TMP"
+        run_quiet_no_exit "clone llama.cpp" git clone --depth 1 "${_CLONE_BRANCH_ARGS[@]}" https://github.com/ggml-org/llama.cpp.git "$_BUILD_TMP" || BUILD_OK=false
 
         if [ "$BUILD_OK" = true ]; then
             # Skip tests/examples we don't need (faster build)
@@ -571,21 +662,29 @@ rm -rf "$LLAMA_CPP_DIR"
                 CMAKE_GENERATOR_ARGS="-G Ninja"
             fi
 
-            run_quiet_no_exit "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$LLAMA_CPP_DIR" -B "$LLAMA_CPP_DIR/build" $CMAKE_ARGS || BUILD_OK=false
+            run_quiet_no_exit "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS || BUILD_OK=false
         fi
 
         if [ "$BUILD_OK" = true ]; then
-            run_quiet_no_exit "build llama-server" cmake --build "$LLAMA_CPP_DIR/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
+            run_quiet_no_exit "build llama-server" cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
         fi
 
         # Also build llama-quantize (needed by unsloth-zoo's GGUF export pipeline)
         if [ "$BUILD_OK" = true ]; then
-            run_quiet_no_exit "build llama-quantize" cmake --build "$LLAMA_CPP_DIR/build" --config Release --target llama-quantize -j"$NCPU" || true
-            # Symlink to llama.cpp root — check_llama_cpp() looks for the binary there
+            run_quiet_no_exit "build llama-quantize" cmake --build "$_BUILD_TMP/build" --config Release --target llama-quantize -j"$NCPU" || true
+        fi
+
+        # Swap only after build succeeds -- preserves existing install on failure
+        if [ "$BUILD_OK" = true ]; then
+            rm -rf "$LLAMA_CPP_DIR"
+            mv "$_BUILD_TMP" "$LLAMA_CPP_DIR"
+            # Symlink to llama.cpp root -- check_llama_cpp() looks for the binary there
             QUANTIZE_BIN="$LLAMA_CPP_DIR/build/bin/llama-quantize"
             if [ -f "$QUANTIZE_BIN" ]; then
                 ln -sf build/bin/llama-quantize "$LLAMA_CPP_DIR/llama-quantize"
             fi
+        else
+            rm -rf "$_BUILD_TMP"
         fi
 
         if [ "$BUILD_OK" = true ]; then
diff --git a/tests/studio/install/smoke_test_llama_prebuilt.py b/tests/studio/install/smoke_test_llama_prebuilt.py
new file mode 100644
index 000000000..994757d2e
--- /dev/null
+++ b/tests/studio/install/smoke_test_llama_prebuilt.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import importlib.util
+import shutil
+import sys
+import tempfile
+import time
+from pathlib import Path
+
+
+PACKAGE_ROOT = Path(__file__).resolve().parents[3]
+INSTALLER_PATH = PACKAGE_ROOT / "studio" / "install_llama_prebuilt.py"
+
+
+def load_installer_module():
+    spec = importlib.util.spec_from_file_location(
+        "studio_install_llama_prebuilt", INSTALLER_PATH
+    )
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"unable to load installer module from {INSTALLER_PATH}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+installer = load_installer_module()
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description = (
+            "Run a real end-to-end prebuilt llama.cpp install into an isolated temporary "
+            "directory on the current machine."
+        )
+    )
+    parser.add_argument(
+        "--llama-tag",
+        default = "latest",
+        help = "llama.cpp tag to resolve. Defaults to the approved prebuilt tag for this host.",
+    )
+    parser.add_argument(
+        "--published-repo",
+        default = installer.DEFAULT_PUBLISHED_REPO,
+        help = "Published bundle repository used for Linux CUDA selection.",
+    )
+    parser.add_argument(
+        "--published-release-tag",
+        default = installer.DEFAULT_PUBLISHED_TAG or "",
+        help = "Optional published GitHub release tag to pin.",
+    )
+    parser.add_argument(
+        "--work-dir",
+        default = "",
+        help = (
+            "Optional directory under which the smoke install temp dir will be created. "
+            "If omitted, defaults to ./.tmp/llama-prebuilt-smoke under the current directory."
+        ),
+    )
+    parser.add_argument(
+        "--keep-temp",
+        action = "store_true",
+        help = "Keep the temporary smoke install directory after success.",
+    )
+    return parser.parse_args()
+
+
+def smoke_root_base(work_dir: str) -> Path:
+    if work_dir:
+        return Path(work_dir).expanduser().resolve()
+    return (Path.cwd() / ".tmp" / "llama-prebuilt-smoke").resolve()
+
+
+def make_smoke_root(base_dir: Path) -> Path:
+    base_dir.mkdir(parents = True, exist_ok = True)
+    timestamp = time.strftime("%Y%m%d%H%M%S", time.gmtime())
+    return Path(tempfile.mkdtemp(prefix = f"run-{timestamp}-", dir = base_dir))
+
+
+def main() -> int:
+    args = parse_args()
+    host = installer.detect_host()
+    smoke_base = smoke_root_base(args.work_dir)
+    smoke_root = make_smoke_root(smoke_base)
+    install_dir = smoke_root / "install" / "llama.cpp"
+    choice = None
+
+    print(f"[smoke] host={host.system} machine={host.machine}")
+    print(f"[smoke] temp_root={smoke_root}")
+
+    try:
+        requested_tag, resolved_tag, attempts, _approved_checksums = (
+            installer.resolve_install_attempts(
+                args.llama_tag,
+                host,
+                args.published_repo,
+                args.published_release_tag,
+            )
+        )
+        choice = attempts[0]
+        print(f"[smoke] requested_tag={requested_tag}")
+        print(f"[smoke] resolved_tag={resolved_tag}")
+        print(f"[smoke] selected_asset={choice.name}")
+        print(f"[smoke] selected_source={choice.source_label}")
+        print(f"[smoke] install_dir={install_dir}")
+        installer.install_prebuilt(
+            install_dir = install_dir,
+            llama_tag = args.llama_tag,
+            published_repo = args.published_repo,
+            published_release_tag = args.published_release_tag,
+        )
+        print(f"[smoke] PASS install_dir={install_dir}")
+        print(
+            "[smoke] note=This was a real prebuilt install into an isolated temp directory."
+        )
+        return installer.EXIT_SUCCESS
+    except SystemExit as exc:
+        code = int(exc.code) if isinstance(exc.code, int) else installer.EXIT_ERROR
+        if code == installer.EXIT_FALLBACK:
+            print(f"[smoke] FALLBACK install_dir={install_dir}")
+            print(
+                "[smoke] note=Prebuilt path failed and would fall back to source build in setup."
+            )
+            print(installer.collect_system_report(host, choice, install_dir))
+        else:
+            print(f"[smoke] ERROR exit_code={code} install_dir={install_dir}")
+        return code
+    except Exception as exc:
+        print(f"[smoke] ERROR {exc}")
+        print(installer.collect_system_report(host, choice, install_dir))
+        return installer.EXIT_ERROR
+    finally:
+        if args.keep_temp:
+            print(f"[smoke] keeping_temp_root={smoke_root}")
+        elif smoke_root.exists():
+            shutil.rmtree(smoke_root, ignore_errors = True)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/studio/install/test_install_llama_prebuilt_logic.py b/tests/studio/install/test_install_llama_prebuilt_logic.py
new file mode 100644
index 000000000..eb30ac274
--- /dev/null
+++ b/tests/studio/install/test_install_llama_prebuilt_logic.py
@@ -0,0 +1,630 @@
+import importlib.util
+import io
+import json
+import os
+import sys
+import tarfile
+import zipfile
+from pathlib import Path
+
+import pytest
+
+
+PACKAGE_ROOT = Path(__file__).resolve().parents[3]
+MODULE_PATH = PACKAGE_ROOT / "studio" / "install_llama_prebuilt.py"
+SPEC = importlib.util.spec_from_file_location(
+    "studio_install_llama_prebuilt", MODULE_PATH
+)
+assert SPEC is not None and SPEC.loader is not None
+INSTALL_LLAMA_PREBUILT = importlib.util.module_from_spec(SPEC)
+sys.modules[SPEC.name] = INSTALL_LLAMA_PREBUILT
+SPEC.loader.exec_module(INSTALL_LLAMA_PREBUILT)
+
+PrebuiltFallback = INSTALL_LLAMA_PREBUILT.PrebuiltFallback
+extract_archive = INSTALL_LLAMA_PREBUILT.extract_archive
+binary_env = INSTALL_LLAMA_PREBUILT.binary_env
+HostInfo = INSTALL_LLAMA_PREBUILT.HostInfo
+AssetChoice = INSTALL_LLAMA_PREBUILT.AssetChoice
+ApprovedArtifactHash = INSTALL_LLAMA_PREBUILT.ApprovedArtifactHash
+ApprovedReleaseChecksums = INSTALL_LLAMA_PREBUILT.ApprovedReleaseChecksums
+hydrate_source_tree = INSTALL_LLAMA_PREBUILT.hydrate_source_tree
+validate_prebuilt_choice = INSTALL_LLAMA_PREBUILT.validate_prebuilt_choice
+activate_install_tree = INSTALL_LLAMA_PREBUILT.activate_install_tree
+create_install_staging_dir = INSTALL_LLAMA_PREBUILT.create_install_staging_dir
+sha256_file = INSTALL_LLAMA_PREBUILT.sha256_file
+source_archive_logical_name = INSTALL_LLAMA_PREBUILT.source_archive_logical_name
+
+
+def approved_checksums_for(
+    upstream_tag: str, *, source_archive: Path, bundle_archive: Path, bundle_name: str
+) -> ApprovedReleaseChecksums:
+    return ApprovedReleaseChecksums(
+        repo = "local",
+        release_tag = upstream_tag,
+        upstream_tag = upstream_tag,
+        source_commit = None,
+        artifacts = {
+            source_archive_logical_name(upstream_tag): ApprovedArtifactHash(
+                asset_name = source_archive_logical_name(upstream_tag),
+                sha256 = sha256_file(source_archive),
+                repo = "ggml-org/llama.cpp",
+                kind = "upstream-source",
+            ),
+            bundle_name: ApprovedArtifactHash(
+                asset_name = bundle_name,
+                sha256 = sha256_file(bundle_archive),
+                repo = "local",
+                kind = "local-test-bundle",
+            ),
+        },
+    )
+
+
+def test_extract_archive_allows_safe_tar_symlink_chain(tmp_path: Path):
+    archive_path = tmp_path / "bundle.tar.gz"
+    payload = b"shared-object"
+
+    with tarfile.open(archive_path, "w:gz") as archive:
+        versioned = tarfile.TarInfo("libllama.so.0.0.1")
+        versioned.size = len(payload)
+        archive.addfile(versioned, io_bytes(payload))
+
+        soname = tarfile.TarInfo("libllama.so.0")
+        soname.type = tarfile.SYMTYPE
+        soname.linkname = "libllama.so.0.0.1"
+        archive.addfile(soname)
+
+        linker_name = tarfile.TarInfo("libllama.so")
+        linker_name.type = tarfile.SYMTYPE
+        linker_name.linkname = "libllama.so.0"
+        archive.addfile(linker_name)
+
+    destination = tmp_path / "extract"
+    extract_archive(archive_path, destination)
+
+    assert (destination / "libllama.so.0.0.1").read_bytes() == payload
+    assert (destination / "libllama.so.0").is_symlink()
+    assert (destination / "libllama.so").is_symlink()
+    assert (destination / "libllama.so").resolve().read_bytes() == payload
+
+
+def test_extract_archive_allows_safe_tar_hardlink(tmp_path: Path):
+    archive_path = tmp_path / "bundle.tar.gz"
+    payload = b"quantize"
+
+    with tarfile.open(archive_path, "w:gz") as archive:
+        target = tarfile.TarInfo("llama-quantize")
+        target.size = len(payload)
+        archive.addfile(target, io_bytes(payload))
+
+        hardlink = tarfile.TarInfo("llama-quantize-copy")
+        hardlink.type = tarfile.LNKTYPE
+        hardlink.linkname = "llama-quantize"
+        archive.addfile(hardlink)
+
+    destination = tmp_path / "extract"
+    extract_archive(archive_path, destination)
+
+    assert (destination / "llama-quantize-copy").read_bytes() == payload
+    assert not (destination / "llama-quantize-copy").is_symlink()
+
+
+def test_extract_archive_rejects_absolute_tar_symlink_target(tmp_path: Path):
+    archive_path = tmp_path / "bundle.tar.gz"
+
+    with tarfile.open(archive_path, "w:gz") as archive:
+        entry = tarfile.TarInfo("libllama.so")
+        entry.type = tarfile.SYMTYPE
+        entry.linkname = "/tmp/libllama.so.0"
+        archive.addfile(entry)
+
+    with pytest.raises(PrebuiltFallback, match = "archive link used an absolute target"):
+        extract_archive(archive_path, tmp_path / "extract")
+
+
+def test_extract_archive_rejects_escaping_tar_symlink_target(tmp_path: Path):
+    archive_path = tmp_path / "bundle.tar.gz"
+
+    with tarfile.open(archive_path, "w:gz") as archive:
+        entry = tarfile.TarInfo("libllama.so")
+        entry.type = tarfile.SYMTYPE
+        entry.linkname = "../outside/libllama.so.0"
+        archive.addfile(entry)
+
+    with pytest.raises(PrebuiltFallback, match = "archive link escaped destination"):
+        extract_archive(archive_path, tmp_path / "extract")
+
+
+def test_extract_archive_rejects_unresolved_tar_symlink_target(tmp_path: Path):
+    archive_path = tmp_path / "bundle.tar.gz"
+
+    with tarfile.open(archive_path, "w:gz") as archive:
+        entry = tarfile.TarInfo("libllama.so")
+        entry.type = tarfile.SYMTYPE
+        entry.linkname = "libllama.so.0"
+        archive.addfile(entry)
+
+    with pytest.raises(PrebuiltFallback, match = "unresolved link entries"):
+        extract_archive(archive_path, tmp_path / "extract")
+
+
+def test_extract_archive_rejects_zip_symlink_entry(tmp_path: Path):
+    archive_path = tmp_path / "bundle.zip"
+
+    with zipfile.ZipFile(archive_path, "w") as archive:
+        info = zipfile.ZipInfo("libllama.so")
+        info.create_system = 3
+        info.external_attr = 0o120777 << 16
+        archive.writestr(info, "libllama.so.0")
+
+    with pytest.raises(PrebuiltFallback, match = "zip archive contained a symlink entry"):
+        extract_archive(archive_path, tmp_path / "extract")
+
+
+def test_hydrate_source_tree_extracts_upstream_archive_contents(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    upstream_tag = "b9999"
+    archive_path = tmp_path / "llama.cpp-source.tar.gz"
+    with tarfile.open(archive_path, "w:gz") as archive:
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/CMakeLists.txt",
+            b"cmake_minimum_required(VERSION 3.14)\n",
+        )
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/convert_hf_to_gguf.py",
+            b"#!/usr/bin/env python3\nimport gguf\n",
+        )
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/gguf-py/gguf/__init__.py",
+            b"__all__ = []\n",
+        )
+
+    source_urls = set(INSTALL_LLAMA_PREBUILT.upstream_source_archive_urls(upstream_tag))
+
+    def fake_download_file(url: str, destination: Path) -> None:
+        assert url in source_urls
+        destination.write_bytes(archive_path.read_bytes())
+
+    monkeypatch.setattr(INSTALL_LLAMA_PREBUILT, "download_file", fake_download_file)
+
+    install_dir = tmp_path / "install"
+    work_dir = tmp_path / "work"
+    work_dir.mkdir()
+    hydrate_source_tree(
+        upstream_tag, install_dir, work_dir, expected_sha256 = sha256_file(archive_path)
+    )
+
+    assert (install_dir / "CMakeLists.txt").exists()
+    assert (install_dir / "convert_hf_to_gguf.py").exists()
+    assert (install_dir / "gguf-py" / "gguf" / "__init__.py").exists()
+    assert not (install_dir / f"llama.cpp-{upstream_tag}").exists()
+
+
+def test_validate_prebuilt_choice_creates_repo_shaped_linux_install(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    upstream_tag = "b9998"
+    bundle_name = "app-b9998-linux-x64-cuda13-newer.tar.gz"
+    source_archive = tmp_path / "source.tar.gz"
+    bundle_archive = tmp_path / "bundle.tar.gz"
+    with tarfile.open(source_archive, "w:gz") as archive:
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/CMakeLists.txt",
+            b"cmake_minimum_required(VERSION 3.14)\n",
+        )
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/convert_hf_to_gguf.py",
+            b"#!/usr/bin/env python3\nimport gguf\n",
+        )
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/gguf-py/gguf/__init__.py",
+            b"__all__ = []\n",
+        )
+    with tarfile.open(bundle_archive, "w:gz") as archive:
+        add_bytes_to_tar(archive, "llama-server", b"#!/bin/sh\nexit 0\n", mode = 0o755)
+        add_bytes_to_tar(archive, "llama-quantize", b"#!/bin/sh\nexit 0\n", mode = 0o755)
+        add_bytes_to_tar(archive, "libllama.so.0.0.1", b"libllama")
+        add_symlink_to_tar(archive, "libllama.so.0", "libllama.so.0.0.1")
+        add_symlink_to_tar(archive, "libllama.so", "libllama.so.0")
+        add_bytes_to_tar(archive, "libggml.so.0.9.8", b"libggml")
+        add_symlink_to_tar(archive, "libggml.so.0", "libggml.so.0.9.8")
+        add_symlink_to_tar(archive, "libggml.so", "libggml.so.0")
+        add_bytes_to_tar(archive, "libggml-base.so.0.9.8", b"libggml-base")
+        add_symlink_to_tar(archive, "libggml-base.so.0", "libggml-base.so.0.9.8")
+        add_symlink_to_tar(archive, "libggml-base.so", "libggml-base.so.0")
+        add_bytes_to_tar(archive, "libggml-cpu-x64.so.0.9.8", b"libggml-cpu")
+        add_symlink_to_tar(archive, "libggml-cpu-x64.so.0", "libggml-cpu-x64.so.0.9.8")
+        add_symlink_to_tar(archive, "libggml-cpu-x64.so", "libggml-cpu-x64.so.0")
+        add_bytes_to_tar(archive, "libmtmd.so.0.0.1", b"libmtmd")
+        add_symlink_to_tar(archive, "libmtmd.so.0", "libmtmd.so.0.0.1")
+        add_symlink_to_tar(archive, "libmtmd.so", "libmtmd.so.0")
+        add_bytes_to_tar(archive, "BUILD_INFO.txt", b"bundle metadata\n")
+        add_bytes_to_tar(archive, "THIRD_PARTY_LICENSES.txt", b"licenses\n")
+
+    source_urls = set(INSTALL_LLAMA_PREBUILT.upstream_source_archive_urls(upstream_tag))
+
+    def fake_download_file(url: str, destination: Path) -> None:
+        if url in source_urls:
+            destination.write_bytes(source_archive.read_bytes())
+            return
+        if url == "file://bundle":
+            destination.write_bytes(bundle_archive.read_bytes())
+            return
+        raise AssertionError(f"unexpected download url: {url}")
+
+    monkeypatch.setattr(INSTALL_LLAMA_PREBUILT, "download_file", fake_download_file)
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT,
+        "download_bytes",
+        lambda url, **_: b"#!/usr/bin/env python3\nimport gguf\n",
+    )
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT,
+        "preflight_linux_installed_binaries",
+        lambda *args, **kwargs: None,
+    )
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT, "validate_quantize", lambda *args, **kwargs: None
+    )
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT, "validate_server", lambda *args, **kwargs: None
+    )
+
+    host = HostInfo(
+        system = "Linux",
+        machine = "x86_64",
+        is_windows = False,
+        is_linux = True,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = None,
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = False,
+    )
+    choice = AssetChoice(
+        repo = "local",
+        tag = upstream_tag,
+        name = bundle_name,
+        url = "file://bundle",
+        source_label = "local",
+        is_ready_bundle = True,
+        install_kind = "linux-cuda",
+        bundle_profile = "cuda13-newer",
+        runtime_line = "cuda13",
+        expected_sha256 = sha256_file(bundle_archive),
+    )
+
+    install_dir = tmp_path / "install"
+    work_dir = tmp_path / "work"
+    work_dir.mkdir()
+    probe_path = tmp_path / "stories260K.gguf"
+    quantized_path = tmp_path / "stories260K-q4.gguf"
+    validate_prebuilt_choice(
+        choice,
+        host,
+        install_dir,
+        work_dir,
+        probe_path,
+        requested_tag = upstream_tag,
+        llama_tag = upstream_tag,
+        approved_checksums = approved_checksums_for(
+            upstream_tag,
+            source_archive = source_archive,
+            bundle_archive = bundle_archive,
+            bundle_name = bundle_name,
+        ),
+        prebuilt_fallback_used = False,
+        quantized_path = quantized_path,
+    )
+
+    assert (install_dir / "gguf-py" / "gguf" / "__init__.py").exists()
+    assert (install_dir / "convert_hf_to_gguf.py").exists()
+    assert (install_dir / "build" / "bin" / "llama-server").exists()
+    assert (install_dir / "build" / "bin" / "llama-quantize").exists()
+    assert (install_dir / "build" / "bin" / "libllama.so").exists()
+    assert (install_dir / "llama-server").exists()
+    assert (install_dir / "llama-quantize").exists()
+    assert (install_dir / "UNSLOTH_PREBUILT_INFO.json").exists()
+    assert (install_dir / "BUILD_INFO.txt").exists()
+
+
+def test_validate_prebuilt_choice_creates_repo_shaped_windows_install(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    upstream_tag = "b9997"
+    bundle_name = "app-b9997-windows-x64-cpu.zip"
+    source_archive = tmp_path / "source.tar.gz"
+    bundle_archive = tmp_path / "bundle.zip"
+    with tarfile.open(source_archive, "w:gz") as archive:
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/CMakeLists.txt",
+            b"cmake_minimum_required(VERSION 3.14)\n",
+        )
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/convert_hf_to_gguf.py",
+            b"#!/usr/bin/env python3\nimport gguf\n",
+        )
+        add_bytes_to_tar(
+            archive,
+            f"llama.cpp-{upstream_tag}/gguf-py/gguf/__init__.py",
+            b"__all__ = []\n",
+        )
+    with zipfile.ZipFile(bundle_archive, "w") as archive:
+        archive.writestr("llama-server.exe", b"MZ")
+        archive.writestr("llama-quantize.exe", b"MZ")
+        archive.writestr("llama.dll", b"DLL")
+        archive.writestr("BUILD_INFO.txt", b"bundle metadata\n")
+
+    source_urls = set(INSTALL_LLAMA_PREBUILT.upstream_source_archive_urls(upstream_tag))
+
+    def fake_download_file(url: str, destination: Path) -> None:
+        if url in source_urls:
+            destination.write_bytes(source_archive.read_bytes())
+            return
+        if url == "file://bundle.zip":
+            destination.write_bytes(bundle_archive.read_bytes())
+            return
+        raise AssertionError(f"unexpected download url: {url}")
+
+    monkeypatch.setattr(INSTALL_LLAMA_PREBUILT, "download_file", fake_download_file)
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT,
+        "download_bytes",
+        lambda url, **_: b"#!/usr/bin/env python3\nimport gguf\n",
+    )
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT,
+        "preflight_linux_installed_binaries",
+        lambda *args, **kwargs: None,
+    )
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT, "validate_quantize", lambda *args, **kwargs: None
+    )
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT, "validate_server", lambda *args, **kwargs: None
+    )
+
+    host = HostInfo(
+        system = "Windows",
+        machine = "AMD64",
+        is_windows = True,
+        is_linux = False,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = None,
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = False,
+    )
+    choice = AssetChoice(
+        repo = "local",
+        tag = upstream_tag,
+        name = bundle_name,
+        url = "file://bundle.zip",
+        source_label = "local",
+        is_ready_bundle = True,
+        install_kind = "windows-cpu",
+        expected_sha256 = sha256_file(bundle_archive),
+    )
+
+    install_dir = tmp_path / "install"
+    work_dir = tmp_path / "work"
+    work_dir.mkdir()
+    probe_path = tmp_path / "stories260K.gguf"
+    quantized_path = tmp_path / "stories260K-q4.gguf"
+    validate_prebuilt_choice(
+        choice,
+        host,
+        install_dir,
+        work_dir,
+        probe_path,
+        requested_tag = upstream_tag,
+        llama_tag = upstream_tag,
+        approved_checksums = approved_checksums_for(
+            upstream_tag,
+            source_archive = source_archive,
+            bundle_archive = bundle_archive,
+            bundle_name = bundle_name,
+        ),
+        prebuilt_fallback_used = False,
+        quantized_path = quantized_path,
+    )
+
+    assert (install_dir / "gguf-py" / "gguf" / "__init__.py").exists()
+    assert (install_dir / "convert_hf_to_gguf.py").exists()
+    assert (install_dir / "build" / "bin" / "Release" / "llama-server.exe").exists()
+    assert (install_dir / "build" / "bin" / "Release" / "llama-quantize.exe").exists()
+    assert (install_dir / "build" / "bin" / "Release" / "llama.dll").exists()
+    assert not (install_dir / "llama-server.exe").exists()
+    assert (install_dir / "UNSLOTH_PREBUILT_INFO.json").exists()
+    assert (install_dir / "BUILD_INFO.txt").exists()
+
+
+def test_activate_install_tree_restores_existing_install_after_activation_failure(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+):
+    install_dir = tmp_path / "llama.cpp"
+    install_dir.mkdir()
+    (install_dir / "old.txt").write_text("old install\n")
+
+    staging_dir = create_install_staging_dir(install_dir)
+    (staging_dir / "new.txt").write_text("new install\n")
+
+    host = HostInfo(
+        system = "Linux",
+        machine = "x86_64",
+        is_windows = False,
+        is_linux = True,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = None,
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = False,
+    )
+
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT,
+        "confirm_install_tree",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            RuntimeError("activation confirm failed")
+        ),
+    )
+
+    with pytest.raises(
+        PrebuiltFallback,
+        match = "activation failed; restored previous install",
+    ):
+        activate_install_tree(staging_dir, install_dir, host)
+
+    assert (install_dir / "old.txt").read_text() == "old install\n"
+    assert not (install_dir / "new.txt").exists()
+    assert not staging_dir.exists()
+    assert not (tmp_path / ".staging").exists()
+
+    output = capsys.readouterr().out
+    assert "moving existing install to rollback path" in output
+    assert "restored previous install from rollback path" in output
+
+
+def test_activate_install_tree_cleans_all_paths_when_rollback_restore_fails(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+):
+    install_dir = tmp_path / "llama.cpp"
+    install_dir.mkdir()
+    (install_dir / "old.txt").write_text("old install\n")
+
+    staging_dir = create_install_staging_dir(install_dir)
+    (staging_dir / "new.txt").write_text("new install\n")
+
+    host = HostInfo(
+        system = "Linux",
+        machine = "x86_64",
+        is_windows = False,
+        is_linux = True,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = None,
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = False,
+    )
+
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT,
+        "confirm_install_tree",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            RuntimeError("activation confirm failed")
+        ),
+    )
+
+    original_replace = INSTALL_LLAMA_PREBUILT.os.replace
+
+    def flaky_replace(src, dst):
+        src_path = Path(src)
+        dst_path = Path(dst)
+        if "rollback-" in src_path.name and dst_path == install_dir:
+            raise OSError("restore failed")
+        return original_replace(src, dst)
+
+    monkeypatch.setattr(INSTALL_LLAMA_PREBUILT.os, "replace", flaky_replace)
+
+    with pytest.raises(
+        PrebuiltFallback,
+        match = "activation and rollback failed; cleaned install state for fresh source build",
+    ):
+        activate_install_tree(staging_dir, install_dir, host)
+
+    assert not install_dir.exists()
+    assert not staging_dir.exists()
+    assert not (tmp_path / ".staging").exists()
+
+    output = capsys.readouterr().out
+    assert "rollback after failed activation also failed: restore failed" in output
+    assert (
+        "cleaning staging, install, and rollback paths before source build fallback"
+        in output
+    )
+    assert "removing failed install path" in output
+    assert "removing rollback path" in output
+
+
+def test_binary_env_linux_includes_binary_parent_in_ld_library_path(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    install_dir = tmp_path / "llama.cpp"
+    bin_dir = install_dir / "build" / "bin"
+    bin_dir.mkdir(parents = True)
+    binary_path = bin_dir / "llama-server"
+    binary_path.write_bytes(b"fake")
+
+    host = HostInfo(
+        system = "Linux",
+        machine = "x86_64",
+        is_windows = False,
+        is_linux = True,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = None,
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = False,
+    )
+
+    monkeypatch.setattr(INSTALL_LLAMA_PREBUILT, "linux_runtime_dirs", lambda _bp: [])
+
+    env = binary_env(binary_path, install_dir, host)
+    ld_dirs = env["LD_LIBRARY_PATH"].split(os.pathsep)
+    assert (
+        str(bin_dir) in ld_dirs
+    ), f"binary_path.parent ({bin_dir}) must be in LD_LIBRARY_PATH, got: {ld_dirs}"
+    assert str(install_dir) in ld_dirs
+
+
+def io_bytes(data: bytes):
+    return io.BytesIO(data)
+
+
+def add_bytes_to_tar(
+    archive: tarfile.TarFile, name: str, data: bytes, *, mode: int = 0o644
+) -> None:
+    info = tarfile.TarInfo(name)
+    info.size = len(data)
+    info.mode = mode
+    archive.addfile(info, io_bytes(data))
+
+
+def add_symlink_to_tar(archive: tarfile.TarFile, name: str, target: str) -> None:
+    info = tarfile.TarInfo(name)
+    info.type = tarfile.SYMTYPE
+    info.linkname = target
+    archive.addfile(info)
diff --git a/tests/studio/install/test_pr4562_bugfixes.py b/tests/studio/install/test_pr4562_bugfixes.py
new file mode 100644
index 000000000..9b8c6219d
--- /dev/null
+++ b/tests/studio/install/test_pr4562_bugfixes.py
@@ -0,0 +1,687 @@
+"""
+Comprehensive tests for PR #4562 bug fixes.
+
+Tests cover:
+  - Bug 1: PS1 detached HEAD on re-run (fetch + checkout -B pattern)
+  - Bug 2: Source-build fallback ignores pinned tag (both .sh and .ps1)
+  - Bug 3: Unix fallback deletes install before checking prerequisites
+  - Bug 4: Linux LD_LIBRARY_PATH missing build/bin
+  - "latest" tag resolution fallback chain (Unsloth -> ggml-org -> raw)
+  - Cross-platform binary_env (Linux, macOS, Windows)
+  - Edge cases: malformed JSON, empty responses, env overrides
+
+Run: pytest tests/studio/install/test_pr4562_bugfixes.py -v
+"""
+
+import importlib.util
+import json
+import os
+import subprocess
+import sys
+import textwrap
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Load the module under test (same pattern as existing test files)
+# ---------------------------------------------------------------------------
+PACKAGE_ROOT = Path(__file__).resolve().parents[3]
+MODULE_PATH = PACKAGE_ROOT / "studio" / "install_llama_prebuilt.py"
+SPEC = importlib.util.spec_from_file_location(
+    "studio_install_llama_prebuilt", MODULE_PATH
+)
+assert SPEC is not None and SPEC.loader is not None
+MOD = importlib.util.module_from_spec(SPEC)
+sys.modules[SPEC.name] = MOD
+SPEC.loader.exec_module(MOD)
+
+binary_env = MOD.binary_env
+HostInfo = MOD.HostInfo
+resolve_requested_llama_tag = MOD.resolve_requested_llama_tag
+
+SETUP_SH = PACKAGE_ROOT / "studio" / "setup.sh"
+SETUP_PS1 = PACKAGE_ROOT / "studio" / "setup.ps1"
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def make_host(*, system: str) -> HostInfo:
+    """Create a HostInfo for the given OS."""
+    return HostInfo(
+        system = system,
+        machine = "x86_64" if system != "Darwin" else "arm64",
+        is_windows = (system == "Windows"),
+        is_linux = (system == "Linux"),
+        is_macos = (system == "Darwin"),
+        is_x86_64 = (system != "Darwin"),
+        is_arm64 = (system == "Darwin"),
+        nvidia_smi = None,
+        driver_cuda_version = None,
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = False,
+    )
+
+
+BASH = "/bin/bash"
+
+
+def run_bash(script: str, *, timeout: int = 10, env: dict | None = None) -> str:
+    """Run a bash script fragment and return its stdout."""
+    run_env = os.environ.copy()
+    if env:
+        run_env.update(env)
+    result = subprocess.run(
+        [BASH, "-c", script],
+        capture_output = True,
+        text = True,
+        timeout = timeout,
+        env = run_env,
+    )
+    return result.stdout.strip()
+
+
+# =========================================================================
+# TEST GROUP A: binary_env across all platforms (Bug 4 + cross-platform)
+# =========================================================================
+class TestBinaryEnvCrossPlatform:
+    """Test that binary_env returns correct library paths for all OSes."""
+
+    def test_linux_includes_binary_parent_in_ld_library_path(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        install_dir = tmp_path / "llama.cpp"
+        bin_dir = install_dir / "build" / "bin"
+        bin_dir.mkdir(parents = True)
+        binary_path = bin_dir / "llama-server"
+        binary_path.write_bytes(b"fake")
+
+        host = make_host(system = "Linux")
+        monkeypatch.setattr(MOD, "linux_runtime_dirs", lambda _bp: [])
+
+        env = binary_env(binary_path, install_dir, host)
+        ld_dirs = env["LD_LIBRARY_PATH"].split(os.pathsep)
+        assert str(bin_dir) in ld_dirs, f"build/bin not in LD_LIBRARY_PATH: {ld_dirs}"
+        assert (
+            str(install_dir) in ld_dirs
+        ), f"install_dir not in LD_LIBRARY_PATH: {ld_dirs}"
+
+    def test_linux_binary_parent_comes_before_install_dir(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        """build/bin should be searched before install_dir for .so files."""
+        install_dir = tmp_path / "llama.cpp"
+        bin_dir = install_dir / "build" / "bin"
+        bin_dir.mkdir(parents = True)
+        binary_path = bin_dir / "llama-server"
+        binary_path.write_bytes(b"fake")
+
+        host = make_host(system = "Linux")
+        monkeypatch.setattr(MOD, "linux_runtime_dirs", lambda _bp: [])
+
+        env = binary_env(binary_path, install_dir, host)
+        ld_dirs = env["LD_LIBRARY_PATH"].split(os.pathsep)
+        bin_idx = ld_dirs.index(str(bin_dir))
+        install_idx = ld_dirs.index(str(install_dir))
+        assert (
+            bin_idx < install_idx
+        ), "binary_path.parent should come before install_dir"
+
+    def test_linux_deduplicates_when_binary_parent_equals_install_dir(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        """When binary is directly in install_dir, no duplicate entries."""
+        install_dir = tmp_path / "llama.cpp"
+        install_dir.mkdir(parents = True)
+        binary_path = install_dir / "llama-server"
+        binary_path.write_bytes(b"fake")
+
+        host = make_host(system = "Linux")
+        monkeypatch.setattr(MOD, "linux_runtime_dirs", lambda _bp: [])
+
+        env = binary_env(binary_path, install_dir, host)
+        ld_dirs = [d for d in env["LD_LIBRARY_PATH"].split(os.pathsep) if d]
+        count = ld_dirs.count(str(install_dir))
+        assert count == 1, f"install_dir appears {count} times in LD_LIBRARY_PATH"
+
+    def test_linux_preserves_existing_ld_library_path(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        install_dir = tmp_path / "llama.cpp"
+        bin_dir = install_dir / "build" / "bin"
+        bin_dir.mkdir(parents = True)
+        binary_path = bin_dir / "llama-server"
+        binary_path.write_bytes(b"fake")
+
+        # Create real directories so dedupe_existing_dirs keeps them
+        custom_lib = tmp_path / "custom_lib"
+        other_lib = tmp_path / "other_lib"
+        custom_lib.mkdir()
+        other_lib.mkdir()
+
+        host = make_host(system = "Linux")
+        monkeypatch.setattr(MOD, "linux_runtime_dirs", lambda _bp: [])
+        original = os.environ.get("LD_LIBRARY_PATH", "")
+        os.environ["LD_LIBRARY_PATH"] = f"{custom_lib}:{other_lib}"
+        try:
+            env = binary_env(binary_path, install_dir, host)
+        finally:
+            if original:
+                os.environ["LD_LIBRARY_PATH"] = original
+            else:
+                os.environ.pop("LD_LIBRARY_PATH", None)
+        ld_dirs = env["LD_LIBRARY_PATH"].split(os.pathsep)
+        assert str(custom_lib.resolve()) in ld_dirs
+        assert str(other_lib.resolve()) in ld_dirs
+
+    def test_windows_includes_binary_parent_in_path(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        install_dir = tmp_path / "llama.cpp"
+        bin_dir = install_dir / "build" / "bin" / "Release"
+        bin_dir.mkdir(parents = True)
+        binary_path = bin_dir / "llama-server.exe"
+        binary_path.write_bytes(b"MZ")
+
+        host = make_host(system = "Windows")
+        monkeypatch.setattr(
+            MOD, "windows_runtime_dirs_for_runtime_line", lambda _rt: []
+        )
+
+        env = binary_env(binary_path, install_dir, host)
+        path_dirs = env["PATH"].split(os.pathsep)
+        assert str(bin_dir) in path_dirs, f"build/bin/Release not in PATH: {path_dirs}"
+
+    def test_macos_sets_dyld_library_path(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        install_dir = tmp_path / "llama.cpp"
+        install_dir.mkdir(parents = True)
+        bin_dir = install_dir / "build" / "bin"
+        binary_path = bin_dir / "llama-server"
+        binary_path.parent.mkdir(parents = True)
+        binary_path.write_bytes(b"fake")
+
+        host = make_host(system = "Darwin")
+        monkeypatch.delenv("DYLD_LIBRARY_PATH", raising = False)
+
+        env = binary_env(binary_path, install_dir, host)
+        dyld_parts = [p for p in env["DYLD_LIBRARY_PATH"].split(os.pathsep) if p]
+        assert (
+            str(bin_dir) in dyld_parts
+        ), f"build/bin not in DYLD_LIBRARY_PATH: {dyld_parts}"
+        assert (
+            str(install_dir) in dyld_parts
+        ), f"install_dir not in DYLD_LIBRARY_PATH: {dyld_parts}"
+        # binary_path.parent (build/bin) should come before install_dir
+        assert dyld_parts.index(str(bin_dir)) < dyld_parts.index(str(install_dir))
+
+
+# =========================================================================
+# TEST GROUP B: resolve_requested_llama_tag (Python function)
+# =========================================================================
+class TestResolveRequestedLlamaTag:
+    def test_concrete_tag_passes_through(self):
+        assert resolve_requested_llama_tag("b8508") == "b8508"
+
+    def test_none_resolves_to_latest(self, monkeypatch: pytest.MonkeyPatch):
+        monkeypatch.setattr(MOD, "latest_upstream_release_tag", lambda: "b9999")
+        assert resolve_requested_llama_tag(None) == "b9999"
+
+    def test_latest_resolves_to_upstream(self, monkeypatch: pytest.MonkeyPatch):
+        monkeypatch.setattr(MOD, "latest_upstream_release_tag", lambda: "b1234")
+        assert resolve_requested_llama_tag("latest") == "b1234"
+
+    def test_empty_string_resolves_to_latest(self, monkeypatch: pytest.MonkeyPatch):
+        monkeypatch.setattr(MOD, "latest_upstream_release_tag", lambda: "b5555")
+        assert resolve_requested_llama_tag("") == "b5555"
+
+
+# =========================================================================
+# TEST GROUP C: setup.sh logic (bash subprocess tests)
+# =========================================================================
+class TestSetupShLogic:
+    """Test setup.sh fragments via bash subprocess with controlled PATH."""
+
+    def test_cmake_missing_preserves_install(self, tmp_path: Path):
+        """Bug 3: When cmake is missing, rm -rf should NOT run."""
+        llama_dir = tmp_path / "llama.cpp"
+        llama_dir.mkdir()
+        marker = llama_dir / "marker.txt"
+        marker.write_text("existing")
+
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir()
+        # Create mock git but NOT cmake
+        (mock_bin / "git").write_text("#!/bin/bash\nexit 0\n")
+        (mock_bin / "git").chmod(0o755)
+
+        # Build PATH: mock_bin first, then system dirs WITHOUT cmake
+        safe_dirs = [str(mock_bin)]
+        for d in os.environ.get("PATH", "").split(":"):
+            if d and not os.path.isfile(os.path.join(d, "cmake")):
+                safe_dirs.append(d)
+
+        script = textwrap.dedent(f"""\
+            export LLAMA_CPP_DIR="{llama_dir}"
+            if ! command -v cmake &>/dev/null; then
+                echo "cmake_missing"
+            elif ! command -v git &>/dev/null; then
+                echo "git_missing"
+            else
+                rm -rf "$LLAMA_CPP_DIR"
+                echo "would_clone"
+            fi
+        """)
+        output = run_bash(script, env = {"PATH": ":".join(safe_dirs)})
+        assert "cmake_missing" in output
+        assert marker.exists(), "Install dir was deleted despite cmake missing!"
+
+    def test_git_missing_preserves_install(self, tmp_path: Path):
+        """Bug 3: When git is missing, rm -rf should NOT run."""
+        llama_dir = tmp_path / "llama.cpp"
+        llama_dir.mkdir()
+        marker = llama_dir / "marker.txt"
+        marker.write_text("existing")
+
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir()
+        # Create mock cmake but NOT git
+        (mock_bin / "cmake").write_text("#!/bin/bash\nexit 0\n")
+        (mock_bin / "cmake").chmod(0o755)
+
+        # Build PATH: mock_bin first, then system dirs WITHOUT git
+        safe_dirs = [str(mock_bin)]
+        for d in os.environ.get("PATH", "").split(":"):
+            if d and not os.path.isfile(os.path.join(d, "git")):
+                safe_dirs.append(d)
+
+        script = textwrap.dedent(f"""\
+            export LLAMA_CPP_DIR="{llama_dir}"
+            if ! command -v cmake &>/dev/null; then
+                echo "cmake_missing"
+            elif ! command -v git &>/dev/null; then
+                echo "git_missing"
+            else
+                rm -rf "$LLAMA_CPP_DIR"
+                echo "would_clone"
+            fi
+        """)
+        output = run_bash(script, env = {"PATH": ":".join(safe_dirs)})
+        assert "git_missing" in output
+        assert marker.exists(), "Install dir was deleted despite git missing!"
+
+    def test_both_present_runs_rm_and_clone(self, tmp_path: Path):
+        """Bug 3: When both present, rm -rf runs before clone."""
+        llama_dir = tmp_path / "llama.cpp"
+        llama_dir.mkdir()
+        marker = llama_dir / "marker.txt"
+        marker.write_text("existing")
+
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir()
+        (mock_bin / "cmake").write_text("#!/bin/bash\nexit 0\n")
+        (mock_bin / "cmake").chmod(0o755)
+        (mock_bin / "git").write_text("#!/bin/bash\nexit 0\n")
+        (mock_bin / "git").chmod(0o755)
+
+        script = textwrap.dedent(f"""\
+            export PATH="{mock_bin}:$PATH"
+            export LLAMA_CPP_DIR="{llama_dir}"
+            if ! command -v cmake &>/dev/null; then
+                echo "cmake_missing"
+            elif ! command -v git &>/dev/null; then
+                echo "git_missing"
+            else
+                rm -rf "$LLAMA_CPP_DIR"
+                echo "would_clone"
+            fi
+        """)
+        output = run_bash(script)
+        assert "would_clone" in output
+        assert not marker.exists(), "Install dir should have been deleted"
+
+    def test_clone_uses_pinned_tag(self, tmp_path: Path):
+        """Bug 2: git clone should use --branch with the resolved tag."""
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir()
+        log_file = tmp_path / "git_calls.log"
+        (mock_bin / "git").write_text(f'#!/bin/bash\necho "$*" >> {log_file}\nexit 0\n')
+        (mock_bin / "git").chmod(0o755)
+
+        script = textwrap.dedent(f"""\
+            export PATH="{mock_bin}:$PATH"
+            git clone --depth 1 --branch "b8508" https://github.com/ggml-org/llama.cpp.git /tmp/llama_test
+        """)
+        run_bash(script)
+        log = log_file.read_text()
+        assert "--branch b8508" in log, f"Expected --branch b8508 in: {log}"
+
+    def test_fetch_checkout_b_pattern(self, tmp_path: Path):
+        """Bug 1: Re-run should use fetch + checkout -B, not pull + checkout FETCH_HEAD."""
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir()
+        log_file = tmp_path / "git_calls.log"
+        (mock_bin / "git").write_text(f'#!/bin/bash\necho "$*" >> {log_file}\nexit 0\n')
+        (mock_bin / "git").chmod(0o755)
+
+        llama_dir = tmp_path / "llama.cpp"
+        llama_dir.mkdir()
+        (llama_dir / ".git").mkdir()
+
+        script = textwrap.dedent(f"""\
+            export PATH="{mock_bin}:$PATH"
+            LlamaCppDir="{llama_dir}"
+            ResolvedLlamaTag="b8508"
+            if [ -d "$LlamaCppDir/.git" ]; then
+                git -C "$LlamaCppDir" fetch --depth 1 origin "$ResolvedLlamaTag"
+                if [ $? -ne 0 ]; then
+                    echo "WARN: fetch failed"
+                else
+                    git -C "$LlamaCppDir" checkout -B unsloth-llama-build FETCH_HEAD
+                fi
+            fi
+        """)
+        run_bash(script)
+        log = log_file.read_text()
+        assert "fetch --depth 1 origin b8508" in log
+        assert "checkout -B unsloth-llama-build FETCH_HEAD" in log
+        assert "pull" not in log, "Should use fetch, not pull"
+
+    def test_fetch_failure_warns_not_aborts(self, tmp_path: Path):
+        """Bug 1: fetch failure should warn and continue, not set BuildOk=false."""
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir()
+        (mock_bin / "git").write_text(
+            '#!/bin/bash\nif echo "$*" | grep -q fetch; then exit 1; fi\nexit 0\n'
+        )
+        (mock_bin / "git").chmod(0o755)
+
+        llama_dir = tmp_path / "llama.cpp"
+        llama_dir.mkdir()
+        (llama_dir / ".git").mkdir()
+
+        script = textwrap.dedent(f"""\
+            export PATH="{mock_bin}:$PATH"
+            LlamaCppDir="{llama_dir}"
+            ResolvedLlamaTag="b8508"
+            BuildOk=true
+            if [ -d "$LlamaCppDir/.git" ]; then
+                git -C "$LlamaCppDir" fetch --depth 1 origin "$ResolvedLlamaTag"
+                if [ $? -ne 0 ]; then
+                    echo "WARN: fetch failed -- using existing source"
+                else
+                    git -C "$LlamaCppDir" checkout -B unsloth-llama-build FETCH_HEAD
+                fi
+            fi
+            echo "BuildOk=$BuildOk"
+        """)
+        output = run_bash(script)
+        assert "WARN: fetch failed" in output
+        assert "BuildOk=true" in output
+
+
+# =========================================================================
+# TEST GROUP D: "latest" tag resolution (bash subprocess)
+# =========================================================================
+class TestLatestTagResolution:
+    """Test the fallback chain: Unsloth API -> ggml-org API -> raw."""
+
+    RESOLVE_TEMPLATE = textwrap.dedent("""\
+        export PATH="{mock_bin}:$PATH"
+        _REQUESTED_LLAMA_TAG="{requested_tag}"
+        _RESOLVED_LLAMA_TAG=""
+        _RESOLVE_UPSTREAM_STATUS=1
+        _HELPER_RELEASE_REPO="unslothai/llama.cpp"
+        if [ "$_RESOLVE_UPSTREAM_STATUS" -ne 0 ] || [ -z "$_RESOLVED_LLAMA_TAG" ]; then
+            if [ "$_REQUESTED_LLAMA_TAG" = "latest" ]; then
+                _RESOLVED_LLAMA_TAG="$(curl -fsSL "https://api.github.com/repos/${{_HELPER_RELEASE_REPO}}/releases/latest" 2>/dev/null | python -c "import sys,json; print(json.load(sys.stdin)['tag_name'])" 2>/dev/null)" || _RESOLVED_LLAMA_TAG=""
+                if [ -z "$_RESOLVED_LLAMA_TAG" ]; then
+                    _RESOLVED_LLAMA_TAG="$(curl -fsSL https://api.github.com/repos/ggml-org/llama.cpp/releases/latest 2>/dev/null | python -c "import sys,json; print(json.load(sys.stdin)['tag_name'])" 2>/dev/null)" || _RESOLVED_LLAMA_TAG=""
+                fi
+            fi
+            if [ -z "$_RESOLVED_LLAMA_TAG" ]; then
+                _RESOLVED_LLAMA_TAG="$_REQUESTED_LLAMA_TAG"
+            fi
+        fi
+        echo "$_RESOLVED_LLAMA_TAG"
+    """)
+
+    @staticmethod
+    def _make_curl_mock(
+        mock_bin: Path, unsloth_response: str | None, ggml_response: str | None
+    ):
+        """Create a curl mock that returns different responses per repo."""
+        lines = ["#!/bin/bash"]
+        if unsloth_response is not None:
+            lines.append(
+                f'if echo "$*" | grep -q "unslothai/llama.cpp"; then echo \'{unsloth_response}\'; exit 0; fi'
+            )
+        else:
+            lines.append(
+                'if echo "$*" | grep -q "unslothai/llama.cpp"; then exit 1; fi'
+            )
+        if ggml_response is not None:
+            lines.append(
+                f'if echo "$*" | grep -q "ggml-org/llama.cpp"; then echo \'{ggml_response}\'; exit 0; fi'
+            )
+        else:
+            lines.append('if echo "$*" | grep -q "ggml-org/llama.cpp"; then exit 1; fi')
+        lines.append("exit 1")
+        curl_path = mock_bin / "curl"
+        curl_path.write_text("\n".join(lines) + "\n")
+        curl_path.chmod(0o755)
+
+    def _run_resolve(
+        self,
+        tmp_path: Path,
+        requested_tag: str,
+        unsloth_resp: str | None,
+        ggml_resp: str | None,
+    ) -> str:
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir(exist_ok = True)
+        self._make_curl_mock(mock_bin, unsloth_resp, ggml_resp)
+        script = self.RESOLVE_TEMPLATE.format(
+            mock_bin = mock_bin, requested_tag = requested_tag
+        )
+        return run_bash(script)
+
+    def test_unsloth_succeeds(self, tmp_path: Path):
+        output = self._run_resolve(
+            tmp_path,
+            "latest",
+            unsloth_resp = '{"tag_name":"b8508"}',
+            ggml_resp = '{"tag_name":"b9000"}',
+        )
+        assert output == "b8508"
+
+    def test_unsloth_fails_ggml_succeeds(self, tmp_path: Path):
+        output = self._run_resolve(
+            tmp_path,
+            "latest",
+            unsloth_resp = None,
+            ggml_resp = '{"tag_name":"b9000"}',
+        )
+        assert output == "b9000"
+
+    def test_both_fail_raw_fallback(self, tmp_path: Path):
+        output = self._run_resolve(
+            tmp_path,
+            "latest",
+            unsloth_resp = None,
+            ggml_resp = None,
+        )
+        assert output == "latest"
+
+    def test_concrete_tag_passes_through(self, tmp_path: Path):
+        output = self._run_resolve(
+            tmp_path,
+            "b7777",
+            unsloth_resp = '{"tag_name":"b8508"}',
+            ggml_resp = '{"tag_name":"b9000"}',
+        )
+        assert output == "b7777"
+
+    def test_unsloth_malformed_json_falls_through(self, tmp_path: Path):
+        output = self._run_resolve(
+            tmp_path,
+            "latest",
+            unsloth_resp = '{"bad_key":"no_tag"}',
+            ggml_resp = '{"tag_name":"b9001"}',
+        )
+        assert output == "b9001"
+
+    def test_both_malformed_json_raw_fallback(self, tmp_path: Path):
+        output = self._run_resolve(
+            tmp_path,
+            "latest",
+            unsloth_resp = '{"bad":"data"}',
+            ggml_resp = '{"also":"bad"}',
+        )
+        assert output == "latest"
+
+    def test_unsloth_empty_body_falls_through(self, tmp_path: Path):
+        output = self._run_resolve(
+            tmp_path,
+            "latest",
+            unsloth_resp = "",
+            ggml_resp = '{"tag_name":"b7000"}',
+        )
+        assert output == "b7000"
+
+    def test_unsloth_empty_tag_name_falls_through(self, tmp_path: Path):
+        output = self._run_resolve(
+            tmp_path,
+            "latest",
+            unsloth_resp = '{"tag_name":""}',
+            ggml_resp = '{"tag_name":"b6000"}',
+        )
+        assert output == "b6000"
+
+    def test_env_override_unsloth_llama_tag(self):
+        output = run_bash(
+            'echo "${UNSLOTH_LLAMA_TAG:-latest}"',
+            env = {"UNSLOTH_LLAMA_TAG": "b1234"},
+        )
+        assert output == "b1234"
+
+    def test_env_unset_defaults_to_latest(self):
+        env = os.environ.copy()
+        env.pop("UNSLOTH_LLAMA_TAG", None)
+        output = run_bash('echo "${UNSLOTH_LLAMA_TAG:-latest}"', env = env)
+        assert output == "latest"
+
+    def test_env_empty_defaults_to_latest(self):
+        output = run_bash(
+            'echo "${UNSLOTH_LLAMA_TAG:-latest}"',
+            env = {"UNSLOTH_LLAMA_TAG": ""},
+        )
+        assert output == "latest"
+
+
+# =========================================================================
+# TEST GROUP E: Source file verification
+# =========================================================================
+class TestSourceCodePatterns:
+    """Verify the actual source files contain the expected fix patterns."""
+
+    def test_setup_sh_no_rm_before_prereq_check(self):
+        """rm -rf must appear AFTER cmake/git checks, not before."""
+        content = SETUP_SH.read_text()
+        # Find the source-build block
+        idx_else = content.find("# Check prerequisites")
+        assert idx_else != -1
+        block = content[idx_else:]
+        # rm -rf should appear after the cmake/git checks
+        idx_cmake = block.find("command -v cmake")
+        idx_git = block.find("command -v git")
+        idx_rm = block.find("rm -rf")
+        assert idx_rm > idx_cmake, "rm -rf should come after cmake check"
+        assert idx_rm > idx_git, "rm -rf should come after git check"
+
+    def test_setup_sh_clone_uses_branch_tag(self):
+        """git clone in source-build should use --branch via _CLONE_BRANCH_ARGS."""
+        content = SETUP_SH.read_text()
+        # The clone line should use _CLONE_BRANCH_ARGS (which conditionally includes --branch)
+        assert (
+            "_CLONE_BRANCH_ARGS" in content
+        ), "Clone should use _CLONE_BRANCH_ARGS array"
+        assert (
+            '--branch "$_RESOLVED_LLAMA_TAG"' in content
+        ), "_CLONE_BRANCH_ARGS should be set to --branch $_RESOLVED_LLAMA_TAG"
+        # Verify the guard: --branch is only used when tag is not "latest"
+        assert (
+            '_RESOLVED_LLAMA_TAG" != "latest"' in content
+        ), "Should guard against literal 'latest' tag"
+
+    def test_setup_sh_latest_resolution_queries_unsloth_first(self):
+        """The Unsloth repo should be queried before ggml-org."""
+        content = SETUP_SH.read_text()
+        idx_unsloth = content.find("_HELPER_RELEASE_REPO}/releases/latest")
+        idx_ggml = content.find("ggml-org/llama.cpp/releases/latest")
+        assert idx_unsloth != -1, "Unsloth API query not found"
+        assert idx_ggml != -1, "ggml-org API query not found"
+        assert idx_unsloth < idx_ggml, "Unsloth should be queried before ggml-org"
+
+    def test_setup_ps1_uses_checkout_b(self):
+        """PS1 should use checkout -B, not checkout --force FETCH_HEAD."""
+        content = SETUP_PS1.read_text()
+        assert "checkout -B unsloth-llama-build" in content
+        assert "checkout --force FETCH_HEAD" not in content
+
+    def test_setup_ps1_clone_uses_branch_tag(self):
+        """PS1 clone should use --branch with the resolved tag."""
+        content = SETUP_PS1.read_text()
+        assert "--branch" in content and "$ResolvedLlamaTag" in content
+        # The old commented-out line should be gone
+        assert "# git clone --depth 1 --branch" not in content
+
+    def test_setup_ps1_no_git_pull(self):
+        """PS1 should use fetch, not pull (which fails in detached HEAD)."""
+        content = SETUP_PS1.read_text()
+        # In the source-build section, there should be no "git pull"
+        # (git pull is only valid on a branch)
+        lines = content.splitlines()
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+            if "git pull" in stripped and not stripped.startswith("#"):
+                # Check context -- should not be in the llama.cpp build section
+                # Allow git pull in other contexts
+                context = "\n".join(lines[max(0, i - 5) : i + 5])
+                if "LlamaCppDir" in context:
+                    pytest.fail(
+                        f"Found 'git pull' in llama.cpp build section at line {i+1}"
+                    )
+
+    def test_setup_ps1_latest_resolution_queries_unsloth_first(self):
+        """PS1 should query Unsloth repo before ggml-org."""
+        content = SETUP_PS1.read_text()
+        idx_unsloth = content.find("$HelperReleaseRepo/releases/latest")
+        idx_ggml = content.find("ggml-org/llama.cpp/releases/latest")
+        assert idx_unsloth != -1, "Unsloth API query not found in PS1"
+        assert idx_ggml != -1, "ggml-org API query not found in PS1"
+        assert idx_unsloth < idx_ggml, "Unsloth should be queried before ggml-org"
+
+    def test_binary_env_linux_has_binary_parent(self):
+        """The Linux branch of binary_env should include binary_path.parent."""
+        content = MODULE_PATH.read_text()
+        # Find the binary_env function
+        in_func = False
+        in_linux = False
+        found = False
+        for line in content.splitlines():
+            if "def binary_env(" in line:
+                in_func = True
+            elif in_func and line and not line[0].isspace() and "def " in line:
+                break
+            if in_func and "host.is_linux" in line:
+                in_linux = True
+            if in_linux and "binary_path.parent" in line:
+                found = True
+                break
+        assert found, "binary_path.parent not found in Linux branch of binary_env"
diff --git a/tests/studio/install/test_selection_logic.py b/tests/studio/install/test_selection_logic.py
new file mode 100644
index 000000000..906c978b0
--- /dev/null
+++ b/tests/studio/install/test_selection_logic.py
@@ -0,0 +1,903 @@
+"""Tests for binary selection logic in install_llama_prebuilt.py.
+
+Covers: normalize_compute_cap, normalize_compute_caps, parse_cuda_visible_devices,
+supports_explicit_visible_device_matching, select_visible_gpu_rows,
+compatible_linux_runtime_lines, pick_windows_cuda_runtime,
+compatible_windows_runtime_lines, runtime_line_from_cuda_version,
+apply_approved_hashes, linux_cuda_choice_from_release, windows_cuda_attempts,
+resolve_upstream_asset_choice.
+
+No GPU, no network, no torch required -- all I/O is monkeypatched.
+"""
+
+import importlib.util
+import sys
+from pathlib import Path
+
+import pytest
+
+
+PACKAGE_ROOT = Path(__file__).resolve().parents[3]
+MODULE_PATH = PACKAGE_ROOT / "studio" / "install_llama_prebuilt.py"
+SPEC = importlib.util.spec_from_file_location(
+    "studio_install_llama_prebuilt", MODULE_PATH
+)
+assert SPEC is not None and SPEC.loader is not None
+INSTALL_LLAMA_PREBUILT = importlib.util.module_from_spec(SPEC)
+sys.modules[SPEC.name] = INSTALL_LLAMA_PREBUILT
+SPEC.loader.exec_module(INSTALL_LLAMA_PREBUILT)
+
+HostInfo = INSTALL_LLAMA_PREBUILT.HostInfo
+AssetChoice = INSTALL_LLAMA_PREBUILT.AssetChoice
+PublishedLlamaArtifact = INSTALL_LLAMA_PREBUILT.PublishedLlamaArtifact
+PublishedReleaseBundle = INSTALL_LLAMA_PREBUILT.PublishedReleaseBundle
+ApprovedArtifactHash = INSTALL_LLAMA_PREBUILT.ApprovedArtifactHash
+ApprovedReleaseChecksums = INSTALL_LLAMA_PREBUILT.ApprovedReleaseChecksums
+PrebuiltFallback = INSTALL_LLAMA_PREBUILT.PrebuiltFallback
+LinuxCudaSelection = INSTALL_LLAMA_PREBUILT.LinuxCudaSelection
+UPSTREAM_REPO = INSTALL_LLAMA_PREBUILT.UPSTREAM_REPO
+
+normalize_compute_cap = INSTALL_LLAMA_PREBUILT.normalize_compute_cap
+normalize_compute_caps = INSTALL_LLAMA_PREBUILT.normalize_compute_caps
+parse_cuda_visible_devices = INSTALL_LLAMA_PREBUILT.parse_cuda_visible_devices
+supports_explicit_visible_device_matching = (
+    INSTALL_LLAMA_PREBUILT.supports_explicit_visible_device_matching
+)
+select_visible_gpu_rows = INSTALL_LLAMA_PREBUILT.select_visible_gpu_rows
+compatible_linux_runtime_lines = INSTALL_LLAMA_PREBUILT.compatible_linux_runtime_lines
+pick_windows_cuda_runtime = INSTALL_LLAMA_PREBUILT.pick_windows_cuda_runtime
+compatible_windows_runtime_lines = (
+    INSTALL_LLAMA_PREBUILT.compatible_windows_runtime_lines
+)
+runtime_line_from_cuda_version = INSTALL_LLAMA_PREBUILT.runtime_line_from_cuda_version
+apply_approved_hashes = INSTALL_LLAMA_PREBUILT.apply_approved_hashes
+linux_cuda_choice_from_release = INSTALL_LLAMA_PREBUILT.linux_cuda_choice_from_release
+windows_cuda_attempts = INSTALL_LLAMA_PREBUILT.windows_cuda_attempts
+resolve_upstream_asset_choice = INSTALL_LLAMA_PREBUILT.resolve_upstream_asset_choice
+
+
+# ---------------------------------------------------------------------------
+# Helper factories
+# ---------------------------------------------------------------------------
+
+
+def make_host(**overrides):
+    system = overrides.pop("system", "Linux")
+    machine = overrides.pop("machine", "x86_64")
+    defaults = dict(
+        system = system,
+        machine = machine,
+        is_linux = system == "Linux",
+        is_windows = system == "Windows",
+        is_macos = system == "Darwin",
+        is_x86_64 = machine.lower() in {"x86_64", "amd64"},
+        is_arm64 = machine.lower() in {"arm64", "aarch64"},
+        nvidia_smi = "/usr/bin/nvidia-smi",
+        driver_cuda_version = (12, 8),
+        compute_caps = ["86"],
+        visible_cuda_devices = None,
+        has_physical_nvidia = True,
+        has_usable_nvidia = True,
+    )
+    defaults.update(overrides)
+    return HostInfo(**defaults)
+
+
+def make_artifact(asset_name, **overrides):
+    defaults = dict(
+        asset_name = asset_name,
+        install_kind = "linux-cuda",
+        runtime_line = "cuda12",
+        coverage_class = "targeted",
+        supported_sms = ["75", "80", "86", "89", "90"],
+        min_sm = 75,
+        max_sm = 90,
+        bundle_profile = "cuda12-newer",
+        rank = 100,
+    )
+    defaults.update(overrides)
+    return PublishedLlamaArtifact(**defaults)
+
+
+def make_release(artifacts, **overrides):
+    defaults = dict(
+        repo = "unslothai/llama.cpp",
+        release_tag = "v1.0",
+        upstream_tag = "b8508",
+        assets = {a.asset_name: f"https://example.com/{a.asset_name}" for a in artifacts},
+        manifest_asset_name = "llama-prebuilt-manifest.json",
+        artifacts = artifacts,
+        selection_log = [],
+    )
+    defaults.update(overrides)
+    return PublishedReleaseBundle(**defaults)
+
+
+def make_checksums(asset_names):
+    return ApprovedReleaseChecksums(
+        repo = "unslothai/llama.cpp",
+        release_tag = "v1.0",
+        upstream_tag = "b8508",
+        source_commit = None,
+        artifacts = {
+            name: ApprovedArtifactHash(
+                asset_name = name,
+                sha256 = "a" * 64,
+                repo = "unslothai/llama.cpp",
+                kind = "prebuilt",
+            )
+            for name in asset_names
+        },
+    )
+
+
+def mock_linux_runtime(monkeypatch, lines):
+    dirs = {line: ["/usr/lib/stub"] for line in lines}
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT,
+        "detected_linux_runtime_lines",
+        lambda: (list(lines), dict(dirs)),
+    )
+
+
+def mock_windows_runtime(monkeypatch, lines):
+    dirs = {line: ["C:\\Windows\\System32"] for line in lines}
+    monkeypatch.setattr(
+        INSTALL_LLAMA_PREBUILT,
+        "detected_windows_runtime_lines",
+        lambda: (list(lines), dict(dirs)),
+    )
+
+
+# ===========================================================================
+# A. normalize_compute_cap
+# ===========================================================================
+
+
+class TestNormalizeComputeCap:
+    def test_dotted_86(self):
+        assert normalize_compute_cap("8.6") == "86"
+
+    def test_dotted_leading_zero(self):
+        assert normalize_compute_cap("07.05") == "75"
+
+    def test_already_normalized(self):
+        assert normalize_compute_cap("75") == "75"
+
+    def test_int_input(self):
+        assert normalize_compute_cap(86) == "86"
+
+    def test_empty_string(self):
+        assert normalize_compute_cap("") is None
+
+    def test_whitespace(self):
+        assert normalize_compute_cap("  ") is None
+
+    def test_non_numeric(self):
+        assert normalize_compute_cap("x.y") is None
+
+    def test_triple_part(self):
+        assert normalize_compute_cap("8.6.0") is None
+
+    def test_zero_minor(self):
+        assert normalize_compute_cap("9.0") == "90"
+
+
+# ===========================================================================
+# B. normalize_compute_caps
+# ===========================================================================
+
+
+class TestNormalizeComputeCaps:
+    def test_deduplication(self):
+        assert normalize_compute_caps(["8.6", "86", "8.6"]) == ["86"]
+
+    def test_numeric_sort(self):
+        assert normalize_compute_caps(["9.0", "7.5", "8.6"]) == ["75", "86", "90"]
+
+    def test_drops_invalid(self):
+        assert normalize_compute_caps(["8.6", "bad", "", "7.5"]) == ["75", "86"]
+
+    def test_empty_input(self):
+        assert normalize_compute_caps([]) == []
+
+
+# ===========================================================================
+# C. parse_cuda_visible_devices
+# ===========================================================================
+
+
+class TestParseCudaVisibleDevices:
+    def test_none(self):
+        assert parse_cuda_visible_devices(None) is None
+
+    def test_empty(self):
+        assert parse_cuda_visible_devices("") == []
+
+    def test_minus_one(self):
+        assert parse_cuda_visible_devices("-1") == []
+
+    def test_single(self):
+        assert parse_cuda_visible_devices("0") == ["0"]
+
+    def test_multi(self):
+        assert parse_cuda_visible_devices("0,1,2") == ["0", "1", "2"]
+
+    def test_whitespace_stripped(self):
+        assert parse_cuda_visible_devices(" 0 , 1 ") == ["0", "1"]
+
+
+# ===========================================================================
+# D. supports_explicit_visible_device_matching
+# ===========================================================================
+
+
+class TestSupportsExplicitVisibleDeviceMatching:
+    def test_all_digits(self):
+        assert supports_explicit_visible_device_matching(["0", "1", "2"]) is True
+
+    def test_gpu_prefix(self):
+        assert supports_explicit_visible_device_matching(["GPU-abc123"]) is True
+
+    def test_none(self):
+        assert supports_explicit_visible_device_matching(None) is False
+
+    def test_empty(self):
+        assert supports_explicit_visible_device_matching([]) is False
+
+    def test_mixed_invalid(self):
+        assert supports_explicit_visible_device_matching(["0", "MIG-device"]) is False
+
+
+# ===========================================================================
+# E. select_visible_gpu_rows
+# ===========================================================================
+
+
+class TestSelectVisibleGpuRows:
+    ROWS = [
+        ("0", "GPU-aaa", "8.6"),
+        ("1", "GPU-bbb", "7.5"),
+        ("2", "GPU-ccc", "8.9"),
+    ]
+
+    def test_none_returns_all(self):
+        assert select_visible_gpu_rows(self.ROWS, None) == list(self.ROWS)
+
+    def test_empty_returns_empty(self):
+        assert select_visible_gpu_rows(self.ROWS, []) == []
+
+    def test_filter_by_index(self):
+        result = select_visible_gpu_rows(self.ROWS, ["0", "2"])
+        assert result == [("0", "GPU-aaa", "8.6"), ("2", "GPU-ccc", "8.9")]
+
+    def test_filter_by_uuid_case_insensitive(self):
+        result = select_visible_gpu_rows(self.ROWS, ["gpu-bbb"])
+        assert result == [("1", "GPU-bbb", "7.5")]
+
+    def test_dedup_same_device(self):
+        result = select_visible_gpu_rows(self.ROWS, ["0", "0"])
+        assert result == [("0", "GPU-aaa", "8.6")]
+
+    def test_missing_token(self):
+        result = select_visible_gpu_rows(self.ROWS, ["99"])
+        assert result == []
+
+
+# ===========================================================================
+# F. compatible_linux_runtime_lines
+# ===========================================================================
+
+
+class TestCompatibleLinuxRuntimeLines:
+    def test_no_driver(self):
+        host = make_host(driver_cuda_version = None)
+        assert compatible_linux_runtime_lines(host) == []
+
+    def test_driver_11_8(self):
+        host = make_host(driver_cuda_version = (11, 8))
+        assert compatible_linux_runtime_lines(host) == []
+
+    def test_driver_12_4(self):
+        host = make_host(driver_cuda_version = (12, 4))
+        assert compatible_linux_runtime_lines(host) == ["cuda12"]
+
+    def test_driver_13_0(self):
+        host = make_host(driver_cuda_version = (13, 0))
+        assert compatible_linux_runtime_lines(host) == ["cuda13", "cuda12"]
+
+
+# ===========================================================================
+# G. pick_windows_cuda_runtime + compatible_windows_runtime_lines
+# ===========================================================================
+
+
+class TestPickWindowsCudaRuntime:
+    def test_no_driver(self):
+        host = make_host(driver_cuda_version = None)
+        assert pick_windows_cuda_runtime(host) is None
+
+    def test_below_threshold(self):
+        host = make_host(driver_cuda_version = (12, 3))
+        assert pick_windows_cuda_runtime(host) is None
+
+    def test_driver_12_4(self):
+        host = make_host(driver_cuda_version = (12, 4))
+        assert pick_windows_cuda_runtime(host) == "12.4"
+
+    def test_driver_13_1(self):
+        host = make_host(driver_cuda_version = (13, 1))
+        assert pick_windows_cuda_runtime(host) == "13.1"
+
+
+class TestCompatibleWindowsRuntimeLines:
+    def test_no_driver(self):
+        host = make_host(driver_cuda_version = None)
+        assert compatible_windows_runtime_lines(host) == []
+
+    def test_driver_12_4(self):
+        host = make_host(driver_cuda_version = (12, 4))
+        assert compatible_windows_runtime_lines(host) == ["cuda12"]
+
+    def test_driver_13_1(self):
+        host = make_host(driver_cuda_version = (13, 1))
+        assert compatible_windows_runtime_lines(host) == ["cuda13", "cuda12"]
+
+
+# ===========================================================================
+# H. runtime_line_from_cuda_version
+# ===========================================================================
+
+
+class TestRuntimeLineFromCudaVersion:
+    def test_cuda_12(self):
+        assert runtime_line_from_cuda_version("12.6") == "cuda12"
+
+    def test_cuda_13(self):
+        assert runtime_line_from_cuda_version("13.0") == "cuda13"
+
+    def test_cuda_11(self):
+        assert runtime_line_from_cuda_version("11.8") is None
+
+    def test_none(self):
+        assert runtime_line_from_cuda_version(None) is None
+
+    def test_empty(self):
+        assert runtime_line_from_cuda_version("") is None
+
+
+# ===========================================================================
+# I. apply_approved_hashes
+# ===========================================================================
+
+
+class TestApplyApprovedHashes:
+    def _choice(self, name):
+        return AssetChoice(
+            repo = "test",
+            tag = "v1",
+            name = name,
+            url = f"https://x/{name}",
+            source_label = "test",
+        )
+
+    def test_both_approved(self):
+        c1, c2 = self._choice("a.tar.gz"), self._choice("b.tar.gz")
+        checksums = make_checksums(["a.tar.gz", "b.tar.gz"])
+        result = apply_approved_hashes([c1, c2], checksums)
+        assert len(result) == 2
+        assert all(c.expected_sha256 == "a" * 64 for c in result)
+
+    def test_one_approved(self):
+        c1, c2 = self._choice("a.tar.gz"), self._choice("missing.tar.gz")
+        checksums = make_checksums(["a.tar.gz"])
+        result = apply_approved_hashes([c1, c2], checksums)
+        assert len(result) == 1
+        assert result[0].name == "a.tar.gz"
+
+    def test_none_approved(self):
+        c1 = self._choice("missing.tar.gz")
+        checksums = make_checksums(["other.tar.gz"])
+        with pytest.raises(PrebuiltFallback, match = "approved checksum"):
+            apply_approved_hashes([c1], checksums)
+
+    def test_empty_input(self):
+        checksums = make_checksums(["a.tar.gz"])
+        with pytest.raises(PrebuiltFallback, match = "approved checksum"):
+            apply_approved_hashes([], checksums)
+
+
+# ===========================================================================
+# J. linux_cuda_choice_from_release -- core selection
+# ===========================================================================
+
+
+class TestLinuxCudaChoiceFromRelease:
+    # --- Runtime line resolution ---
+
+    def test_no_runtime_lines_detected(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, [])
+        host = make_host(driver_cuda_version = (12, 8))
+        art = make_artifact("bundle-cuda12.tar.gz")
+        release = make_release([art])
+        assert linux_cuda_choice_from_release(host, release) is None
+
+    def test_detected_lines_incompatible_with_driver(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda13"])
+        host = make_host(driver_cuda_version = (12, 4))
+        art = make_artifact("bundle-cuda13.tar.gz", runtime_line = "cuda13")
+        release = make_release([art])
+        assert linux_cuda_choice_from_release(host, release) is None
+
+    def test_driver_13_only_cuda12_detected(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(driver_cuda_version = (13, 0))
+        art = make_artifact("bundle-cuda12.tar.gz", runtime_line = "cuda12")
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is not None
+        assert result.primary.runtime_line == "cuda12"
+
+    def test_preferred_runtime_line_reorders(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda13", "cuda12"])
+        host = make_host(driver_cuda_version = (13, 0))
+        art12 = make_artifact("bundle-cuda12.tar.gz", runtime_line = "cuda12")
+        art13 = make_artifact("bundle-cuda13.tar.gz", runtime_line = "cuda13")
+        release = make_release([art12, art13])
+        result = linux_cuda_choice_from_release(
+            host, release, preferred_runtime_line = "cuda12"
+        )
+        assert result is not None
+        assert result.primary.runtime_line == "cuda12"
+
+    def test_preferred_runtime_line_unavailable(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(driver_cuda_version = (12, 8))
+        art = make_artifact("bundle-cuda12.tar.gz", runtime_line = "cuda12")
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(
+            host, release, preferred_runtime_line = "cuda13"
+        )
+        assert result is not None
+        assert result.primary.runtime_line == "cuda12"
+        log_entries = result.selection_log
+        assert any("unavailable_on_host" in entry for entry in log_entries)
+
+    # --- SM matching ---
+
+    def test_exact_sm_match(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        art = make_artifact(
+            "bundle.tar.gz", supported_sms = ["75", "86", "89"], min_sm = 75, max_sm = 89
+        )
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is not None
+        assert result.primary.name == "bundle.tar.gz"
+
+    def test_sm_not_in_supported_sms(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        art = make_artifact(
+            "bundle.tar.gz", supported_sms = ["75", "80", "89"], min_sm = 75, max_sm = 89
+        )
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_sm_outside_min_range(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["50"])
+        art = make_artifact(
+            "bundle.tar.gz", supported_sms = ["50", "75", "86"], min_sm = 75, max_sm = 90
+        )
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_sm_outside_max_range(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["100"])
+        art = make_artifact(
+            "bundle.tar.gz", supported_sms = ["100", "75", "86"], min_sm = 75, max_sm = 90
+        )
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_very_old_sm(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["50"])
+        art = make_artifact("bundle.tar.gz", min_sm = 75, max_sm = 90)
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_very_new_sm(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["100"])
+        art = make_artifact("bundle.tar.gz", min_sm = 75, max_sm = 90)
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    # --- Unknown compute caps (empty list) ---
+
+    def test_unknown_caps_only_portable(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = [])
+        targeted = make_artifact("targeted.tar.gz", coverage_class = "targeted")
+        portable = make_artifact("portable.tar.gz", coverage_class = "portable")
+        release = make_release([targeted, portable])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is not None
+        assert result.primary.name == "portable.tar.gz"
+
+    def test_unknown_caps_no_portable(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = [])
+        targeted = make_artifact("targeted.tar.gz", coverage_class = "targeted")
+        release = make_release([targeted])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    # --- Multi-GPU ---
+
+    def test_multi_gpu_all_covered(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["75", "89"])
+        art = make_artifact(
+            "bundle.tar.gz",
+            supported_sms = ["75", "80", "86", "89", "90"],
+            min_sm = 75,
+            max_sm = 90,
+        )
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is not None
+
+    def test_multi_gpu_not_all_covered(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["50", "89"])
+        art = make_artifact(
+            "bundle.tar.gz", supported_sms = ["75", "89"], min_sm = 75, max_sm = 89
+        )
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    # --- Artifact selection priority ---
+
+    def test_narrowest_sm_range_wins(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        wide = make_artifact(
+            "wide.tar.gz",
+            supported_sms = ["75", "86", "90"],
+            min_sm = 75,
+            max_sm = 90,
+            rank = 100,
+        )
+        narrow = make_artifact(
+            "narrow.tar.gz",
+            supported_sms = ["80", "86", "89"],
+            min_sm = 80,
+            max_sm = 89,
+            rank = 100,
+        )
+        release = make_release([wide, narrow])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is not None
+        assert result.primary.name == "narrow.tar.gz"
+
+    def test_range_tie_lower_rank_wins(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        high = make_artifact(
+            "high.tar.gz",
+            supported_sms = ["75", "86", "90"],
+            min_sm = 75,
+            max_sm = 90,
+            rank = 200,
+        )
+        low = make_artifact(
+            "low.tar.gz",
+            supported_sms = ["75", "86", "90"],
+            min_sm = 75,
+            max_sm = 90,
+            rank = 50,
+        )
+        release = make_release([high, low])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is not None
+        assert result.primary.name == "low.tar.gz"
+
+    def test_targeted_preferred_portable_fallback(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        targeted = make_artifact("targeted.tar.gz", coverage_class = "targeted", rank = 100)
+        portable = make_artifact("portable.tar.gz", coverage_class = "portable", rank = 100)
+        release = make_release([targeted, portable])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is not None
+        assert result.primary.name == "targeted.tar.gz"
+        assert len(result.attempts) == 2
+        assert result.attempts[1].name == "portable.tar.gz"
+
+    # --- Edge cases ---
+
+    def test_asset_missing_from_release_assets(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        art = make_artifact("bundle.tar.gz")
+        release = make_release([art], assets = {})
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_artifact_empty_supported_sms(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        art = make_artifact("bundle.tar.gz", supported_sms = [])
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_artifact_missing_min_sm(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        art = make_artifact("bundle.tar.gz", min_sm = None, max_sm = 90)
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_artifact_missing_max_sm(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        art = make_artifact("bundle.tar.gz", min_sm = 75, max_sm = None)
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_no_linux_cuda_artifacts(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        art = make_artifact("bundle.tar.gz", install_kind = "windows-cuda")
+        release = make_release([art])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+    def test_empty_artifacts_list(self, monkeypatch):
+        mock_linux_runtime(monkeypatch, ["cuda12"])
+        host = make_host(compute_caps = ["86"])
+        release = make_release([])
+        result = linux_cuda_choice_from_release(host, release)
+        assert result is None
+
+
+# ===========================================================================
+# K. windows_cuda_attempts
+# ===========================================================================
+
+
+class TestWindowsCudaAttempts:
+    TAG = "b8508"
+
+    def _upstream(self, *runtime_versions):
+        assets = {}
+        for rv in runtime_versions:
+            name = f"llama-{self.TAG}-bin-win-cuda-{rv}-x64.zip"
+            assets[name] = f"https://example.com/{name}"
+        return assets
+
+    def test_driver_12_4_no_dlls_fallback(self, monkeypatch):
+        mock_windows_runtime(monkeypatch, [])
+        host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (12, 4))
+        assets = self._upstream("12.4")
+        result = windows_cuda_attempts(host, self.TAG, assets, None)
+        assert len(result) == 1
+        assert result[0].runtime_line == "cuda12"
+
+    def test_driver_13_1_both_dlls(self, monkeypatch):
+        mock_windows_runtime(monkeypatch, ["cuda13", "cuda12"])
+        host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (13, 1))
+        assets = self._upstream("13.1", "12.4")
+        result = windows_cuda_attempts(host, self.TAG, assets, None)
+        assert len(result) == 2
+        assert result[0].runtime_line == "cuda13"
+        assert result[1].runtime_line == "cuda12"
+
+    def test_preferred_reorders(self, monkeypatch):
+        mock_windows_runtime(monkeypatch, ["cuda13", "cuda12"])
+        host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (13, 1))
+        assets = self._upstream("13.1", "12.4")
+        result = windows_cuda_attempts(host, self.TAG, assets, "cuda12")
+        assert len(result) == 2
+        assert result[0].runtime_line == "cuda12"
+
+    def test_preferred_unavailable(self, monkeypatch):
+        mock_windows_runtime(monkeypatch, ["cuda12"])
+        host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (12, 4))
+        assets = self._upstream("12.4")
+        result = windows_cuda_attempts(host, self.TAG, assets, "cuda13")
+        assert len(result) == 1
+        assert result[0].runtime_line == "cuda12"
+
+    def test_detected_incompatible_with_driver(self, monkeypatch):
+        mock_windows_runtime(monkeypatch, ["cuda13"])
+        host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (12, 4))
+        assets = self._upstream("12.4")
+        result = windows_cuda_attempts(host, self.TAG, assets, None)
+        assert len(result) == 1
+        assert result[0].runtime_line == "cuda12"
+
+    def test_driver_too_old(self, monkeypatch):
+        mock_windows_runtime(monkeypatch, [])
+        host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (11, 8))
+        assets = self._upstream("12.4")
+        result = windows_cuda_attempts(host, self.TAG, assets, None)
+        assert result == []
+
+    def test_asset_missing_from_upstream(self, monkeypatch):
+        mock_windows_runtime(monkeypatch, ["cuda12"])
+        host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (12, 4))
+        result = windows_cuda_attempts(host, self.TAG, {}, None)
+        assert result == []
+
+    def test_both_assets_present(self, monkeypatch):
+        mock_windows_runtime(monkeypatch, ["cuda13", "cuda12"])
+        host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (13, 1))
+        assets = self._upstream("13.1", "12.4")
+        result = windows_cuda_attempts(host, self.TAG, assets, None)
+        assert len(result) == 2
+
+
+# ===========================================================================
+# L. resolve_upstream_asset_choice -- platform routing
+# ===========================================================================
+
+
+class TestResolveUpstreamAssetChoice:
+    TAG = "b8508"
+
+    def _mock_github_assets(self, monkeypatch, assets):
+        monkeypatch.setattr(
+            INSTALL_LLAMA_PREBUILT,
+            "github_release_assets",
+            lambda repo, tag: assets,
+        )
+
+    def test_linux_x86_64_cpu(self, monkeypatch):
+        name = f"llama-{self.TAG}-bin-ubuntu-x64.tar.gz"
+        self._mock_github_assets(monkeypatch, {name: f"https://x/{name}"})
+        host = make_host(
+            has_usable_nvidia = False, nvidia_smi = None, has_physical_nvidia = False
+        )
+        result = resolve_upstream_asset_choice(host, self.TAG)
+        assert result.install_kind == "linux-cpu"
+        assert result.name == name
+
+    def test_linux_cpu_missing(self, monkeypatch):
+        self._mock_github_assets(monkeypatch, {})
+        host = make_host(
+            has_usable_nvidia = False, nvidia_smi = None, has_physical_nvidia = False
+        )
+        with pytest.raises(PrebuiltFallback, match = "Linux CPU"):
+            resolve_upstream_asset_choice(host, self.TAG)
+
+    def test_windows_x86_64_cpu(self, monkeypatch):
+        name = f"llama-{self.TAG}-bin-win-cpu-x64.zip"
+        self._mock_github_assets(monkeypatch, {name: f"https://x/{name}"})
+        host = make_host(
+            system = "Windows",
+            machine = "AMD64",
+            has_usable_nvidia = False,
+            nvidia_smi = None,
+            has_physical_nvidia = False,
+        )
+        result = resolve_upstream_asset_choice(host, self.TAG)
+        assert result.install_kind == "windows-cpu"
+        assert result.name == name
+
+    def test_windows_cpu_missing(self, monkeypatch):
+        self._mock_github_assets(monkeypatch, {})
+        host = make_host(
+            system = "Windows",
+            machine = "AMD64",
+            has_usable_nvidia = False,
+            nvidia_smi = None,
+            has_physical_nvidia = False,
+        )
+        with pytest.raises(PrebuiltFallback, match = "Windows CPU"):
+            resolve_upstream_asset_choice(host, self.TAG)
+
+    def test_macos_arm64(self, monkeypatch):
+        name = f"llama-{self.TAG}-bin-macos-arm64.tar.gz"
+        self._mock_github_assets(monkeypatch, {name: f"https://x/{name}"})
+        host = make_host(
+            system = "Darwin",
+            machine = "arm64",
+            nvidia_smi = None,
+            driver_cuda_version = None,
+            compute_caps = [],
+            has_physical_nvidia = False,
+            has_usable_nvidia = False,
+        )
+        result = resolve_upstream_asset_choice(host, self.TAG)
+        assert result.install_kind == "macos-arm64"
+        assert result.name == name
+
+    def test_macos_arm64_missing(self, monkeypatch):
+        self._mock_github_assets(monkeypatch, {})
+        host = make_host(
+            system = "Darwin",
+            machine = "arm64",
+            nvidia_smi = None,
+            driver_cuda_version = None,
+            compute_caps = [],
+            has_physical_nvidia = False,
+            has_usable_nvidia = False,
+        )
+        with pytest.raises(PrebuiltFallback, match = "macOS arm64"):
+            resolve_upstream_asset_choice(host, self.TAG)
+
+    def test_macos_x86_64(self, monkeypatch):
+        name = f"llama-{self.TAG}-bin-macos-x64.tar.gz"
+        self._mock_github_assets(monkeypatch, {name: f"https://x/{name}"})
+        host = make_host(
+            system = "Darwin",
+            machine = "x86_64",
+            nvidia_smi = None,
+            driver_cuda_version = None,
+            compute_caps = [],
+            has_physical_nvidia = False,
+            has_usable_nvidia = False,
+        )
+        result = resolve_upstream_asset_choice(host, self.TAG)
+        assert result.install_kind == "macos-x64"
+        assert result.name == name
+
+    def test_linux_aarch64(self, monkeypatch):
+        self._mock_github_assets(monkeypatch, {})
+        host = make_host(
+            system = "Linux",
+            machine = "aarch64",
+            nvidia_smi = None,
+            driver_cuda_version = None,
+            compute_caps = [],
+            has_physical_nvidia = False,
+            has_usable_nvidia = False,
+        )
+        with pytest.raises(
+            PrebuiltFallback, match = "no prebuilt policy exists for Linux aarch64"
+        ):
+            resolve_upstream_asset_choice(host, self.TAG)
+
+    def test_windows_usable_nvidia_delegates(self, monkeypatch):
+        cuda_name = f"llama-{self.TAG}-bin-win-cuda-12.4-x64.zip"
+        self._mock_github_assets(monkeypatch, {cuda_name: f"https://x/{cuda_name}"})
+        mock_windows_runtime(monkeypatch, ["cuda12"])
+        monkeypatch.setattr(
+            INSTALL_LLAMA_PREBUILT,
+            "resolve_windows_cuda_choices",
+            lambda host, tag, assets: [
+                AssetChoice(
+                    repo = UPSTREAM_REPO,
+                    tag = tag,
+                    name = cuda_name,
+                    url = f"https://x/{cuda_name}",
+                    source_label = "upstream",
+                    install_kind = "windows-cuda",
+                    runtime_line = "cuda12",
+                )
+            ],
+        )
+        host = make_host(
+            system = "Windows",
+            machine = "AMD64",
+            driver_cuda_version = (12, 4),
+            has_usable_nvidia = True,
+        )
+        result = resolve_upstream_asset_choice(host, self.TAG)
+        assert result.install_kind == "windows-cuda"
+        assert result.name == cuda_name