diff --git a/studio/install_llama_prebuilt.py b/studio/install_llama_prebuilt.py new file mode 100755 index 000000000..a9d0b7235 --- /dev/null +++ b/studio/install_llama_prebuilt.py @@ -0,0 +1,3395 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0 + +"""Cross platform llama.cpp prebuilt installer for Unsloth Studio""" + +from __future__ import annotations + +import argparse +import fnmatch +import hashlib +import json +import os +import platform +import random +import shutil +import site +import socket +import subprocess +import sys +import tarfile +import tempfile +import textwrap +import time +import urllib.error +import urllib.parse +import urllib.request +import zipfile +from contextlib import contextmanager +from dataclasses import dataclass + +try: + from filelock import FileLock, Timeout as FileLockTimeout +except ImportError: + FileLock = None + FileLockTimeout = None +from pathlib import Path +from typing import Any, Iterable, Iterator + + +EXIT_SUCCESS = 0 +EXIT_FALLBACK = 2 +EXIT_ERROR = 1 + +APPROVED_PREBUILT_LLAMA_TAG = "b8508" +DEFAULT_LLAMA_TAG = os.environ.get("UNSLOTH_LLAMA_TAG", APPROVED_PREBUILT_LLAMA_TAG) +DEFAULT_PUBLISHED_REPO = os.environ.get( + "UNSLOTH_LLAMA_RELEASE_REPO", "unslothai/llama.cpp" +) +DEFAULT_PUBLISHED_TAG = os.environ.get("UNSLOTH_LLAMA_RELEASE_TAG") +DEFAULT_PUBLISHED_MANIFEST_ASSET = os.environ.get( + "UNSLOTH_LLAMA_RELEASE_MANIFEST_ASSET", "llama-prebuilt-manifest.json" +) +DEFAULT_PUBLISHED_SHA256_ASSET = os.environ.get( + "UNSLOTH_LLAMA_RELEASE_SHA256_ASSET", "llama-prebuilt-sha256.json" +) +UPSTREAM_REPO = "ggml-org/llama.cpp" +UPSTREAM_RELEASES_API = f"https://api.github.com/repos/{UPSTREAM_REPO}/releases/latest" +TEST_MODEL_URL = ( + "https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf" +) +TEST_MODEL_SHA256 = "270cba1bd5109f42d03350f60406024560464db173c0e387d91f0426d3bd256d" +VALIDATION_MODEL_CACHE_DIRNAME = ".cache" +VALIDATION_MODEL_CACHE_FILENAME = "stories260K.gguf" +INSTALL_LOCK_TIMEOUT_SECONDS = 300 +INSTALL_STAGING_ROOT_NAME = ".staging" +GITHUB_AUTH_HOSTS = {"api.github.com", "github.com"} +RETRYABLE_HTTP_STATUS = {408, 429, 500, 502, 503, 504} +HTTP_FETCH_ATTEMPTS = 4 +HTTP_FETCH_BASE_DELAY_SECONDS = 0.75 +SERVER_PORT_BIND_ATTEMPTS = 3 +SERVER_BIND_RETRY_WINDOW_SECONDS = 5.0 +TTY_PROGRESS_START_DELAY_SECONDS = 0.5 + + +@dataclass +class HostInfo: + system: str + machine: str + is_windows: bool + is_linux: bool + is_macos: bool + is_x86_64: bool + is_arm64: bool + nvidia_smi: str | None + driver_cuda_version: tuple[int, int] | None + compute_caps: list[str] + visible_cuda_devices: str | None + has_physical_nvidia: bool + has_usable_nvidia: bool + + +@dataclass +class AssetChoice: + repo: str + tag: str + name: str + url: str + source_label: str + runtime_name: str | None = None + runtime_url: str | None = None + is_ready_bundle: bool = False + install_kind: str = "" + bundle_profile: str | None = None + runtime_line: str | None = None + coverage_class: str | None = None + supported_sms: list[str] | None = None + min_sm: int | None = None + max_sm: int | None = None + selection_log: list[str] | None = None + expected_sha256: str | None = None + + +@dataclass(frozen = True) +class PublishedLlamaArtifact: + asset_name: str + install_kind: str + runtime_line: str | None + coverage_class: str | None + supported_sms: list[str] + min_sm: int | None + max_sm: int | None + bundle_profile: str | None + rank: int + + +@dataclass +class PublishedReleaseBundle: + repo: str + release_tag: str + upstream_tag: str + assets: dict[str, str] + manifest_asset_name: str + artifacts: list[PublishedLlamaArtifact] + selection_log: list[str] + + +@dataclass +class LinuxCudaSelection: + attempts: list[AssetChoice] + selection_log: list[str] + + @property + def primary(self) -> AssetChoice: + if not self.attempts: + raise RuntimeError("linux CUDA selection unexpectedly had no attempts") + return self.attempts[0] + + +@dataclass +class CudaRuntimePreference: + runtime_line: str | None + selection_log: list[str] + + +@dataclass(frozen = True) +class ApprovedArtifactHash: + asset_name: str + sha256: str + repo: str | None + kind: str | None + + +@dataclass +class ApprovedReleaseChecksums: + repo: str + release_tag: str + upstream_tag: str + source_commit: str | None + artifacts: dict[str, ApprovedArtifactHash] + + +class PrebuiltFallback(RuntimeError): + pass + + +def log(message: str) -> None: + print(f"[llama-prebuilt] {message}") + + +def log_lines(lines: Iterable[str]) -> None: + for line in lines: + log(line) + + +def parsed_hostname(url: str | None) -> str | None: + if not url: + return None + try: + hostname = urllib.parse.urlparse(url).hostname + except Exception: + return None + if not hostname: + return None + return hostname.lower() + + +def should_send_github_auth(url: str | None) -> bool: + return parsed_hostname(url) in GITHUB_AUTH_HOSTS + + +def auth_headers(url: str | None = None) -> dict[str, str]: + headers = { + "User-Agent": "unsloth-studio-llama-prebuilt", + } + token = os.environ.get("GH_TOKEN") or os.environ.get("GITHUB_TOKEN") + if token and should_send_github_auth(url): + headers["Authorization"] = f"Bearer {token}" + return headers + + +def github_api_headers(url: str | None = None) -> dict[str, str]: + return { + "Accept": "application/vnd.github+json", + **auth_headers(url), + } + + +def is_github_api_url(url: str | None) -> bool: + return parsed_hostname(url) == "api.github.com" + + +def is_retryable_url_error(exc: Exception) -> bool: + if isinstance(exc, urllib.error.HTTPError): + return exc.code in RETRYABLE_HTTP_STATUS + if isinstance(exc, urllib.error.URLError): + return True + if isinstance(exc, TimeoutError): + return True + if isinstance(exc, socket.timeout): + return True + return False + + +def sleep_backoff( + attempt: int, *, base_delay: float = HTTP_FETCH_BASE_DELAY_SECONDS +) -> None: + delay = base_delay * (2 ** max(attempt - 1, 0)) + delay += random.uniform(0.0, 0.2) + time.sleep(delay) + + +def atomic_write_bytes(destination: Path, data: bytes) -> None: + destination.parent.mkdir(parents = True, exist_ok = True) + with tempfile.NamedTemporaryFile( + prefix = destination.name + ".tmp-", + dir = destination.parent, + delete = False, + ) as handle: + tmp_path = Path(handle.name) + handle.write(data) + handle.flush() + os.fsync(handle.fileno()) + os.replace(tmp_path, destination) + + +def atomic_replace_from_tempfile(tmp_path: Path, destination: Path) -> None: + destination.parent.mkdir(parents = True, exist_ok = True) + os.replace(tmp_path, destination) + + +def source_archive_logical_name(upstream_tag: str) -> str: + return f"llama.cpp-source-{upstream_tag}.tar.gz" + + +def sha256_file(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def normalize_sha256_digest(value: str | None) -> str | None: + if not isinstance(value, str) or not value: + return None + lowered = value.lower() + if lowered.startswith("sha256:"): + lowered = lowered.split(":", 1)[1] + if len(lowered) != 64 or any(ch not in "0123456789abcdef" for ch in lowered): + return None + return lowered + + +def format_byte_count(num_bytes: float) -> str: + units = ["B", "KiB", "MiB", "GiB", "TiB"] + value = float(num_bytes) + for unit in units: + if abs(value) < 1024.0 or unit == units[-1]: + if unit == "B": + return f"{int(value)} {unit}" + return f"{value:.1f} {unit}" + value /= 1024.0 + return f"{num_bytes:.1f} B" + + +class DownloadProgress: + def __init__(self, label: str, total_bytes: int | None) -> None: + self.label = label + self.total_bytes = total_bytes if total_bytes and total_bytes > 0 else None + self.start_time = time.monotonic() + self.last_emit = 0.0 + term_ok = os.environ.get("TERM", "").lower() != "dumb" + self.stream = ( + sys.stderr + if sys.stderr.isatty() + else sys.stdout + if sys.stdout.isatty() + else sys.stderr + ) + self.is_tty = term_ok and self.stream.isatty() + self.completed = False + self.last_milestone_percent = -1 + self.last_milestone_bytes = 0 + self.has_rendered_tty_progress = False + + def _render(self, downloaded_bytes: int, *, final: bool = False) -> str: + elapsed = max(time.monotonic() - self.start_time, 1e-6) + speed = downloaded_bytes / elapsed + speed_text = f"{format_byte_count(speed)}/s" + if self.total_bytes is not None: + percent = min(100.0, (downloaded_bytes / self.total_bytes) * 100.0) + return ( + f"{self.label}: {percent:5.1f}% " + f"({format_byte_count(downloaded_bytes)}/{format_byte_count(self.total_bytes)}) " + f"at {speed_text}" + ) + if final: + return f"{self.label}: {format_byte_count(downloaded_bytes)} downloaded at {speed_text}" + return f"{self.label}: {format_byte_count(downloaded_bytes)} downloaded at {speed_text}" + + def update(self, downloaded_bytes: int) -> None: + now = time.monotonic() + if self.is_tty: + elapsed = now - self.start_time + if not self.has_rendered_tty_progress: + if ( + self.total_bytes is not None + and downloaded_bytes >= self.total_bytes + ): + return + if elapsed < TTY_PROGRESS_START_DELAY_SECONDS: + return + min_interval = 0.2 + if ( + self.has_rendered_tty_progress + and not self.completed + and (now - self.last_emit) < min_interval + ): + return + self.last_emit = now + line = self._render(downloaded_bytes) + self.stream.write("\r\033[K" + line) + self.stream.flush() + self.has_rendered_tty_progress = True + return + + should_emit = False + if self.total_bytes is not None: + percent = int((downloaded_bytes * 100) / max(self.total_bytes, 1)) + milestone_percent = min((percent // 25) * 25, 100) + if ( + milestone_percent > self.last_milestone_percent + and milestone_percent < 100 + ): + self.last_milestone_percent = milestone_percent + should_emit = True + else: + byte_step = 25 * 1024 * 1024 + if ( + downloaded_bytes - self.last_milestone_bytes >= byte_step + and (now - self.last_emit) >= 5.0 + ): + self.last_milestone_bytes = downloaded_bytes + should_emit = True + + if not should_emit: + return + + self.last_emit = now + self.stream.write(self._render(downloaded_bytes) + "\n") + self.stream.flush() + + def finish(self, downloaded_bytes: int) -> None: + self.completed = True + line = self._render(downloaded_bytes, final = True) + if self.is_tty: + if not self.has_rendered_tty_progress: + return + self.stream.write("\r\033[K") + else: + self.stream.write(line + "\n") + self.stream.flush() + + +def download_label_from_url(url: str) -> str: + name = Path(urllib.parse.urlparse(url).path).name + return name or url + + +def download_bytes( + url: str, + *, + timeout: int = 120, + attempts: int = HTTP_FETCH_ATTEMPTS, + headers: dict[str, str] | None = None, + progress_label: str | None = None, +) -> bytes: + last_exc: Exception | None = None + for attempt in range(1, attempts + 1): + try: + request = urllib.request.Request(url, headers = headers or auth_headers(url)) + with urllib.request.urlopen(request, timeout = timeout) as response: + total_bytes: int | None = None + content_length = response.headers.get("Content-Length") + if content_length and content_length.isdigit(): + total_bytes = int(content_length) + progress = ( + DownloadProgress(progress_label, total_bytes) + if progress_label + else None + ) + data = bytearray() + while True: + chunk = response.read(1024 * 1024) + if not chunk: + break + data.extend(chunk) + if progress is not None: + progress.update(len(data)) + if progress is not None: + progress.finish(len(data)) + return bytes(data) + except Exception as exc: + last_exc = exc + if attempt >= attempts or not is_retryable_url_error(exc): + raise + log(f"fetch failed ({attempt}/{attempts}) for {url}: {exc}; retrying") + sleep_backoff(attempt) + assert last_exc is not None + raise last_exc + + +def fetch_json(url: str) -> Any: + data = download_bytes( + url, + timeout = 30, + headers = github_api_headers(url) + if is_github_api_url(url) + else auth_headers(url), + ) + if not data: + raise RuntimeError(f"downloaded empty JSON payload from {url}") + try: + payload = json.loads(data.decode("utf-8")) + except (UnicodeDecodeError, json.JSONDecodeError) as exc: + raise RuntimeError(f"downloaded invalid JSON from {url}: {exc}") from exc + if not isinstance(payload, dict) and not isinstance(payload, list): + raise RuntimeError( + f"downloaded unexpected JSON type from {url}: {type(payload).__name__}" + ) + return payload + + +def download_file(url: str, destination: Path) -> None: + destination.parent.mkdir(parents = True, exist_ok = True) + last_exc: Exception | None = None + for attempt in range(1, HTTP_FETCH_ATTEMPTS + 1): + tmp_path: Path | None = None + try: + request = urllib.request.Request(url, headers = auth_headers(url)) + with tempfile.NamedTemporaryFile( + prefix = destination.name + ".tmp-", + dir = destination.parent, + delete = False, + ) as handle: + tmp_path = Path(handle.name) + with urllib.request.urlopen(request, timeout = 120) as response: + total_bytes: int | None = None + content_length = response.headers.get("Content-Length") + if content_length and content_length.isdigit(): + total_bytes = int(content_length) + progress = DownloadProgress( + f"Downloading {destination.name}", total_bytes + ) + downloaded_bytes = 0 + while True: + chunk = response.read(1024 * 1024) + if not chunk: + break + handle.write(chunk) + downloaded_bytes += len(chunk) + progress.update(downloaded_bytes) + progress.finish(downloaded_bytes) + handle.flush() + os.fsync(handle.fileno()) + if not tmp_path.exists() or tmp_path.stat().st_size == 0: + raise RuntimeError(f"downloaded empty file from {url}") + atomic_replace_from_tempfile(tmp_path, destination) + return + except Exception as exc: + last_exc = exc + if tmp_path is not None: + try: + tmp_path.unlink(missing_ok = True) + except Exception: + pass + if attempt >= HTTP_FETCH_ATTEMPTS or not is_retryable_url_error(exc): + raise + log( + f"download failed ({attempt}/{HTTP_FETCH_ATTEMPTS}) for {url}: {exc}; retrying" + ) + sleep_backoff(attempt) + assert last_exc is not None + raise last_exc + + +def download_file_verified( + url: str, + destination: Path, + *, + expected_sha256: str, + label: str, +) -> None: + normalized_expected = normalize_sha256_digest(expected_sha256) + if not normalized_expected: + raise PrebuiltFallback(f"{label} did not have a valid approved sha256") + + for attempt in range(1, 3): + download_file(url, destination) + actual_sha256 = sha256_file(destination) + if actual_sha256 == normalized_expected: + log(f"verified {label} sha256={actual_sha256}") + return + + log( + f"{label} checksum mismatch on attempt {attempt}/2: " + f"expected={normalized_expected} actual={actual_sha256}" + ) + destination.unlink(missing_ok = True) + if attempt == 2: + raise PrebuiltFallback( + f"{label} checksum mismatch after retry: expected={normalized_expected} actual={actual_sha256}" + ) + log(f"retrying {label} download after checksum mismatch") + + +def upstream_source_archive_urls(tag: str) -> list[str]: + encoded_tag = urllib.parse.quote(tag, safe = "") + return [ + f"https://codeload.github.com/{UPSTREAM_REPO}/tar.gz/refs/tags/{encoded_tag}", + f"https://github.com/{UPSTREAM_REPO}/archive/refs/tags/{encoded_tag}.tar.gz", + ] + + +def github_release_assets(repo: str, tag: str) -> dict[str, str]: + payload = fetch_json( + f"https://api.github.com/repos/{repo}/releases/tags/{urllib.parse.quote(tag, safe = '')}" + ) + if not isinstance(payload, dict): + raise RuntimeError(f"unexpected release payload for {repo}@{tag}") + return release_asset_map(payload) + + +def github_release(repo: str, tag: str) -> dict[str, Any]: + payload = fetch_json( + f"https://api.github.com/repos/{repo}/releases/tags/{urllib.parse.quote(tag, safe = '')}" + ) + if not isinstance(payload, dict): + raise RuntimeError(f"unexpected release payload for {repo}@{tag}") + return payload + + +def github_releases(repo: str, *, per_page: int = 100) -> list[dict[str, Any]]: + releases: list[dict[str, Any]] = [] + page = 1 + while True: + payload = fetch_json( + f"https://api.github.com/repos/{repo}/releases?per_page={per_page}&page={page}" + ) + if not isinstance(payload, list): + raise RuntimeError(f"unexpected releases payload for {repo}") + page_items = [item for item in payload if isinstance(item, dict)] + releases.extend(page_items) + if len(payload) < per_page: + break + page += 1 + return releases + + +def latest_upstream_release_tag() -> str: + payload = fetch_json(UPSTREAM_RELEASES_API) + tag = payload.get("tag_name") + if not isinstance(tag, str) or not tag: + raise RuntimeError( + f"latest release tag was missing from {UPSTREAM_RELEASES_API}" + ) + return tag + + +def normalize_compute_cap(value: Any) -> str | None: + raw = str(value).strip() + if not raw: + return None + if "." in raw: + parts = raw.split(".", 1) + if len(parts) != 2: + return None + major, minor = parts + if not major.isdigit() or not minor.isdigit(): + return None + return f"{int(major)}{int(minor)}" + if raw.isdigit(): + return str(int(raw)) + return None + + +def normalize_compute_caps(compute_caps: Iterable[str]) -> list[str]: + normalized: list[str] = [] + seen: set[str] = set() + for raw in compute_caps: + normalized_value = normalize_compute_cap(raw) + if normalized_value is None: + continue + if normalized_value in seen: + continue + seen.add(normalized_value) + normalized.append(normalized_value) + normalized.sort(key = int) + return normalized + + +def parse_cuda_visible_devices(value: str | None) -> list[str] | None: + if value is None: + return None + raw = value.strip() + if not raw or raw == "-1": + return [] + return [token.strip() for token in raw.split(",") if token.strip()] + + +def supports_explicit_visible_device_matching( + visible_devices: list[str] | None, +) -> bool: + if not visible_devices: + return False + for token in visible_devices: + lowered = token.lower() + if token.isdigit() or lowered.startswith("gpu-"): + continue + return False + return True + + +def select_visible_gpu_rows( + gpu_rows: Iterable[tuple[str, str, str]], + visible_devices: list[str] | None, +) -> list[tuple[str, str, str]]: + rows = list(gpu_rows) + if visible_devices is None: + return rows + if not visible_devices: + return [] + + by_index = {index: (index, uuid, cap) for index, uuid, cap in rows} + by_uuid = {uuid.lower(): (index, uuid, cap) for index, uuid, cap in rows} + selected: list[tuple[str, str, str]] = [] + seen_indices: set[str] = set() + for token in visible_devices: + row = by_index.get(token) + if row is None: + normalized_token = token.lower() + row = by_uuid.get(normalized_token) + if row is None and normalized_token.startswith("gpu-"): + row = by_uuid.get(normalized_token) + if row is None and not normalized_token.startswith("gpu-"): + row = by_uuid.get("gpu-" + normalized_token) + if row is None: + continue + index = row[0] + if index in seen_indices: + continue + seen_indices.add(index) + selected.append(row) + return selected + + +def dir_provides_exact_library(directory: str | Path, library: str) -> bool: + if not library: + return False + candidate = Path(directory) / library + return candidate.exists() and (candidate.is_file() or candidate.is_symlink()) + + +def linux_runtime_dirs_for_required_libraries( + required_libraries: Iterable[str], +) -> list[str]: + required = [library for library in required_libraries if library] + candidates: list[str | Path] = [] + + env_dirs = os.environ.get("CUDA_RUNTIME_LIB_DIR", "") + if env_dirs: + candidates.extend(part for part in env_dirs.split(os.pathsep) if part) + ld_library_path = os.environ.get("LD_LIBRARY_PATH", "") + if ld_library_path: + candidates.extend(part for part in ld_library_path.split(os.pathsep) if part) + + cuda_roots: list[Path] = [] + for name in ("CUDA_HOME", "CUDA_PATH", "CUDA_ROOT"): + value = os.environ.get(name) + if value: + cuda_roots.append(Path(value)) + cuda_roots.extend( + Path(path) for path in glob_paths("/usr/local/cuda", "/usr/local/cuda-*") + ) + + for root in cuda_roots: + candidates.extend( + [ + root / "lib", + root / "lib64", + root / "targets" / "x86_64-linux" / "lib", + ] + ) + + candidates.extend( + Path(path) + for path in glob_paths( + "/lib", + "/lib64", + "/usr/lib", + "/usr/lib64", + "/usr/local/lib", + "/usr/local/lib64", + "/lib/x86_64-linux-gnu", + "/usr/lib/x86_64-linux-gnu", + ) + ) + candidates.extend( + Path(path) + for path in glob_paths("/usr/local/lib/ollama/cuda_v*", "/usr/lib/wsl/lib") + ) + candidates.extend(Path(path) for path in python_runtime_dirs()) + candidates.extend(Path(path) for path in ldconfig_runtime_dirs(required)) + + resolved = dedupe_existing_dirs(candidates) + if not required: + return resolved + + matched: list[tuple[int, str]] = [] + for directory in resolved: + base = Path(directory) + provided = sum( + 1 for library in required if dir_provides_exact_library(directory, library) + ) + if provided: + matched.append((provided, directory)) + + matched.sort(key = lambda item: item[0], reverse = True) + return [directory for _, directory in matched] + + +def detected_linux_runtime_lines() -> tuple[list[str], dict[str, list[str]]]: + line_requirements = { + "cuda13": ["libcudart.so.13", "libcublas.so.13"], + "cuda12": ["libcudart.so.12", "libcublas.so.12"], + } + detected: list[str] = [] + runtime_dirs: dict[str, list[str]] = {} + for line, required in line_requirements.items(): + dirs = linux_runtime_dirs_for_required_libraries(required) + library_matches: dict[str, list[str]] = {} + matching_dirs: list[str] = [] + for library in required: + matched_dirs = [ + directory + for directory in dirs + if any(Path(directory).glob(f"{library}*")) + ] + if not matched_dirs: + library_matches = {} + matching_dirs = [] + break + library_matches[library] = matched_dirs + for directory in matched_dirs: + if directory not in matching_dirs: + matching_dirs.append(directory) + if library_matches: + detected.append(line) + runtime_dirs[line] = matching_dirs + return detected, runtime_dirs + + +def release_asset_map(release: dict[str, Any]) -> dict[str, str]: + assets = release.get("assets") + if not isinstance(assets, list): + return {} + return { + asset["name"]: asset.get("browser_download_url", "") + for asset in assets + if isinstance(asset, dict) + and isinstance(asset.get("name"), str) + and isinstance(asset.get("browser_download_url"), str) + } + + +def parse_published_artifact(raw: Any) -> PublishedLlamaArtifact | None: + if not isinstance(raw, dict): + raise ValueError("artifact entry was not an object") + asset_name = raw.get("asset_name") + install_kind = raw.get("install_kind") + if not isinstance(asset_name, str) or not asset_name: + raise ValueError("artifact.asset_name was missing or not a string") + if not isinstance(install_kind, str) or not install_kind: + raise ValueError( + f"artifact {asset_name} install_kind was missing or not a string" + ) + + supported_sms_raw = raw.get("supported_sms", []) + if not isinstance(supported_sms_raw, (list, tuple)): + raise ValueError(f"artifact {asset_name} supported_sms must be a list or tuple") + if any(not isinstance(value, (int, str)) for value in supported_sms_raw): + raise ValueError( + f"artifact {asset_name} supported_sms entries must be ints or strings" + ) + supported_sms = normalize_compute_caps(supported_sms_raw) + + min_sm_raw = raw.get("min_sm") + max_sm_raw = raw.get("max_sm") + try: + min_sm = int(min_sm_raw) if min_sm_raw is not None else None + max_sm = int(max_sm_raw) if max_sm_raw is not None else None + except (TypeError, ValueError) as exc: + raise ValueError( + f"artifact {asset_name} min_sm/max_sm were not integers" + ) from exc + runtime_line = raw.get("runtime_line") + coverage_class = raw.get("coverage_class") + bundle_profile = raw.get("bundle_profile") + rank_raw = raw.get("rank", 1000) + if runtime_line is not None and not isinstance(runtime_line, str): + raise ValueError(f"artifact {asset_name} runtime_line was not a string") + if coverage_class is not None and not isinstance(coverage_class, str): + raise ValueError(f"artifact {asset_name} coverage_class was not a string") + if bundle_profile is not None and not isinstance(bundle_profile, str): + raise ValueError(f"artifact {asset_name} bundle_profile was not a string") + try: + rank = int(rank_raw) + except (TypeError, ValueError): + raise ValueError(f"artifact {asset_name} rank was not an integer") + return PublishedLlamaArtifact( + asset_name = asset_name, + install_kind = install_kind, + runtime_line = runtime_line + if isinstance(runtime_line, str) and runtime_line + else None, + coverage_class = coverage_class + if isinstance(coverage_class, str) and coverage_class + else None, + supported_sms = supported_sms, + min_sm = min_sm, + max_sm = max_sm, + bundle_profile = bundle_profile + if isinstance(bundle_profile, str) and bundle_profile + else None, + rank = rank, + ) + + +def parse_published_release_bundle( + repo: str, release: dict[str, Any] +) -> PublishedReleaseBundle | None: + release_tag = release.get("tag_name") + if not isinstance(release_tag, str) or not release_tag: + return None + + assets = release_asset_map(release) + manifest_url = assets.get(DEFAULT_PUBLISHED_MANIFEST_ASSET) + if not manifest_url: + return None + + # Mixed repos are filtered by an explicit release-side manifest rather than + # by release tag or asset filename conventions. + manifest_payload = fetch_json(manifest_url) + if not isinstance(manifest_payload, dict): + raise RuntimeError( + f"published manifest {DEFAULT_PUBLISHED_MANIFEST_ASSET} was not a JSON object" + ) + component = manifest_payload.get("component") + upstream_tag = manifest_payload.get("upstream_tag") + if component != "llama.cpp": + return None + if not isinstance(upstream_tag, str) or not upstream_tag: + raise RuntimeError( + f"published manifest {DEFAULT_PUBLISHED_MANIFEST_ASSET} in {repo}@{release_tag} omitted upstream_tag" + ) + + artifacts_payload = manifest_payload.get("artifacts") + if not isinstance(artifacts_payload, list): + raise RuntimeError( + f"published manifest {DEFAULT_PUBLISHED_MANIFEST_ASSET} in {repo}@{release_tag} omitted artifacts" + ) + + artifacts: list[PublishedLlamaArtifact] = [] + for index, raw_artifact in enumerate(artifacts_payload): + try: + artifact = parse_published_artifact(raw_artifact) + except ValueError as exc: + log( + f"published artifact ignored for {repo}@{release_tag} artifact[{index}]: {exc}" + ) + continue + if artifact is not None: + artifacts.append(artifact) + selection_log = [ + f"published_release: repo={repo}", + f"published_release: tag={release_tag}", + f"published_release: manifest={DEFAULT_PUBLISHED_MANIFEST_ASSET}", + f"published_release: upstream_tag={upstream_tag}", + ] + return PublishedReleaseBundle( + repo = repo, + release_tag = release_tag, + upstream_tag = upstream_tag, + assets = assets, + manifest_asset_name = DEFAULT_PUBLISHED_MANIFEST_ASSET, + artifacts = artifacts, + selection_log = selection_log, + ) + + +def parse_approved_release_checksums( + repo: str, + release_tag: str, + payload: Any, +) -> ApprovedReleaseChecksums: + if not isinstance(payload, dict): + raise RuntimeError( + f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} was not a JSON object" + ) + if payload.get("component") != "llama.cpp": + raise RuntimeError( + f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} did not describe llama.cpp" + ) + payload_release_tag = payload.get("release_tag") + if not isinstance(payload_release_tag, str) or not payload_release_tag: + raise RuntimeError( + f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} omitted release_tag" + ) + if payload_release_tag != release_tag: + raise RuntimeError( + f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} release_tag={payload_release_tag} " + f"did not match pinned release tag {release_tag}" + ) + upstream_tag = payload.get("upstream_tag") + if not isinstance(upstream_tag, str) or not upstream_tag: + raise RuntimeError( + f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} omitted upstream_tag" + ) + artifacts_payload = payload.get("artifacts") + if not isinstance(artifacts_payload, dict): + raise RuntimeError( + f"published checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} omitted artifacts" + ) + + artifacts: dict[str, ApprovedArtifactHash] = {} + for asset_name, raw_entry in artifacts_payload.items(): + if not isinstance(asset_name, str) or not asset_name: + raise RuntimeError( + "published checksum asset used a non-string artifact key" + ) + if not isinstance(raw_entry, dict): + raise RuntimeError( + f"published checksum entry for {asset_name} was not an object" + ) + digest = normalize_sha256_digest(raw_entry.get("sha256")) + if not digest: + raise RuntimeError( + f"published checksum entry for {asset_name} omitted a valid sha256" + ) + repo_value = raw_entry.get("repo") + kind_value = raw_entry.get("kind") + artifacts[asset_name] = ApprovedArtifactHash( + asset_name = asset_name, + sha256 = digest, + repo = repo_value if isinstance(repo_value, str) and repo_value else None, + kind = kind_value if isinstance(kind_value, str) and kind_value else None, + ) + + source_commit = payload.get("source_commit") + return ApprovedReleaseChecksums( + repo = repo, + release_tag = release_tag, + upstream_tag = upstream_tag, + source_commit = source_commit + if isinstance(source_commit, str) and source_commit + else None, + artifacts = artifacts, + ) + + +def load_approved_release_checksums( + repo: str, release_tag: str +) -> ApprovedReleaseChecksums: + try: + release = github_release(repo, release_tag) + except Exception as exc: + raise PrebuiltFallback( + f"approved prebuilt release {repo}@{release_tag} was not available" + ) from exc + assets = release_asset_map(release) + checksum_url = assets.get(DEFAULT_PUBLISHED_SHA256_ASSET) + if not checksum_url: + raise PrebuiltFallback( + f"approved prebuilt release {repo}@{release_tag} did not expose {DEFAULT_PUBLISHED_SHA256_ASSET}" + ) + try: + payload = fetch_json(checksum_url) + checksums = parse_approved_release_checksums(repo, release_tag, payload) + except PrebuiltFallback: + raise + except Exception as exc: + raise PrebuiltFallback( + f"approved checksum asset {DEFAULT_PUBLISHED_SHA256_ASSET} in {repo}@{release_tag} was invalid" + ) from exc + return checksums + + +def iter_published_release_bundles( + repo: str, published_release_tag: str = "" +) -> Iterable[PublishedReleaseBundle]: + releases = ( + [github_release(repo, published_release_tag)] + if published_release_tag + else github_releases(repo) + ) + for release in releases: + if not published_release_tag and ( + release.get("draft") or release.get("prerelease") + ): + continue + try: + bundle = parse_published_release_bundle(repo, release) + except Exception as exc: + release_tag = release.get("tag_name", "unknown") + log(f"published release metadata ignored for {repo}@{release_tag}: {exc}") + continue + if bundle is None: + continue + yield bundle + + +def linux_cuda_choice_from_release( + host: HostInfo, + release: PublishedReleaseBundle, + preferred_runtime_line: str | None = None, + selection_preamble: Iterable[str] = (), +) -> LinuxCudaSelection | None: + host_sms = normalize_compute_caps(host.compute_caps) + detected_runtime_lines, runtime_dirs = detected_linux_runtime_lines() + driver_runtime_lines = compatible_linux_runtime_lines(host) + runtime_lines = [ + runtime_line + for runtime_line in detected_runtime_lines + if runtime_line in driver_runtime_lines + ] + ordered_runtime_lines = list(runtime_lines) + selection_log = ( + list(release.selection_log) + + list(selection_preamble) + + [ + f"linux_cuda_selection: release={release.release_tag}", + f"linux_cuda_selection: detected_sms={','.join(host_sms) if host_sms else 'unknown'}", + "linux_cuda_selection: detected_runtime_lines=" + + (",".join(detected_runtime_lines) if detected_runtime_lines else "none"), + "linux_cuda_selection: driver_runtime_lines=" + + (",".join(driver_runtime_lines) if driver_runtime_lines else "none"), + "linux_cuda_selection: compatible_runtime_lines=" + + (",".join(runtime_lines) if runtime_lines else "none"), + ] + ) + for runtime_line in ("cuda13", "cuda12"): + selection_log.append( + "linux_cuda_selection: runtime_dirs " + f"{runtime_line}=" + + ( + ",".join(runtime_dirs.get(runtime_line, [])) + if runtime_dirs.get(runtime_line) + else "none" + ) + ) + published_artifacts = [ + artifact + for artifact in release.artifacts + if artifact.install_kind == "linux-cuda" + ] + published_asset_names = sorted( + artifact.asset_name for artifact in published_artifacts + ) + selection_log.append( + "linux_cuda_selection: published_assets=" + + (",".join(published_asset_names) if published_asset_names else "none") + ) + + if not host_sms: + selection_log.append( + "linux_cuda_selection: compute capability detection unavailable; prefer portable by runtime line" + ) + if not runtime_lines: + selection_log.append( + "linux_cuda_selection: no Linux CUDA runtime line satisfied both runtime libraries and driver compatibility" + ) + return None + + if preferred_runtime_line: + if preferred_runtime_line in ordered_runtime_lines: + ordered_runtime_lines = [preferred_runtime_line] + [ + runtime_line + for runtime_line in ordered_runtime_lines + if runtime_line != preferred_runtime_line + ] + selection_log.append( + "linux_cuda_selection: torch_preferred_runtime_line=" + f"{preferred_runtime_line} reordered_attempts={','.join(ordered_runtime_lines)}" + ) + else: + selection_log.append( + "linux_cuda_selection: torch_preferred_runtime_line=" + f"{preferred_runtime_line} unavailable_on_host" + ) + + attempts: list[AssetChoice] = [] + seen_attempts: set[str] = set() + + def add_attempt( + artifact: PublishedLlamaArtifact, asset_url: str, reason: str + ) -> None: + asset_name = artifact.asset_name + if asset_name in seen_attempts: + return + seen_attempts.add(asset_name) + attempts.append( + AssetChoice( + repo = release.repo, + tag = release.release_tag, + name = asset_name, + url = asset_url, + source_label = "published", + is_ready_bundle = True, + install_kind = "linux-cuda", + bundle_profile = artifact.bundle_profile, + runtime_line = artifact.runtime_line, + coverage_class = artifact.coverage_class, + supported_sms = artifact.supported_sms, + min_sm = artifact.min_sm, + max_sm = artifact.max_sm, + selection_log = list(selection_log) + + [ + "linux_cuda_selection: selected " + f"{asset_name} runtime_line={artifact.runtime_line} coverage_class={artifact.coverage_class} reason={reason}" + ], + ) + ) + + for runtime_line in ordered_runtime_lines: + coverage_candidates: list[tuple[PublishedLlamaArtifact, str]] = [] + portable_candidate: tuple[PublishedLlamaArtifact, str] | None = None + for artifact in published_artifacts: + if artifact.runtime_line != runtime_line: + continue + asset_name = artifact.asset_name + asset_url = release.assets.get(asset_name) + if not asset_url: + selection_log.append( + f"linux_cuda_selection: reject {asset_name} missing asset" + ) + continue + if not host_sms and artifact.coverage_class != "portable": + selection_log.append( + "linux_cuda_selection: reject " + f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} " + "reason=unknown_compute_caps_prefer_portable" + ) + continue + + if not artifact.supported_sms: + selection_log.append( + "linux_cuda_selection: reject " + f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} " + "reason=artifact_missing_supported_sms" + ) + continue + if artifact.min_sm is None or artifact.max_sm is None: + selection_log.append( + "linux_cuda_selection: reject " + f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} " + "reason=artifact_missing_sm_bounds" + ) + continue + + supported_sms = {str(value) for value in artifact.supported_sms} + missing_sms = [sm for sm in host_sms if sm not in supported_sms] + out_of_range_sms = [ + sm + for sm in host_sms + if not (artifact.min_sm <= int(sm) <= artifact.max_sm) + ] + reasons: list[str] = [] + if missing_sms: + reasons.append(f"missing_sms={','.join(missing_sms)}") + if out_of_range_sms: + reasons.append(f"out_of_range_sms={','.join(out_of_range_sms)}") + if reasons: + selection_log.append( + "linux_cuda_selection: reject " + f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} " + f"coverage={artifact.min_sm}-{artifact.max_sm} supported={','.join(artifact.supported_sms)} " + f"reasons={' '.join(reasons)}" + ) + continue + + selection_log.append( + "linux_cuda_selection: accept " + f"{asset_name} runtime_line={runtime_line} coverage_class={artifact.coverage_class} " + f"coverage={artifact.min_sm}-{artifact.max_sm} supported={','.join(artifact.supported_sms)}" + ) + if artifact.coverage_class == "portable": + portable_candidate = (artifact, asset_url) + else: + coverage_candidates.append((artifact, asset_url)) + + if coverage_candidates: + artifact, url = sorted( + coverage_candidates, + key = lambda item: ( + (item[0].max_sm or 0) - (item[0].min_sm or 0), + item[0].rank, + item[0].max_sm or 0, + ), + )[0] + add_attempt(artifact, url, "best coverage for runtime line") + if portable_candidate: + artifact, url = portable_candidate + add_attempt(artifact, url, "portable fallback for runtime line") + + if not attempts: + return None + + selection_log.append( + "linux_cuda_selection: attempt_order=" + + ",".join(choice.name for choice in attempts) + ) + for attempt in attempts: + attempt.selection_log = list(selection_log) + [ + "linux_cuda_selection: attempt " + f"{attempt.name} runtime_line={attempt.runtime_line} coverage_class={attempt.coverage_class}" + ] + return LinuxCudaSelection(attempts = attempts, selection_log = selection_log) + + +def latest_published_linux_cuda_tag(host: HostInfo, published_repo: str) -> str | None: + for release in iter_published_release_bundles(published_repo): + if linux_cuda_choice_from_release(host, release): + return release.upstream_tag + return None + + +def iter_upstream_releases() -> Iterable[dict[str, Any]]: + for release in github_releases(UPSTREAM_REPO): + if release.get("draft") or release.get("prerelease"): + continue + yield release + + +def pinned_published_release_bundle( + repo: str, published_release_tag: str +) -> PublishedReleaseBundle: + bundle = next(iter_published_release_bundles(repo, published_release_tag), None) + if bundle is None: + raise PrebuiltFallback( + f"published release {repo}@{published_release_tag} did not expose a usable llama.cpp manifest" + ) + return bundle + + +def resolve_requested_llama_tag( + requested_tag: str | None, +) -> str: + if requested_tag and requested_tag != "latest": + return requested_tag + return latest_upstream_release_tag() + + +def resolve_requested_install_tag( + requested_tag: str | None, + published_release_tag: str = "", +) -> str: + approved_tag = APPROVED_PREBUILT_LLAMA_TAG + normalized_requested = requested_tag or "latest" + if normalized_requested not in {"latest", approved_tag}: + raise PrebuiltFallback( + f"prebuilt installs are pinned to approved release {approved_tag}; requested {normalized_requested}" + ) + if published_release_tag and published_release_tag != approved_tag: + raise PrebuiltFallback( + f"prebuilt installs require published release tag {approved_tag}; requested {published_release_tag}" + ) + return approved_tag + + +def run_capture( + command: list[str], + *, + timeout: int = 30, + check: bool = False, + env: dict[str, str] | None = None, +) -> subprocess.CompletedProcess[str]: + result = subprocess.run( + command, + capture_output = True, + text = True, + timeout = timeout, + env = env, + ) + if check and result.returncode != 0: + raise subprocess.CalledProcessError( + result.returncode, command, result.stdout, result.stderr + ) + return result + + +def detect_host() -> HostInfo: + system = platform.system() + machine = platform.machine().lower() + is_windows = system == "Windows" + is_linux = system == "Linux" + is_macos = system == "Darwin" + is_x86_64 = machine in {"x86_64", "amd64"} + is_arm64 = machine in {"arm64", "aarch64"} + + nvidia_smi = shutil.which("nvidia-smi") + driver_cuda_version = None + compute_caps: list[str] = [] + visible_cuda_devices = os.environ.get("CUDA_VISIBLE_DEVICES") + visible_device_tokens = parse_cuda_visible_devices(visible_cuda_devices) + has_physical_nvidia = False + has_usable_nvidia = False + if nvidia_smi: + try: + result = run_capture([nvidia_smi], timeout = 20) + merged = "\n".join(part for part in (result.stdout, result.stderr) if part) + if "NVIDIA-SMI" in merged: + has_physical_nvidia = True + has_usable_nvidia = visible_device_tokens != [] + for line in merged.splitlines(): + if "CUDA Version:" in line: + raw = line.split("CUDA Version:", 1)[1].strip().split()[0] + major, minor = raw.split(".", 1) + driver_cuda_version = (int(major), int(minor)) + break + except Exception: + pass + + try: + caps = run_capture( + [ + nvidia_smi, + "--query-gpu=index,uuid,compute_cap", + "--format=csv,noheader", + ], + timeout = 20, + ) + visible_gpu_rows: list[tuple[str, str, str]] = [] + for raw in caps.stdout.splitlines(): + parts = [part.strip() for part in raw.split(",")] + if len(parts) != 3: + continue + index, uuid, cap = parts + visible_gpu_row = select_visible_gpu_rows( + [(index, uuid, cap)], + visible_device_tokens, + ) + if not visible_gpu_row: + continue + visible_gpu_rows.extend(visible_gpu_row) + normalized_cap = normalize_compute_cap(cap) + if normalized_cap is None: + continue + if normalized_cap not in compute_caps: + compute_caps.append(normalized_cap) + + if visible_gpu_rows: + has_usable_nvidia = True + elif visible_device_tokens == []: + has_usable_nvidia = False + elif supports_explicit_visible_device_matching(visible_device_tokens): + has_usable_nvidia = False + elif has_physical_nvidia: + has_usable_nvidia = True + except Exception: + pass + + return HostInfo( + system = system, + machine = machine, + is_windows = is_windows, + is_linux = is_linux, + is_macos = is_macos, + is_x86_64 = is_x86_64, + is_arm64 = is_arm64, + nvidia_smi = nvidia_smi, + driver_cuda_version = driver_cuda_version, + compute_caps = compute_caps, + visible_cuda_devices = visible_cuda_devices, + has_physical_nvidia = has_physical_nvidia, + has_usable_nvidia = has_usable_nvidia, + ) + + +def pick_windows_cuda_runtime(host: HostInfo) -> str | None: + if not host.driver_cuda_version: + return None + major, minor = host.driver_cuda_version + if major > 13 or (major == 13 and minor >= 1): + return "13.1" + if major > 12 or (major == 12 and minor >= 4): + return "12.4" + return None + + +def compatible_linux_runtime_lines(host: HostInfo) -> list[str]: + if not host.driver_cuda_version: + return [] + major, _minor = host.driver_cuda_version + if major >= 13: + return ["cuda13", "cuda12"] + if major >= 12: + return ["cuda12"] + return [] + + +def windows_runtime_line_info() -> dict[str, tuple[str, ...]]: + return { + "cuda13": ("cudart64_13*.dll", "cublas64_13*.dll", "cublasLt64_13*.dll"), + "cuda12": ("cudart64_12*.dll", "cublas64_12*.dll", "cublasLt64_12*.dll"), + } + + +def detected_windows_runtime_lines() -> tuple[list[str], dict[str, list[str]]]: + dirs = windows_runtime_dirs() + detected: list[str] = [] + runtime_dirs: dict[str, list[str]] = {} + for runtime_line, required_patterns in windows_runtime_line_info().items(): + matching_dirs = windows_runtime_dirs_for_patterns(required_patterns, dirs) + if matching_dirs: + detected.append(runtime_line) + runtime_dirs[runtime_line] = matching_dirs + return detected, runtime_dirs + + +def compatible_windows_runtime_lines(host: HostInfo) -> list[str]: + driver_runtime = pick_windows_cuda_runtime(host) + if driver_runtime == "13.1": + return ["cuda13", "cuda12"] + if driver_runtime == "12.4": + return ["cuda12"] + return [] + + +def runtime_line_from_cuda_version(cuda_version: str | None) -> str | None: + if not cuda_version: + return None + raw = str(cuda_version).strip() + if not raw: + return None + major, _, _ = raw.partition(".") + if major == "12": + return "cuda12" + if major == "13": + return "cuda13" + return None + + +def detect_torch_cuda_runtime_preference(host: HostInfo) -> CudaRuntimePreference: + selection_log: list[str] = [] + if host.is_macos: + selection_log.append("torch_cuda_preference: skipped on macOS") + return CudaRuntimePreference(runtime_line = None, selection_log = selection_log) + if not (host.has_usable_nvidia and (host.is_linux or host.is_windows)): + selection_log.append( + "torch_cuda_preference: skipped because CUDA host prerequisites were not met" + ) + return CudaRuntimePreference(runtime_line = None, selection_log = selection_log) + + try: + import torch + except Exception as exc: + selection_log.append(f"torch_cuda_preference: import failed: {exc}") + return CudaRuntimePreference(runtime_line = None, selection_log = selection_log) + + cuda_version = getattr(getattr(torch, "version", None), "cuda", None) + if not isinstance(cuda_version, str) or not cuda_version.strip(): + selection_log.append( + "torch_cuda_preference: torch.version.cuda missing; skipping Torch shortcut" + ) + return CudaRuntimePreference(runtime_line = None, selection_log = selection_log) + + try: + cuda_available = bool(torch.cuda.is_available()) + except Exception as exc: + selection_log.append( + f"torch_cuda_preference: torch.cuda.is_available() failed: {exc}" + ) + return CudaRuntimePreference(runtime_line = None, selection_log = selection_log) + + if not cuda_available: + selection_log.append( + "torch_cuda_preference: torch.cuda.is_available() returned False; falling back to normal selection" + ) + return CudaRuntimePreference(runtime_line = None, selection_log = selection_log) + + runtime_line = runtime_line_from_cuda_version(cuda_version) + if runtime_line is None: + selection_log.append( + f"torch_cuda_preference: unsupported torch.version.cuda={cuda_version}; falling back to normal selection" + ) + return CudaRuntimePreference(runtime_line = None, selection_log = selection_log) + + selection_log.append( + "torch_cuda_preference: selected runtime_line=" + f"{runtime_line} from torch.version.cuda={cuda_version}" + ) + return CudaRuntimePreference(runtime_line = runtime_line, selection_log = selection_log) + + +def windows_cuda_attempts( + host: HostInfo, + llama_tag: str, + upstream_assets: dict[str, str], + preferred_runtime_line: str | None, + selection_preamble: Iterable[str] = (), +) -> list[AssetChoice]: + selection_log = list(selection_preamble) + runtime_by_line = {"cuda12": "12.4", "cuda13": "13.1"} + driver_runtime = pick_windows_cuda_runtime(host) + detected_runtime_lines, runtime_dirs = detected_windows_runtime_lines() + compatible_runtime_lines = compatible_windows_runtime_lines(host) + normal_runtime_lines: list[str] + if detected_runtime_lines: + normal_runtime_lines = [ + line for line in compatible_runtime_lines if line in detected_runtime_lines + ] + else: + normal_runtime_lines = compatible_runtime_lines + selection_log.append( + "windows_cuda_selection: driver_runtime=" + + (driver_runtime if driver_runtime else "unknown") + ) + selection_log.append( + "windows_cuda_selection: detected_runtime_lines=" + + (",".join(detected_runtime_lines) if detected_runtime_lines else "none") + ) + for runtime_line in ("cuda13", "cuda12"): + selection_log.append( + "windows_cuda_selection: runtime_dirs " + f"{runtime_line}=" + + ( + ",".join(runtime_dirs.get(runtime_line, [])) + if runtime_dirs.get(runtime_line) + else "none" + ) + ) + if detected_runtime_lines: + selection_log.append( + "windows_cuda_selection: host_runtime_order=" + + (",".join(normal_runtime_lines) if normal_runtime_lines else "none") + ) + else: + selection_log.append( + "windows_cuda_selection: no CUDA runtime DLL line detected; falling back to driver order" + ) + if not normal_runtime_lines: + if detected_runtime_lines: + selection_log.append( + "windows_cuda_selection: detected CUDA runtime DLLs were incompatible with the reported driver" + ) + fallback_runtime_lines = ( + ["cuda13", "cuda12"] + if driver_runtime == "13.1" + else (["cuda12"] if driver_runtime == "12.4" else []) + ) + normal_runtime_lines = fallback_runtime_lines + + runtime_order: list[str] = [] + if preferred_runtime_line and preferred_runtime_line in normal_runtime_lines: + runtime_order.append(preferred_runtime_line) + selection_log.append( + "windows_cuda_selection: torch_preferred_runtime_line=" + f"{preferred_runtime_line} reordered_attempts" + ) + elif preferred_runtime_line: + selection_log.append( + "windows_cuda_selection: torch_preferred_runtime_line=" + f"{preferred_runtime_line} unavailable_or_incompatible" + ) + else: + selection_log.append( + "windows_cuda_selection: no Torch runtime preference available" + ) + + runtime_order.extend( + runtime_line + for runtime_line in normal_runtime_lines + if runtime_line not in runtime_order + ) + selection_log.append( + "windows_cuda_selection: normal_runtime_order=" + + (",".join(normal_runtime_lines) if normal_runtime_lines else "none") + ) + selection_log.append( + "windows_cuda_selection: attempt_runtime_order=" + + (",".join(runtime_order) if runtime_order else "none") + ) + + attempts: list[AssetChoice] = [] + for runtime_line in runtime_order: + runtime = runtime_by_line[runtime_line] + upstream_name = f"llama-{llama_tag}-bin-win-cuda-{runtime}-x64.zip" + asset_url = upstream_assets.get(upstream_name) + if not asset_url: + selection_log.append( + f"windows_cuda_selection: skip missing asset {upstream_name}" + ) + continue + attempts.append( + AssetChoice( + repo = UPSTREAM_REPO, + tag = llama_tag, + name = upstream_name, + url = asset_url, + source_label = "upstream", + install_kind = "windows-cuda", + runtime_line = runtime_line, + selection_log = list(selection_log) + + [ + f"windows_cuda_selection: selected {upstream_name} runtime={runtime}" + ], + ) + ) + return attempts + + +def resolve_windows_cuda_choices( + host: HostInfo, llama_tag: str, upstream_assets: dict[str, str] +) -> list[AssetChoice]: + torch_preference = detect_torch_cuda_runtime_preference(host) + attempts = windows_cuda_attempts( + host, + llama_tag, + upstream_assets, + torch_preference.runtime_line, + torch_preference.selection_log, + ) + return attempts + + +def resolve_linux_cuda_choice( + host: HostInfo, llama_tag: str, published_repo: str, published_release_tag: str +) -> LinuxCudaSelection: + torch_preference = detect_torch_cuda_runtime_preference(host) + skipped_tag_mismatches = 0 + for release in iter_published_release_bundles( + published_repo, published_release_tag + ): + if release.upstream_tag != llama_tag: + skipped_tag_mismatches += 1 + continue + selection = linux_cuda_choice_from_release( + host, + release, + preferred_runtime_line = torch_preference.runtime_line, + selection_preamble = torch_preference.selection_log, + ) + if selection is not None: + return selection + if skipped_tag_mismatches: + log( + "published Linux CUDA selection skipped " + f"{skipped_tag_mismatches} release(s) with upstream_tag != {llama_tag}" + ) + raise PrebuiltFallback("no compatible published Linux CUDA bundle was found") + + +def resolve_upstream_asset_choice(host: HostInfo, llama_tag: str) -> AssetChoice: + upstream_assets = github_release_assets(UPSTREAM_REPO, llama_tag) + if host.is_linux and host.is_x86_64: + upstream_name = f"llama-{llama_tag}-bin-ubuntu-x64.tar.gz" + if upstream_name not in upstream_assets: + raise PrebuiltFallback("upstream Linux CPU asset was not found") + return AssetChoice( + repo = UPSTREAM_REPO, + tag = llama_tag, + name = upstream_name, + url = upstream_assets[upstream_name], + source_label = "upstream", + install_kind = "linux-cpu", + ) + + if host.is_windows and host.is_x86_64: + if host.has_usable_nvidia: + attempts = resolve_windows_cuda_choices(host, llama_tag, upstream_assets) + if attempts: + return attempts[0] + raise PrebuiltFallback("no compatible Windows CUDA asset was found") + + upstream_name = f"llama-{llama_tag}-bin-win-cpu-x64.zip" + if upstream_name not in upstream_assets: + raise PrebuiltFallback("upstream Windows CPU asset was not found") + return AssetChoice( + repo = UPSTREAM_REPO, + tag = llama_tag, + name = upstream_name, + url = upstream_assets[upstream_name], + source_label = "upstream", + install_kind = "windows-cpu", + ) + + if host.is_macos and host.is_arm64: + upstream_name = f"llama-{llama_tag}-bin-macos-arm64.tar.gz" + if upstream_name not in upstream_assets: + raise PrebuiltFallback("upstream macOS arm64 asset was not found") + return AssetChoice( + repo = UPSTREAM_REPO, + tag = llama_tag, + name = upstream_name, + url = upstream_assets[upstream_name], + source_label = "upstream", + install_kind = "macos-arm64", + ) + + if host.is_macos and host.is_x86_64: + upstream_name = f"llama-{llama_tag}-bin-macos-x64.tar.gz" + if upstream_name not in upstream_assets: + raise PrebuiltFallback("upstream macOS x64 asset was not found") + return AssetChoice( + repo = UPSTREAM_REPO, + tag = llama_tag, + name = upstream_name, + url = upstream_assets[upstream_name], + source_label = "upstream", + install_kind = "macos-x64", + ) + + raise PrebuiltFallback( + f"no prebuilt policy exists for {host.system} {host.machine}" + ) + + +def resolve_asset_choice( + host: HostInfo, llama_tag: str, published_repo: str, published_release_tag: str +) -> AssetChoice: + if host.is_linux and host.is_x86_64 and host.has_usable_nvidia: + return resolve_linux_cuda_choice( + host, llama_tag, published_repo, published_release_tag + ).primary + return resolve_upstream_asset_choice(host, llama_tag) + + +def extract_archive(archive_path: Path, destination: Path) -> None: + def safe_extract_path(base: Path, member_name: str) -> Path: + normalized = member_name.replace("\\", "/") + member_path = Path(normalized) + if member_path.is_absolute(): + raise PrebuiltFallback( + f"archive member used an absolute path: {member_name}" + ) + + target = (base / member_path).resolve() + base_resolved = base.resolve() + try: + target.relative_to(base_resolved) + except ValueError as exc: + raise PrebuiltFallback( + f"archive member escaped destination: {member_name}" + ) from exc + return target + + def safe_link_target( + base: Path, member_name: str, link_name: str, target: Path + ) -> tuple[str, Path]: + normalized = link_name.replace("\\", "/") + link_path = Path(normalized) + if link_path.is_absolute(): + raise PrebuiltFallback( + f"archive link used an absolute target: {member_name} -> {link_name}" + ) + if not normalized: + raise PrebuiltFallback(f"archive link used an empty target: {member_name}") + + resolved = (target.parent / link_path).resolve() + base_resolved = base.resolve() + try: + resolved.relative_to(base_resolved) + except ValueError as exc: + raise PrebuiltFallback( + f"archive link escaped destination: {member_name} -> {link_name}" + ) from exc + return normalized, resolved + + def extract_zip_safely(source: Path, base: Path) -> None: + with zipfile.ZipFile(source) as archive: + for member in archive.infolist(): + target = safe_extract_path(base, member.filename) + mode = (member.external_attr >> 16) & 0o170000 + if mode == 0o120000: + raise PrebuiltFallback( + f"zip archive contained a symlink entry: {member.filename}" + ) + if member.is_dir(): + target.mkdir(parents = True, exist_ok = True) + continue + target.parent.mkdir(parents = True, exist_ok = True) + with archive.open(member, "r") as src, target.open("wb") as dst: + shutil.copyfileobj(src, dst) + + def extract_tar_safely(source: Path, base: Path) -> None: + pending_links: list[tuple[tarfile.TarInfo, Path]] = [] + with tarfile.open(source, "r:gz") as archive: + for member in archive.getmembers(): + target = safe_extract_path(base, member.name) + if member.isdir(): + target.mkdir(parents = True, exist_ok = True) + continue + if member.islnk() or member.issym(): + pending_links.append((member, target)) + continue + if not member.isfile(): + raise PrebuiltFallback( + f"tar archive contained an unsupported entry: {member.name}" + ) + target.parent.mkdir(parents = True, exist_ok = True) + extracted = archive.extractfile(member) + if extracted is None: + raise PrebuiltFallback( + f"tar archive entry could not be read: {member.name}" + ) + with extracted, target.open("wb") as dst: + shutil.copyfileobj(extracted, dst) + + unresolved = list(pending_links) + while unresolved: + next_round: list[tuple[tarfile.TarInfo, Path]] = [] + progressed = False + for member, target in unresolved: + normalized_link, resolved_target = safe_link_target( + base, member.name, member.linkname, target + ) + if not resolved_target.exists() and not resolved_target.is_symlink(): + next_round.append((member, target)) + continue + if resolved_target.is_dir(): + raise PrebuiltFallback( + f"archive link targeted a directory: {member.name} -> {member.linkname}" + ) + + target.parent.mkdir(parents = True, exist_ok = True) + if target.exists() or target.is_symlink(): + target.unlink() + + if member.issym(): + target.symlink_to(normalized_link) + else: + shutil.copy2(resolved_target, target) + progressed = True + + if not progressed: + details = ", ".join( + f"{member.name} -> {member.linkname}" for member, _ in next_round + ) + raise PrebuiltFallback( + f"tar archive contained unresolved link entries: {details}" + ) + unresolved = next_round + + destination.mkdir(parents = True, exist_ok = True) + if archive_path.name.endswith(".zip"): + extract_zip_safely(archive_path, destination) + return + if archive_path.name.endswith(".tar.gz"): + extract_tar_safely(archive_path, destination) + return + raise PrebuiltFallback(f"unsupported archive format: {archive_path.name}") + + +def copy_globs( + source_dir: Path, destination: Path, patterns: list[str], *, required: bool = True +) -> None: + destination.mkdir(parents = True, exist_ok = True) + matched_sources: dict[str, Path] = {} + for path in sorted( + (candidate for candidate in source_dir.rglob("*") if candidate.is_file()), + key = lambda candidate: ( + len(candidate.relative_to(source_dir).parts), + str(candidate), + ), + ): + for pattern in patterns: + if fnmatch.fnmatch(path.name, pattern): + previous = matched_sources.get(path.name) + if previous is not None and previous != path: + raise PrebuiltFallback( + f"ambiguous archive layout for {path.name}: " + f"{previous.relative_to(source_dir)} and {path.relative_to(source_dir)}" + ) + matched_sources[path.name] = path + break + + if required and not matched_sources: + raise PrebuiltFallback(f"required files missing from {source_dir}: {patterns}") + + for name, path in matched_sources.items(): + shutil.copy2(path, destination / name) + + +def ensure_converter_scripts(install_dir: Path, llama_tag: str) -> None: + canonical = install_dir / "convert_hf_to_gguf.py" + if not canonical.exists(): + # Hydrated source tree should have placed this file already. + # Fall back to a network fetch so the install is not blocked. + raw_base = f"https://raw.githubusercontent.com/ggml-org/llama.cpp/{llama_tag}" + source_url = f"{raw_base}/convert_hf_to_gguf.py" + data = download_bytes( + source_url, + progress_label = f"Downloading {download_label_from_url(source_url)}", + ) + if not data: + raise RuntimeError(f"downloaded empty converter script from {source_url}") + if b"import " not in data and b"def " not in data and b"#!/" not in data: + raise RuntimeError( + f"downloaded converter script did not look like Python source: {source_url}" + ) + atomic_write_bytes(canonical, data) + legacy = install_dir / "convert-hf-to-gguf.py" + if legacy.exists() or legacy.is_symlink(): + legacy.unlink() + try: + legacy.symlink_to("convert_hf_to_gguf.py") + except OSError: + shutil.copy2(canonical, legacy) + + +def extracted_archive_root(extract_dir: Path) -> Path: + children = [path for path in extract_dir.iterdir()] + if len(children) == 1 and children[0].is_dir(): + return children[0] + return extract_dir + + +def copy_directory_contents(source_dir: Path, destination: Path) -> None: + destination.mkdir(parents = True, exist_ok = True) + for item in source_dir.iterdir(): + target = destination / item.name + if item.is_dir(): + shutil.copytree(item, target, dirs_exist_ok = True) + else: + shutil.copy2(item, target) + + +def hydrate_source_tree( + upstream_tag: str, + install_dir: Path, + work_dir: Path, + *, + expected_sha256: str, +) -> None: + archive_path = work_dir / f"llama.cpp-source-{upstream_tag}.tar.gz" + source_urls = upstream_source_archive_urls(upstream_tag) + extract_dir = Path(tempfile.mkdtemp(prefix = "source-extract-", dir = work_dir)) + + try: + log(f"downloading llama.cpp source tree for upstream tag {upstream_tag}") + last_exc: Exception | None = None + downloaded = False + for index, source_url in enumerate(source_urls): + try: + if index > 0: + log( + f"retrying source tree download from fallback URL: {source_url}" + ) + download_file_verified( + source_url, + archive_path, + expected_sha256 = expected_sha256, + label = f"llama.cpp source tree for {upstream_tag}", + ) + downloaded = True + break + except Exception as exc: + last_exc = exc + if index == len(source_urls) - 1: + raise + log(f"source tree download failed from {source_url}: {exc}") + if not downloaded: + assert last_exc is not None + raise last_exc + extract_archive(archive_path, extract_dir) + source_root = extracted_archive_root(extract_dir) + required_paths = [ + source_root / "CMakeLists.txt", + source_root / "convert_hf_to_gguf.py", + source_root / "gguf-py", + ] + missing = [ + str(path.relative_to(source_root)) + for path in required_paths + if not path.exists() + ] + if missing: + raise PrebuiltFallback( + "upstream source archive was missing required repo files: " + + ", ".join(missing) + ) + copy_directory_contents(source_root, install_dir) + except PrebuiltFallback: + raise + except Exception as exc: + raise PrebuiltFallback( + f"failed to hydrate upstream llama.cpp source tree for {upstream_tag}: {exc}" + ) from exc + finally: + remove_tree(extract_dir) + + +def normalize_install_layout(install_dir: Path, host: HostInfo) -> tuple[Path, Path]: + build_bin = install_dir / "build" / "bin" + if host.is_windows: + exec_dir = build_bin / "Release" + exec_dir.mkdir(parents = True, exist_ok = True) + return exec_dir / "llama-server.exe", exec_dir / "llama-quantize.exe" + + install_dir.mkdir(parents = True, exist_ok = True) + build_bin.mkdir(parents = True, exist_ok = True) + return install_dir / "llama-server", install_dir / "llama-quantize" + + +def discover_installed_executable(install_dir: Path, executable_name: str) -> Path: + direct = install_dir / executable_name + if direct.exists() and direct.is_file(): + return direct + candidate = next( + (path for path in install_dir.rglob(executable_name) if path.is_file()), None + ) + if candidate is None: + raise PrebuiltFallback(f"{executable_name} was not installed") + return candidate + + +def write_exec_wrapper(entrypoint: Path, target: Path) -> None: + relative_target = os.path.relpath(target, entrypoint.parent) + script = "\n".join( + [ + "#!/bin/sh", + f'exec "$(dirname "$0")/{relative_target}" "$@"', + "", + ] + ) + atomic_write_bytes(entrypoint, script.encode("utf-8")) + os.chmod(entrypoint, 0o755) + + +def create_exec_entrypoint(entrypoint: Path, target: Path) -> None: + if entrypoint == target: + return + if entrypoint.exists() or entrypoint.is_symlink(): + entrypoint.unlink() + try: + entrypoint.symlink_to(os.path.relpath(target, entrypoint.parent)) + except Exception: + write_exec_wrapper(entrypoint, target) + + +def overlay_directory_for_choice( + install_dir: Path, choice: AssetChoice, host: HostInfo +) -> Path: + if host.is_windows or choice.install_kind.startswith("windows"): + path = install_dir / "build" / "bin" / "Release" + else: + path = install_dir / "build" / "bin" + path.mkdir(parents = True, exist_ok = True) + return path + + +def runtime_patterns_for_choice(choice: AssetChoice) -> list[str]: + if choice.install_kind in {"linux-cpu", "linux-cuda"}: + return [ + "llama-server", + "llama-quantize", + "libllama.so*", + "libggml.so*", + "libggml-base.so*", + "libmtmd.so*", + "libggml-cpu-*.so*", + "libggml-cuda.so*", + "libggml-rpc.so*", + ] + if choice.install_kind in {"macos-arm64", "macos-x64"}: + return ["llama-server", "llama-quantize", "lib*.dylib"] + if choice.install_kind in {"windows-cpu", "windows-cuda"}: + return ["*.exe", "*.dll"] + raise PrebuiltFallback( + f"unsupported install kind for runtime overlay: {choice.install_kind}" + ) + + +def metadata_patterns_for_choice(choice: AssetChoice) -> list[str]: + patterns = ["BUILD_INFO.txt", "THIRD_PARTY_LICENSES.txt"] + if choice.install_kind.startswith("windows"): + patterns.append("LICENSE.txt") + else: + patterns.append("LICENSE") + return patterns + + +@contextmanager +def install_lock(lock_path: Path) -> Iterator[None]: + lock_path.parent.mkdir(parents = True, exist_ok = True) + + if FileLock is None: + # Fallback: exclusive file creation as a simple lock. + # Write our PID so stale locks from crashed processes can be detected. + fd: int | None = None + deadline = time.monotonic() + INSTALL_LOCK_TIMEOUT_SECONDS + while True: + try: + fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_RDWR) + os.write(fd, f"{os.getpid()}\n".encode()) + os.fsync(fd) + break + except FileExistsError: + # Check if the holder process is still alive + stale = False + try: + raw = lock_path.read_text().strip() + except FileNotFoundError: + # Lock vanished between our open attempt and read -- retry + continue + if not raw: + # File exists but PID not yet written -- another process + # just created it. Wait briefly for the write to land. + time.sleep(0.1) + continue + try: + holder_pid = int(raw) + os.kill(holder_pid, 0) # signal 0 = existence check + except ValueError: + # PID unreadable (corrupted file) + stale = True + except ProcessLookupError: + # Process is dead + stale = True + except PermissionError: + # Process is alive but owned by another user -- not stale + pass + if stale: + lock_path.unlink(missing_ok = True) + continue + if time.monotonic() >= deadline: + raise RuntimeError( + f"timed out after {INSTALL_LOCK_TIMEOUT_SECONDS}s waiting for concurrent install lock: {lock_path}" + ) + time.sleep(0.5) + try: + yield + finally: + if fd is not None: + os.close(fd) + lock_path.unlink(missing_ok = True) + return + + try: + with FileLock(lock_path, timeout = INSTALL_LOCK_TIMEOUT_SECONDS): + yield + except FileLockTimeout as exc: + raise RuntimeError( + f"timed out after {INSTALL_LOCK_TIMEOUT_SECONDS}s waiting for concurrent install lock: {lock_path}" + ) from exc + + +def install_lock_path(install_dir: Path) -> Path: + return install_dir.parent / f".{install_dir.name}.install.lock" + + +def install_staging_root(install_dir: Path) -> Path: + root = install_dir.parent / INSTALL_STAGING_ROOT_NAME + root.mkdir(parents = True, exist_ok = True) + return root + + +def prune_install_staging_root(install_dir: Path) -> None: + root = install_dir.parent / INSTALL_STAGING_ROOT_NAME + try: + root.rmdir() + except OSError: + pass + + +def create_install_staging_dir(install_dir: Path) -> Path: + staging_dir = Path( + tempfile.mkdtemp( + prefix = f"{install_dir.name}.staging-", dir = install_staging_root(install_dir) + ) + ) + log(f"created install staging dir {staging_dir}") + return staging_dir + + +def unique_install_side_path(install_dir: Path, label: str) -> Path: + root = install_staging_root(install_dir) + timestamp = time.strftime("%Y%m%d%H%M%S", time.gmtime()) + prefix = f"{install_dir.name}.{label}-{timestamp}-{os.getpid()}" + candidate = root / prefix + counter = 0 + while candidate.exists(): + counter += 1 + candidate = root / f"{prefix}-{counter}" + return candidate + + +def remove_tree(path: Path | None) -> None: + if path and path.exists(): + shutil.rmtree(path, ignore_errors = True) + + +def remove_tree_logged(path: Path | None, label: str) -> None: + if not path: + return + if not path.exists(): + log(f"{label} already absent at {path}") + return + log(f"removing {label} at {path}") + try: + shutil.rmtree(path) + except Exception as exc: + log(f"failed to remove {label} at {path}: {exc}") + raise + + +def cleanup_install_side_paths( + install_dir: Path, + *, + staging_dir: Path | None = None, + rollback_dir: Path | None = None, + failed_dir: Path | None = None, + active_dir: Path | None = None, +) -> None: + cleanup_failures: list[str] = [] + for label, path in ( + ("failed install path", failed_dir), + ("rollback path", rollback_dir), + ("active install path", active_dir), + ("staging dir", staging_dir), + ): + if not path: + continue + try: + remove_tree_logged(path, label) + except Exception as exc: + cleanup_failures.append(f"{label} ({path}): {exc}") + prune_install_staging_root(install_dir) + if cleanup_failures: + raise RuntimeError("cleanup failed for " + "; ".join(cleanup_failures)) + + +def confirm_install_tree(install_dir: Path, host: HostInfo) -> None: + if host.is_windows: + expected = [ + install_dir / "build" / "bin" / "Release" / "llama-server.exe", + install_dir / "build" / "bin" / "Release" / "llama-quantize.exe", + install_dir / "convert_hf_to_gguf.py", + install_dir / "gguf-py", + ] + else: + expected = [ + install_dir / "llama-server", + install_dir / "llama-quantize", + install_dir / "build" / "bin" / "llama-server", + install_dir / "build" / "bin" / "llama-quantize", + install_dir / "convert_hf_to_gguf.py", + install_dir / "gguf-py", + ] + + expected.append(install_dir / "UNSLOTH_PREBUILT_INFO.json") + missing = [str(path) for path in expected if not path.exists()] + if missing: + raise RuntimeError( + "activated install was missing expected files: " + ", ".join(missing) + ) + + +def activate_install_tree(staging_dir: Path, install_dir: Path, host: HostInfo) -> None: + rollback_dir: Path | None = None + failed_dir: Path | None = None + try: + if install_dir.exists(): + rollback_dir = unique_install_side_path(install_dir, "rollback") + log(f"moving existing install to rollback path {rollback_dir}") + os.replace(install_dir, rollback_dir) + log(f"moved existing install to rollback path {rollback_dir.name}") + + log(f"activating staged install {staging_dir} -> {install_dir}") + os.replace(staging_dir, install_dir) + log(f"activated staged install at {install_dir}") + log(f"confirming activated install tree at {install_dir}") + confirm_install_tree(install_dir, host) + log(f"activated install tree confirmed at {install_dir}") + except Exception as exc: + log(f"activation failed for staged install: {exc}") + try: + if install_dir.exists(): + failed_dir = unique_install_side_path(install_dir, "failed") + log(f"moving failed active install to {failed_dir}") + os.replace(install_dir, failed_dir) + elif staging_dir.exists(): + failed_dir = staging_dir + staging_dir = None + log(f"retaining failed staging tree at {failed_dir}") + + if rollback_dir and rollback_dir.exists(): + log(f"restoring rollback path {rollback_dir} -> {install_dir}") + os.replace(rollback_dir, install_dir) + log(f"restored previous install from rollback path {rollback_dir.name}") + raise PrebuiltFallback( + "staged prebuilt validation passed but activation failed; restored previous install " + f"({textwrap.shorten(str(exc), width = 200, placeholder = '...')})" + ) from exc + except PrebuiltFallback: + raise + except Exception as rollback_exc: + log(f"rollback after failed activation also failed: {rollback_exc}") + + log( + "rollback restoration failed; cleaning staging, install, and rollback paths before source build fallback" + ) + cleanup_error: Exception | None = None + try: + cleanup_install_side_paths( + install_dir, + staging_dir = staging_dir, + rollback_dir = rollback_dir, + failed_dir = failed_dir, + active_dir = install_dir, + ) + except Exception as cleanup_exc: + cleanup_error = cleanup_exc + log(f"cleanup after rollback failure also failed: {cleanup_exc}") + details = textwrap.shorten(str(exc), width = 200, placeholder = "...") + if cleanup_error is not None: + raise PrebuiltFallback( + "staged prebuilt validation passed but activation and rollback failed; " + f"cleanup also reported errors ({details}; cleanup={cleanup_error})" + ) from exc + raise PrebuiltFallback( + "staged prebuilt validation passed but activation and rollback failed; " + f"cleaned install state for fresh source build ({details})" + ) from exc + else: + if rollback_dir: + remove_tree_logged(rollback_dir, "rollback path") + finally: + remove_tree(failed_dir) + remove_tree(staging_dir) + prune_install_staging_root(install_dir) + + +def install_from_archives( + choice: AssetChoice, host: HostInfo, install_dir: Path, work_dir: Path +) -> tuple[Path, Path]: + main_archive = work_dir / choice.name + log(f"downloading {choice.name} from {choice.source_label} release") + if not choice.expected_sha256: + raise PrebuiltFallback( + f"approved checksum was missing for selected asset {choice.name}" + ) + download_file_verified( + choice.url, + main_archive, + expected_sha256 = choice.expected_sha256, + label = f"prebuilt archive {choice.name}", + ) + + install_dir.mkdir(parents = True, exist_ok = True) + extract_dir = Path(tempfile.mkdtemp(prefix = "extract-", dir = work_dir)) + + try: + extract_archive(main_archive, extract_dir) + source_dir = extract_dir + overlay_dir = overlay_directory_for_choice(install_dir, choice, host) + copy_globs( + source_dir, overlay_dir, runtime_patterns_for_choice(choice), required = True + ) + copy_globs( + source_dir, + install_dir, + metadata_patterns_for_choice(choice), + required = False, + ) + finally: + remove_tree(extract_dir) + + if host.is_windows: + exec_dir = install_dir / "build" / "bin" / "Release" + server_src = next(exec_dir.glob("llama-server.exe"), None) + quantize_src = next(exec_dir.glob("llama-quantize.exe"), None) + if server_src is None or quantize_src is None: + raise PrebuiltFallback("windows executables were not installed correctly") + return server_src, quantize_src + + build_bin = install_dir / "build" / "bin" + source_server = build_bin / "llama-server" + source_quantize = build_bin / "llama-quantize" + if not source_server.exists() or not source_quantize.exists(): + raise PrebuiltFallback( + "unix executables were not installed correctly into build/bin" + ) + os.chmod(source_server, 0o755) + os.chmod(source_quantize, 0o755) + + root_server = install_dir / "llama-server" + root_quantize = install_dir / "llama-quantize" + if source_server != root_server: + create_exec_entrypoint(root_server, source_server) + if source_quantize != root_quantize: + create_exec_entrypoint(root_quantize, source_quantize) + build_server = build_bin / "llama-server" + build_quantize = build_bin / "llama-quantize" + if source_server != build_server: + create_exec_entrypoint(build_server, source_server) + if source_quantize != build_quantize: + create_exec_entrypoint(build_quantize, source_quantize) + + return source_server, source_quantize + + +def ensure_repo_shape(install_dir: Path) -> None: + required = [ + install_dir / "CMakeLists.txt", + install_dir / "convert_hf_to_gguf.py", + install_dir / "gguf-py", + ] + missing = [ + str(path.relative_to(install_dir)) for path in required if not path.exists() + ] + if missing: + raise PrebuiltFallback( + "hydrated llama.cpp source tree was missing: " + ", ".join(missing) + ) + + +def validation_model_cache_path(install_dir: Path) -> Path: + cache_dir = install_dir.parent / VALIDATION_MODEL_CACHE_DIRNAME + cache_dir.mkdir(parents = True, exist_ok = True) + return cache_dir / VALIDATION_MODEL_CACHE_FILENAME + + +def validated_validation_model_bytes(data: bytes) -> bytes: + if not data: + raise RuntimeError(f"downloaded empty validation model from {TEST_MODEL_URL}") + digest = hashlib.sha256(data).hexdigest() + if digest != TEST_MODEL_SHA256: + raise RuntimeError( + "validation model checksum mismatch: " + f"expected={TEST_MODEL_SHA256} actual={digest}" + ) + return data + + +def download_validation_model(path: Path, cache_path: Path | None = None) -> None: + try: + data: bytes | None = None + if cache_path and cache_path.exists(): + try: + data = validated_validation_model_bytes(cache_path.read_bytes()) + log(f"using cached tiny GGUF validation model from {cache_path}") + except Exception as exc: + log( + f"cached tiny GGUF validation model was invalid; refreshing cache ({exc})" + ) + data = None + if data is None: + log("downloading tiny GGUF validation model") + data = validated_validation_model_bytes( + download_bytes( + TEST_MODEL_URL, + progress_label = f"Downloading {download_label_from_url(TEST_MODEL_URL)}", + ) + ) + if cache_path is not None: + atomic_write_bytes(cache_path, data) + atomic_write_bytes(path, data) + except Exception as exc: + raise PrebuiltFallback(f"validation model unavailable: {exc}") from exc + + +def free_local_port() -> int: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(("127.0.0.1", 0)) + _, port = sock.getsockname() + sock.close() + return int(port) + + +def read_log_excerpt(log_path: Path, *, max_lines: int = 60) -> str: + try: + content = log_path.read_text(encoding = "utf-8", errors = "replace") + except FileNotFoundError: + return "" + return "\n".join(content.splitlines()[-max_lines:]) + + +def is_retryable_server_bind_error( + exc: Exception | None, + output: str = "", + *, + exited_quickly: bool = False, +) -> bool: + haystack = output.lower() + bind_markers = ( + "address already in use", + "only one usage of each socket address", + "failed to bind", + "bind failed", + "failed to listen", + "errno 98", + "errno 10048", + ) + if any(marker in haystack for marker in bind_markers): + return True + + if isinstance(exc, urllib.error.URLError): + reason = exc.reason + if exited_quickly and isinstance(reason, ConnectionRefusedError): + return True + if isinstance(reason, OSError) and reason.errno in { + 98, + 99, + 111, + 10048, + 10049, + 10061, + }: + return exited_quickly + if exited_quickly and isinstance(exc, ConnectionRefusedError): + return True + if isinstance(exc, OSError) and exc.errno in {98, 99, 111, 10048, 10049, 10061}: + return exited_quickly + return False + + +def dedupe_existing_dirs(paths: Iterable[str | Path]) -> list[str]: + unique: list[str] = [] + seen: set[str] = set() + for raw in paths: + if not raw: + continue + path = Path(raw).expanduser() + if not path.is_dir(): + continue + resolved = str(path.resolve()) + if resolved in seen: + continue + seen.add(resolved) + unique.append(resolved) + return unique + + +def linux_missing_libraries( + binary_path: Path, *, env: dict[str, str] | None = None +) -> list[str]: + try: + result = run_capture(["ldd", str(binary_path)], timeout = 20, env = env) + except Exception: + return [] + + missing: list[str] = [] + for line in (result.stdout + result.stderr).splitlines(): + line = line.strip() + if "=> not found" not in line: + continue + library = line.split("=>", 1)[0].strip() + if library and library not in missing: + missing.append(library) + return missing + + +def python_runtime_dirs() -> list[str]: + candidates: list[Path] = [] + search_roots = [Path(entry) for entry in sys.path if entry] + try: + search_roots.extend(Path(path) for path in site.getsitepackages()) + except Exception: + pass + try: + user_site = site.getusersitepackages() + if user_site: + search_roots.append(Path(user_site)) + except Exception: + pass + + for root in search_roots: + if not root.is_dir(): + continue + candidates.extend(root.glob("nvidia/*/lib")) + candidates.extend(root.glob("nvidia/*/bin")) + candidates.extend(root.glob("torch/lib")) + return dedupe_existing_dirs(candidates) + + +def ldconfig_runtime_dirs(required_libraries: Iterable[str]) -> list[str]: + try: + result = run_capture(["ldconfig", "-p"], timeout = 20) + except Exception: + return [] + + required = set(required_libraries) + candidates: list[str] = [] + for line in result.stdout.splitlines(): + if "=>" not in line: + continue + library, _, location = line.partition("=>") + library = library.strip().split()[0] + if required and library not in required: + continue + path = Path(location.strip()).parent + candidates.append(str(path)) + return dedupe_existing_dirs(candidates) + + +def linux_runtime_dirs(binary_path: Path) -> list[str]: + missing = linux_missing_libraries(binary_path) + if not missing: + return [] + return linux_runtime_dirs_for_required_libraries(missing) + + +def preflight_linux_installed_binaries( + binaries: Iterable[Path], + install_dir: Path, + host: HostInfo, +) -> None: + if not host.is_linux: + return + + issues: list[str] = [] + for binary_path in binaries: + env = binary_env(binary_path, install_dir, host) + missing = linux_missing_libraries(binary_path, env = env) + if not missing: + continue + runtime_dirs = [ + part for part in env.get("LD_LIBRARY_PATH", "").split(os.pathsep) if part + ] + issues.append( + f"{binary_path.name}: missing={','.join(missing)} " + f"ld_library_path={','.join(runtime_dirs) if runtime_dirs else 'none'}" + ) + + if issues: + raise PrebuiltFallback( + "linux extracted binary preflight failed:\n" + "\n".join(issues) + ) + + +def glob_paths(*patterns: str) -> list[str]: + matches: list[str] = [] + for pattern in patterns: + if any(char in pattern for char in "*?[]"): + matches.extend(str(path) for path in Path("/").glob(pattern.lstrip("/"))) + else: + matches.append(pattern) + return matches + + +def windows_runtime_dirs() -> list[str]: + candidates: list[str | Path] = [] + + env_dirs = os.environ.get("CUDA_RUNTIME_DLL_DIR", "") + if env_dirs: + candidates.extend(part for part in env_dirs.split(os.pathsep) if part) + + path_dirs = os.environ.get("PATH", "") + if path_dirs: + candidates.extend(part for part in path_dirs.split(os.pathsep) if part) + + cuda_roots: list[Path] = [] + for name in ("CUDA_PATH", "CUDA_HOME", "CUDA_ROOT"): + value = os.environ.get(name) + if value: + cuda_roots.append(Path(value)) + + for root in cuda_roots: + candidates.extend([root / "bin", root / "lib" / "x64"]) + + program_files = os.environ.get("ProgramFiles", r"C:\Program Files") + toolkit_base = Path(program_files) / "NVIDIA GPU Computing Toolkit" / "CUDA" + if toolkit_base.is_dir(): + candidates.extend(toolkit_base.glob("v*/bin")) + candidates.extend(toolkit_base.glob("v*/lib/x64")) + + candidates.extend(Path(path) for path in python_runtime_dirs()) + return dedupe_existing_dirs(candidates) + + +def windows_runtime_dirs_for_patterns( + required_patterns: Iterable[str], + candidate_dirs: Iterable[str] | None = None, +) -> list[str]: + directories = ( + list(candidate_dirs) if candidate_dirs is not None else windows_runtime_dirs() + ) + matching_dirs: list[str] = [] + for pattern in required_patterns: + matched_dirs = [ + directory for directory in directories if any(Path(directory).glob(pattern)) + ] + if not matched_dirs: + return [] + for directory in matched_dirs: + if directory not in matching_dirs: + matching_dirs.append(directory) + return matching_dirs + + +def windows_runtime_dirs_for_runtime_line(runtime_line: str | None) -> list[str]: + if not runtime_line: + return [] + patterns = windows_runtime_line_info().get(runtime_line) + if not patterns: + return [] + return windows_runtime_dirs_for_patterns(patterns) + + +def binary_env( + binary_path: Path, + install_dir: Path, + host: HostInfo, + *, + runtime_line: str | None = None, +) -> dict[str, str]: + env = os.environ.copy() + if host.is_windows: + path_dirs = [ + str(binary_path.parent), + *windows_runtime_dirs_for_runtime_line(runtime_line), + ] + existing = [part for part in env.get("PATH", "").split(os.pathsep) if part] + env["PATH"] = os.pathsep.join(dedupe_existing_dirs([*path_dirs, *existing])) + elif host.is_linux: + ld_dirs = [ + str(binary_path.parent), + str(install_dir), + *linux_runtime_dirs(binary_path), + ] + existing = [ + part for part in env.get("LD_LIBRARY_PATH", "").split(os.pathsep) if part + ] + env["LD_LIBRARY_PATH"] = os.pathsep.join( + dedupe_existing_dirs([*ld_dirs, *existing]) + ) + elif host.is_macos: + dyld_dirs = [str(binary_path.parent), str(install_dir)] + existing = [ + part for part in env.get("DYLD_LIBRARY_PATH", "").split(os.pathsep) if part + ] + env["DYLD_LIBRARY_PATH"] = os.pathsep.join( + dedupe_existing_dirs([*dyld_dirs, *existing]) + ) + return env + + +def validate_quantize( + quantize_path: Path, + probe_path: Path, + quantized_path: Path, + install_dir: Path, + host: HostInfo, + *, + runtime_line: str | None = None, +) -> None: + command = [str(quantize_path), str(probe_path), str(quantized_path), "Q6_K", "2"] + result = subprocess.run( + command, + capture_output = True, + text = True, + timeout = 120, + env = binary_env(quantize_path, install_dir, host, runtime_line = runtime_line), + ) + if ( + result.returncode != 0 + or not quantized_path.exists() + or quantized_path.stat().st_size == 0 + ): + raise PrebuiltFallback( + "llama-quantize validation failed:\n" + + result.stdout + + ("\n" + result.stderr if result.stderr else "") + ) + + +def validate_server( + server_path: Path, + probe_path: Path, + host: HostInfo, + install_dir: Path, + *, + runtime_line: str | None = None, +) -> None: + last_failure: PrebuiltFallback | None = None + for port_attempt in range(1, SERVER_PORT_BIND_ATTEMPTS + 1): + port = free_local_port() + command = [ + str(server_path), + "-m", + str(probe_path), + "--host", + "127.0.0.1", + "--port", + str(port), + "-c", + "32", + "--parallel", + "1", + "--threads", + "1", + "--ubatch-size", + "32", + "--batch-size", + "32", + ] + if host.has_usable_nvidia or (host.is_macos and host.is_arm64): + command.extend(["--n-gpu-layers", "1"]) + + log_fd, log_name = tempfile.mkstemp(prefix = "llama-server-", suffix = ".log") + os.close(log_fd) + log_path = Path(log_name) + process: subprocess.Popen[str] | None = None + try: + with log_path.open("w", encoding = "utf-8", errors = "replace") as log_handle: + process = subprocess.Popen( + command, + stdout = log_handle, + stderr = subprocess.STDOUT, + text = True, + env = binary_env( + server_path, install_dir, host, runtime_line = runtime_line + ), + ) + deadline = time.time() + 20 + startup_started = time.time() + response_body = "" + last_error: Exception | None = None + while time.time() < deadline: + if process.poll() is not None: + process.wait(timeout = 5) + log_handle.flush() + output = read_log_excerpt(log_path) + exited_quickly = ( + time.time() - startup_started + ) <= SERVER_BIND_RETRY_WINDOW_SECONDS + failure = PrebuiltFallback( + "llama-server exited during startup:\n" + output + ) + if ( + port_attempt < SERVER_PORT_BIND_ATTEMPTS + and is_retryable_server_bind_error( + last_error, + output, + exited_quickly = exited_quickly, + ) + ): + log( + f"llama-server startup hit a port race on {port}; retrying with a fresh port " + f"({port_attempt}/{SERVER_PORT_BIND_ATTEMPTS})" + ) + last_failure = failure + break + raise failure + + payload = json.dumps({"prompt": "a", "n_predict": 1}).encode( + "utf-8" + ) + request = urllib.request.Request( + f"http://127.0.0.1:{port}/completion", + data = payload, + headers = {"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(request, timeout = 5) as response: + status_code = response.status + response_body = response.read().decode("utf-8", "replace") + if status_code == 200: + return + last_error = RuntimeError( + f"unexpected HTTP status {status_code}" + ) + except urllib.error.HTTPError as exc: + response_body = exc.read().decode("utf-8", "replace") + last_error = exc + except Exception as exc: + last_error = exc + time.sleep(0.5) + else: + log_handle.flush() + output = read_log_excerpt(log_path) + raise PrebuiltFallback( + "llama-server completion validation timed out" + + (f" ({last_error})" if last_error else "") + + ":\n" + + output + + ("\n" + response_body if response_body else "") + ) + finally: + if process is not None and process.poll() is None: + process.terminate() + try: + process.wait(timeout = 5) + except subprocess.TimeoutExpired: + process.kill() + process.wait(timeout = 5) + try: + log_path.unlink(missing_ok = True) + except Exception: + pass + if last_failure is not None: + raise last_failure + raise PrebuiltFallback("llama-server validation failed unexpectedly") + + +def collect_system_report( + host: HostInfo, choice: AssetChoice | None, install_dir: Path +) -> str: + lines = [ + f"platform={host.system} machine={host.machine}", + f"driver_cuda_version={host.driver_cuda_version}", + f"compute_caps={','.join(host.compute_caps) if host.compute_caps else 'unknown'}", + f"cuda_visible_devices={host.visible_cuda_devices if host.visible_cuda_devices is not None else 'unset'}", + f"has_physical_nvidia={host.has_physical_nvidia}", + f"has_usable_nvidia={host.has_usable_nvidia}", + f"chosen_asset={(choice.name if choice else 'none')}", + f"asset_source={(choice.source_label if choice else 'none')}", + ] + if host.is_linux and host.has_physical_nvidia: + runtime_lines, runtime_dirs = detected_linux_runtime_lines() + lines.append( + "linux_runtime_lines=" + + (",".join(runtime_lines) if runtime_lines else "none") + ) + for runtime_line in ("cuda13", "cuda12"): + lines.append( + f"linux_runtime_dirs_{runtime_line}=" + + ( + ",".join(runtime_dirs.get(runtime_line, [])) + if runtime_dirs.get(runtime_line) + else "none" + ) + ) + if choice and choice.selection_log: + lines.append("selection_log:") + lines.extend(choice.selection_log) + if host.nvidia_smi: + try: + smi = run_capture([host.nvidia_smi], timeout = 20) + excerpt = "\n".join((smi.stdout + smi.stderr).splitlines()[:20]) + lines.append("nvidia-smi:") + lines.append(excerpt) + except Exception as exc: + lines.append(f"nvidia-smi error: {exc}") + + if host.is_linux: + server_binary = install_dir / "llama-server" + if server_binary.exists(): + server_env = binary_env(server_binary, install_dir, host) + lines.append( + "linux_missing_libs=" + + ( + ",".join(linux_missing_libraries(server_binary, env = server_env)) + or "none" + ) + ) + lines.append( + "linux_runtime_dirs=" + + ( + ",".join( + [ + part + for part in server_env.get("LD_LIBRARY_PATH", "").split( + os.pathsep + ) + if part + ] + ) + or "none" + ) + ) + try: + ldd = run_capture( + ["ldd", str(server_binary)], timeout = 20, env = server_env + ) + lines.append("ldd llama-server:") + lines.append((ldd.stdout + ldd.stderr).strip()) + except Exception as exc: + lines.append(f"ldd error: {exc}") + elif host.is_windows: + lines.append( + "windows_runtime_dirs=" + (",".join(windows_runtime_dirs()) or "none") + ) + runtime_lines, runtime_dirs = detected_windows_runtime_lines() + lines.append( + "windows_runtime_lines=" + + (",".join(runtime_lines) if runtime_lines else "none") + ) + for runtime_line in ("cuda13", "cuda12"): + lines.append( + f"windows_runtime_dirs_{runtime_line}=" + + ( + ",".join(runtime_dirs.get(runtime_line, [])) + if runtime_dirs.get(runtime_line) + else "none" + ) + ) + elif host.is_macos: + server_binary = install_dir / "llama-server" + if server_binary.exists(): + try: + otool = run_capture(["otool", "-L", str(server_binary)], timeout = 20) + lines.append("otool -L llama-server:") + lines.append((otool.stdout + otool.stderr).strip()) + except Exception as exc: + lines.append(f"otool error: {exc}") + + return "\n".join(lines) + + +def apply_approved_hashes( + attempts: Iterable[AssetChoice], + checksums: ApprovedReleaseChecksums, +) -> list[AssetChoice]: + approved_attempts: list[AssetChoice] = [] + missing_assets: list[str] = [] + for attempt in attempts: + approved = checksums.artifacts.get(attempt.name) + if approved is None: + missing_assets.append(attempt.name) + continue + attempt.expected_sha256 = approved.sha256 + approved_attempts.append(attempt) + if not approved_attempts: + missing_text = ", ".join(missing_assets) if missing_assets else "none" + raise PrebuiltFallback( + "approved checksum asset did not contain the selected prebuilt archive(s): " + f"{missing_text}" + ) + return approved_attempts + + +def require_approved_source_hash( + checksums: ApprovedReleaseChecksums, llama_tag: str +) -> ApprovedArtifactHash: + source_asset_name = source_archive_logical_name(llama_tag) + approved_source = checksums.artifacts.get(source_asset_name) + if approved_source is None: + raise PrebuiltFallback( + f"approved checksum asset did not contain source archive {source_asset_name}" + ) + return approved_source + + +def resolve_install_attempts( + llama_tag: str, + host: HostInfo, + published_repo: str, + published_release_tag: str, +) -> tuple[str, str, list[AssetChoice], ApprovedReleaseChecksums]: + requested_tag = llama_tag + resolved_tag = resolve_requested_install_tag(llama_tag, published_release_tag) + checksums = load_approved_release_checksums(published_repo, resolved_tag) + require_approved_source_hash(checksums, resolved_tag) + + if host.is_linux and host.is_x86_64 and host.has_usable_nvidia: + linux_cuda_selection = resolve_linux_cuda_choice( + host, resolved_tag, published_repo, published_release_tag + ) + attempts = apply_approved_hashes(linux_cuda_selection.attempts, checksums) + if not attempts: + raise PrebuiltFallback("no compatible Linux CUDA asset was found") + log_lines(linux_cuda_selection.selection_log) + return requested_tag, resolved_tag, attempts, checksums + + if host.is_windows and host.is_x86_64 and host.has_usable_nvidia: + upstream_assets = github_release_assets(UPSTREAM_REPO, resolved_tag) + attempts = apply_approved_hashes( + resolve_windows_cuda_choices(host, resolved_tag, upstream_assets), checksums + ) + if not attempts: + raise PrebuiltFallback("no compatible Windows CUDA asset was found") + if attempts[0].selection_log: + log_lines(attempts[0].selection_log) + return requested_tag, resolved_tag, attempts, checksums + + choice = resolve_asset_choice( + host, resolved_tag, published_repo, published_release_tag + ) + approved_attempts = apply_approved_hashes([choice], checksums) + if choice.selection_log: + log_lines(choice.selection_log) + return requested_tag, resolved_tag, approved_attempts, checksums + + +def write_prebuilt_metadata( + install_dir: Path, + *, + requested_tag: str, + llama_tag: str, + choice: AssetChoice, + prebuilt_fallback_used: bool, +) -> None: + metadata = { + "requested_tag": requested_tag, + "tag": llama_tag, + "asset": choice.name, + "source": choice.source_label, + "bundle_profile": choice.bundle_profile, + "runtime_line": choice.runtime_line, + "coverage_class": choice.coverage_class, + "prebuilt_fallback_used": prebuilt_fallback_used, + "installed_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + } + (install_dir / "UNSLOTH_PREBUILT_INFO.json").write_text( + json.dumps(metadata, indent = 2) + "\n" + ) + + +def validate_prebuilt_choice( + choice: AssetChoice, + host: HostInfo, + install_dir: Path, + work_dir: Path, + probe_path: Path, + *, + requested_tag: str, + llama_tag: str, + approved_checksums: ApprovedReleaseChecksums, + prebuilt_fallback_used: bool, + quantized_path: Path, +) -> tuple[Path, Path]: + source_archive = approved_checksums.artifacts.get( + source_archive_logical_name(llama_tag) + ) + if source_archive is None: + raise PrebuiltFallback( + f"approved checksum asset did not contain source archive {source_archive_logical_name(llama_tag)}" + ) + log(f"hydrating upstream llama.cpp source for {llama_tag} into {install_dir}") + hydrate_source_tree( + llama_tag, + install_dir, + work_dir, + expected_sha256 = source_archive.sha256, + ) + log(f"overlaying prebuilt bundle {choice.name} into {install_dir}") + server_path, quantize_path = install_from_archives( + choice, host, install_dir, work_dir + ) + preflight_linux_installed_binaries((server_path, quantize_path), install_dir, host) + ensure_repo_shape(install_dir) + write_prebuilt_metadata( + install_dir, + requested_tag = requested_tag, + llama_tag = llama_tag, + choice = choice, + prebuilt_fallback_used = prebuilt_fallback_used, + ) + validate_quantize( + quantize_path, + probe_path, + quantized_path, + install_dir, + host, + runtime_line = choice.runtime_line, + ) + validate_server( + server_path, + probe_path, + host, + install_dir, + runtime_line = choice.runtime_line, + ) + log(f"staged prebuilt validation succeeded for {choice.name}") + return server_path, quantize_path + + +def validate_prebuilt_attempts( + attempts: Iterable[AssetChoice], + host: HostInfo, + install_dir: Path, + work_dir: Path, + probe_path: Path, + *, + requested_tag: str, + llama_tag: str, + approved_checksums: ApprovedReleaseChecksums, +) -> tuple[AssetChoice, Path, bool]: + attempt_list = list(attempts) + if not attempt_list: + raise PrebuiltFallback("no prebuilt bundle attempts were available") + + tried_fallback = False + for index, attempt in enumerate(attempt_list): + if index > 0: + tried_fallback = True + log( + "retrying CUDA prebuilt " + f"{attempt.name} install_kind={attempt.install_kind} " + f"runtime_line={attempt.runtime_line} coverage_class={attempt.coverage_class}" + ) + + staging_dir = create_install_staging_dir(install_dir) + quantized_path = work_dir / f"stories260K-q4-{index}.gguf" + if quantized_path.exists(): + quantized_path.unlink() + try: + validate_prebuilt_choice( + attempt, + host, + staging_dir, + work_dir, + probe_path, + requested_tag = requested_tag, + llama_tag = llama_tag, + approved_checksums = approved_checksums, + prebuilt_fallback_used = tried_fallback, + quantized_path = quantized_path, + ) + except Exception as exc: + remove_tree(staging_dir) + prune_install_staging_root(install_dir) + if isinstance(exc, PrebuiltFallback): + attempt_error = exc + else: + attempt_error = PrebuiltFallback( + f"candidate attempt failed before activation for {attempt.name}: {exc}" + ) + if index == len(attempt_list) - 1: + raise attempt_error from exc + log( + "selected CUDA bundle failed before activation; trying next prebuilt fallback " + f"({textwrap.shorten(str(attempt_error), width = 200, placeholder = '...')})" + ) + continue + + return attempt, staging_dir, tried_fallback + + raise PrebuiltFallback("no prebuilt bundle passed validation") + + +def install_prebuilt( + install_dir: Path, llama_tag: str, published_repo: str, published_release_tag: str +) -> None: + host = detect_host() + choice: AssetChoice | None = None + try: + with install_lock(install_lock_path(install_dir)): + if install_dir.exists(): + log( + f"existing llama.cpp install detected at {install_dir}; validating staged prebuilt update before replacement" + ) + else: + log( + f"no existing llama.cpp install detected at {install_dir}; performing fresh prebuilt install" + ) + requested_tag, llama_tag, attempts, approved_checksums = ( + resolve_install_attempts( + llama_tag, + host, + published_repo, + published_release_tag, + ) + ) + choice = attempts[0] + log( + f"selected {choice.name} ({choice.source_label}) for {host.system} {host.machine}" + ) + with tempfile.TemporaryDirectory(prefix = "unsloth-llama-prebuilt-") as tmp: + work_dir = Path(tmp) + probe_path = work_dir / "stories260K.gguf" + download_validation_model( + probe_path, validation_model_cache_path(install_dir) + ) + choice, selected_staging_dir, _ = validate_prebuilt_attempts( + attempts, + host, + install_dir, + work_dir, + probe_path, + requested_tag = requested_tag, + llama_tag = llama_tag, + approved_checksums = approved_checksums, + ) + activate_install_tree(selected_staging_dir, install_dir, host) + try: + ensure_converter_scripts(install_dir, llama_tag) + except Exception as exc: + log( + "converter script fetch failed after activation; install remains valid " + f"({textwrap.shorten(str(exc), width = 200, placeholder = '...')})" + ) + except PrebuiltFallback as exc: + log("prebuilt install path failed; falling back to source build") + log(f"prebuilt fallback reason: {exc}") + report = collect_system_report(host, choice, install_dir) + print(report) + raise SystemExit(EXIT_FALLBACK) from exc + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description = "Install and validate a prebuilt llama.cpp bundle for Unsloth Studio." + ) + parser.add_argument("--install-dir", help = "Target ~/.unsloth/llama.cpp directory") + parser.add_argument( + "--llama-tag", + default = DEFAULT_LLAMA_TAG, + help = f"llama.cpp release tag. Prebuilt installs are pinned to the approved tag {APPROVED_PREBUILT_LLAMA_TAG}.", + ) + parser.add_argument( + "--published-repo", + default = DEFAULT_PUBLISHED_REPO, + help = "Published bundle repository", + ) + parser.add_argument( + "--published-release-tag", + default = DEFAULT_PUBLISHED_TAG, + help = "Published GitHub release tag to pin. By default, scan releases until a compatible llama.cpp bundle is found.", + ) + resolve_group = parser.add_mutually_exclusive_group() + resolve_group.add_argument( + "--resolve-llama-tag", + nargs = "?", + const = "latest", + help = "Resolve a llama.cpp tag such as 'latest' to the logical upstream release tag.", + ) + resolve_group.add_argument( + "--resolve-install-tag", + nargs = "?", + const = "latest", + help = "Resolve a llama.cpp tag such as 'latest' to the concrete tag installable on the current host.", + ) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + if args.resolve_llama_tag is not None: + print(resolve_requested_llama_tag(args.resolve_llama_tag)) + return EXIT_SUCCESS + + if args.resolve_install_tag is not None: + print( + resolve_requested_install_tag( + args.resolve_install_tag, args.published_release_tag or "" + ) + ) + return EXIT_SUCCESS + + if not args.install_dir: + raise SystemExit( + "install_llama_prebuilt.py: --install-dir is required unless --resolve-llama-tag or --resolve-install-tag is used" + ) + install_prebuilt( + install_dir = Path(args.install_dir).expanduser().resolve(), + llama_tag = args.llama_tag, + published_repo = args.published_repo, + published_release_tag = args.published_release_tag or "", + ) + return EXIT_SUCCESS + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except SystemExit: + raise + except Exception as exc: + message = textwrap.shorten(str(exc), width = 400, placeholder = "...") + log(f"fatal helper error: {message}") + raise SystemExit(EXIT_ERROR) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index c58bcd5c8..d8465fd03 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -503,7 +503,6 @@ if ($DriverMaxCuda) { $isCompat = ($tkMaj -lt $drMajorCuda) -or ($tkMaj -eq $drMajorCuda -and $tkMin -le $drMinorCuda) if ($isCompat) { # Also verify the toolkit supports our GPU architecture - Write-Host " [DEBUG] Checking CUDA compatibility: toolkit=$tkMaj.$tkMin arch=sm_$CudaArch" -ForegroundColor Magenta $archOk = $true if ($CudaArch) { $archOk = Test-NvccArchSupport -NvccExe $candidateNvcc -Arch $CudaArch @@ -1296,6 +1295,93 @@ if ($LASTEXITCODE -ne 0) { $ErrorActionPreference = $prevEAP_t5 Write-Host "[OK] Transformers 5.x pre-installed to .venv_t5/" -ForegroundColor Green +# ========================================================================== +# PHASE 3.4: Prefer prebuilt llama.cpp bundles before source build +# ========================================================================== +$UnslothHome = Join-Path $env:USERPROFILE ".unsloth" +if (-not (Test-Path $UnslothHome)) { New-Item -ItemType Directory -Force $UnslothHome | Out-Null } +$LlamaCppDir = Join-Path $UnslothHome "llama.cpp" +$NeedLlamaSourceBuild = $false +$SkipPrebuiltInstall = $false +$RequestedLlamaTag = if ($env:UNSLOTH_LLAMA_TAG) { $env:UNSLOTH_LLAMA_TAG } else { "latest" } +$HelperReleaseRepo = if ($env:UNSLOTH_LLAMA_RELEASE_REPO) { $env:UNSLOTH_LLAMA_RELEASE_REPO } else { "unslothai/llama.cpp" } +$resolveOutput = & python "$PSScriptRoot\install_llama_prebuilt.py" --resolve-install-tag $RequestedLlamaTag --published-repo $HelperReleaseRepo 2>&1 +$resolveExit = $LASTEXITCODE +$ResolvedLlamaTag = if ($resolveOutput) { ($resolveOutput | Select-Object -Last 1).ToString().Trim() } else { "" } +if ($resolveExit -ne 0 -or [string]::IsNullOrWhiteSpace($ResolvedLlamaTag)) { + Write-Host "" + Write-Host "[WARN] Failed to resolve an installable prebuilt llama.cpp tag via $HelperReleaseRepo" -ForegroundColor Yellow + if ($resolveOutput) { + $resolveOutput | ForEach-Object { Write-Host $_ } + } + $fallbackOutput = & python "$PSScriptRoot\install_llama_prebuilt.py" --resolve-llama-tag $RequestedLlamaTag 2>$null + $fallbackExit = $LASTEXITCODE + $ResolvedLlamaTag = if ($fallbackExit -eq 0 -and $fallbackOutput) { + ($fallbackOutput | Select-Object -Last 1).ToString().Trim() + } elseif ($RequestedLlamaTag -eq "latest") { + # Try Unsloth release repo first, then fall back to ggml-org upstream + $resolvedLatest = $null + try { + $latestRelease = Invoke-RestMethod -Uri "https://api.github.com/repos/$HelperReleaseRepo/releases/latest" -ErrorAction Stop + $resolvedLatest = $latestRelease.tag_name + } catch {} + if (-not $resolvedLatest) { + try { + $latestRelease = Invoke-RestMethod -Uri "https://api.github.com/repos/ggml-org/llama.cpp/releases/latest" -ErrorAction Stop + $resolvedLatest = $latestRelease.tag_name + } catch {} + } + if ($resolvedLatest) { $resolvedLatest } else { $RequestedLlamaTag } + } else { + $RequestedLlamaTag + } + $NeedLlamaSourceBuild = $true + $SkipPrebuiltInstall = $true +} + +Write-Host "" +Write-Host "Resolved llama.cpp release tag: $ResolvedLlamaTag" -ForegroundColor Gray + +if ($env:UNSLOTH_LLAMA_FORCE_COMPILE -eq "1") { + Write-Host "" + Write-Host "[WARN] UNSLOTH_LLAMA_FORCE_COMPILE=1 -- skipping prebuilt llama.cpp install" -ForegroundColor Yellow + $NeedLlamaSourceBuild = $true +} else { + Write-Host "" + Write-Host "Installing prebuilt llama.cpp bundle (preferred path)..." -ForegroundColor Cyan + if (Test-Path $LlamaCppDir) { + Write-Host "Existing llama.cpp install detected -- validating staged prebuilt update before replacement" -ForegroundColor Gray + } + if ($SkipPrebuiltInstall) { + Write-Host "[WARN] Skipping prebuilt install because prebuilt tag resolution failed -- falling back to source build" -ForegroundColor Yellow + } else { + $prebuiltArgs = @( + "$PSScriptRoot\install_llama_prebuilt.py", + "--install-dir", $LlamaCppDir, + "--llama-tag", $ResolvedLlamaTag, + "--published-repo", $HelperReleaseRepo + ) + if ($env:UNSLOTH_LLAMA_RELEASE_TAG) { + $prebuiltArgs += @("--published-release-tag", $env:UNSLOTH_LLAMA_RELEASE_TAG) + } + $prevEAPPrebuilt = $ErrorActionPreference + $ErrorActionPreference = "Continue" + & python @prebuiltArgs + $prebuiltExit = $LASTEXITCODE + $ErrorActionPreference = $prevEAPPrebuilt + + if ($prebuiltExit -eq 0) { + Write-Host "[OK] Prebuilt llama.cpp installed and validated" -ForegroundColor Green + } else { + if (Test-Path $LlamaCppDir) { + Write-Host "[WARN] Prebuilt update failed; existing install was restored or cleaned before source build fallback" -ForegroundColor Yellow + } + Write-Host "[WARN] Prebuilt llama.cpp path unavailable or failed validation -- falling back to source build" -ForegroundColor Yellow + $NeedLlamaSourceBuild = $true + } + } +} + # ========================================================================== # PHASE 3.5: Install OpenSSL dev (for HTTPS support in llama-server) # ========================================================================== @@ -1303,42 +1389,46 @@ Write-Host "[OK] Transformers 5.x pre-installed to .venv_t5/" -ForegroundColor G # ShiningLight.OpenSSL.Dev includes headers + libs that cmake can find. $OpenSslAvailable = $false -# Check if OpenSSL dev is already installed (look for include dir) -$OpenSslRoots = @( - 'C:\Program Files\OpenSSL-Win64', - 'C:\Program Files\OpenSSL', - 'C:\OpenSSL-Win64' -) -$OpenSslRoot = $null -foreach ($root in $OpenSslRoots) { - if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) { - $OpenSslRoot = $root - break - } -} - -if ($OpenSslRoot) { - $OpenSslAvailable = $true - Write-Host "[OK] OpenSSL dev found at $OpenSslRoot" -ForegroundColor Green -} else { - Write-Host "" - Write-Host "Installing OpenSSL dev (for HTTPS in llama-server)..." -ForegroundColor Cyan - $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue) - if ($HasWinget) { - winget install -e --id ShiningLight.OpenSSL.Dev --accept-package-agreements --accept-source-agreements - # Re-check after install - foreach ($root in $OpenSslRoots) { - if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) { - $OpenSslRoot = $root - $OpenSslAvailable = $true - Write-Host "[OK] OpenSSL dev installed at $OpenSslRoot" -ForegroundColor Green - break - } +if ($NeedLlamaSourceBuild) { + # Check if OpenSSL dev is already installed (look for include dir) + $OpenSslRoots = @( + 'C:\Program Files\OpenSSL-Win64', + 'C:\Program Files\OpenSSL', + 'C:\OpenSSL-Win64' + ) + $OpenSslRoot = $null + foreach ($root in $OpenSslRoots) { + if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) { + $OpenSslRoot = $root + break } } - if (-not $OpenSslAvailable) { - Write-Host "[WARN] OpenSSL dev not available -- llama-server will be built without HTTPS" -ForegroundColor Yellow + + if ($OpenSslRoot) { + $OpenSslAvailable = $true + Write-Host "[OK] OpenSSL dev found at $OpenSslRoot" -ForegroundColor Green + } else { + Write-Host "" + Write-Host "Installing OpenSSL dev (for HTTPS in llama-server)..." -ForegroundColor Cyan + $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue) + if ($HasWinget) { + winget install -e --id ShiningLight.OpenSSL.Dev --accept-package-agreements --accept-source-agreements + # Re-check after install + foreach ($root in $OpenSslRoots) { + if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) { + $OpenSslRoot = $root + $OpenSslAvailable = $true + Write-Host "[OK] OpenSSL dev installed at $OpenSslRoot" -ForegroundColor Green + break + } + } + } + if (-not $OpenSslAvailable) { + Write-Host "[WARN] OpenSSL dev not available -- llama-server will be built without HTTPS" -ForegroundColor Yellow + } } +} else { + Write-Host "[SKIP] OpenSSL dev install -- prebuilt llama.cpp already validated" -ForegroundColor Yellow } # ========================================================================== @@ -1351,9 +1441,7 @@ if ($OpenSslRoot) { # - llama-server: for GGUF model inference (with HTTPS if OpenSSL available) # - llama-quantize: for GGUF export quantization # Prerequisites (git, cmake, VS Build Tools, CUDA Toolkit) already installed in Phase 1. -$UnslothHome = Join-Path $env:USERPROFILE ".unsloth" -if (-not (Test-Path $UnslothHome)) { New-Item -ItemType Directory -Force $UnslothHome | Out-Null } -$LlamaCppDir = Join-Path $UnslothHome "llama.cpp" +$OriginalLlamaCppDir = $LlamaCppDir $BuildDir = Join-Path $LlamaCppDir "build" $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe" @@ -1376,7 +1464,10 @@ if (Test-Path $LlamaServerBin) { } } -if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) { +if (-not $NeedLlamaSourceBuild) { + Write-Host "" + Write-Host "[OK] Using validated prebuilt llama.cpp install at $LlamaCppDir" -ForegroundColor Green +} elseif ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) { Write-Host "" Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green } elseif (-not $HasCmakeForBuild) { @@ -1432,29 +1523,49 @@ if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) { # -- Step A: Clone or pull llama.cpp -- + $UseConcreteRef = ($ResolvedLlamaTag -ne "latest" -and -not [string]::IsNullOrWhiteSpace($ResolvedLlamaTag)) + if (Test-Path (Join-Path $LlamaCppDir ".git")) { - Write-Host " llama.cpp repo already cloned, pulling latest..." -ForegroundColor Gray - git -C $LlamaCppDir pull 2>&1 | Out-Null + Write-Host " Syncing llama.cpp to $ResolvedLlamaTag..." -ForegroundColor Gray + if ($UseConcreteRef) { + git -C $LlamaCppDir fetch --depth 1 origin $ResolvedLlamaTag 2>&1 | Out-Null + } else { + git -C $LlamaCppDir fetch --depth 1 origin 2>&1 | Out-Null + } if ($LASTEXITCODE -ne 0) { - Write-Host " [WARN] git pull failed -- using existing source" -ForegroundColor Yellow + Write-Host " [WARN] git fetch failed -- using existing source" -ForegroundColor Yellow + } else { + git -C $LlamaCppDir checkout -B unsloth-llama-build FETCH_HEAD 2>&1 | Out-Null + if ($LASTEXITCODE -ne 0) { + $BuildOk = $false + $FailedStep = "git checkout" + } else { + git -C $LlamaCppDir clean -fdx 2>&1 | Out-Null + } } } else { - Write-Host " Cloning llama.cpp..." -ForegroundColor Gray - if (Test-Path $LlamaCppDir) { Remove-Item -Recurse -Force $LlamaCppDir } - git clone --depth 1 https://github.com/ggml-org/llama.cpp.git $LlamaCppDir 2>&1 | Out-Null + Write-Host " Cloning llama.cpp @ $ResolvedLlamaTag..." -ForegroundColor Gray + $buildTmp = "$LlamaCppDir.build.$PID" + if (Test-Path $buildTmp) { Remove-Item -Recurse -Force $buildTmp } + $cloneArgs = @("clone", "--depth", "1") + if ($UseConcreteRef) { + $cloneArgs += @("--branch", $ResolvedLlamaTag) + } + $cloneArgs += @("https://github.com/ggml-org/llama.cpp.git", $buildTmp) + git @cloneArgs 2>&1 | Out-Null if ($LASTEXITCODE -ne 0) { $BuildOk = $false $FailedStep = "git clone" + if (Test-Path $buildTmp) { Remove-Item -Recurse -Force $buildTmp } + } + # Use temp dir for build; swap into $LlamaCppDir only after build succeeds + if ($BuildOk) { + $LlamaCppDir = $buildTmp + $BuildDir = Join-Path $LlamaCppDir "build" } } # -- Step B: cmake configure -- - # Clean stale CMake cache to prevent previous CUDA settings from leaking - # into a CPU-only rebuild (or vice versa). - $CmakeCacheFile = Join-Path $BuildDir "CMakeCache.txt" - if (Test-Path $CmakeCacheFile) { - Remove-Item -Recurse -Force $BuildDir - } if ($BuildOk) { Write-Host "" @@ -1555,6 +1666,21 @@ if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) { } } + # Swap temp build dir into final location (only if we built in a temp dir) + if ($BuildOk -and $LlamaCppDir -ne $OriginalLlamaCppDir) { + if (Test-Path $OriginalLlamaCppDir) { Remove-Item -Recurse -Force $OriginalLlamaCppDir } + Move-Item $LlamaCppDir $OriginalLlamaCppDir + $LlamaCppDir = $OriginalLlamaCppDir + $BuildDir = Join-Path $LlamaCppDir "build" + $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe" + } elseif (-not $BuildOk -and $LlamaCppDir -ne $OriginalLlamaCppDir) { + # Build failed -- clean up temp dir, preserve existing install + if (Test-Path $LlamaCppDir) { Remove-Item -Recurse -Force $LlamaCppDir } + $LlamaCppDir = $OriginalLlamaCppDir + $BuildDir = Join-Path $LlamaCppDir "build" + $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe" + } + # Restore ErrorActionPreference $ErrorActionPreference = $prevEAP diff --git a/studio/setup.sh b/studio/setup.sh index 0e9917375..4cfabec95 100755 --- a/studio/setup.sh +++ b/studio/setup.sh @@ -341,10 +341,98 @@ else echo "✅ Python dependencies up to date — skipping" fi -# ── 7. WSL: pre-install GGUF build dependencies ── +# ── 7. Prefer prebuilt llama.cpp bundles before any source build path ── +UNSLOTH_HOME="$HOME/.unsloth" +mkdir -p "$UNSLOTH_HOME" +LLAMA_CPP_DIR="$UNSLOTH_HOME/llama.cpp" +LLAMA_SERVER_BIN="$LLAMA_CPP_DIR/build/bin/llama-server" +_NEED_LLAMA_SOURCE_BUILD=false +_LLAMA_FORCE_COMPILE="${UNSLOTH_LLAMA_FORCE_COMPILE:-0}" +_REQUESTED_LLAMA_TAG="${UNSLOTH_LLAMA_TAG:-latest}" +_HELPER_RELEASE_REPO="${UNSLOTH_LLAMA_RELEASE_REPO:-unslothai/llama.cpp}" +_RESOLVE_LLAMA_LOG="$(mktemp)" +set +e +python "$SCRIPT_DIR/install_llama_prebuilt.py" \ + --resolve-install-tag "$_REQUESTED_LLAMA_TAG" \ + --published-repo "$_HELPER_RELEASE_REPO" >"$_RESOLVE_LLAMA_LOG" 2>&1 +_RESOLVE_LLAMA_STATUS=$? +set -e +if [ "$_RESOLVE_LLAMA_STATUS" -eq 0 ]; then + _RESOLVED_LLAMA_TAG="$(tail -n 1 "$_RESOLVE_LLAMA_LOG" | tr -d '\r')" +else + _RESOLVED_LLAMA_TAG="" +fi +if [ -z "$_RESOLVED_LLAMA_TAG" ]; then + echo "" + echo "⚠️ Failed to resolve an installable prebuilt llama.cpp tag via $_HELPER_RELEASE_REPO" + cat "$_RESOLVE_LLAMA_LOG" >&2 || true + set +e + _RESOLVED_LLAMA_TAG="$(python "$SCRIPT_DIR/install_llama_prebuilt.py" --resolve-llama-tag "$_REQUESTED_LLAMA_TAG" 2>/dev/null)" + _RESOLVE_UPSTREAM_STATUS=$? + set -e + if [ "$_RESOLVE_UPSTREAM_STATUS" -ne 0 ] || [ -z "$_RESOLVED_LLAMA_TAG" ]; then + if [ "$_REQUESTED_LLAMA_TAG" = "latest" ]; then + # Try Unsloth release repo first, then fall back to ggml-org upstream + _RESOLVED_LLAMA_TAG="$(curl -fsSL "https://api.github.com/repos/${_HELPER_RELEASE_REPO}/releases/latest" 2>/dev/null | python -c "import sys,json; print(json.load(sys.stdin)['tag_name'])" 2>/dev/null)" || _RESOLVED_LLAMA_TAG="" + if [ -z "$_RESOLVED_LLAMA_TAG" ]; then + _RESOLVED_LLAMA_TAG="$(curl -fsSL https://api.github.com/repos/ggml-org/llama.cpp/releases/latest 2>/dev/null | python -c "import sys,json; print(json.load(sys.stdin)['tag_name'])" 2>/dev/null)" || _RESOLVED_LLAMA_TAG="" + fi + fi + if [ -z "$_RESOLVED_LLAMA_TAG" ]; then + _RESOLVED_LLAMA_TAG="$_REQUESTED_LLAMA_TAG" + fi + fi + _NEED_LLAMA_SOURCE_BUILD=true + _SKIP_PREBUILT_INSTALL=true +fi +rm -f "$_RESOLVE_LLAMA_LOG" + +echo "" +echo "Resolved llama.cpp release tag: $_RESOLVED_LLAMA_TAG" + +if [ "$_LLAMA_FORCE_COMPILE" = "1" ]; then + echo "" + echo "⚠️ UNSLOTH_LLAMA_FORCE_COMPILE=1 -- skipping prebuilt llama.cpp install" + _NEED_LLAMA_SOURCE_BUILD=true +else + echo "" + echo "Installing prebuilt llama.cpp bundle (preferred path)..." + if [ -d "$LLAMA_CPP_DIR" ]; then + echo "Existing llama.cpp install detected -- validating staged prebuilt update before replacement" + fi + if [ "${_SKIP_PREBUILT_INSTALL:-false}" = true ]; then + echo "⚠️ Skipping prebuilt install because prebuilt tag resolution failed -- falling back to source build" + else + _PREBUILT_CMD=( + python "$SCRIPT_DIR/install_llama_prebuilt.py" + --install-dir "$LLAMA_CPP_DIR" + --llama-tag "$_RESOLVED_LLAMA_TAG" + --published-repo "$_HELPER_RELEASE_REPO" + ) + if [ -n "${UNSLOTH_LLAMA_RELEASE_TAG:-}" ]; then + _PREBUILT_CMD+=(--published-release-tag "$UNSLOTH_LLAMA_RELEASE_TAG") + fi + set +e + "${_PREBUILT_CMD[@]}" + _PREBUILT_STATUS=$? + set -e + + if [ "$_PREBUILT_STATUS" -eq 0 ]; then + echo "✅ Prebuilt llama.cpp installed and validated" + else + if [ -d "$LLAMA_CPP_DIR" ]; then + echo "⚠️ Prebuilt update failed; existing install was restored or cleaned before source build fallback" + fi + echo "⚠️ Prebuilt llama.cpp path unavailable or failed validation -- falling back to source build" + _NEED_LLAMA_SOURCE_BUILD=true + fi + fi +fi + +# ── 8. WSL: pre-install GGUF build dependencies for fallback source builds ── # On WSL, sudo requires a password and can't be entered during GGUF export # (runs in a non-interactive subprocess). Install build deps here instead. -if grep -qi microsoft /proc/version 2>/dev/null; then +if [ "$_NEED_LLAMA_SOURCE_BUILD" = true ] && grep -qi microsoft /proc/version 2>/dev/null; then echo "" echo "⚠️ WSL detected -- installing build dependencies for GGUF export..." _GGUF_DEPS="pciutils build-essential cmake curl git libcurl4-openssl-dev" @@ -402,22 +490,19 @@ if grep -qi microsoft /proc/version 2>/dev/null; then fi fi -# ── 8. Build llama.cpp binaries for GGUF inference + export ── +# ── 9. Build llama.cpp binaries for GGUF inference + export when prebuilt install fails ── # Builds at ~/.unsloth/llama.cpp — a single shared location under the user's # home directory. This is used by both the inference server and the GGUF # export pipeline (unsloth-zoo). # - llama-server: for GGUF model inference # - llama-quantize: for GGUF export quantization (symlinked to root for check_llama_cpp()) -UNSLOTH_HOME="$HOME/.unsloth" -mkdir -p "$UNSLOTH_HOME" -LLAMA_CPP_DIR="$UNSLOTH_HOME/llama.cpp" -LLAMA_SERVER_BIN="$LLAMA_CPP_DIR/build/bin/llama-server" -if [ "${_SKIP_GGUF_BUILD:-}" = true ]; then +if [ "$_NEED_LLAMA_SOURCE_BUILD" = false ]; then + : +elif [ "${_SKIP_GGUF_BUILD:-}" = true ]; then echo "" echo "Skipping llama-server build (missing dependencies)" echo " Install the missing packages and re-run setup to enable GGUF inference." else -rm -rf "$LLAMA_CPP_DIR" { # Check prerequisites if ! command -v cmake &>/dev/null; then @@ -432,7 +517,13 @@ rm -rf "$LLAMA_CPP_DIR" echo "Building llama-server for GGUF inference..." BUILD_OK=true - run_quiet_no_exit "clone llama.cpp" git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "$LLAMA_CPP_DIR" || BUILD_OK=false + _CLONE_BRANCH_ARGS=() + if [ "$_RESOLVED_LLAMA_TAG" != "latest" ] && [ -n "$_RESOLVED_LLAMA_TAG" ]; then + _CLONE_BRANCH_ARGS=(--branch "$_RESOLVED_LLAMA_TAG") + fi + _BUILD_TMP="${LLAMA_CPP_DIR}.build.$$" + rm -rf "$_BUILD_TMP" + run_quiet_no_exit "clone llama.cpp" git clone --depth 1 "${_CLONE_BRANCH_ARGS[@]}" https://github.com/ggml-org/llama.cpp.git "$_BUILD_TMP" || BUILD_OK=false if [ "$BUILD_OK" = true ]; then # Skip tests/examples we don't need (faster build) @@ -571,21 +662,29 @@ rm -rf "$LLAMA_CPP_DIR" CMAKE_GENERATOR_ARGS="-G Ninja" fi - run_quiet_no_exit "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$LLAMA_CPP_DIR" -B "$LLAMA_CPP_DIR/build" $CMAKE_ARGS || BUILD_OK=false + run_quiet_no_exit "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS || BUILD_OK=false fi if [ "$BUILD_OK" = true ]; then - run_quiet_no_exit "build llama-server" cmake --build "$LLAMA_CPP_DIR/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false + run_quiet_no_exit "build llama-server" cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false fi # Also build llama-quantize (needed by unsloth-zoo's GGUF export pipeline) if [ "$BUILD_OK" = true ]; then - run_quiet_no_exit "build llama-quantize" cmake --build "$LLAMA_CPP_DIR/build" --config Release --target llama-quantize -j"$NCPU" || true - # Symlink to llama.cpp root — check_llama_cpp() looks for the binary there + run_quiet_no_exit "build llama-quantize" cmake --build "$_BUILD_TMP/build" --config Release --target llama-quantize -j"$NCPU" || true + fi + + # Swap only after build succeeds -- preserves existing install on failure + if [ "$BUILD_OK" = true ]; then + rm -rf "$LLAMA_CPP_DIR" + mv "$_BUILD_TMP" "$LLAMA_CPP_DIR" + # Symlink to llama.cpp root -- check_llama_cpp() looks for the binary there QUANTIZE_BIN="$LLAMA_CPP_DIR/build/bin/llama-quantize" if [ -f "$QUANTIZE_BIN" ]; then ln -sf build/bin/llama-quantize "$LLAMA_CPP_DIR/llama-quantize" fi + else + rm -rf "$_BUILD_TMP" fi if [ "$BUILD_OK" = true ]; then diff --git a/tests/studio/install/smoke_test_llama_prebuilt.py b/tests/studio/install/smoke_test_llama_prebuilt.py new file mode 100644 index 000000000..994757d2e --- /dev/null +++ b/tests/studio/install/smoke_test_llama_prebuilt.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import importlib.util +import shutil +import sys +import tempfile +import time +from pathlib import Path + + +PACKAGE_ROOT = Path(__file__).resolve().parents[3] +INSTALLER_PATH = PACKAGE_ROOT / "studio" / "install_llama_prebuilt.py" + + +def load_installer_module(): + spec = importlib.util.spec_from_file_location( + "studio_install_llama_prebuilt", INSTALLER_PATH + ) + if spec is None or spec.loader is None: + raise RuntimeError(f"unable to load installer module from {INSTALLER_PATH}") + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +installer = load_installer_module() + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description = ( + "Run a real end-to-end prebuilt llama.cpp install into an isolated temporary " + "directory on the current machine." + ) + ) + parser.add_argument( + "--llama-tag", + default = "latest", + help = "llama.cpp tag to resolve. Defaults to the approved prebuilt tag for this host.", + ) + parser.add_argument( + "--published-repo", + default = installer.DEFAULT_PUBLISHED_REPO, + help = "Published bundle repository used for Linux CUDA selection.", + ) + parser.add_argument( + "--published-release-tag", + default = installer.DEFAULT_PUBLISHED_TAG or "", + help = "Optional published GitHub release tag to pin.", + ) + parser.add_argument( + "--work-dir", + default = "", + help = ( + "Optional directory under which the smoke install temp dir will be created. " + "If omitted, defaults to ./.tmp/llama-prebuilt-smoke under the current directory." + ), + ) + parser.add_argument( + "--keep-temp", + action = "store_true", + help = "Keep the temporary smoke install directory after success.", + ) + return parser.parse_args() + + +def smoke_root_base(work_dir: str) -> Path: + if work_dir: + return Path(work_dir).expanduser().resolve() + return (Path.cwd() / ".tmp" / "llama-prebuilt-smoke").resolve() + + +def make_smoke_root(base_dir: Path) -> Path: + base_dir.mkdir(parents = True, exist_ok = True) + timestamp = time.strftime("%Y%m%d%H%M%S", time.gmtime()) + return Path(tempfile.mkdtemp(prefix = f"run-{timestamp}-", dir = base_dir)) + + +def main() -> int: + args = parse_args() + host = installer.detect_host() + smoke_base = smoke_root_base(args.work_dir) + smoke_root = make_smoke_root(smoke_base) + install_dir = smoke_root / "install" / "llama.cpp" + choice = None + + print(f"[smoke] host={host.system} machine={host.machine}") + print(f"[smoke] temp_root={smoke_root}") + + try: + requested_tag, resolved_tag, attempts, _approved_checksums = ( + installer.resolve_install_attempts( + args.llama_tag, + host, + args.published_repo, + args.published_release_tag, + ) + ) + choice = attempts[0] + print(f"[smoke] requested_tag={requested_tag}") + print(f"[smoke] resolved_tag={resolved_tag}") + print(f"[smoke] selected_asset={choice.name}") + print(f"[smoke] selected_source={choice.source_label}") + print(f"[smoke] install_dir={install_dir}") + installer.install_prebuilt( + install_dir = install_dir, + llama_tag = args.llama_tag, + published_repo = args.published_repo, + published_release_tag = args.published_release_tag, + ) + print(f"[smoke] PASS install_dir={install_dir}") + print( + "[smoke] note=This was a real prebuilt install into an isolated temp directory." + ) + return installer.EXIT_SUCCESS + except SystemExit as exc: + code = int(exc.code) if isinstance(exc.code, int) else installer.EXIT_ERROR + if code == installer.EXIT_FALLBACK: + print(f"[smoke] FALLBACK install_dir={install_dir}") + print( + "[smoke] note=Prebuilt path failed and would fall back to source build in setup." + ) + print(installer.collect_system_report(host, choice, install_dir)) + else: + print(f"[smoke] ERROR exit_code={code} install_dir={install_dir}") + return code + except Exception as exc: + print(f"[smoke] ERROR {exc}") + print(installer.collect_system_report(host, choice, install_dir)) + return installer.EXIT_ERROR + finally: + if args.keep_temp: + print(f"[smoke] keeping_temp_root={smoke_root}") + elif smoke_root.exists(): + shutil.rmtree(smoke_root, ignore_errors = True) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/studio/install/test_install_llama_prebuilt_logic.py b/tests/studio/install/test_install_llama_prebuilt_logic.py new file mode 100644 index 000000000..eb30ac274 --- /dev/null +++ b/tests/studio/install/test_install_llama_prebuilt_logic.py @@ -0,0 +1,630 @@ +import importlib.util +import io +import json +import os +import sys +import tarfile +import zipfile +from pathlib import Path + +import pytest + + +PACKAGE_ROOT = Path(__file__).resolve().parents[3] +MODULE_PATH = PACKAGE_ROOT / "studio" / "install_llama_prebuilt.py" +SPEC = importlib.util.spec_from_file_location( + "studio_install_llama_prebuilt", MODULE_PATH +) +assert SPEC is not None and SPEC.loader is not None +INSTALL_LLAMA_PREBUILT = importlib.util.module_from_spec(SPEC) +sys.modules[SPEC.name] = INSTALL_LLAMA_PREBUILT +SPEC.loader.exec_module(INSTALL_LLAMA_PREBUILT) + +PrebuiltFallback = INSTALL_LLAMA_PREBUILT.PrebuiltFallback +extract_archive = INSTALL_LLAMA_PREBUILT.extract_archive +binary_env = INSTALL_LLAMA_PREBUILT.binary_env +HostInfo = INSTALL_LLAMA_PREBUILT.HostInfo +AssetChoice = INSTALL_LLAMA_PREBUILT.AssetChoice +ApprovedArtifactHash = INSTALL_LLAMA_PREBUILT.ApprovedArtifactHash +ApprovedReleaseChecksums = INSTALL_LLAMA_PREBUILT.ApprovedReleaseChecksums +hydrate_source_tree = INSTALL_LLAMA_PREBUILT.hydrate_source_tree +validate_prebuilt_choice = INSTALL_LLAMA_PREBUILT.validate_prebuilt_choice +activate_install_tree = INSTALL_LLAMA_PREBUILT.activate_install_tree +create_install_staging_dir = INSTALL_LLAMA_PREBUILT.create_install_staging_dir +sha256_file = INSTALL_LLAMA_PREBUILT.sha256_file +source_archive_logical_name = INSTALL_LLAMA_PREBUILT.source_archive_logical_name + + +def approved_checksums_for( + upstream_tag: str, *, source_archive: Path, bundle_archive: Path, bundle_name: str +) -> ApprovedReleaseChecksums: + return ApprovedReleaseChecksums( + repo = "local", + release_tag = upstream_tag, + upstream_tag = upstream_tag, + source_commit = None, + artifacts = { + source_archive_logical_name(upstream_tag): ApprovedArtifactHash( + asset_name = source_archive_logical_name(upstream_tag), + sha256 = sha256_file(source_archive), + repo = "ggml-org/llama.cpp", + kind = "upstream-source", + ), + bundle_name: ApprovedArtifactHash( + asset_name = bundle_name, + sha256 = sha256_file(bundle_archive), + repo = "local", + kind = "local-test-bundle", + ), + }, + ) + + +def test_extract_archive_allows_safe_tar_symlink_chain(tmp_path: Path): + archive_path = tmp_path / "bundle.tar.gz" + payload = b"shared-object" + + with tarfile.open(archive_path, "w:gz") as archive: + versioned = tarfile.TarInfo("libllama.so.0.0.1") + versioned.size = len(payload) + archive.addfile(versioned, io_bytes(payload)) + + soname = tarfile.TarInfo("libllama.so.0") + soname.type = tarfile.SYMTYPE + soname.linkname = "libllama.so.0.0.1" + archive.addfile(soname) + + linker_name = tarfile.TarInfo("libllama.so") + linker_name.type = tarfile.SYMTYPE + linker_name.linkname = "libllama.so.0" + archive.addfile(linker_name) + + destination = tmp_path / "extract" + extract_archive(archive_path, destination) + + assert (destination / "libllama.so.0.0.1").read_bytes() == payload + assert (destination / "libllama.so.0").is_symlink() + assert (destination / "libllama.so").is_symlink() + assert (destination / "libllama.so").resolve().read_bytes() == payload + + +def test_extract_archive_allows_safe_tar_hardlink(tmp_path: Path): + archive_path = tmp_path / "bundle.tar.gz" + payload = b"quantize" + + with tarfile.open(archive_path, "w:gz") as archive: + target = tarfile.TarInfo("llama-quantize") + target.size = len(payload) + archive.addfile(target, io_bytes(payload)) + + hardlink = tarfile.TarInfo("llama-quantize-copy") + hardlink.type = tarfile.LNKTYPE + hardlink.linkname = "llama-quantize" + archive.addfile(hardlink) + + destination = tmp_path / "extract" + extract_archive(archive_path, destination) + + assert (destination / "llama-quantize-copy").read_bytes() == payload + assert not (destination / "llama-quantize-copy").is_symlink() + + +def test_extract_archive_rejects_absolute_tar_symlink_target(tmp_path: Path): + archive_path = tmp_path / "bundle.tar.gz" + + with tarfile.open(archive_path, "w:gz") as archive: + entry = tarfile.TarInfo("libllama.so") + entry.type = tarfile.SYMTYPE + entry.linkname = "/tmp/libllama.so.0" + archive.addfile(entry) + + with pytest.raises(PrebuiltFallback, match = "archive link used an absolute target"): + extract_archive(archive_path, tmp_path / "extract") + + +def test_extract_archive_rejects_escaping_tar_symlink_target(tmp_path: Path): + archive_path = tmp_path / "bundle.tar.gz" + + with tarfile.open(archive_path, "w:gz") as archive: + entry = tarfile.TarInfo("libllama.so") + entry.type = tarfile.SYMTYPE + entry.linkname = "../outside/libllama.so.0" + archive.addfile(entry) + + with pytest.raises(PrebuiltFallback, match = "archive link escaped destination"): + extract_archive(archive_path, tmp_path / "extract") + + +def test_extract_archive_rejects_unresolved_tar_symlink_target(tmp_path: Path): + archive_path = tmp_path / "bundle.tar.gz" + + with tarfile.open(archive_path, "w:gz") as archive: + entry = tarfile.TarInfo("libllama.so") + entry.type = tarfile.SYMTYPE + entry.linkname = "libllama.so.0" + archive.addfile(entry) + + with pytest.raises(PrebuiltFallback, match = "unresolved link entries"): + extract_archive(archive_path, tmp_path / "extract") + + +def test_extract_archive_rejects_zip_symlink_entry(tmp_path: Path): + archive_path = tmp_path / "bundle.zip" + + with zipfile.ZipFile(archive_path, "w") as archive: + info = zipfile.ZipInfo("libllama.so") + info.create_system = 3 + info.external_attr = 0o120777 << 16 + archive.writestr(info, "libllama.so.0") + + with pytest.raises(PrebuiltFallback, match = "zip archive contained a symlink entry"): + extract_archive(archive_path, tmp_path / "extract") + + +def test_hydrate_source_tree_extracts_upstream_archive_contents( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + upstream_tag = "b9999" + archive_path = tmp_path / "llama.cpp-source.tar.gz" + with tarfile.open(archive_path, "w:gz") as archive: + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/CMakeLists.txt", + b"cmake_minimum_required(VERSION 3.14)\n", + ) + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/convert_hf_to_gguf.py", + b"#!/usr/bin/env python3\nimport gguf\n", + ) + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/gguf-py/gguf/__init__.py", + b"__all__ = []\n", + ) + + source_urls = set(INSTALL_LLAMA_PREBUILT.upstream_source_archive_urls(upstream_tag)) + + def fake_download_file(url: str, destination: Path) -> None: + assert url in source_urls + destination.write_bytes(archive_path.read_bytes()) + + monkeypatch.setattr(INSTALL_LLAMA_PREBUILT, "download_file", fake_download_file) + + install_dir = tmp_path / "install" + work_dir = tmp_path / "work" + work_dir.mkdir() + hydrate_source_tree( + upstream_tag, install_dir, work_dir, expected_sha256 = sha256_file(archive_path) + ) + + assert (install_dir / "CMakeLists.txt").exists() + assert (install_dir / "convert_hf_to_gguf.py").exists() + assert (install_dir / "gguf-py" / "gguf" / "__init__.py").exists() + assert not (install_dir / f"llama.cpp-{upstream_tag}").exists() + + +def test_validate_prebuilt_choice_creates_repo_shaped_linux_install( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + upstream_tag = "b9998" + bundle_name = "app-b9998-linux-x64-cuda13-newer.tar.gz" + source_archive = tmp_path / "source.tar.gz" + bundle_archive = tmp_path / "bundle.tar.gz" + with tarfile.open(source_archive, "w:gz") as archive: + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/CMakeLists.txt", + b"cmake_minimum_required(VERSION 3.14)\n", + ) + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/convert_hf_to_gguf.py", + b"#!/usr/bin/env python3\nimport gguf\n", + ) + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/gguf-py/gguf/__init__.py", + b"__all__ = []\n", + ) + with tarfile.open(bundle_archive, "w:gz") as archive: + add_bytes_to_tar(archive, "llama-server", b"#!/bin/sh\nexit 0\n", mode = 0o755) + add_bytes_to_tar(archive, "llama-quantize", b"#!/bin/sh\nexit 0\n", mode = 0o755) + add_bytes_to_tar(archive, "libllama.so.0.0.1", b"libllama") + add_symlink_to_tar(archive, "libllama.so.0", "libllama.so.0.0.1") + add_symlink_to_tar(archive, "libllama.so", "libllama.so.0") + add_bytes_to_tar(archive, "libggml.so.0.9.8", b"libggml") + add_symlink_to_tar(archive, "libggml.so.0", "libggml.so.0.9.8") + add_symlink_to_tar(archive, "libggml.so", "libggml.so.0") + add_bytes_to_tar(archive, "libggml-base.so.0.9.8", b"libggml-base") + add_symlink_to_tar(archive, "libggml-base.so.0", "libggml-base.so.0.9.8") + add_symlink_to_tar(archive, "libggml-base.so", "libggml-base.so.0") + add_bytes_to_tar(archive, "libggml-cpu-x64.so.0.9.8", b"libggml-cpu") + add_symlink_to_tar(archive, "libggml-cpu-x64.so.0", "libggml-cpu-x64.so.0.9.8") + add_symlink_to_tar(archive, "libggml-cpu-x64.so", "libggml-cpu-x64.so.0") + add_bytes_to_tar(archive, "libmtmd.so.0.0.1", b"libmtmd") + add_symlink_to_tar(archive, "libmtmd.so.0", "libmtmd.so.0.0.1") + add_symlink_to_tar(archive, "libmtmd.so", "libmtmd.so.0") + add_bytes_to_tar(archive, "BUILD_INFO.txt", b"bundle metadata\n") + add_bytes_to_tar(archive, "THIRD_PARTY_LICENSES.txt", b"licenses\n") + + source_urls = set(INSTALL_LLAMA_PREBUILT.upstream_source_archive_urls(upstream_tag)) + + def fake_download_file(url: str, destination: Path) -> None: + if url in source_urls: + destination.write_bytes(source_archive.read_bytes()) + return + if url == "file://bundle": + destination.write_bytes(bundle_archive.read_bytes()) + return + raise AssertionError(f"unexpected download url: {url}") + + monkeypatch.setattr(INSTALL_LLAMA_PREBUILT, "download_file", fake_download_file) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "download_bytes", + lambda url, **_: b"#!/usr/bin/env python3\nimport gguf\n", + ) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "preflight_linux_installed_binaries", + lambda *args, **kwargs: None, + ) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, "validate_quantize", lambda *args, **kwargs: None + ) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, "validate_server", lambda *args, **kwargs: None + ) + + host = HostInfo( + system = "Linux", + machine = "x86_64", + is_windows = False, + is_linux = True, + is_macos = False, + is_x86_64 = True, + is_arm64 = False, + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + visible_cuda_devices = None, + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + choice = AssetChoice( + repo = "local", + tag = upstream_tag, + name = bundle_name, + url = "file://bundle", + source_label = "local", + is_ready_bundle = True, + install_kind = "linux-cuda", + bundle_profile = "cuda13-newer", + runtime_line = "cuda13", + expected_sha256 = sha256_file(bundle_archive), + ) + + install_dir = tmp_path / "install" + work_dir = tmp_path / "work" + work_dir.mkdir() + probe_path = tmp_path / "stories260K.gguf" + quantized_path = tmp_path / "stories260K-q4.gguf" + validate_prebuilt_choice( + choice, + host, + install_dir, + work_dir, + probe_path, + requested_tag = upstream_tag, + llama_tag = upstream_tag, + approved_checksums = approved_checksums_for( + upstream_tag, + source_archive = source_archive, + bundle_archive = bundle_archive, + bundle_name = bundle_name, + ), + prebuilt_fallback_used = False, + quantized_path = quantized_path, + ) + + assert (install_dir / "gguf-py" / "gguf" / "__init__.py").exists() + assert (install_dir / "convert_hf_to_gguf.py").exists() + assert (install_dir / "build" / "bin" / "llama-server").exists() + assert (install_dir / "build" / "bin" / "llama-quantize").exists() + assert (install_dir / "build" / "bin" / "libllama.so").exists() + assert (install_dir / "llama-server").exists() + assert (install_dir / "llama-quantize").exists() + assert (install_dir / "UNSLOTH_PREBUILT_INFO.json").exists() + assert (install_dir / "BUILD_INFO.txt").exists() + + +def test_validate_prebuilt_choice_creates_repo_shaped_windows_install( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + upstream_tag = "b9997" + bundle_name = "app-b9997-windows-x64-cpu.zip" + source_archive = tmp_path / "source.tar.gz" + bundle_archive = tmp_path / "bundle.zip" + with tarfile.open(source_archive, "w:gz") as archive: + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/CMakeLists.txt", + b"cmake_minimum_required(VERSION 3.14)\n", + ) + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/convert_hf_to_gguf.py", + b"#!/usr/bin/env python3\nimport gguf\n", + ) + add_bytes_to_tar( + archive, + f"llama.cpp-{upstream_tag}/gguf-py/gguf/__init__.py", + b"__all__ = []\n", + ) + with zipfile.ZipFile(bundle_archive, "w") as archive: + archive.writestr("llama-server.exe", b"MZ") + archive.writestr("llama-quantize.exe", b"MZ") + archive.writestr("llama.dll", b"DLL") + archive.writestr("BUILD_INFO.txt", b"bundle metadata\n") + + source_urls = set(INSTALL_LLAMA_PREBUILT.upstream_source_archive_urls(upstream_tag)) + + def fake_download_file(url: str, destination: Path) -> None: + if url in source_urls: + destination.write_bytes(source_archive.read_bytes()) + return + if url == "file://bundle.zip": + destination.write_bytes(bundle_archive.read_bytes()) + return + raise AssertionError(f"unexpected download url: {url}") + + monkeypatch.setattr(INSTALL_LLAMA_PREBUILT, "download_file", fake_download_file) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "download_bytes", + lambda url, **_: b"#!/usr/bin/env python3\nimport gguf\n", + ) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "preflight_linux_installed_binaries", + lambda *args, **kwargs: None, + ) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, "validate_quantize", lambda *args, **kwargs: None + ) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, "validate_server", lambda *args, **kwargs: None + ) + + host = HostInfo( + system = "Windows", + machine = "AMD64", + is_windows = True, + is_linux = False, + is_macos = False, + is_x86_64 = True, + is_arm64 = False, + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + visible_cuda_devices = None, + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + choice = AssetChoice( + repo = "local", + tag = upstream_tag, + name = bundle_name, + url = "file://bundle.zip", + source_label = "local", + is_ready_bundle = True, + install_kind = "windows-cpu", + expected_sha256 = sha256_file(bundle_archive), + ) + + install_dir = tmp_path / "install" + work_dir = tmp_path / "work" + work_dir.mkdir() + probe_path = tmp_path / "stories260K.gguf" + quantized_path = tmp_path / "stories260K-q4.gguf" + validate_prebuilt_choice( + choice, + host, + install_dir, + work_dir, + probe_path, + requested_tag = upstream_tag, + llama_tag = upstream_tag, + approved_checksums = approved_checksums_for( + upstream_tag, + source_archive = source_archive, + bundle_archive = bundle_archive, + bundle_name = bundle_name, + ), + prebuilt_fallback_used = False, + quantized_path = quantized_path, + ) + + assert (install_dir / "gguf-py" / "gguf" / "__init__.py").exists() + assert (install_dir / "convert_hf_to_gguf.py").exists() + assert (install_dir / "build" / "bin" / "Release" / "llama-server.exe").exists() + assert (install_dir / "build" / "bin" / "Release" / "llama-quantize.exe").exists() + assert (install_dir / "build" / "bin" / "Release" / "llama.dll").exists() + assert not (install_dir / "llama-server.exe").exists() + assert (install_dir / "UNSLOTH_PREBUILT_INFO.json").exists() + assert (install_dir / "BUILD_INFO.txt").exists() + + +def test_activate_install_tree_restores_existing_install_after_activation_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +): + install_dir = tmp_path / "llama.cpp" + install_dir.mkdir() + (install_dir / "old.txt").write_text("old install\n") + + staging_dir = create_install_staging_dir(install_dir) + (staging_dir / "new.txt").write_text("new install\n") + + host = HostInfo( + system = "Linux", + machine = "x86_64", + is_windows = False, + is_linux = True, + is_macos = False, + is_x86_64 = True, + is_arm64 = False, + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + visible_cuda_devices = None, + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "confirm_install_tree", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + RuntimeError("activation confirm failed") + ), + ) + + with pytest.raises( + PrebuiltFallback, + match = "activation failed; restored previous install", + ): + activate_install_tree(staging_dir, install_dir, host) + + assert (install_dir / "old.txt").read_text() == "old install\n" + assert not (install_dir / "new.txt").exists() + assert not staging_dir.exists() + assert not (tmp_path / ".staging").exists() + + output = capsys.readouterr().out + assert "moving existing install to rollback path" in output + assert "restored previous install from rollback path" in output + + +def test_activate_install_tree_cleans_all_paths_when_rollback_restore_fails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +): + install_dir = tmp_path / "llama.cpp" + install_dir.mkdir() + (install_dir / "old.txt").write_text("old install\n") + + staging_dir = create_install_staging_dir(install_dir) + (staging_dir / "new.txt").write_text("new install\n") + + host = HostInfo( + system = "Linux", + machine = "x86_64", + is_windows = False, + is_linux = True, + is_macos = False, + is_x86_64 = True, + is_arm64 = False, + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + visible_cuda_devices = None, + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "confirm_install_tree", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + RuntimeError("activation confirm failed") + ), + ) + + original_replace = INSTALL_LLAMA_PREBUILT.os.replace + + def flaky_replace(src, dst): + src_path = Path(src) + dst_path = Path(dst) + if "rollback-" in src_path.name and dst_path == install_dir: + raise OSError("restore failed") + return original_replace(src, dst) + + monkeypatch.setattr(INSTALL_LLAMA_PREBUILT.os, "replace", flaky_replace) + + with pytest.raises( + PrebuiltFallback, + match = "activation and rollback failed; cleaned install state for fresh source build", + ): + activate_install_tree(staging_dir, install_dir, host) + + assert not install_dir.exists() + assert not staging_dir.exists() + assert not (tmp_path / ".staging").exists() + + output = capsys.readouterr().out + assert "rollback after failed activation also failed: restore failed" in output + assert ( + "cleaning staging, install, and rollback paths before source build fallback" + in output + ) + assert "removing failed install path" in output + assert "removing rollback path" in output + + +def test_binary_env_linux_includes_binary_parent_in_ld_library_path( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + install_dir = tmp_path / "llama.cpp" + bin_dir = install_dir / "build" / "bin" + bin_dir.mkdir(parents = True) + binary_path = bin_dir / "llama-server" + binary_path.write_bytes(b"fake") + + host = HostInfo( + system = "Linux", + machine = "x86_64", + is_windows = False, + is_linux = True, + is_macos = False, + is_x86_64 = True, + is_arm64 = False, + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + visible_cuda_devices = None, + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + + monkeypatch.setattr(INSTALL_LLAMA_PREBUILT, "linux_runtime_dirs", lambda _bp: []) + + env = binary_env(binary_path, install_dir, host) + ld_dirs = env["LD_LIBRARY_PATH"].split(os.pathsep) + assert ( + str(bin_dir) in ld_dirs + ), f"binary_path.parent ({bin_dir}) must be in LD_LIBRARY_PATH, got: {ld_dirs}" + assert str(install_dir) in ld_dirs + + +def io_bytes(data: bytes): + return io.BytesIO(data) + + +def add_bytes_to_tar( + archive: tarfile.TarFile, name: str, data: bytes, *, mode: int = 0o644 +) -> None: + info = tarfile.TarInfo(name) + info.size = len(data) + info.mode = mode + archive.addfile(info, io_bytes(data)) + + +def add_symlink_to_tar(archive: tarfile.TarFile, name: str, target: str) -> None: + info = tarfile.TarInfo(name) + info.type = tarfile.SYMTYPE + info.linkname = target + archive.addfile(info) diff --git a/tests/studio/install/test_pr4562_bugfixes.py b/tests/studio/install/test_pr4562_bugfixes.py new file mode 100644 index 000000000..9b8c6219d --- /dev/null +++ b/tests/studio/install/test_pr4562_bugfixes.py @@ -0,0 +1,687 @@ +""" +Comprehensive tests for PR #4562 bug fixes. + +Tests cover: + - Bug 1: PS1 detached HEAD on re-run (fetch + checkout -B pattern) + - Bug 2: Source-build fallback ignores pinned tag (both .sh and .ps1) + - Bug 3: Unix fallback deletes install before checking prerequisites + - Bug 4: Linux LD_LIBRARY_PATH missing build/bin + - "latest" tag resolution fallback chain (Unsloth -> ggml-org -> raw) + - Cross-platform binary_env (Linux, macOS, Windows) + - Edge cases: malformed JSON, empty responses, env overrides + +Run: pytest tests/studio/install/test_pr4562_bugfixes.py -v +""" + +import importlib.util +import json +import os +import subprocess +import sys +import textwrap +from pathlib import Path +from unittest.mock import patch + +import pytest + +# --------------------------------------------------------------------------- +# Load the module under test (same pattern as existing test files) +# --------------------------------------------------------------------------- +PACKAGE_ROOT = Path(__file__).resolve().parents[3] +MODULE_PATH = PACKAGE_ROOT / "studio" / "install_llama_prebuilt.py" +SPEC = importlib.util.spec_from_file_location( + "studio_install_llama_prebuilt", MODULE_PATH +) +assert SPEC is not None and SPEC.loader is not None +MOD = importlib.util.module_from_spec(SPEC) +sys.modules[SPEC.name] = MOD +SPEC.loader.exec_module(MOD) + +binary_env = MOD.binary_env +HostInfo = MOD.HostInfo +resolve_requested_llama_tag = MOD.resolve_requested_llama_tag + +SETUP_SH = PACKAGE_ROOT / "studio" / "setup.sh" +SETUP_PS1 = PACKAGE_ROOT / "studio" / "setup.ps1" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- +def make_host(*, system: str) -> HostInfo: + """Create a HostInfo for the given OS.""" + return HostInfo( + system = system, + machine = "x86_64" if system != "Darwin" else "arm64", + is_windows = (system == "Windows"), + is_linux = (system == "Linux"), + is_macos = (system == "Darwin"), + is_x86_64 = (system != "Darwin"), + is_arm64 = (system == "Darwin"), + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + visible_cuda_devices = None, + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + + +BASH = "/bin/bash" + + +def run_bash(script: str, *, timeout: int = 10, env: dict | None = None) -> str: + """Run a bash script fragment and return its stdout.""" + run_env = os.environ.copy() + if env: + run_env.update(env) + result = subprocess.run( + [BASH, "-c", script], + capture_output = True, + text = True, + timeout = timeout, + env = run_env, + ) + return result.stdout.strip() + + +# ========================================================================= +# TEST GROUP A: binary_env across all platforms (Bug 4 + cross-platform) +# ========================================================================= +class TestBinaryEnvCrossPlatform: + """Test that binary_env returns correct library paths for all OSes.""" + + def test_linux_includes_binary_parent_in_ld_library_path( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + install_dir = tmp_path / "llama.cpp" + bin_dir = install_dir / "build" / "bin" + bin_dir.mkdir(parents = True) + binary_path = bin_dir / "llama-server" + binary_path.write_bytes(b"fake") + + host = make_host(system = "Linux") + monkeypatch.setattr(MOD, "linux_runtime_dirs", lambda _bp: []) + + env = binary_env(binary_path, install_dir, host) + ld_dirs = env["LD_LIBRARY_PATH"].split(os.pathsep) + assert str(bin_dir) in ld_dirs, f"build/bin not in LD_LIBRARY_PATH: {ld_dirs}" + assert ( + str(install_dir) in ld_dirs + ), f"install_dir not in LD_LIBRARY_PATH: {ld_dirs}" + + def test_linux_binary_parent_comes_before_install_dir( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + """build/bin should be searched before install_dir for .so files.""" + install_dir = tmp_path / "llama.cpp" + bin_dir = install_dir / "build" / "bin" + bin_dir.mkdir(parents = True) + binary_path = bin_dir / "llama-server" + binary_path.write_bytes(b"fake") + + host = make_host(system = "Linux") + monkeypatch.setattr(MOD, "linux_runtime_dirs", lambda _bp: []) + + env = binary_env(binary_path, install_dir, host) + ld_dirs = env["LD_LIBRARY_PATH"].split(os.pathsep) + bin_idx = ld_dirs.index(str(bin_dir)) + install_idx = ld_dirs.index(str(install_dir)) + assert ( + bin_idx < install_idx + ), "binary_path.parent should come before install_dir" + + def test_linux_deduplicates_when_binary_parent_equals_install_dir( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + """When binary is directly in install_dir, no duplicate entries.""" + install_dir = tmp_path / "llama.cpp" + install_dir.mkdir(parents = True) + binary_path = install_dir / "llama-server" + binary_path.write_bytes(b"fake") + + host = make_host(system = "Linux") + monkeypatch.setattr(MOD, "linux_runtime_dirs", lambda _bp: []) + + env = binary_env(binary_path, install_dir, host) + ld_dirs = [d for d in env["LD_LIBRARY_PATH"].split(os.pathsep) if d] + count = ld_dirs.count(str(install_dir)) + assert count == 1, f"install_dir appears {count} times in LD_LIBRARY_PATH" + + def test_linux_preserves_existing_ld_library_path( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + install_dir = tmp_path / "llama.cpp" + bin_dir = install_dir / "build" / "bin" + bin_dir.mkdir(parents = True) + binary_path = bin_dir / "llama-server" + binary_path.write_bytes(b"fake") + + # Create real directories so dedupe_existing_dirs keeps them + custom_lib = tmp_path / "custom_lib" + other_lib = tmp_path / "other_lib" + custom_lib.mkdir() + other_lib.mkdir() + + host = make_host(system = "Linux") + monkeypatch.setattr(MOD, "linux_runtime_dirs", lambda _bp: []) + original = os.environ.get("LD_LIBRARY_PATH", "") + os.environ["LD_LIBRARY_PATH"] = f"{custom_lib}:{other_lib}" + try: + env = binary_env(binary_path, install_dir, host) + finally: + if original: + os.environ["LD_LIBRARY_PATH"] = original + else: + os.environ.pop("LD_LIBRARY_PATH", None) + ld_dirs = env["LD_LIBRARY_PATH"].split(os.pathsep) + assert str(custom_lib.resolve()) in ld_dirs + assert str(other_lib.resolve()) in ld_dirs + + def test_windows_includes_binary_parent_in_path( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + install_dir = tmp_path / "llama.cpp" + bin_dir = install_dir / "build" / "bin" / "Release" + bin_dir.mkdir(parents = True) + binary_path = bin_dir / "llama-server.exe" + binary_path.write_bytes(b"MZ") + + host = make_host(system = "Windows") + monkeypatch.setattr( + MOD, "windows_runtime_dirs_for_runtime_line", lambda _rt: [] + ) + + env = binary_env(binary_path, install_dir, host) + path_dirs = env["PATH"].split(os.pathsep) + assert str(bin_dir) in path_dirs, f"build/bin/Release not in PATH: {path_dirs}" + + def test_macos_sets_dyld_library_path( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + install_dir = tmp_path / "llama.cpp" + install_dir.mkdir(parents = True) + bin_dir = install_dir / "build" / "bin" + binary_path = bin_dir / "llama-server" + binary_path.parent.mkdir(parents = True) + binary_path.write_bytes(b"fake") + + host = make_host(system = "Darwin") + monkeypatch.delenv("DYLD_LIBRARY_PATH", raising = False) + + env = binary_env(binary_path, install_dir, host) + dyld_parts = [p for p in env["DYLD_LIBRARY_PATH"].split(os.pathsep) if p] + assert ( + str(bin_dir) in dyld_parts + ), f"build/bin not in DYLD_LIBRARY_PATH: {dyld_parts}" + assert ( + str(install_dir) in dyld_parts + ), f"install_dir not in DYLD_LIBRARY_PATH: {dyld_parts}" + # binary_path.parent (build/bin) should come before install_dir + assert dyld_parts.index(str(bin_dir)) < dyld_parts.index(str(install_dir)) + + +# ========================================================================= +# TEST GROUP B: resolve_requested_llama_tag (Python function) +# ========================================================================= +class TestResolveRequestedLlamaTag: + def test_concrete_tag_passes_through(self): + assert resolve_requested_llama_tag("b8508") == "b8508" + + def test_none_resolves_to_latest(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(MOD, "latest_upstream_release_tag", lambda: "b9999") + assert resolve_requested_llama_tag(None) == "b9999" + + def test_latest_resolves_to_upstream(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(MOD, "latest_upstream_release_tag", lambda: "b1234") + assert resolve_requested_llama_tag("latest") == "b1234" + + def test_empty_string_resolves_to_latest(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(MOD, "latest_upstream_release_tag", lambda: "b5555") + assert resolve_requested_llama_tag("") == "b5555" + + +# ========================================================================= +# TEST GROUP C: setup.sh logic (bash subprocess tests) +# ========================================================================= +class TestSetupShLogic: + """Test setup.sh fragments via bash subprocess with controlled PATH.""" + + def test_cmake_missing_preserves_install(self, tmp_path: Path): + """Bug 3: When cmake is missing, rm -rf should NOT run.""" + llama_dir = tmp_path / "llama.cpp" + llama_dir.mkdir() + marker = llama_dir / "marker.txt" + marker.write_text("existing") + + mock_bin = tmp_path / "mock_bin" + mock_bin.mkdir() + # Create mock git but NOT cmake + (mock_bin / "git").write_text("#!/bin/bash\nexit 0\n") + (mock_bin / "git").chmod(0o755) + + # Build PATH: mock_bin first, then system dirs WITHOUT cmake + safe_dirs = [str(mock_bin)] + for d in os.environ.get("PATH", "").split(":"): + if d and not os.path.isfile(os.path.join(d, "cmake")): + safe_dirs.append(d) + + script = textwrap.dedent(f"""\ + export LLAMA_CPP_DIR="{llama_dir}" + if ! command -v cmake &>/dev/null; then + echo "cmake_missing" + elif ! command -v git &>/dev/null; then + echo "git_missing" + else + rm -rf "$LLAMA_CPP_DIR" + echo "would_clone" + fi + """) + output = run_bash(script, env = {"PATH": ":".join(safe_dirs)}) + assert "cmake_missing" in output + assert marker.exists(), "Install dir was deleted despite cmake missing!" + + def test_git_missing_preserves_install(self, tmp_path: Path): + """Bug 3: When git is missing, rm -rf should NOT run.""" + llama_dir = tmp_path / "llama.cpp" + llama_dir.mkdir() + marker = llama_dir / "marker.txt" + marker.write_text("existing") + + mock_bin = tmp_path / "mock_bin" + mock_bin.mkdir() + # Create mock cmake but NOT git + (mock_bin / "cmake").write_text("#!/bin/bash\nexit 0\n") + (mock_bin / "cmake").chmod(0o755) + + # Build PATH: mock_bin first, then system dirs WITHOUT git + safe_dirs = [str(mock_bin)] + for d in os.environ.get("PATH", "").split(":"): + if d and not os.path.isfile(os.path.join(d, "git")): + safe_dirs.append(d) + + script = textwrap.dedent(f"""\ + export LLAMA_CPP_DIR="{llama_dir}" + if ! command -v cmake &>/dev/null; then + echo "cmake_missing" + elif ! command -v git &>/dev/null; then + echo "git_missing" + else + rm -rf "$LLAMA_CPP_DIR" + echo "would_clone" + fi + """) + output = run_bash(script, env = {"PATH": ":".join(safe_dirs)}) + assert "git_missing" in output + assert marker.exists(), "Install dir was deleted despite git missing!" + + def test_both_present_runs_rm_and_clone(self, tmp_path: Path): + """Bug 3: When both present, rm -rf runs before clone.""" + llama_dir = tmp_path / "llama.cpp" + llama_dir.mkdir() + marker = llama_dir / "marker.txt" + marker.write_text("existing") + + mock_bin = tmp_path / "mock_bin" + mock_bin.mkdir() + (mock_bin / "cmake").write_text("#!/bin/bash\nexit 0\n") + (mock_bin / "cmake").chmod(0o755) + (mock_bin / "git").write_text("#!/bin/bash\nexit 0\n") + (mock_bin / "git").chmod(0o755) + + script = textwrap.dedent(f"""\ + export PATH="{mock_bin}:$PATH" + export LLAMA_CPP_DIR="{llama_dir}" + if ! command -v cmake &>/dev/null; then + echo "cmake_missing" + elif ! command -v git &>/dev/null; then + echo "git_missing" + else + rm -rf "$LLAMA_CPP_DIR" + echo "would_clone" + fi + """) + output = run_bash(script) + assert "would_clone" in output + assert not marker.exists(), "Install dir should have been deleted" + + def test_clone_uses_pinned_tag(self, tmp_path: Path): + """Bug 2: git clone should use --branch with the resolved tag.""" + mock_bin = tmp_path / "mock_bin" + mock_bin.mkdir() + log_file = tmp_path / "git_calls.log" + (mock_bin / "git").write_text(f'#!/bin/bash\necho "$*" >> {log_file}\nexit 0\n') + (mock_bin / "git").chmod(0o755) + + script = textwrap.dedent(f"""\ + export PATH="{mock_bin}:$PATH" + git clone --depth 1 --branch "b8508" https://github.com/ggml-org/llama.cpp.git /tmp/llama_test + """) + run_bash(script) + log = log_file.read_text() + assert "--branch b8508" in log, f"Expected --branch b8508 in: {log}" + + def test_fetch_checkout_b_pattern(self, tmp_path: Path): + """Bug 1: Re-run should use fetch + checkout -B, not pull + checkout FETCH_HEAD.""" + mock_bin = tmp_path / "mock_bin" + mock_bin.mkdir() + log_file = tmp_path / "git_calls.log" + (mock_bin / "git").write_text(f'#!/bin/bash\necho "$*" >> {log_file}\nexit 0\n') + (mock_bin / "git").chmod(0o755) + + llama_dir = tmp_path / "llama.cpp" + llama_dir.mkdir() + (llama_dir / ".git").mkdir() + + script = textwrap.dedent(f"""\ + export PATH="{mock_bin}:$PATH" + LlamaCppDir="{llama_dir}" + ResolvedLlamaTag="b8508" + if [ -d "$LlamaCppDir/.git" ]; then + git -C "$LlamaCppDir" fetch --depth 1 origin "$ResolvedLlamaTag" + if [ $? -ne 0 ]; then + echo "WARN: fetch failed" + else + git -C "$LlamaCppDir" checkout -B unsloth-llama-build FETCH_HEAD + fi + fi + """) + run_bash(script) + log = log_file.read_text() + assert "fetch --depth 1 origin b8508" in log + assert "checkout -B unsloth-llama-build FETCH_HEAD" in log + assert "pull" not in log, "Should use fetch, not pull" + + def test_fetch_failure_warns_not_aborts(self, tmp_path: Path): + """Bug 1: fetch failure should warn and continue, not set BuildOk=false.""" + mock_bin = tmp_path / "mock_bin" + mock_bin.mkdir() + (mock_bin / "git").write_text( + '#!/bin/bash\nif echo "$*" | grep -q fetch; then exit 1; fi\nexit 0\n' + ) + (mock_bin / "git").chmod(0o755) + + llama_dir = tmp_path / "llama.cpp" + llama_dir.mkdir() + (llama_dir / ".git").mkdir() + + script = textwrap.dedent(f"""\ + export PATH="{mock_bin}:$PATH" + LlamaCppDir="{llama_dir}" + ResolvedLlamaTag="b8508" + BuildOk=true + if [ -d "$LlamaCppDir/.git" ]; then + git -C "$LlamaCppDir" fetch --depth 1 origin "$ResolvedLlamaTag" + if [ $? -ne 0 ]; then + echo "WARN: fetch failed -- using existing source" + else + git -C "$LlamaCppDir" checkout -B unsloth-llama-build FETCH_HEAD + fi + fi + echo "BuildOk=$BuildOk" + """) + output = run_bash(script) + assert "WARN: fetch failed" in output + assert "BuildOk=true" in output + + +# ========================================================================= +# TEST GROUP D: "latest" tag resolution (bash subprocess) +# ========================================================================= +class TestLatestTagResolution: + """Test the fallback chain: Unsloth API -> ggml-org API -> raw.""" + + RESOLVE_TEMPLATE = textwrap.dedent("""\ + export PATH="{mock_bin}:$PATH" + _REQUESTED_LLAMA_TAG="{requested_tag}" + _RESOLVED_LLAMA_TAG="" + _RESOLVE_UPSTREAM_STATUS=1 + _HELPER_RELEASE_REPO="unslothai/llama.cpp" + if [ "$_RESOLVE_UPSTREAM_STATUS" -ne 0 ] || [ -z "$_RESOLVED_LLAMA_TAG" ]; then + if [ "$_REQUESTED_LLAMA_TAG" = "latest" ]; then + _RESOLVED_LLAMA_TAG="$(curl -fsSL "https://api.github.com/repos/${{_HELPER_RELEASE_REPO}}/releases/latest" 2>/dev/null | python -c "import sys,json; print(json.load(sys.stdin)['tag_name'])" 2>/dev/null)" || _RESOLVED_LLAMA_TAG="" + if [ -z "$_RESOLVED_LLAMA_TAG" ]; then + _RESOLVED_LLAMA_TAG="$(curl -fsSL https://api.github.com/repos/ggml-org/llama.cpp/releases/latest 2>/dev/null | python -c "import sys,json; print(json.load(sys.stdin)['tag_name'])" 2>/dev/null)" || _RESOLVED_LLAMA_TAG="" + fi + fi + if [ -z "$_RESOLVED_LLAMA_TAG" ]; then + _RESOLVED_LLAMA_TAG="$_REQUESTED_LLAMA_TAG" + fi + fi + echo "$_RESOLVED_LLAMA_TAG" + """) + + @staticmethod + def _make_curl_mock( + mock_bin: Path, unsloth_response: str | None, ggml_response: str | None + ): + """Create a curl mock that returns different responses per repo.""" + lines = ["#!/bin/bash"] + if unsloth_response is not None: + lines.append( + f'if echo "$*" | grep -q "unslothai/llama.cpp"; then echo \'{unsloth_response}\'; exit 0; fi' + ) + else: + lines.append( + 'if echo "$*" | grep -q "unslothai/llama.cpp"; then exit 1; fi' + ) + if ggml_response is not None: + lines.append( + f'if echo "$*" | grep -q "ggml-org/llama.cpp"; then echo \'{ggml_response}\'; exit 0; fi' + ) + else: + lines.append('if echo "$*" | grep -q "ggml-org/llama.cpp"; then exit 1; fi') + lines.append("exit 1") + curl_path = mock_bin / "curl" + curl_path.write_text("\n".join(lines) + "\n") + curl_path.chmod(0o755) + + def _run_resolve( + self, + tmp_path: Path, + requested_tag: str, + unsloth_resp: str | None, + ggml_resp: str | None, + ) -> str: + mock_bin = tmp_path / "mock_bin" + mock_bin.mkdir(exist_ok = True) + self._make_curl_mock(mock_bin, unsloth_resp, ggml_resp) + script = self.RESOLVE_TEMPLATE.format( + mock_bin = mock_bin, requested_tag = requested_tag + ) + return run_bash(script) + + def test_unsloth_succeeds(self, tmp_path: Path): + output = self._run_resolve( + tmp_path, + "latest", + unsloth_resp = '{"tag_name":"b8508"}', + ggml_resp = '{"tag_name":"b9000"}', + ) + assert output == "b8508" + + def test_unsloth_fails_ggml_succeeds(self, tmp_path: Path): + output = self._run_resolve( + tmp_path, + "latest", + unsloth_resp = None, + ggml_resp = '{"tag_name":"b9000"}', + ) + assert output == "b9000" + + def test_both_fail_raw_fallback(self, tmp_path: Path): + output = self._run_resolve( + tmp_path, + "latest", + unsloth_resp = None, + ggml_resp = None, + ) + assert output == "latest" + + def test_concrete_tag_passes_through(self, tmp_path: Path): + output = self._run_resolve( + tmp_path, + "b7777", + unsloth_resp = '{"tag_name":"b8508"}', + ggml_resp = '{"tag_name":"b9000"}', + ) + assert output == "b7777" + + def test_unsloth_malformed_json_falls_through(self, tmp_path: Path): + output = self._run_resolve( + tmp_path, + "latest", + unsloth_resp = '{"bad_key":"no_tag"}', + ggml_resp = '{"tag_name":"b9001"}', + ) + assert output == "b9001" + + def test_both_malformed_json_raw_fallback(self, tmp_path: Path): + output = self._run_resolve( + tmp_path, + "latest", + unsloth_resp = '{"bad":"data"}', + ggml_resp = '{"also":"bad"}', + ) + assert output == "latest" + + def test_unsloth_empty_body_falls_through(self, tmp_path: Path): + output = self._run_resolve( + tmp_path, + "latest", + unsloth_resp = "", + ggml_resp = '{"tag_name":"b7000"}', + ) + assert output == "b7000" + + def test_unsloth_empty_tag_name_falls_through(self, tmp_path: Path): + output = self._run_resolve( + tmp_path, + "latest", + unsloth_resp = '{"tag_name":""}', + ggml_resp = '{"tag_name":"b6000"}', + ) + assert output == "b6000" + + def test_env_override_unsloth_llama_tag(self): + output = run_bash( + 'echo "${UNSLOTH_LLAMA_TAG:-latest}"', + env = {"UNSLOTH_LLAMA_TAG": "b1234"}, + ) + assert output == "b1234" + + def test_env_unset_defaults_to_latest(self): + env = os.environ.copy() + env.pop("UNSLOTH_LLAMA_TAG", None) + output = run_bash('echo "${UNSLOTH_LLAMA_TAG:-latest}"', env = env) + assert output == "latest" + + def test_env_empty_defaults_to_latest(self): + output = run_bash( + 'echo "${UNSLOTH_LLAMA_TAG:-latest}"', + env = {"UNSLOTH_LLAMA_TAG": ""}, + ) + assert output == "latest" + + +# ========================================================================= +# TEST GROUP E: Source file verification +# ========================================================================= +class TestSourceCodePatterns: + """Verify the actual source files contain the expected fix patterns.""" + + def test_setup_sh_no_rm_before_prereq_check(self): + """rm -rf must appear AFTER cmake/git checks, not before.""" + content = SETUP_SH.read_text() + # Find the source-build block + idx_else = content.find("# Check prerequisites") + assert idx_else != -1 + block = content[idx_else:] + # rm -rf should appear after the cmake/git checks + idx_cmake = block.find("command -v cmake") + idx_git = block.find("command -v git") + idx_rm = block.find("rm -rf") + assert idx_rm > idx_cmake, "rm -rf should come after cmake check" + assert idx_rm > idx_git, "rm -rf should come after git check" + + def test_setup_sh_clone_uses_branch_tag(self): + """git clone in source-build should use --branch via _CLONE_BRANCH_ARGS.""" + content = SETUP_SH.read_text() + # The clone line should use _CLONE_BRANCH_ARGS (which conditionally includes --branch) + assert ( + "_CLONE_BRANCH_ARGS" in content + ), "Clone should use _CLONE_BRANCH_ARGS array" + assert ( + '--branch "$_RESOLVED_LLAMA_TAG"' in content + ), "_CLONE_BRANCH_ARGS should be set to --branch $_RESOLVED_LLAMA_TAG" + # Verify the guard: --branch is only used when tag is not "latest" + assert ( + '_RESOLVED_LLAMA_TAG" != "latest"' in content + ), "Should guard against literal 'latest' tag" + + def test_setup_sh_latest_resolution_queries_unsloth_first(self): + """The Unsloth repo should be queried before ggml-org.""" + content = SETUP_SH.read_text() + idx_unsloth = content.find("_HELPER_RELEASE_REPO}/releases/latest") + idx_ggml = content.find("ggml-org/llama.cpp/releases/latest") + assert idx_unsloth != -1, "Unsloth API query not found" + assert idx_ggml != -1, "ggml-org API query not found" + assert idx_unsloth < idx_ggml, "Unsloth should be queried before ggml-org" + + def test_setup_ps1_uses_checkout_b(self): + """PS1 should use checkout -B, not checkout --force FETCH_HEAD.""" + content = SETUP_PS1.read_text() + assert "checkout -B unsloth-llama-build" in content + assert "checkout --force FETCH_HEAD" not in content + + def test_setup_ps1_clone_uses_branch_tag(self): + """PS1 clone should use --branch with the resolved tag.""" + content = SETUP_PS1.read_text() + assert "--branch" in content and "$ResolvedLlamaTag" in content + # The old commented-out line should be gone + assert "# git clone --depth 1 --branch" not in content + + def test_setup_ps1_no_git_pull(self): + """PS1 should use fetch, not pull (which fails in detached HEAD).""" + content = SETUP_PS1.read_text() + # In the source-build section, there should be no "git pull" + # (git pull is only valid on a branch) + lines = content.splitlines() + for i, line in enumerate(lines): + stripped = line.strip() + if "git pull" in stripped and not stripped.startswith("#"): + # Check context -- should not be in the llama.cpp build section + # Allow git pull in other contexts + context = "\n".join(lines[max(0, i - 5) : i + 5]) + if "LlamaCppDir" in context: + pytest.fail( + f"Found 'git pull' in llama.cpp build section at line {i+1}" + ) + + def test_setup_ps1_latest_resolution_queries_unsloth_first(self): + """PS1 should query Unsloth repo before ggml-org.""" + content = SETUP_PS1.read_text() + idx_unsloth = content.find("$HelperReleaseRepo/releases/latest") + idx_ggml = content.find("ggml-org/llama.cpp/releases/latest") + assert idx_unsloth != -1, "Unsloth API query not found in PS1" + assert idx_ggml != -1, "ggml-org API query not found in PS1" + assert idx_unsloth < idx_ggml, "Unsloth should be queried before ggml-org" + + def test_binary_env_linux_has_binary_parent(self): + """The Linux branch of binary_env should include binary_path.parent.""" + content = MODULE_PATH.read_text() + # Find the binary_env function + in_func = False + in_linux = False + found = False + for line in content.splitlines(): + if "def binary_env(" in line: + in_func = True + elif in_func and line and not line[0].isspace() and "def " in line: + break + if in_func and "host.is_linux" in line: + in_linux = True + if in_linux and "binary_path.parent" in line: + found = True + break + assert found, "binary_path.parent not found in Linux branch of binary_env" diff --git a/tests/studio/install/test_selection_logic.py b/tests/studio/install/test_selection_logic.py new file mode 100644 index 000000000..906c978b0 --- /dev/null +++ b/tests/studio/install/test_selection_logic.py @@ -0,0 +1,903 @@ +"""Tests for binary selection logic in install_llama_prebuilt.py. + +Covers: normalize_compute_cap, normalize_compute_caps, parse_cuda_visible_devices, +supports_explicit_visible_device_matching, select_visible_gpu_rows, +compatible_linux_runtime_lines, pick_windows_cuda_runtime, +compatible_windows_runtime_lines, runtime_line_from_cuda_version, +apply_approved_hashes, linux_cuda_choice_from_release, windows_cuda_attempts, +resolve_upstream_asset_choice. + +No GPU, no network, no torch required -- all I/O is monkeypatched. +""" + +import importlib.util +import sys +from pathlib import Path + +import pytest + + +PACKAGE_ROOT = Path(__file__).resolve().parents[3] +MODULE_PATH = PACKAGE_ROOT / "studio" / "install_llama_prebuilt.py" +SPEC = importlib.util.spec_from_file_location( + "studio_install_llama_prebuilt", MODULE_PATH +) +assert SPEC is not None and SPEC.loader is not None +INSTALL_LLAMA_PREBUILT = importlib.util.module_from_spec(SPEC) +sys.modules[SPEC.name] = INSTALL_LLAMA_PREBUILT +SPEC.loader.exec_module(INSTALL_LLAMA_PREBUILT) + +HostInfo = INSTALL_LLAMA_PREBUILT.HostInfo +AssetChoice = INSTALL_LLAMA_PREBUILT.AssetChoice +PublishedLlamaArtifact = INSTALL_LLAMA_PREBUILT.PublishedLlamaArtifact +PublishedReleaseBundle = INSTALL_LLAMA_PREBUILT.PublishedReleaseBundle +ApprovedArtifactHash = INSTALL_LLAMA_PREBUILT.ApprovedArtifactHash +ApprovedReleaseChecksums = INSTALL_LLAMA_PREBUILT.ApprovedReleaseChecksums +PrebuiltFallback = INSTALL_LLAMA_PREBUILT.PrebuiltFallback +LinuxCudaSelection = INSTALL_LLAMA_PREBUILT.LinuxCudaSelection +UPSTREAM_REPO = INSTALL_LLAMA_PREBUILT.UPSTREAM_REPO + +normalize_compute_cap = INSTALL_LLAMA_PREBUILT.normalize_compute_cap +normalize_compute_caps = INSTALL_LLAMA_PREBUILT.normalize_compute_caps +parse_cuda_visible_devices = INSTALL_LLAMA_PREBUILT.parse_cuda_visible_devices +supports_explicit_visible_device_matching = ( + INSTALL_LLAMA_PREBUILT.supports_explicit_visible_device_matching +) +select_visible_gpu_rows = INSTALL_LLAMA_PREBUILT.select_visible_gpu_rows +compatible_linux_runtime_lines = INSTALL_LLAMA_PREBUILT.compatible_linux_runtime_lines +pick_windows_cuda_runtime = INSTALL_LLAMA_PREBUILT.pick_windows_cuda_runtime +compatible_windows_runtime_lines = ( + INSTALL_LLAMA_PREBUILT.compatible_windows_runtime_lines +) +runtime_line_from_cuda_version = INSTALL_LLAMA_PREBUILT.runtime_line_from_cuda_version +apply_approved_hashes = INSTALL_LLAMA_PREBUILT.apply_approved_hashes +linux_cuda_choice_from_release = INSTALL_LLAMA_PREBUILT.linux_cuda_choice_from_release +windows_cuda_attempts = INSTALL_LLAMA_PREBUILT.windows_cuda_attempts +resolve_upstream_asset_choice = INSTALL_LLAMA_PREBUILT.resolve_upstream_asset_choice + + +# --------------------------------------------------------------------------- +# Helper factories +# --------------------------------------------------------------------------- + + +def make_host(**overrides): + system = overrides.pop("system", "Linux") + machine = overrides.pop("machine", "x86_64") + defaults = dict( + system = system, + machine = machine, + is_linux = system == "Linux", + is_windows = system == "Windows", + is_macos = system == "Darwin", + is_x86_64 = machine.lower() in {"x86_64", "amd64"}, + is_arm64 = machine.lower() in {"arm64", "aarch64"}, + nvidia_smi = "/usr/bin/nvidia-smi", + driver_cuda_version = (12, 8), + compute_caps = ["86"], + visible_cuda_devices = None, + has_physical_nvidia = True, + has_usable_nvidia = True, + ) + defaults.update(overrides) + return HostInfo(**defaults) + + +def make_artifact(asset_name, **overrides): + defaults = dict( + asset_name = asset_name, + install_kind = "linux-cuda", + runtime_line = "cuda12", + coverage_class = "targeted", + supported_sms = ["75", "80", "86", "89", "90"], + min_sm = 75, + max_sm = 90, + bundle_profile = "cuda12-newer", + rank = 100, + ) + defaults.update(overrides) + return PublishedLlamaArtifact(**defaults) + + +def make_release(artifacts, **overrides): + defaults = dict( + repo = "unslothai/llama.cpp", + release_tag = "v1.0", + upstream_tag = "b8508", + assets = {a.asset_name: f"https://example.com/{a.asset_name}" for a in artifacts}, + manifest_asset_name = "llama-prebuilt-manifest.json", + artifacts = artifacts, + selection_log = [], + ) + defaults.update(overrides) + return PublishedReleaseBundle(**defaults) + + +def make_checksums(asset_names): + return ApprovedReleaseChecksums( + repo = "unslothai/llama.cpp", + release_tag = "v1.0", + upstream_tag = "b8508", + source_commit = None, + artifacts = { + name: ApprovedArtifactHash( + asset_name = name, + sha256 = "a" * 64, + repo = "unslothai/llama.cpp", + kind = "prebuilt", + ) + for name in asset_names + }, + ) + + +def mock_linux_runtime(monkeypatch, lines): + dirs = {line: ["/usr/lib/stub"] for line in lines} + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "detected_linux_runtime_lines", + lambda: (list(lines), dict(dirs)), + ) + + +def mock_windows_runtime(monkeypatch, lines): + dirs = {line: ["C:\\Windows\\System32"] for line in lines} + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "detected_windows_runtime_lines", + lambda: (list(lines), dict(dirs)), + ) + + +# =========================================================================== +# A. normalize_compute_cap +# =========================================================================== + + +class TestNormalizeComputeCap: + def test_dotted_86(self): + assert normalize_compute_cap("8.6") == "86" + + def test_dotted_leading_zero(self): + assert normalize_compute_cap("07.05") == "75" + + def test_already_normalized(self): + assert normalize_compute_cap("75") == "75" + + def test_int_input(self): + assert normalize_compute_cap(86) == "86" + + def test_empty_string(self): + assert normalize_compute_cap("") is None + + def test_whitespace(self): + assert normalize_compute_cap(" ") is None + + def test_non_numeric(self): + assert normalize_compute_cap("x.y") is None + + def test_triple_part(self): + assert normalize_compute_cap("8.6.0") is None + + def test_zero_minor(self): + assert normalize_compute_cap("9.0") == "90" + + +# =========================================================================== +# B. normalize_compute_caps +# =========================================================================== + + +class TestNormalizeComputeCaps: + def test_deduplication(self): + assert normalize_compute_caps(["8.6", "86", "8.6"]) == ["86"] + + def test_numeric_sort(self): + assert normalize_compute_caps(["9.0", "7.5", "8.6"]) == ["75", "86", "90"] + + def test_drops_invalid(self): + assert normalize_compute_caps(["8.6", "bad", "", "7.5"]) == ["75", "86"] + + def test_empty_input(self): + assert normalize_compute_caps([]) == [] + + +# =========================================================================== +# C. parse_cuda_visible_devices +# =========================================================================== + + +class TestParseCudaVisibleDevices: + def test_none(self): + assert parse_cuda_visible_devices(None) is None + + def test_empty(self): + assert parse_cuda_visible_devices("") == [] + + def test_minus_one(self): + assert parse_cuda_visible_devices("-1") == [] + + def test_single(self): + assert parse_cuda_visible_devices("0") == ["0"] + + def test_multi(self): + assert parse_cuda_visible_devices("0,1,2") == ["0", "1", "2"] + + def test_whitespace_stripped(self): + assert parse_cuda_visible_devices(" 0 , 1 ") == ["0", "1"] + + +# =========================================================================== +# D. supports_explicit_visible_device_matching +# =========================================================================== + + +class TestSupportsExplicitVisibleDeviceMatching: + def test_all_digits(self): + assert supports_explicit_visible_device_matching(["0", "1", "2"]) is True + + def test_gpu_prefix(self): + assert supports_explicit_visible_device_matching(["GPU-abc123"]) is True + + def test_none(self): + assert supports_explicit_visible_device_matching(None) is False + + def test_empty(self): + assert supports_explicit_visible_device_matching([]) is False + + def test_mixed_invalid(self): + assert supports_explicit_visible_device_matching(["0", "MIG-device"]) is False + + +# =========================================================================== +# E. select_visible_gpu_rows +# =========================================================================== + + +class TestSelectVisibleGpuRows: + ROWS = [ + ("0", "GPU-aaa", "8.6"), + ("1", "GPU-bbb", "7.5"), + ("2", "GPU-ccc", "8.9"), + ] + + def test_none_returns_all(self): + assert select_visible_gpu_rows(self.ROWS, None) == list(self.ROWS) + + def test_empty_returns_empty(self): + assert select_visible_gpu_rows(self.ROWS, []) == [] + + def test_filter_by_index(self): + result = select_visible_gpu_rows(self.ROWS, ["0", "2"]) + assert result == [("0", "GPU-aaa", "8.6"), ("2", "GPU-ccc", "8.9")] + + def test_filter_by_uuid_case_insensitive(self): + result = select_visible_gpu_rows(self.ROWS, ["gpu-bbb"]) + assert result == [("1", "GPU-bbb", "7.5")] + + def test_dedup_same_device(self): + result = select_visible_gpu_rows(self.ROWS, ["0", "0"]) + assert result == [("0", "GPU-aaa", "8.6")] + + def test_missing_token(self): + result = select_visible_gpu_rows(self.ROWS, ["99"]) + assert result == [] + + +# =========================================================================== +# F. compatible_linux_runtime_lines +# =========================================================================== + + +class TestCompatibleLinuxRuntimeLines: + def test_no_driver(self): + host = make_host(driver_cuda_version = None) + assert compatible_linux_runtime_lines(host) == [] + + def test_driver_11_8(self): + host = make_host(driver_cuda_version = (11, 8)) + assert compatible_linux_runtime_lines(host) == [] + + def test_driver_12_4(self): + host = make_host(driver_cuda_version = (12, 4)) + assert compatible_linux_runtime_lines(host) == ["cuda12"] + + def test_driver_13_0(self): + host = make_host(driver_cuda_version = (13, 0)) + assert compatible_linux_runtime_lines(host) == ["cuda13", "cuda12"] + + +# =========================================================================== +# G. pick_windows_cuda_runtime + compatible_windows_runtime_lines +# =========================================================================== + + +class TestPickWindowsCudaRuntime: + def test_no_driver(self): + host = make_host(driver_cuda_version = None) + assert pick_windows_cuda_runtime(host) is None + + def test_below_threshold(self): + host = make_host(driver_cuda_version = (12, 3)) + assert pick_windows_cuda_runtime(host) is None + + def test_driver_12_4(self): + host = make_host(driver_cuda_version = (12, 4)) + assert pick_windows_cuda_runtime(host) == "12.4" + + def test_driver_13_1(self): + host = make_host(driver_cuda_version = (13, 1)) + assert pick_windows_cuda_runtime(host) == "13.1" + + +class TestCompatibleWindowsRuntimeLines: + def test_no_driver(self): + host = make_host(driver_cuda_version = None) + assert compatible_windows_runtime_lines(host) == [] + + def test_driver_12_4(self): + host = make_host(driver_cuda_version = (12, 4)) + assert compatible_windows_runtime_lines(host) == ["cuda12"] + + def test_driver_13_1(self): + host = make_host(driver_cuda_version = (13, 1)) + assert compatible_windows_runtime_lines(host) == ["cuda13", "cuda12"] + + +# =========================================================================== +# H. runtime_line_from_cuda_version +# =========================================================================== + + +class TestRuntimeLineFromCudaVersion: + def test_cuda_12(self): + assert runtime_line_from_cuda_version("12.6") == "cuda12" + + def test_cuda_13(self): + assert runtime_line_from_cuda_version("13.0") == "cuda13" + + def test_cuda_11(self): + assert runtime_line_from_cuda_version("11.8") is None + + def test_none(self): + assert runtime_line_from_cuda_version(None) is None + + def test_empty(self): + assert runtime_line_from_cuda_version("") is None + + +# =========================================================================== +# I. apply_approved_hashes +# =========================================================================== + + +class TestApplyApprovedHashes: + def _choice(self, name): + return AssetChoice( + repo = "test", + tag = "v1", + name = name, + url = f"https://x/{name}", + source_label = "test", + ) + + def test_both_approved(self): + c1, c2 = self._choice("a.tar.gz"), self._choice("b.tar.gz") + checksums = make_checksums(["a.tar.gz", "b.tar.gz"]) + result = apply_approved_hashes([c1, c2], checksums) + assert len(result) == 2 + assert all(c.expected_sha256 == "a" * 64 for c in result) + + def test_one_approved(self): + c1, c2 = self._choice("a.tar.gz"), self._choice("missing.tar.gz") + checksums = make_checksums(["a.tar.gz"]) + result = apply_approved_hashes([c1, c2], checksums) + assert len(result) == 1 + assert result[0].name == "a.tar.gz" + + def test_none_approved(self): + c1 = self._choice("missing.tar.gz") + checksums = make_checksums(["other.tar.gz"]) + with pytest.raises(PrebuiltFallback, match = "approved checksum"): + apply_approved_hashes([c1], checksums) + + def test_empty_input(self): + checksums = make_checksums(["a.tar.gz"]) + with pytest.raises(PrebuiltFallback, match = "approved checksum"): + apply_approved_hashes([], checksums) + + +# =========================================================================== +# J. linux_cuda_choice_from_release -- core selection +# =========================================================================== + + +class TestLinuxCudaChoiceFromRelease: + # --- Runtime line resolution --- + + def test_no_runtime_lines_detected(self, monkeypatch): + mock_linux_runtime(monkeypatch, []) + host = make_host(driver_cuda_version = (12, 8)) + art = make_artifact("bundle-cuda12.tar.gz") + release = make_release([art]) + assert linux_cuda_choice_from_release(host, release) is None + + def test_detected_lines_incompatible_with_driver(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda13"]) + host = make_host(driver_cuda_version = (12, 4)) + art = make_artifact("bundle-cuda13.tar.gz", runtime_line = "cuda13") + release = make_release([art]) + assert linux_cuda_choice_from_release(host, release) is None + + def test_driver_13_only_cuda12_detected(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(driver_cuda_version = (13, 0)) + art = make_artifact("bundle-cuda12.tar.gz", runtime_line = "cuda12") + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is not None + assert result.primary.runtime_line == "cuda12" + + def test_preferred_runtime_line_reorders(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda13", "cuda12"]) + host = make_host(driver_cuda_version = (13, 0)) + art12 = make_artifact("bundle-cuda12.tar.gz", runtime_line = "cuda12") + art13 = make_artifact("bundle-cuda13.tar.gz", runtime_line = "cuda13") + release = make_release([art12, art13]) + result = linux_cuda_choice_from_release( + host, release, preferred_runtime_line = "cuda12" + ) + assert result is not None + assert result.primary.runtime_line == "cuda12" + + def test_preferred_runtime_line_unavailable(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(driver_cuda_version = (12, 8)) + art = make_artifact("bundle-cuda12.tar.gz", runtime_line = "cuda12") + release = make_release([art]) + result = linux_cuda_choice_from_release( + host, release, preferred_runtime_line = "cuda13" + ) + assert result is not None + assert result.primary.runtime_line == "cuda12" + log_entries = result.selection_log + assert any("unavailable_on_host" in entry for entry in log_entries) + + # --- SM matching --- + + def test_exact_sm_match(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + art = make_artifact( + "bundle.tar.gz", supported_sms = ["75", "86", "89"], min_sm = 75, max_sm = 89 + ) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is not None + assert result.primary.name == "bundle.tar.gz" + + def test_sm_not_in_supported_sms(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + art = make_artifact( + "bundle.tar.gz", supported_sms = ["75", "80", "89"], min_sm = 75, max_sm = 89 + ) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_sm_outside_min_range(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["50"]) + art = make_artifact( + "bundle.tar.gz", supported_sms = ["50", "75", "86"], min_sm = 75, max_sm = 90 + ) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_sm_outside_max_range(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["100"]) + art = make_artifact( + "bundle.tar.gz", supported_sms = ["100", "75", "86"], min_sm = 75, max_sm = 90 + ) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_very_old_sm(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["50"]) + art = make_artifact("bundle.tar.gz", min_sm = 75, max_sm = 90) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_very_new_sm(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["100"]) + art = make_artifact("bundle.tar.gz", min_sm = 75, max_sm = 90) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + # --- Unknown compute caps (empty list) --- + + def test_unknown_caps_only_portable(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = []) + targeted = make_artifact("targeted.tar.gz", coverage_class = "targeted") + portable = make_artifact("portable.tar.gz", coverage_class = "portable") + release = make_release([targeted, portable]) + result = linux_cuda_choice_from_release(host, release) + assert result is not None + assert result.primary.name == "portable.tar.gz" + + def test_unknown_caps_no_portable(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = []) + targeted = make_artifact("targeted.tar.gz", coverage_class = "targeted") + release = make_release([targeted]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + # --- Multi-GPU --- + + def test_multi_gpu_all_covered(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["75", "89"]) + art = make_artifact( + "bundle.tar.gz", + supported_sms = ["75", "80", "86", "89", "90"], + min_sm = 75, + max_sm = 90, + ) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is not None + + def test_multi_gpu_not_all_covered(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["50", "89"]) + art = make_artifact( + "bundle.tar.gz", supported_sms = ["75", "89"], min_sm = 75, max_sm = 89 + ) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + # --- Artifact selection priority --- + + def test_narrowest_sm_range_wins(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + wide = make_artifact( + "wide.tar.gz", + supported_sms = ["75", "86", "90"], + min_sm = 75, + max_sm = 90, + rank = 100, + ) + narrow = make_artifact( + "narrow.tar.gz", + supported_sms = ["80", "86", "89"], + min_sm = 80, + max_sm = 89, + rank = 100, + ) + release = make_release([wide, narrow]) + result = linux_cuda_choice_from_release(host, release) + assert result is not None + assert result.primary.name == "narrow.tar.gz" + + def test_range_tie_lower_rank_wins(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + high = make_artifact( + "high.tar.gz", + supported_sms = ["75", "86", "90"], + min_sm = 75, + max_sm = 90, + rank = 200, + ) + low = make_artifact( + "low.tar.gz", + supported_sms = ["75", "86", "90"], + min_sm = 75, + max_sm = 90, + rank = 50, + ) + release = make_release([high, low]) + result = linux_cuda_choice_from_release(host, release) + assert result is not None + assert result.primary.name == "low.tar.gz" + + def test_targeted_preferred_portable_fallback(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + targeted = make_artifact("targeted.tar.gz", coverage_class = "targeted", rank = 100) + portable = make_artifact("portable.tar.gz", coverage_class = "portable", rank = 100) + release = make_release([targeted, portable]) + result = linux_cuda_choice_from_release(host, release) + assert result is not None + assert result.primary.name == "targeted.tar.gz" + assert len(result.attempts) == 2 + assert result.attempts[1].name == "portable.tar.gz" + + # --- Edge cases --- + + def test_asset_missing_from_release_assets(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + art = make_artifact("bundle.tar.gz") + release = make_release([art], assets = {}) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_artifact_empty_supported_sms(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + art = make_artifact("bundle.tar.gz", supported_sms = []) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_artifact_missing_min_sm(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + art = make_artifact("bundle.tar.gz", min_sm = None, max_sm = 90) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_artifact_missing_max_sm(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + art = make_artifact("bundle.tar.gz", min_sm = 75, max_sm = None) + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_no_linux_cuda_artifacts(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + art = make_artifact("bundle.tar.gz", install_kind = "windows-cuda") + release = make_release([art]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + def test_empty_artifacts_list(self, monkeypatch): + mock_linux_runtime(monkeypatch, ["cuda12"]) + host = make_host(compute_caps = ["86"]) + release = make_release([]) + result = linux_cuda_choice_from_release(host, release) + assert result is None + + +# =========================================================================== +# K. windows_cuda_attempts +# =========================================================================== + + +class TestWindowsCudaAttempts: + TAG = "b8508" + + def _upstream(self, *runtime_versions): + assets = {} + for rv in runtime_versions: + name = f"llama-{self.TAG}-bin-win-cuda-{rv}-x64.zip" + assets[name] = f"https://example.com/{name}" + return assets + + def test_driver_12_4_no_dlls_fallback(self, monkeypatch): + mock_windows_runtime(monkeypatch, []) + host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (12, 4)) + assets = self._upstream("12.4") + result = windows_cuda_attempts(host, self.TAG, assets, None) + assert len(result) == 1 + assert result[0].runtime_line == "cuda12" + + def test_driver_13_1_both_dlls(self, monkeypatch): + mock_windows_runtime(monkeypatch, ["cuda13", "cuda12"]) + host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (13, 1)) + assets = self._upstream("13.1", "12.4") + result = windows_cuda_attempts(host, self.TAG, assets, None) + assert len(result) == 2 + assert result[0].runtime_line == "cuda13" + assert result[1].runtime_line == "cuda12" + + def test_preferred_reorders(self, monkeypatch): + mock_windows_runtime(monkeypatch, ["cuda13", "cuda12"]) + host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (13, 1)) + assets = self._upstream("13.1", "12.4") + result = windows_cuda_attempts(host, self.TAG, assets, "cuda12") + assert len(result) == 2 + assert result[0].runtime_line == "cuda12" + + def test_preferred_unavailable(self, monkeypatch): + mock_windows_runtime(monkeypatch, ["cuda12"]) + host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (12, 4)) + assets = self._upstream("12.4") + result = windows_cuda_attempts(host, self.TAG, assets, "cuda13") + assert len(result) == 1 + assert result[0].runtime_line == "cuda12" + + def test_detected_incompatible_with_driver(self, monkeypatch): + mock_windows_runtime(monkeypatch, ["cuda13"]) + host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (12, 4)) + assets = self._upstream("12.4") + result = windows_cuda_attempts(host, self.TAG, assets, None) + assert len(result) == 1 + assert result[0].runtime_line == "cuda12" + + def test_driver_too_old(self, monkeypatch): + mock_windows_runtime(monkeypatch, []) + host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (11, 8)) + assets = self._upstream("12.4") + result = windows_cuda_attempts(host, self.TAG, assets, None) + assert result == [] + + def test_asset_missing_from_upstream(self, monkeypatch): + mock_windows_runtime(monkeypatch, ["cuda12"]) + host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (12, 4)) + result = windows_cuda_attempts(host, self.TAG, {}, None) + assert result == [] + + def test_both_assets_present(self, monkeypatch): + mock_windows_runtime(monkeypatch, ["cuda13", "cuda12"]) + host = make_host(system = "Windows", machine = "AMD64", driver_cuda_version = (13, 1)) + assets = self._upstream("13.1", "12.4") + result = windows_cuda_attempts(host, self.TAG, assets, None) + assert len(result) == 2 + + +# =========================================================================== +# L. resolve_upstream_asset_choice -- platform routing +# =========================================================================== + + +class TestResolveUpstreamAssetChoice: + TAG = "b8508" + + def _mock_github_assets(self, monkeypatch, assets): + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "github_release_assets", + lambda repo, tag: assets, + ) + + def test_linux_x86_64_cpu(self, monkeypatch): + name = f"llama-{self.TAG}-bin-ubuntu-x64.tar.gz" + self._mock_github_assets(monkeypatch, {name: f"https://x/{name}"}) + host = make_host( + has_usable_nvidia = False, nvidia_smi = None, has_physical_nvidia = False + ) + result = resolve_upstream_asset_choice(host, self.TAG) + assert result.install_kind == "linux-cpu" + assert result.name == name + + def test_linux_cpu_missing(self, monkeypatch): + self._mock_github_assets(monkeypatch, {}) + host = make_host( + has_usable_nvidia = False, nvidia_smi = None, has_physical_nvidia = False + ) + with pytest.raises(PrebuiltFallback, match = "Linux CPU"): + resolve_upstream_asset_choice(host, self.TAG) + + def test_windows_x86_64_cpu(self, monkeypatch): + name = f"llama-{self.TAG}-bin-win-cpu-x64.zip" + self._mock_github_assets(monkeypatch, {name: f"https://x/{name}"}) + host = make_host( + system = "Windows", + machine = "AMD64", + has_usable_nvidia = False, + nvidia_smi = None, + has_physical_nvidia = False, + ) + result = resolve_upstream_asset_choice(host, self.TAG) + assert result.install_kind == "windows-cpu" + assert result.name == name + + def test_windows_cpu_missing(self, monkeypatch): + self._mock_github_assets(monkeypatch, {}) + host = make_host( + system = "Windows", + machine = "AMD64", + has_usable_nvidia = False, + nvidia_smi = None, + has_physical_nvidia = False, + ) + with pytest.raises(PrebuiltFallback, match = "Windows CPU"): + resolve_upstream_asset_choice(host, self.TAG) + + def test_macos_arm64(self, monkeypatch): + name = f"llama-{self.TAG}-bin-macos-arm64.tar.gz" + self._mock_github_assets(monkeypatch, {name: f"https://x/{name}"}) + host = make_host( + system = "Darwin", + machine = "arm64", + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + result = resolve_upstream_asset_choice(host, self.TAG) + assert result.install_kind == "macos-arm64" + assert result.name == name + + def test_macos_arm64_missing(self, monkeypatch): + self._mock_github_assets(monkeypatch, {}) + host = make_host( + system = "Darwin", + machine = "arm64", + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + with pytest.raises(PrebuiltFallback, match = "macOS arm64"): + resolve_upstream_asset_choice(host, self.TAG) + + def test_macos_x86_64(self, monkeypatch): + name = f"llama-{self.TAG}-bin-macos-x64.tar.gz" + self._mock_github_assets(monkeypatch, {name: f"https://x/{name}"}) + host = make_host( + system = "Darwin", + machine = "x86_64", + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + result = resolve_upstream_asset_choice(host, self.TAG) + assert result.install_kind == "macos-x64" + assert result.name == name + + def test_linux_aarch64(self, monkeypatch): + self._mock_github_assets(monkeypatch, {}) + host = make_host( + system = "Linux", + machine = "aarch64", + nvidia_smi = None, + driver_cuda_version = None, + compute_caps = [], + has_physical_nvidia = False, + has_usable_nvidia = False, + ) + with pytest.raises( + PrebuiltFallback, match = "no prebuilt policy exists for Linux aarch64" + ): + resolve_upstream_asset_choice(host, self.TAG) + + def test_windows_usable_nvidia_delegates(self, monkeypatch): + cuda_name = f"llama-{self.TAG}-bin-win-cuda-12.4-x64.zip" + self._mock_github_assets(monkeypatch, {cuda_name: f"https://x/{cuda_name}"}) + mock_windows_runtime(monkeypatch, ["cuda12"]) + monkeypatch.setattr( + INSTALL_LLAMA_PREBUILT, + "resolve_windows_cuda_choices", + lambda host, tag, assets: [ + AssetChoice( + repo = UPSTREAM_REPO, + tag = tag, + name = cuda_name, + url = f"https://x/{cuda_name}", + source_label = "upstream", + install_kind = "windows-cuda", + runtime_line = "cuda12", + ) + ], + ) + host = make_host( + system = "Windows", + machine = "AMD64", + driver_cuda_version = (12, 4), + has_usable_nvidia = True, + ) + result = resolve_upstream_asset_choice(host, self.TAG) + assert result.install_kind == "windows-cuda" + assert result.name == cuda_name