Fix custom llama.cpp source builds and macos metal source builds (#4762)

* Fix script unbound variable error

* remove stale test script, add llama.cpp metal source builds, update tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix Metal precedence, test sync, and add behavioral tests

- Move macOS arm64 Metal check before CUDA/ROCm in GPU backend
  decision chain so Metal is not bypassed when nvcc is in PATH
- Remove RPATH flags from CPU fallback CMAKE_ARGS (only needed
  for Metal library linking)
- Update test_llama_pr_force_and_source.py to match _CLONE_ARGS
  rename from _CLONE_BRANCH_ARGS in setup.sh
- Add confirm_install_tree guard test for
  existing_install_matches_choice
- Add TestMacOSMetalBuildLogic bash subprocess tests verifying
  Metal flag selection, nvcc precedence, and CPU fallback behavior

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix Metal CPU fallback to also cover cmake build failures and update tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* 1. _GPU_BACKEND_FRAGMENT synced -- removed dead CPU_FALLBACK_CMAKE_ARGS= init (6/8)
2. RPATH assertion replaced -- new test_macos_arm64_cpu_fallback_args_exclude_rpath checks the actual runtime CPU_FALLBACK_CMAKE_ARGS output for @loader_path and -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON (6/8)
3. _TRY_METAL_CPU_FALLBACK=false reset after both configure-failure and build-failure fallback branches in setup.sh (4/8)
4. macOS test now removes libmtmd.0.dylib instead of the platform-agnostic convert_hf_to_gguf.py (3/8)
5. Empty-string tag test added -- test_empty_tag_omits_branch_flag for resolved_tag= (2/8)
6. RPATH checks on cmake call logs -- both fallback tests now assert @loader_path and -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON are absent from CPU fallback cmake calls, plus baseline flag preservation (multiple)

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* tests clean up

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
DoubleMathew 2026-04-01 14:06:39 -05:00 committed by GitHub
parent 39fe23ded8
commit 71b934ef9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 616 additions and 195 deletions

View file

@ -3739,6 +3739,11 @@ def existing_install_matches_choice(
if metadata is None:
return False
try:
confirm_install_tree(install_dir, host)
except Exception:
return False
if not runtime_payload_is_healthy(install_dir, host, choice):
return False

View file

@ -716,20 +716,29 @@ else
git -C "$_BUILD_TMP" checkout "pr-$_LLAMA_PR" || BUILD_OK=false
fi
else
_CLONE_BRANCH_ARGS=()
_CLONE_ARGS=(git clone --depth 1)
if [ "$_RESOLVED_LLAMA_TAG" != "latest" ] && [ -n "$_RESOLVED_LLAMA_TAG" ]; then
_CLONE_BRANCH_ARGS=(--branch "$_RESOLVED_LLAMA_TAG")
_CLONE_ARGS+=(--branch "$_RESOLVED_LLAMA_TAG")
fi
_CLONE_ARGS+=("${_LLAMA_SOURCE}.git" "$_BUILD_TMP")
run_quiet_no_exit "clone llama.cpp" \
git clone --depth 1 "${_CLONE_BRANCH_ARGS[@]}" "${_LLAMA_SOURCE}.git" "$_BUILD_TMP" || BUILD_OK=false
"${_CLONE_ARGS[@]}" || BUILD_OK=false
fi
if [ "$BUILD_OK" = true ]; then
CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_NATIVE=ON"
_TRY_METAL_CPU_FALLBACK=false
_HOST_SYSTEM="$(uname -s 2>/dev/null || true)"
_HOST_MACHINE="$(uname -m 2>/dev/null || true)"
_IS_MACOS_ARM64=false
if [ "$_HOST_SYSTEM" = "Darwin" ] && { [ "$_HOST_MACHINE" = "arm64" ] || [ "$_HOST_MACHINE" = "aarch64" ]; }; then
_IS_MACOS_ARM64=true
fi
if command -v ccache &>/dev/null; then
CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
fi
CPU_FALLBACK_CMAKE_ARGS="$CMAKE_ARGS"
GPU_BACKEND=""
NVCC_PATH=""
@ -765,7 +774,13 @@ else
fi
_BUILD_DESC="building"
if [ -n "$NVCC_PATH" ]; then
if [ "$_IS_MACOS_ARM64" = true ]; then
# Metal takes precedence on Apple Silicon (CUDA/ROCm not functional on macOS)
_BUILD_DESC="building (Metal)"
CMAKE_ARGS="$CMAKE_ARGS -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON -DCMAKE_INSTALL_RPATH=@loader_path -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
CPU_FALLBACK_CMAKE_ARGS="$CPU_FALLBACK_CMAKE_ARGS -DGGML_METAL=OFF"
_TRY_METAL_CPU_FALLBACK=true
elif [ -n "$NVCC_PATH" ]; then
CMAKE_ARGS="$CMAKE_ARGS -DGGML_CUDA=ON"
CUDA_ARCHS=""
@ -847,11 +862,37 @@ else
CMAKE_GENERATOR_ARGS="-G Ninja"
fi
run_quiet_no_exit "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS || BUILD_OK=false
if ! run_quiet_no_exit "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS; then
if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
_TRY_METAL_CPU_FALLBACK=false
substep "Metal configure failed; retrying CPU build..." "$C_WARN"
rm -rf "$_BUILD_TMP/build"
run_quiet_no_exit "cmake llama.cpp (cpu fallback)" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS || BUILD_OK=false
if [ "$BUILD_OK" = true ]; then
_BUILD_DESC="building (CPU fallback)"
fi
else
BUILD_OK=false
fi
fi
fi
if [ "$BUILD_OK" = true ]; then
run_quiet_no_exit "build llama-server" cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
if ! run_quiet_no_exit "build llama-server" cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU"; then
if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
_TRY_METAL_CPU_FALLBACK=false
substep "Metal build failed; retrying CPU build..." "$C_WARN"
rm -rf "$_BUILD_TMP/build"
if run_quiet_no_exit "cmake llama.cpp (cpu fallback)" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS; then
_BUILD_DESC="building (CPU fallback)"
run_quiet_no_exit "build llama-server (cpu fallback)" cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
else
BUILD_OK=false
fi
else
BUILD_OK=false
fi
fi
fi
if [ "$BUILD_OK" = true ]; then

View file

@ -1864,3 +1864,184 @@ def add_symlink_to_tar(archive: tarfile.TarFile, name: str, target: str) -> None
info.type = tarfile.SYMTYPE
info.linkname = target
archive.addfile(info)
def test_existing_install_matches_choice_fails_when_install_tree_incomplete(
tmp_path: Path,
):
"""confirm_install_tree guard rejects installs missing critical files."""
install_dir = tmp_path / "llama.cpp"
install_dir.mkdir()
write_linux_install_shape(install_dir)
host = HostInfo(
system = "Linux",
machine = "x86_64",
is_windows = False,
is_linux = True,
is_macos = False,
is_x86_64 = True,
is_arm64 = False,
nvidia_smi = None,
driver_cuda_version = None,
compute_caps = [],
visible_cuda_devices = None,
has_physical_nvidia = False,
has_usable_nvidia = False,
)
choice = AssetChoice(
repo = "unslothai/llama.cpp",
tag = "release-1",
name = "llama-b9001-bin-ubuntu-x64.tar.gz",
url = "https://example.com/llama-b9001-bin-ubuntu-x64.tar.gz",
source_label = "upstream",
install_kind = "linux-cpu",
expected_sha256 = "a" * 64,
)
checksums = ApprovedReleaseChecksums(
repo = "unslothai/llama.cpp",
release_tag = "release-1",
upstream_tag = "b9001",
source_commit = "deadbeef",
artifacts = {
source_archive_logical_name("b9001"): ApprovedArtifactHash(
asset_name = source_archive_logical_name("b9001"),
sha256 = "b" * 64,
repo = "ggml-org/llama.cpp",
kind = "upstream-source",
),
choice.name: ApprovedArtifactHash(
asset_name = choice.name,
sha256 = choice.expected_sha256,
repo = "ggml-org/llama.cpp",
kind = "upstream-prebuilt",
),
},
)
write_prebuilt_metadata(
install_dir,
requested_tag = "latest",
llama_tag = "b9001",
release_tag = "release-1",
choice = choice,
approved_checksums = checksums,
prebuilt_fallback_used = False,
)
# Full install should match
assert (
existing_install_matches_choice(
install_dir,
host,
llama_tag = "b9001",
release_tag = "release-1",
choice = choice,
approved_checksums = checksums,
)
is True
)
# Remove convert_hf_to_gguf.py (checked by confirm_install_tree but not
# runtime_payload_is_healthy) and verify the guard catches it
(install_dir / "convert_hf_to_gguf.py").unlink()
assert (
existing_install_matches_choice(
install_dir,
host,
llama_tag = "b9001",
release_tag = "release-1",
choice = choice,
approved_checksums = checksums,
)
is False
)
def test_existing_install_matches_choice_fails_when_install_tree_incomplete_macos(
tmp_path: Path,
):
"""confirm_install_tree guard rejects macOS arm64 installs missing critical files."""
install_dir = tmp_path / "llama.cpp"
install_dir.mkdir()
write_macos_install_shape(install_dir)
host = HostInfo(
system = "Darwin",
machine = "arm64",
is_windows = False,
is_linux = False,
is_macos = True,
is_x86_64 = False,
is_arm64 = True,
nvidia_smi = None,
driver_cuda_version = None,
compute_caps = [],
visible_cuda_devices = None,
has_physical_nvidia = False,
has_usable_nvidia = False,
)
choice = AssetChoice(
repo = "unslothai/llama.cpp",
tag = "release-1",
name = "llama-b9001-bin-macos-arm64.tar.gz",
url = "https://example.com/llama-b9001-bin-macos-arm64.tar.gz",
source_label = "upstream",
install_kind = "macos-arm64",
expected_sha256 = "a" * 64,
)
checksums = ApprovedReleaseChecksums(
repo = "unslothai/llama.cpp",
release_tag = "release-1",
upstream_tag = "b9001",
source_commit = "deadbeef",
artifacts = {
source_archive_logical_name("b9001"): ApprovedArtifactHash(
asset_name = source_archive_logical_name("b9001"),
sha256 = "b" * 64,
repo = "ggml-org/llama.cpp",
kind = "upstream-source",
),
choice.name: ApprovedArtifactHash(
asset_name = choice.name,
sha256 = choice.expected_sha256,
repo = "ggml-org/llama.cpp",
kind = "upstream-prebuilt",
),
},
)
write_prebuilt_metadata(
install_dir,
requested_tag = "latest",
llama_tag = "b9001",
release_tag = "release-1",
choice = choice,
approved_checksums = checksums,
prebuilt_fallback_used = False,
)
# Full install should match
assert (
existing_install_matches_choice(
install_dir,
host,
llama_tag = "b9001",
release_tag = "release-1",
choice = choice,
approved_checksums = checksums,
)
is True
)
# Remove a macOS-specific runtime artifact and verify the guard catches it
(install_dir / "build" / "bin" / "libmtmd.0.dylib").unlink()
assert (
existing_install_matches_choice(
install_dir,
host,
llama_tag = "b9001",
release_tag = "release-1",
choice = choice,
approved_checksums = checksums,
)
is False
)

View file

@ -302,12 +302,13 @@ class TestBashCloneUrlParameterized:
run_quiet_no_exit "clone llama.cpp" \\
git clone --depth 1 "${{_LLAMA_SOURCE}}.git" "$_BUILD_TMP" || BUILD_OK=false
else
_CLONE_BRANCH_ARGS=()
_CLONE_ARGS=(git clone --depth 1)
if [ "$_RESOLVED_LLAMA_TAG" != "latest" ] && [ -n "$_RESOLVED_LLAMA_TAG" ]; then
_CLONE_BRANCH_ARGS=(--branch "$_RESOLVED_LLAMA_TAG")
_CLONE_ARGS+=(--branch "$_RESOLVED_LLAMA_TAG")
fi
_CLONE_ARGS+=("${{_LLAMA_SOURCE}}.git" "$_BUILD_TMP")
run_quiet_no_exit "clone llama.cpp" \\
git clone --depth 1 "${{_CLONE_BRANCH_ARGS[@]}}" "${{_LLAMA_SOURCE}}.git" "$_BUILD_TMP" || BUILD_OK=false
"${{_CLONE_ARGS[@]}}" || BUILD_OK=false
fi
echo "BUILD_OK=$BUILD_OK"
""")
@ -350,6 +351,36 @@ class TestBashCloneUrlParameterized:
log = log_file.read_text()
assert "ggml-org/llama.cpp.git" in log
def test_latest_tag_omits_branch_flag(self, tmp_path: Path):
"""resolved_tag='latest' should not pass --branch to git clone."""
mock_bin, log_file = make_mock_git(tmp_path)
build_tmp = str(tmp_path / "build_tmp")
script = self._clone_script(
mock_bin,
build_tmp,
resolved_tag = "latest",
)
r = run_bash(script)
assert r.returncode == 0
log = log_file.read_text()
assert "--branch" not in log
assert "ggml-org/llama.cpp.git" in log
def test_empty_tag_omits_branch_flag(self, tmp_path: Path):
"""resolved_tag='' (empty) should not pass --branch to git clone."""
mock_bin, log_file = make_mock_git(tmp_path)
build_tmp = str(tmp_path / "build_tmp")
script = self._clone_script(
mock_bin,
build_tmp,
resolved_tag = "",
)
r = run_bash(script)
assert r.returncode == 0
log = log_file.read_text()
assert "--branch" not in log
assert "ggml-org/llama.cpp.git" in log
# =========================================================================
# TEST GROUP D: Static source patterns -- setup.sh
@ -395,8 +426,8 @@ class TestSourcePatternsSh:
def test_clone_urls_parameterized_tag_path(self):
"""Non-PR clone path uses ${_LLAMA_SOURCE}.git, not hardcoded URL."""
# Find the non-PR clone line (after _CLONE_BRANCH_ARGS)
idx = self.content.index("_CLONE_BRANCH_ARGS=()")
# Find the non-PR clone line (after _CLONE_ARGS)
idx = self.content.index("_CLONE_ARGS=(git clone --depth 1)")
block = self.content[idx : idx + 400]
assert '"${_LLAMA_SOURCE}.git"' in block
assert "ggml-org/llama.cpp.git" not in block

View file

@ -14,13 +14,11 @@ Run: pytest tests/studio/install/test_pr4562_bugfixes.py -v
"""
import importlib.util
import json
import os
import subprocess
import sys
import textwrap
from pathlib import Path
from unittest.mock import patch
import pytest
@ -86,6 +84,9 @@ def run_bash(script: str, *, timeout: int = 10, env: dict | None = None) -> str:
timeout = timeout,
env = run_env,
)
assert (
result.returncode == 0
), f"bash script failed (exit {result.returncode}):\n{result.stderr}"
return result.stdout.strip()
@ -581,15 +582,12 @@ class TestSourceCodePatterns:
assert idx_rm > idx_git, "rm -rf should come after git check"
def test_setup_sh_clone_uses_branch_tag(self):
"""git clone in source-build should use --branch via _CLONE_BRANCH_ARGS."""
"""git clone in source-build should use --branch via the clone args array."""
content = SETUP_SH.read_text()
# The clone line should use _CLONE_BRANCH_ARGS (which conditionally includes --branch)
assert "_CLONE_ARGS=(git clone --depth 1)" in content
assert (
"_CLONE_BRANCH_ARGS" in content
), "Clone should use _CLONE_BRANCH_ARGS array"
assert (
'--branch "$_RESOLVED_LLAMA_TAG"' in content
), "_CLONE_BRANCH_ARGS should be set to --branch $_RESOLVED_LLAMA_TAG"
'_CLONE_ARGS+=(--branch "$_RESOLVED_LLAMA_TAG")' in content
), "_CLONE_ARGS should be extended with --branch $_RESOLVED_LLAMA_TAG"
# Verify the guard: --branch is only used when tag is not "latest"
assert (
'_RESOLVED_LLAMA_TAG" != "latest"' in content
@ -598,10 +596,78 @@ class TestSourceCodePatterns:
def test_setup_sh_latest_resolution_uses_helper_only(self):
"""Shell fallback should rely on helper output, not raw GitHub API tag_name."""
content = SETUP_SH.read_text()
assert "--resolve-install-tag" in content
assert "--resolve-llama-tag" in content
assert "_HELPER_RELEASE_REPO}/releases/latest" not in content
assert "ggml-org/llama.cpp/releases/latest" not in content
def test_setup_sh_macos_arm64_uses_metal_flags(self):
"""Apple Silicon source builds should explicitly enable Metal like upstream."""
content = SETUP_SH.read_text()
assert "_IS_MACOS_ARM64=true" in content
assert 'if [ "$_IS_MACOS_ARM64" = true ]; then' in content
assert "-DGGML_METAL=ON" in content
assert "-DGGML_METAL_EMBED_LIBRARY=ON" in content
assert "-DGGML_METAL_USE_BF16=ON" in content
assert "-DCMAKE_INSTALL_RPATH=@loader_path" in content
assert "-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" in content
def test_setup_sh_macos_metal_configure_has_cpu_fallback(self):
"""If Metal configure or build fails, setup should retry with CPU fallback."""
content = SETUP_SH.read_text()
assert "_TRY_METAL_CPU_FALLBACK=true" in content
assert (
'substep "Metal configure failed; retrying CPU build..." "$C_WARN"'
in content
)
assert (
'substep "Metal build failed; retrying CPU build..." "$C_WARN"' in content
)
assert 'run_quiet_no_exit "cmake llama.cpp (cpu fallback)"' in content
assert "-DGGML_METAL=OFF" in content
# _TRY_METAL_CPU_FALLBACK must be reset to false in both fallback branches
# (1 init + 2 resets = at least 3 occurrences of =false)
assert content.count("_TRY_METAL_CPU_FALLBACK=false") >= 3, (
"_TRY_METAL_CPU_FALLBACK=false should appear at least 3 times "
"(init + configure fallback + build fallback)"
)
def test_macos_arm64_cpu_fallback_args_exclude_rpath(self):
"""CPU fallback args must NOT contain Metal-only RPATH flags at runtime."""
script = (
'_IS_MACOS_ARM64=true\nNVCC_PATH=""\nGPU_BACKEND=""\n'
+ _GPU_BACKEND_FRAGMENT
)
output = run_bash(script)
fallback_line = next(
line
for line in output.splitlines()
if line.startswith("CPU_FALLBACK_CMAKE_ARGS=")
)
assert "-DGGML_METAL=OFF" in fallback_line
assert (
"@loader_path" not in fallback_line
), "CPU fallback args should not contain RPATH flags"
assert (
"-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" not in fallback_line
), "CPU fallback args should not contain RPATH build flag"
def test_setup_sh_does_not_enable_metal_for_intel_macos(self):
"""Intel macOS should stay on the existing non-Metal path in this patch."""
content = SETUP_SH.read_text()
assert 'if [ "$_IS_MACOS_ARM64" = true ]; then' in content
assert (
'Darwin" ] && { [ "$_HOST_MACHINE" = "arm64" ] || [ "$_HOST_MACHINE" = "aarch64" ]; }'
in content
)
assert (
"x86_64"
not in content[
content.find("-DGGML_METAL=ON") - 200 : content.find("-DGGML_METAL=ON")
+ 200
]
)
def test_setup_ps1_uses_checkout_b(self):
"""PS1 should use checkout -B, not checkout --force FETCH_HEAD."""
content = SETUP_PS1.read_text()
@ -635,6 +701,7 @@ class TestSourceCodePatterns:
def test_setup_ps1_latest_resolution_uses_helper_only(self):
"""PS1 fallback should rely on helper output, not raw GitHub API tag_name."""
content = SETUP_PS1.read_text()
assert "--resolve-install-tag" in content
assert "--resolve-llama-tag" in content
assert "$HelperReleaseRepo/releases/latest" not in content
assert "ggml-org/llama.cpp/releases/latest" not in content
@ -657,3 +724,274 @@ class TestSourceCodePatterns:
found = True
break
assert found, "binary_path.parent not found in Linux branch of binary_env"
# =========================================================================
# TEST GROUP F: macOS Metal build logic (bash subprocess tests)
# =========================================================================
# Minimal bash fragment that mirrors setup.sh's GPU backend decision chain.
# Variables _IS_MACOS_ARM64, NVCC_PATH, GPU_BACKEND are injected by tests.
_GPU_BACKEND_FRAGMENT = textwrap.dedent("""\
CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF"
_TRY_METAL_CPU_FALLBACK=false
CPU_FALLBACK_CMAKE_ARGS="$CMAKE_ARGS"
_BUILD_DESC="building"
if [ "$_IS_MACOS_ARM64" = true ]; then
_BUILD_DESC="building (Metal)"
CMAKE_ARGS="$CMAKE_ARGS -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON -DCMAKE_INSTALL_RPATH=@loader_path -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
CPU_FALLBACK_CMAKE_ARGS="$CPU_FALLBACK_CMAKE_ARGS -DGGML_METAL=OFF"
_TRY_METAL_CPU_FALLBACK=true
elif [ -n "$NVCC_PATH" ]; then
CMAKE_ARGS="$CMAKE_ARGS -DGGML_CUDA=ON"
_BUILD_DESC="building (CUDA)"
elif [ "$GPU_BACKEND" = "rocm" ]; then
CMAKE_ARGS="$CMAKE_ARGS -DGGML_HIP=ON"
_BUILD_DESC="building (ROCm)"
else
_BUILD_DESC="building (CPU)"
fi
echo "CMAKE_ARGS=$CMAKE_ARGS"
echo "CPU_FALLBACK_CMAKE_ARGS=$CPU_FALLBACK_CMAKE_ARGS"
echo "BUILD_DESC=$_BUILD_DESC"
echo "TRY_METAL_CPU_FALLBACK=$_TRY_METAL_CPU_FALLBACK"
""")
class TestMacOSMetalBuildLogic:
"""Behavioral bash subprocess tests for the Metal GPU backend logic."""
def test_macos_arm64_cmake_args_contain_metal_flags(self):
"""macOS arm64 should enable Metal, not CUDA."""
script = (
'_IS_MACOS_ARM64=true\nNVCC_PATH=""\nGPU_BACKEND=""\n'
+ _GPU_BACKEND_FRAGMENT
)
output = run_bash(script)
assert "-DGGML_METAL=ON" in output
assert "-DGGML_CUDA=ON" not in output
assert "BUILD_DESC=building (Metal)" in output
def test_intel_macos_no_metal_flags(self):
"""Intel macOS (not arm64) should not get Metal flags."""
script = (
'_IS_MACOS_ARM64=false\nNVCC_PATH=""\nGPU_BACKEND=""\n'
+ _GPU_BACKEND_FRAGMENT
)
output = run_bash(script)
assert "-DGGML_METAL=ON" not in output
assert "BUILD_DESC=building (CPU)" in output
def test_macos_arm64_metal_precedes_nvcc(self):
"""Even with nvcc in PATH, macOS arm64 should use Metal, not CUDA."""
script = (
'_IS_MACOS_ARM64=true\nNVCC_PATH="/usr/local/cuda/bin/nvcc"\n'
'GPU_BACKEND="cuda"\n' + _GPU_BACKEND_FRAGMENT
)
output = run_bash(script)
assert "-DGGML_METAL=ON" in output
assert "-DGGML_CUDA=ON" not in output
assert "BUILD_DESC=building (Metal)" in output
def test_metal_cpu_fallback_triggers_on_cmake_failure(self, tmp_path: Path):
"""When cmake fails on Metal, the fallback should retry with -DGGML_METAL=OFF."""
mock_bin = tmp_path / "mock_bin"
mock_bin.mkdir()
calls_file = tmp_path / "cmake_calls.log"
# cmake that logs args and fails on first call (Metal), succeeds on second (CPU fallback)
cmake_script = mock_bin / "cmake"
cmake_script.write_text(
textwrap.dedent(f"""\
#!/bin/bash
echo "$*" >> "{calls_file}"
COUNTER_FILE="{tmp_path}/cmake_counter"
if [ ! -f "$COUNTER_FILE" ]; then
echo 1 > "$COUNTER_FILE"
exit 1
fi
exit 0
""")
)
cmake_script.chmod(0o755)
script = textwrap.dedent(f"""\
export PATH="{mock_bin}:$PATH"
_IS_MACOS_ARM64=true
NVCC_PATH=""
GPU_BACKEND=""
CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF"
_TRY_METAL_CPU_FALLBACK=false
CPU_FALLBACK_CMAKE_ARGS="$CMAKE_ARGS"
_BUILD_DESC="building"
if [ "$_IS_MACOS_ARM64" = true ]; then
_BUILD_DESC="building (Metal)"
CMAKE_ARGS="$CMAKE_ARGS -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON -DCMAKE_INSTALL_RPATH=@loader_path -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
CPU_FALLBACK_CMAKE_ARGS="$CPU_FALLBACK_CMAKE_ARGS -DGGML_METAL=OFF"
_TRY_METAL_CPU_FALLBACK=true
fi
BUILD_OK=true
_BUILD_TMP="{tmp_path}/build_tmp"
mkdir -p "$_BUILD_TMP"
if ! cmake -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS; then
if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
_TRY_METAL_CPU_FALLBACK=false
echo "FALLBACK_TRIGGERED"
rm -rf "$_BUILD_TMP/build"
cmake -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS || BUILD_OK=false
if [ "$BUILD_OK" = true ]; then
_BUILD_DESC="building (CPU fallback)"
fi
else
BUILD_OK=false
fi
fi
echo "BUILD_OK=$BUILD_OK"
echo "BUILD_DESC=$_BUILD_DESC"
echo "TRY_METAL_CPU_FALLBACK=$_TRY_METAL_CPU_FALLBACK"
""")
output = run_bash(script)
assert "FALLBACK_TRIGGERED" in output
assert "BUILD_OK=true" in output
assert "BUILD_DESC=building (CPU fallback)" in output
assert (
"TRY_METAL_CPU_FALLBACK=false" in output
), "Fallback flag should be reset to false after configure fallback"
# Verify cmake args: first call has Metal ON, second has Metal OFF
calls = calls_file.read_text().splitlines()
assert len(calls) >= 2, f"Expected >= 2 cmake calls, got {len(calls)}"
assert (
"-DGGML_METAL=ON" in calls[0]
), f"First cmake call should have Metal ON: {calls[0]}"
assert (
"-DGGML_METAL=OFF" in calls[1]
), f"Second cmake call should have Metal OFF: {calls[1]}"
assert (
"-DGGML_METAL=ON" not in calls[1]
), f"Second cmake call should NOT have Metal ON: {calls[1]}"
assert (
"@loader_path" not in calls[1]
), f"CPU fallback should not have RPATH: {calls[1]}"
assert (
"-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" not in calls[1]
), f"CPU fallback should not have RPATH build flag: {calls[1]}"
def test_metal_build_failure_retries_cpu_fallback(self, tmp_path: Path):
"""When cmake --build fails on Metal, the fallback should re-configure and rebuild with CPU."""
mock_bin = tmp_path / "mock_bin"
mock_bin.mkdir()
calls_file = tmp_path / "cmake_calls.log"
# cmake mock: configure always succeeds; first --build fails, rest succeed
cmake_script = mock_bin / "cmake"
cmake_script.write_text(
textwrap.dedent(f"""\
#!/bin/bash
echo "$*" >> "{calls_file}"
if [ "$1" = "--build" ]; then
BUILD_COUNTER_FILE="{tmp_path}/build_counter"
if [ ! -f "$BUILD_COUNTER_FILE" ]; then
echo 1 > "$BUILD_COUNTER_FILE"
exit 1
fi
fi
exit 0
""")
)
cmake_script.chmod(0o755)
script = textwrap.dedent(f"""\
export PATH="{mock_bin}:$PATH"
_IS_MACOS_ARM64=true
NVCC_PATH=""
GPU_BACKEND=""
CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF"
_TRY_METAL_CPU_FALLBACK=false
CPU_FALLBACK_CMAKE_ARGS="$CMAKE_ARGS"
CMAKE_GENERATOR_ARGS=""
NCPU=2
_BUILD_DESC="building"
if [ "$_IS_MACOS_ARM64" = true ]; then
_BUILD_DESC="building (Metal)"
CMAKE_ARGS="$CMAKE_ARGS -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON -DCMAKE_INSTALL_RPATH=@loader_path -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
CPU_FALLBACK_CMAKE_ARGS="$CPU_FALLBACK_CMAKE_ARGS -DGGML_METAL=OFF"
_TRY_METAL_CPU_FALLBACK=true
fi
BUILD_OK=true
_BUILD_TMP="{tmp_path}/build_tmp"
mkdir -p "$_BUILD_TMP"
# Configure (succeeds)
if ! cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS; then
if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
_TRY_METAL_CPU_FALLBACK=false
echo "CONFIGURE_FALLBACK"
rm -rf "$_BUILD_TMP/build"
cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS || BUILD_OK=false
if [ "$BUILD_OK" = true ]; then
_BUILD_DESC="building (CPU fallback)"
fi
else
BUILD_OK=false
fi
fi
# Build (first --build fails, triggers fallback)
if [ "$BUILD_OK" = true ]; then
if ! cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU"; then
if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
_TRY_METAL_CPU_FALLBACK=false
echo "BUILD_FALLBACK_TRIGGERED"
rm -rf "$_BUILD_TMP/build"
if cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS; then
_BUILD_DESC="building (CPU fallback)"
cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
else
BUILD_OK=false
fi
else
BUILD_OK=false
fi
fi
fi
echo "BUILD_OK=$BUILD_OK"
echo "BUILD_DESC=$_BUILD_DESC"
echo "TRY_METAL_CPU_FALLBACK=$_TRY_METAL_CPU_FALLBACK"
""")
output = run_bash(script)
assert "CONFIGURE_FALLBACK" not in output, "Configure should have succeeded"
assert "BUILD_FALLBACK_TRIGGERED" in output
assert "BUILD_OK=true" in output
assert "BUILD_DESC=building (CPU fallback)" in output
assert (
"TRY_METAL_CPU_FALLBACK=false" in output
), "Fallback flag should be reset to false after build fallback"
# Verify: configure with Metal ON, build fails, re-configure with Metal OFF, rebuild
calls = calls_file.read_text().splitlines()
assert len(calls) >= 4, f"Expected >= 4 cmake calls, got {len(calls)}: {calls}"
# First call: configure with Metal ON
assert "-DGGML_METAL=ON" in calls[0]
# Second call: build (fails)
assert "--build" in calls[1]
# Third call: re-configure with Metal OFF and no RPATH flags
assert "-DGGML_METAL=OFF" in calls[2]
assert "-DGGML_METAL=ON" not in calls[2]
assert (
"@loader_path" not in calls[2]
), f"CPU fallback should not have RPATH: {calls[2]}"
assert (
"-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" not in calls[2]
), f"CPU fallback should not have RPATH build flag: {calls[2]}"
assert (
"-DLLAMA_BUILD_TESTS=OFF" in calls[2]
), f"CPU fallback should preserve baseline flags: {calls[2]}"
# Fourth call: rebuild (succeeds)
assert "--build" in calls[3]

View file

@ -1,175 +0,0 @@
import importlib.util
import sys
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[3]
MODULE_PATH = REPO_ROOT / "validate-llama-prebuilt.py"
if not MODULE_PATH.is_file():
pytest.skip(
f"validate-llama-prebuilt.py not present at {MODULE_PATH}",
allow_module_level = True,
)
SPEC = importlib.util.spec_from_file_location("validate_llama_prebuilt", MODULE_PATH)
assert SPEC is not None and SPEC.loader is not None
VALIDATE = importlib.util.module_from_spec(SPEC)
sys.modules[SPEC.name] = VALIDATE
SPEC.loader.exec_module(VALIDATE)
def test_build_local_approved_checksums_uses_staged_upstream_tag(
tmp_path: Path, monkeypatch
):
stage_dir = tmp_path / "release-1"
stage_dir.mkdir()
asset_path = stage_dir / "app-test-linux-x64-cuda12-newer.tar.gz"
asset_path.write_bytes(b"bundle")
sibling_checksums = stage_dir / VALIDATE.installer.DEFAULT_PUBLISHED_SHA256_ASSET
sibling_checksums.write_text(
"""
{
"schema_version": 1,
"component": "llama.cpp",
"release_tag": "release-1",
"upstream_tag": "b9001",
"source_commit": "deadbeef",
"artifacts": {
"llama.cpp-source-b9001.tar.gz": {
"sha256": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
"repo": "ggml-org/llama.cpp",
"kind": "upstream-source"
}
}
}
""".strip()
+ "\n",
encoding = "utf-8",
)
asset = VALIDATE.LocalAsset(
path = asset_path,
tag = "test",
name = asset_path.name,
install_kind = "linux-cuda",
source_kind = "app-bundle",
native_runnable = True,
bundle_profile = "cuda12-newer",
runtime_line = "cuda12",
)
checksums = VALIDATE.build_local_approved_checksums(
asset,
allow_network_source_hash = False,
)
assert checksums.release_tag == "release-1"
assert checksums.upstream_tag == "b9001"
assert "llama.cpp-source-b9001.tar.gz" in checksums.artifacts
assert "llama.cpp-source-test.tar.gz" not in checksums.artifacts
def test_validate_native_asset_passes_release_tag_and_upstream_tag(
tmp_path: Path, monkeypatch
):
stage_dir = tmp_path / "release-7"
stage_dir.mkdir()
asset_path = stage_dir / "app-test-linux-x64-cuda12-newer.tar.gz"
asset_path.write_bytes(b"bundle")
sibling_checksums = stage_dir / VALIDATE.installer.DEFAULT_PUBLISHED_SHA256_ASSET
sibling_checksums.write_text(
"""
{
"schema_version": 1,
"component": "llama.cpp",
"release_tag": "release-7",
"upstream_tag": "b9007",
"source_commit": "deadbeef",
"artifacts": {
"llama.cpp-source-b9007.tar.gz": {
"sha256": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
"repo": "ggml-org/llama.cpp",
"kind": "upstream-source"
}
}
}
""".strip()
+ "\n",
encoding = "utf-8",
)
asset = VALIDATE.LocalAsset(
path = asset_path,
tag = "test",
name = asset_path.name,
install_kind = "linux-cuda",
source_kind = "app-bundle",
native_runnable = True,
bundle_profile = "cuda12-newer",
runtime_line = "cuda12",
)
host = VALIDATE.installer.HostInfo(
system = "Linux",
machine = "x86_64",
is_windows = False,
is_linux = True,
is_macos = False,
is_x86_64 = True,
is_arm64 = False,
nvidia_smi = None,
driver_cuda_version = None,
compute_caps = [],
visible_cuda_devices = None,
has_physical_nvidia = False,
has_usable_nvidia = False,
)
monkeypatch.setattr(VALIDATE.installer, "detect_host", lambda: host)
monkeypatch.setattr(
VALIDATE.installer,
"download_validation_model",
lambda probe_path, cache_path: probe_path.write_bytes(b"probe"),
)
captured = {}
def fake_validate_prebuilt_attempts(
attempts,
host,
install_dir,
work_dir,
probe_path,
*,
requested_tag,
llama_tag,
release_tag,
approved_checksums,
initial_fallback_used = False,
existing_install_dir = None,
):
captured["requested_tag"] = requested_tag
captured["llama_tag"] = llama_tag
captured["release_tag"] = release_tag
staging_dir = VALIDATE.installer.create_install_staging_dir(install_dir)
return attempts[0], staging_dir, False
monkeypatch.setattr(
VALIDATE.installer,
"validate_prebuilt_attempts",
fake_validate_prebuilt_attempts,
)
record = VALIDATE.validate_native_asset(
asset,
keep_temp = False,
allow_network_source_hash = False,
)
assert record.status == "PASS"
assert captured == {
"requested_tag": "test",
"llama_tag": "b9007",
"release_tag": "release-7",
}