Fix custom llama.cpp source builds and macos metal source builds (#4762)

* Fix script unbound variable error * remove stale test script, add llama.cpp metal source builds, update tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Metal precedence, test sync, and add behavioral tests - Move macOS arm64 Metal check before CUDA/ROCm in GPU backend decision chain so Metal is not bypassed when nvcc is in PATH - Remove RPATH flags from CPU fallback CMAKE_ARGS (only needed for Metal library linking) - Update test_llama_pr_force_and_source.py to match _CLONE_ARGS rename from _CLONE_BRANCH_ARGS in setup.sh - Add confirm_install_tree guard test for existing_install_matches_choice - Add TestMacOSMetalBuildLogic bash subprocess tests verifying Metal flag selection, nvcc precedence, and CPU fallback behavior * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix Metal CPU fallback to also cover cmake build failures and update tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * 1. _GPU_BACKEND_FRAGMENT synced -- removed dead CPU_FALLBACK_CMAKE_ARGS= init (6/8) 2. RPATH assertion replaced -- new test_macos_arm64_cpu_fallback_args_exclude_rpath checks the actual runtime CPU_FALLBACK_CMAKE_ARGS output for @loader_path and -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON (6/8) 3. _TRY_METAL_CPU_FALLBACK=false reset after both configure-failure and build-failure fallback branches in setup.sh (4/8) 4. macOS test now removes libmtmd.0.dylib instead of the platform-agnostic convert_hf_to_gguf.py (3/8) 5. Empty-string tag test added -- test_empty_tag_omits_branch_flag for resolved_tag= (2/8) 6. RPATH checks on cmake call logs -- both fallback tests now assert @loader_path and -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON are absent from CPU fallback cmake calls, plus baseline flag preservation (multiple) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests clean up * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2026-04-21 13:37:39 +00:00 · 2026-04-01 14:06:39 -05:00 · 2026-04-01 14:06:39 -05:00 · 71b934ef9d
commit 71b934ef9d
parent 39fe23ded8
6 changed files with 616 additions and 195 deletions
--- a/studio/install_llama_prebuilt.py
+++ b/studio/install_llama_prebuilt.py
@ -3739,6 +3739,11 @@ def existing_install_matches_choice(
    if metadata is None:
        return False

+    try:
+        confirm_install_tree(install_dir, host)
+    except Exception:
+        return False
+
    if not runtime_payload_is_healthy(install_dir, host, choice):
        return False

--- a/studio/setup.sh
+++ b/studio/setup.sh
@ -716,20 +716,29 @@ else
                    git -C "$_BUILD_TMP" checkout "pr-$_LLAMA_PR" || BUILD_OK=false
            fi
        else
-            _CLONE_BRANCH_ARGS=()
+            _CLONE_ARGS=(git clone --depth 1)
            if [ "$_RESOLVED_LLAMA_TAG" != "latest" ] && [ -n "$_RESOLVED_LLAMA_TAG" ]; then
-                _CLONE_BRANCH_ARGS=(--branch "$_RESOLVED_LLAMA_TAG")
+                _CLONE_ARGS+=(--branch "$_RESOLVED_LLAMA_TAG")
            fi
+            _CLONE_ARGS+=("${_LLAMA_SOURCE}.git" "$_BUILD_TMP")
            run_quiet_no_exit "clone llama.cpp" \
-                git clone --depth 1 "${_CLONE_BRANCH_ARGS[@]}" "${_LLAMA_SOURCE}.git" "$_BUILD_TMP" || BUILD_OK=false
+                "${_CLONE_ARGS[@]}" || BUILD_OK=false
        fi

        if [ "$BUILD_OK" = true ]; then
            CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_NATIVE=ON"
+            _TRY_METAL_CPU_FALLBACK=false
+            _HOST_SYSTEM="$(uname -s 2>/dev/null || true)"
+            _HOST_MACHINE="$(uname -m 2>/dev/null || true)"
+            _IS_MACOS_ARM64=false
+            if [ "$_HOST_SYSTEM" = "Darwin" ] && { [ "$_HOST_MACHINE" = "arm64" ] || [ "$_HOST_MACHINE" = "aarch64" ]; }; then
+                _IS_MACOS_ARM64=true
+            fi

            if command -v ccache &>/dev/null; then
                CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
            fi
+            CPU_FALLBACK_CMAKE_ARGS="$CMAKE_ARGS"

            GPU_BACKEND=""
            NVCC_PATH=""
@ -765,7 +774,13 @@ else
            fi

            _BUILD_DESC="building"
-            if [ -n "$NVCC_PATH" ]; then
+            if [ "$_IS_MACOS_ARM64" = true ]; then
+                # Metal takes precedence on Apple Silicon (CUDA/ROCm not functional on macOS)
+                _BUILD_DESC="building (Metal)"
+                CMAKE_ARGS="$CMAKE_ARGS -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON -DCMAKE_INSTALL_RPATH=@loader_path -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
+                CPU_FALLBACK_CMAKE_ARGS="$CPU_FALLBACK_CMAKE_ARGS -DGGML_METAL=OFF"
+                _TRY_METAL_CPU_FALLBACK=true
+            elif [ -n "$NVCC_PATH" ]; then
                CMAKE_ARGS="$CMAKE_ARGS -DGGML_CUDA=ON"

                CUDA_ARCHS=""
@ -847,11 +862,37 @@ else
                CMAKE_GENERATOR_ARGS="-G Ninja"
            fi

-            run_quiet_no_exit "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS || BUILD_OK=false
+            if ! run_quiet_no_exit "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS; then
+                if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
+                    _TRY_METAL_CPU_FALLBACK=false
+                    substep "Metal configure failed; retrying CPU build..." "$C_WARN"
+                    rm -rf "$_BUILD_TMP/build"
+                    run_quiet_no_exit "cmake llama.cpp (cpu fallback)" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS || BUILD_OK=false
+                    if [ "$BUILD_OK" = true ]; then
+                        _BUILD_DESC="building (CPU fallback)"
+                    fi
+                else
+                    BUILD_OK=false
+                fi
+            fi
        fi

        if [ "$BUILD_OK" = true ]; then
-            run_quiet_no_exit "build llama-server" cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
+            if ! run_quiet_no_exit "build llama-server" cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU"; then
+                if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
+                    _TRY_METAL_CPU_FALLBACK=false
+                    substep "Metal build failed; retrying CPU build..." "$C_WARN"
+                    rm -rf "$_BUILD_TMP/build"
+                    if run_quiet_no_exit "cmake llama.cpp (cpu fallback)" cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS; then
+                        _BUILD_DESC="building (CPU fallback)"
+                        run_quiet_no_exit "build llama-server (cpu fallback)" cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
+                    else
+                        BUILD_OK=false
+                    fi
+                else
+                    BUILD_OK=false
+                fi
+            fi
        fi

        if [ "$BUILD_OK" = true ]; then
--- a/tests/studio/install/test_install_llama_prebuilt_logic.py
+++ b/tests/studio/install/test_install_llama_prebuilt_logic.py
@ -1864,3 +1864,184 @@ def add_symlink_to_tar(archive: tarfile.TarFile, name: str, target: str) -> None
    info.type = tarfile.SYMTYPE
    info.linkname = target
    archive.addfile(info)
+
+
+def test_existing_install_matches_choice_fails_when_install_tree_incomplete(
+    tmp_path: Path,
+):
+    """confirm_install_tree guard rejects installs missing critical files."""
+    install_dir = tmp_path / "llama.cpp"
+    install_dir.mkdir()
+    write_linux_install_shape(install_dir)
+
+    host = HostInfo(
+        system = "Linux",
+        machine = "x86_64",
+        is_windows = False,
+        is_linux = True,
+        is_macos = False,
+        is_x86_64 = True,
+        is_arm64 = False,
+        nvidia_smi = None,
+        driver_cuda_version = None,
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = False,
+    )
+    choice = AssetChoice(
+        repo = "unslothai/llama.cpp",
+        tag = "release-1",
+        name = "llama-b9001-bin-ubuntu-x64.tar.gz",
+        url = "https://example.com/llama-b9001-bin-ubuntu-x64.tar.gz",
+        source_label = "upstream",
+        install_kind = "linux-cpu",
+        expected_sha256 = "a" * 64,
+    )
+    checksums = ApprovedReleaseChecksums(
+        repo = "unslothai/llama.cpp",
+        release_tag = "release-1",
+        upstream_tag = "b9001",
+        source_commit = "deadbeef",
+        artifacts = {
+            source_archive_logical_name("b9001"): ApprovedArtifactHash(
+                asset_name = source_archive_logical_name("b9001"),
+                sha256 = "b" * 64,
+                repo = "ggml-org/llama.cpp",
+                kind = "upstream-source",
+            ),
+            choice.name: ApprovedArtifactHash(
+                asset_name = choice.name,
+                sha256 = choice.expected_sha256,
+                repo = "ggml-org/llama.cpp",
+                kind = "upstream-prebuilt",
+            ),
+        },
+    )
+    write_prebuilt_metadata(
+        install_dir,
+        requested_tag = "latest",
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        choice = choice,
+        approved_checksums = checksums,
+        prebuilt_fallback_used = False,
+    )
+
+    # Full install should match
+    assert (
+        existing_install_matches_choice(
+            install_dir,
+            host,
+            llama_tag = "b9001",
+            release_tag = "release-1",
+            choice = choice,
+            approved_checksums = checksums,
+        )
+        is True
+    )
+
+    # Remove convert_hf_to_gguf.py (checked by confirm_install_tree but not
+    # runtime_payload_is_healthy) and verify the guard catches it
+    (install_dir / "convert_hf_to_gguf.py").unlink()
+    assert (
+        existing_install_matches_choice(
+            install_dir,
+            host,
+            llama_tag = "b9001",
+            release_tag = "release-1",
+            choice = choice,
+            approved_checksums = checksums,
+        )
+        is False
+    )
+
+
+def test_existing_install_matches_choice_fails_when_install_tree_incomplete_macos(
+    tmp_path: Path,
+):
+    """confirm_install_tree guard rejects macOS arm64 installs missing critical files."""
+    install_dir = tmp_path / "llama.cpp"
+    install_dir.mkdir()
+    write_macos_install_shape(install_dir)
+
+    host = HostInfo(
+        system = "Darwin",
+        machine = "arm64",
+        is_windows = False,
+        is_linux = False,
+        is_macos = True,
+        is_x86_64 = False,
+        is_arm64 = True,
+        nvidia_smi = None,
+        driver_cuda_version = None,
+        compute_caps = [],
+        visible_cuda_devices = None,
+        has_physical_nvidia = False,
+        has_usable_nvidia = False,
+    )
+    choice = AssetChoice(
+        repo = "unslothai/llama.cpp",
+        tag = "release-1",
+        name = "llama-b9001-bin-macos-arm64.tar.gz",
+        url = "https://example.com/llama-b9001-bin-macos-arm64.tar.gz",
+        source_label = "upstream",
+        install_kind = "macos-arm64",
+        expected_sha256 = "a" * 64,
+    )
+    checksums = ApprovedReleaseChecksums(
+        repo = "unslothai/llama.cpp",
+        release_tag = "release-1",
+        upstream_tag = "b9001",
+        source_commit = "deadbeef",
+        artifacts = {
+            source_archive_logical_name("b9001"): ApprovedArtifactHash(
+                asset_name = source_archive_logical_name("b9001"),
+                sha256 = "b" * 64,
+                repo = "ggml-org/llama.cpp",
+                kind = "upstream-source",
+            ),
+            choice.name: ApprovedArtifactHash(
+                asset_name = choice.name,
+                sha256 = choice.expected_sha256,
+                repo = "ggml-org/llama.cpp",
+                kind = "upstream-prebuilt",
+            ),
+        },
+    )
+    write_prebuilt_metadata(
+        install_dir,
+        requested_tag = "latest",
+        llama_tag = "b9001",
+        release_tag = "release-1",
+        choice = choice,
+        approved_checksums = checksums,
+        prebuilt_fallback_used = False,
+    )
+
+    # Full install should match
+    assert (
+        existing_install_matches_choice(
+            install_dir,
+            host,
+            llama_tag = "b9001",
+            release_tag = "release-1",
+            choice = choice,
+            approved_checksums = checksums,
+        )
+        is True
+    )
+
+    # Remove a macOS-specific runtime artifact and verify the guard catches it
+    (install_dir / "build" / "bin" / "libmtmd.0.dylib").unlink()
+    assert (
+        existing_install_matches_choice(
+            install_dir,
+            host,
+            llama_tag = "b9001",
+            release_tag = "release-1",
+            choice = choice,
+            approved_checksums = checksums,
+        )
+        is False
+    )
--- a/tests/studio/install/test_llama_pr_force_and_source.py
+++ b/tests/studio/install/test_llama_pr_force_and_source.py
@ -302,12 +302,13 @@ class TestBashCloneUrlParameterized:
                run_quiet_no_exit "clone llama.cpp" \\
                    git clone --depth 1 "${{_LLAMA_SOURCE}}.git" "$_BUILD_TMP" || BUILD_OK=false
            else
-                _CLONE_BRANCH_ARGS=()
+                _CLONE_ARGS=(git clone --depth 1)
                if [ "$_RESOLVED_LLAMA_TAG" != "latest" ] && [ -n "$_RESOLVED_LLAMA_TAG" ]; then
-                    _CLONE_BRANCH_ARGS=(--branch "$_RESOLVED_LLAMA_TAG")
+                    _CLONE_ARGS+=(--branch "$_RESOLVED_LLAMA_TAG")
                fi
+                _CLONE_ARGS+=("${{_LLAMA_SOURCE}}.git" "$_BUILD_TMP")
                run_quiet_no_exit "clone llama.cpp" \\
-                    git clone --depth 1 "${{_CLONE_BRANCH_ARGS[@]}}" "${{_LLAMA_SOURCE}}.git" "$_BUILD_TMP" || BUILD_OK=false
+                    "${{_CLONE_ARGS[@]}}" || BUILD_OK=false
            fi
            echo "BUILD_OK=$BUILD_OK"
        """)
@ -350,6 +351,36 @@ class TestBashCloneUrlParameterized:
        log = log_file.read_text()
        assert "ggml-org/llama.cpp.git" in log

+    def test_latest_tag_omits_branch_flag(self, tmp_path: Path):
+        """resolved_tag='latest' should not pass --branch to git clone."""
+        mock_bin, log_file = make_mock_git(tmp_path)
+        build_tmp = str(tmp_path / "build_tmp")
+        script = self._clone_script(
+            mock_bin,
+            build_tmp,
+            resolved_tag = "latest",
+        )
+        r = run_bash(script)
+        assert r.returncode == 0
+        log = log_file.read_text()
+        assert "--branch" not in log
+        assert "ggml-org/llama.cpp.git" in log
+
+    def test_empty_tag_omits_branch_flag(self, tmp_path: Path):
+        """resolved_tag='' (empty) should not pass --branch to git clone."""
+        mock_bin, log_file = make_mock_git(tmp_path)
+        build_tmp = str(tmp_path / "build_tmp")
+        script = self._clone_script(
+            mock_bin,
+            build_tmp,
+            resolved_tag = "",
+        )
+        r = run_bash(script)
+        assert r.returncode == 0
+        log = log_file.read_text()
+        assert "--branch" not in log
+        assert "ggml-org/llama.cpp.git" in log
+

 # =========================================================================
 # TEST GROUP D: Static source patterns -- setup.sh
@ -395,8 +426,8 @@ class TestSourcePatternsSh:

    def test_clone_urls_parameterized_tag_path(self):
        """Non-PR clone path uses ${_LLAMA_SOURCE}.git, not hardcoded URL."""
-        # Find the non-PR clone line (after _CLONE_BRANCH_ARGS)
-        idx = self.content.index("_CLONE_BRANCH_ARGS=()")
+        # Find the non-PR clone line (after _CLONE_ARGS)
+        idx = self.content.index("_CLONE_ARGS=(git clone --depth 1)")
        block = self.content[idx : idx + 400]
        assert '"${_LLAMA_SOURCE}.git"' in block
        assert "ggml-org/llama.cpp.git" not in block
--- a/tests/studio/install/test_pr4562_bugfixes.py
+++ b/tests/studio/install/test_pr4562_bugfixes.py
@ -14,13 +14,11 @@ Run: pytest tests/studio/install/test_pr4562_bugfixes.py -v
 """

 import importlib.util
-import json
 import os
 import subprocess
 import sys
 import textwrap
 from pathlib import Path
-from unittest.mock import patch

 import pytest

@ -86,6 +84,9 @@ def run_bash(script: str, *, timeout: int = 10, env: dict | None = None) -> str:
        timeout = timeout,
        env = run_env,
    )
+    assert (
+        result.returncode == 0
+    ), f"bash script failed (exit {result.returncode}):\n{result.stderr}"
    return result.stdout.strip()


@ -581,15 +582,12 @@ class TestSourceCodePatterns:
        assert idx_rm > idx_git, "rm -rf should come after git check"

    def test_setup_sh_clone_uses_branch_tag(self):
-        """git clone in source-build should use --branch via _CLONE_BRANCH_ARGS."""
+        """git clone in source-build should use --branch via the clone args array."""
        content = SETUP_SH.read_text()
-        # The clone line should use _CLONE_BRANCH_ARGS (which conditionally includes --branch)
+        assert "_CLONE_ARGS=(git clone --depth 1)" in content
        assert (
-            "_CLONE_BRANCH_ARGS" in content
-        ), "Clone should use _CLONE_BRANCH_ARGS array"
-        assert (
-            '--branch "$_RESOLVED_LLAMA_TAG"' in content
-        ), "_CLONE_BRANCH_ARGS should be set to --branch $_RESOLVED_LLAMA_TAG"
+            '_CLONE_ARGS+=(--branch "$_RESOLVED_LLAMA_TAG")' in content
+        ), "_CLONE_ARGS should be extended with --branch $_RESOLVED_LLAMA_TAG"
        # Verify the guard: --branch is only used when tag is not "latest"
        assert (
            '_RESOLVED_LLAMA_TAG" != "latest"' in content
@ -598,10 +596,78 @@ class TestSourceCodePatterns:
    def test_setup_sh_latest_resolution_uses_helper_only(self):
        """Shell fallback should rely on helper output, not raw GitHub API tag_name."""
        content = SETUP_SH.read_text()
+        assert "--resolve-install-tag" in content
        assert "--resolve-llama-tag" in content
        assert "_HELPER_RELEASE_REPO}/releases/latest" not in content
        assert "ggml-org/llama.cpp/releases/latest" not in content

+    def test_setup_sh_macos_arm64_uses_metal_flags(self):
+        """Apple Silicon source builds should explicitly enable Metal like upstream."""
+        content = SETUP_SH.read_text()
+        assert "_IS_MACOS_ARM64=true" in content
+        assert 'if [ "$_IS_MACOS_ARM64" = true ]; then' in content
+        assert "-DGGML_METAL=ON" in content
+        assert "-DGGML_METAL_EMBED_LIBRARY=ON" in content
+        assert "-DGGML_METAL_USE_BF16=ON" in content
+        assert "-DCMAKE_INSTALL_RPATH=@loader_path" in content
+        assert "-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" in content
+
+    def test_setup_sh_macos_metal_configure_has_cpu_fallback(self):
+        """If Metal configure or build fails, setup should retry with CPU fallback."""
+        content = SETUP_SH.read_text()
+        assert "_TRY_METAL_CPU_FALLBACK=true" in content
+        assert (
+            'substep "Metal configure failed; retrying CPU build..." "$C_WARN"'
+            in content
+        )
+        assert (
+            'substep "Metal build failed; retrying CPU build..." "$C_WARN"' in content
+        )
+        assert 'run_quiet_no_exit "cmake llama.cpp (cpu fallback)"' in content
+        assert "-DGGML_METAL=OFF" in content
+        # _TRY_METAL_CPU_FALLBACK must be reset to false in both fallback branches
+        # (1 init + 2 resets = at least 3 occurrences of =false)
+        assert content.count("_TRY_METAL_CPU_FALLBACK=false") >= 3, (
+            "_TRY_METAL_CPU_FALLBACK=false should appear at least 3 times "
+            "(init + configure fallback + build fallback)"
+        )
+
+    def test_macos_arm64_cpu_fallback_args_exclude_rpath(self):
+        """CPU fallback args must NOT contain Metal-only RPATH flags at runtime."""
+        script = (
+            '_IS_MACOS_ARM64=true\nNVCC_PATH=""\nGPU_BACKEND=""\n'
+            + _GPU_BACKEND_FRAGMENT
+        )
+        output = run_bash(script)
+        fallback_line = next(
+            line
+            for line in output.splitlines()
+            if line.startswith("CPU_FALLBACK_CMAKE_ARGS=")
+        )
+        assert "-DGGML_METAL=OFF" in fallback_line
+        assert (
+            "@loader_path" not in fallback_line
+        ), "CPU fallback args should not contain RPATH flags"
+        assert (
+            "-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" not in fallback_line
+        ), "CPU fallback args should not contain RPATH build flag"
+
+    def test_setup_sh_does_not_enable_metal_for_intel_macos(self):
+        """Intel macOS should stay on the existing non-Metal path in this patch."""
+        content = SETUP_SH.read_text()
+        assert 'if [ "$_IS_MACOS_ARM64" = true ]; then' in content
+        assert (
+            'Darwin" ] && { [ "$_HOST_MACHINE" = "arm64" ] || [ "$_HOST_MACHINE" = "aarch64" ]; }'
+            in content
+        )
+        assert (
+            "x86_64"
+            not in content[
+                content.find("-DGGML_METAL=ON") - 200 : content.find("-DGGML_METAL=ON")
+                + 200
+            ]
+        )
+
    def test_setup_ps1_uses_checkout_b(self):
        """PS1 should use checkout -B, not checkout --force FETCH_HEAD."""
        content = SETUP_PS1.read_text()
@ -635,6 +701,7 @@ class TestSourceCodePatterns:
    def test_setup_ps1_latest_resolution_uses_helper_only(self):
        """PS1 fallback should rely on helper output, not raw GitHub API tag_name."""
        content = SETUP_PS1.read_text()
+        assert "--resolve-install-tag" in content
        assert "--resolve-llama-tag" in content
        assert "$HelperReleaseRepo/releases/latest" not in content
        assert "ggml-org/llama.cpp/releases/latest" not in content
@ -657,3 +724,274 @@ class TestSourceCodePatterns:
                found = True
                break
        assert found, "binary_path.parent not found in Linux branch of binary_env"
+
+
+# =========================================================================
+# TEST GROUP F: macOS Metal build logic (bash subprocess tests)
+# =========================================================================
+
+# Minimal bash fragment that mirrors setup.sh's GPU backend decision chain.
+# Variables _IS_MACOS_ARM64, NVCC_PATH, GPU_BACKEND are injected by tests.
+_GPU_BACKEND_FRAGMENT = textwrap.dedent("""\
+    CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF"
+    _TRY_METAL_CPU_FALLBACK=false
+    CPU_FALLBACK_CMAKE_ARGS="$CMAKE_ARGS"
+
+    _BUILD_DESC="building"
+    if [ "$_IS_MACOS_ARM64" = true ]; then
+        _BUILD_DESC="building (Metal)"
+        CMAKE_ARGS="$CMAKE_ARGS -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON -DCMAKE_INSTALL_RPATH=@loader_path -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
+        CPU_FALLBACK_CMAKE_ARGS="$CPU_FALLBACK_CMAKE_ARGS -DGGML_METAL=OFF"
+        _TRY_METAL_CPU_FALLBACK=true
+    elif [ -n "$NVCC_PATH" ]; then
+        CMAKE_ARGS="$CMAKE_ARGS -DGGML_CUDA=ON"
+        _BUILD_DESC="building (CUDA)"
+    elif [ "$GPU_BACKEND" = "rocm" ]; then
+        CMAKE_ARGS="$CMAKE_ARGS -DGGML_HIP=ON"
+        _BUILD_DESC="building (ROCm)"
+    else
+        _BUILD_DESC="building (CPU)"
+    fi
+
+    echo "CMAKE_ARGS=$CMAKE_ARGS"
+    echo "CPU_FALLBACK_CMAKE_ARGS=$CPU_FALLBACK_CMAKE_ARGS"
+    echo "BUILD_DESC=$_BUILD_DESC"
+    echo "TRY_METAL_CPU_FALLBACK=$_TRY_METAL_CPU_FALLBACK"
+""")
+
+
+class TestMacOSMetalBuildLogic:
+    """Behavioral bash subprocess tests for the Metal GPU backend logic."""
+
+    def test_macos_arm64_cmake_args_contain_metal_flags(self):
+        """macOS arm64 should enable Metal, not CUDA."""
+        script = (
+            '_IS_MACOS_ARM64=true\nNVCC_PATH=""\nGPU_BACKEND=""\n'
+            + _GPU_BACKEND_FRAGMENT
+        )
+        output = run_bash(script)
+        assert "-DGGML_METAL=ON" in output
+        assert "-DGGML_CUDA=ON" not in output
+        assert "BUILD_DESC=building (Metal)" in output
+
+    def test_intel_macos_no_metal_flags(self):
+        """Intel macOS (not arm64) should not get Metal flags."""
+        script = (
+            '_IS_MACOS_ARM64=false\nNVCC_PATH=""\nGPU_BACKEND=""\n'
+            + _GPU_BACKEND_FRAGMENT
+        )
+        output = run_bash(script)
+        assert "-DGGML_METAL=ON" not in output
+        assert "BUILD_DESC=building (CPU)" in output
+
+    def test_macos_arm64_metal_precedes_nvcc(self):
+        """Even with nvcc in PATH, macOS arm64 should use Metal, not CUDA."""
+        script = (
+            '_IS_MACOS_ARM64=true\nNVCC_PATH="/usr/local/cuda/bin/nvcc"\n'
+            'GPU_BACKEND="cuda"\n' + _GPU_BACKEND_FRAGMENT
+        )
+        output = run_bash(script)
+        assert "-DGGML_METAL=ON" in output
+        assert "-DGGML_CUDA=ON" not in output
+        assert "BUILD_DESC=building (Metal)" in output
+
+    def test_metal_cpu_fallback_triggers_on_cmake_failure(self, tmp_path: Path):
+        """When cmake fails on Metal, the fallback should retry with -DGGML_METAL=OFF."""
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir()
+        calls_file = tmp_path / "cmake_calls.log"
+        # cmake that logs args and fails on first call (Metal), succeeds on second (CPU fallback)
+        cmake_script = mock_bin / "cmake"
+        cmake_script.write_text(
+            textwrap.dedent(f"""\
+            #!/bin/bash
+            echo "$*" >> "{calls_file}"
+            COUNTER_FILE="{tmp_path}/cmake_counter"
+            if [ ! -f "$COUNTER_FILE" ]; then
+                echo 1 > "$COUNTER_FILE"
+                exit 1
+            fi
+            exit 0
+        """)
+        )
+        cmake_script.chmod(0o755)
+
+        script = textwrap.dedent(f"""\
+            export PATH="{mock_bin}:$PATH"
+            _IS_MACOS_ARM64=true
+            NVCC_PATH=""
+            GPU_BACKEND=""
+            CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF"
+            _TRY_METAL_CPU_FALLBACK=false
+            CPU_FALLBACK_CMAKE_ARGS="$CMAKE_ARGS"
+
+            _BUILD_DESC="building"
+            if [ "$_IS_MACOS_ARM64" = true ]; then
+                _BUILD_DESC="building (Metal)"
+                CMAKE_ARGS="$CMAKE_ARGS -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON -DCMAKE_INSTALL_RPATH=@loader_path -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
+                CPU_FALLBACK_CMAKE_ARGS="$CPU_FALLBACK_CMAKE_ARGS -DGGML_METAL=OFF"
+                _TRY_METAL_CPU_FALLBACK=true
+            fi
+
+            BUILD_OK=true
+            _BUILD_TMP="{tmp_path}/build_tmp"
+            mkdir -p "$_BUILD_TMP"
+            if ! cmake -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS; then
+                if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
+                    _TRY_METAL_CPU_FALLBACK=false
+                    echo "FALLBACK_TRIGGERED"
+                    rm -rf "$_BUILD_TMP/build"
+                    cmake -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS || BUILD_OK=false
+                    if [ "$BUILD_OK" = true ]; then
+                        _BUILD_DESC="building (CPU fallback)"
+                    fi
+                else
+                    BUILD_OK=false
+                fi
+            fi
+
+            echo "BUILD_OK=$BUILD_OK"
+            echo "BUILD_DESC=$_BUILD_DESC"
+            echo "TRY_METAL_CPU_FALLBACK=$_TRY_METAL_CPU_FALLBACK"
+        """)
+        output = run_bash(script)
+        assert "FALLBACK_TRIGGERED" in output
+        assert "BUILD_OK=true" in output
+        assert "BUILD_DESC=building (CPU fallback)" in output
+        assert (
+            "TRY_METAL_CPU_FALLBACK=false" in output
+        ), "Fallback flag should be reset to false after configure fallback"
+
+        # Verify cmake args: first call has Metal ON, second has Metal OFF
+        calls = calls_file.read_text().splitlines()
+        assert len(calls) >= 2, f"Expected >= 2 cmake calls, got {len(calls)}"
+        assert (
+            "-DGGML_METAL=ON" in calls[0]
+        ), f"First cmake call should have Metal ON: {calls[0]}"
+        assert (
+            "-DGGML_METAL=OFF" in calls[1]
+        ), f"Second cmake call should have Metal OFF: {calls[1]}"
+        assert (
+            "-DGGML_METAL=ON" not in calls[1]
+        ), f"Second cmake call should NOT have Metal ON: {calls[1]}"
+        assert (
+            "@loader_path" not in calls[1]
+        ), f"CPU fallback should not have RPATH: {calls[1]}"
+        assert (
+            "-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" not in calls[1]
+        ), f"CPU fallback should not have RPATH build flag: {calls[1]}"
+
+    def test_metal_build_failure_retries_cpu_fallback(self, tmp_path: Path):
+        """When cmake --build fails on Metal, the fallback should re-configure and rebuild with CPU."""
+        mock_bin = tmp_path / "mock_bin"
+        mock_bin.mkdir()
+        calls_file = tmp_path / "cmake_calls.log"
+        # cmake mock: configure always succeeds; first --build fails, rest succeed
+        cmake_script = mock_bin / "cmake"
+        cmake_script.write_text(
+            textwrap.dedent(f"""\
+            #!/bin/bash
+            echo "$*" >> "{calls_file}"
+            if [ "$1" = "--build" ]; then
+                BUILD_COUNTER_FILE="{tmp_path}/build_counter"
+                if [ ! -f "$BUILD_COUNTER_FILE" ]; then
+                    echo 1 > "$BUILD_COUNTER_FILE"
+                    exit 1
+                fi
+            fi
+            exit 0
+        """)
+        )
+        cmake_script.chmod(0o755)
+
+        script = textwrap.dedent(f"""\
+            export PATH="{mock_bin}:$PATH"
+            _IS_MACOS_ARM64=true
+            NVCC_PATH=""
+            GPU_BACKEND=""
+            CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF"
+            _TRY_METAL_CPU_FALLBACK=false
+            CPU_FALLBACK_CMAKE_ARGS="$CMAKE_ARGS"
+            CMAKE_GENERATOR_ARGS=""
+            NCPU=2
+
+            _BUILD_DESC="building"
+            if [ "$_IS_MACOS_ARM64" = true ]; then
+                _BUILD_DESC="building (Metal)"
+                CMAKE_ARGS="$CMAKE_ARGS -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON -DCMAKE_INSTALL_RPATH=@loader_path -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
+                CPU_FALLBACK_CMAKE_ARGS="$CPU_FALLBACK_CMAKE_ARGS -DGGML_METAL=OFF"
+                _TRY_METAL_CPU_FALLBACK=true
+            fi
+
+            BUILD_OK=true
+            _BUILD_TMP="{tmp_path}/build_tmp"
+            mkdir -p "$_BUILD_TMP"
+
+            # Configure (succeeds)
+            if ! cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CMAKE_ARGS; then
+                if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
+                    _TRY_METAL_CPU_FALLBACK=false
+                    echo "CONFIGURE_FALLBACK"
+                    rm -rf "$_BUILD_TMP/build"
+                    cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS || BUILD_OK=false
+                    if [ "$BUILD_OK" = true ]; then
+                        _BUILD_DESC="building (CPU fallback)"
+                    fi
+                else
+                    BUILD_OK=false
+                fi
+            fi
+
+            # Build (first --build fails, triggers fallback)
+            if [ "$BUILD_OK" = true ]; then
+                if ! cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU"; then
+                    if [ "$_TRY_METAL_CPU_FALLBACK" = true ]; then
+                        _TRY_METAL_CPU_FALLBACK=false
+                        echo "BUILD_FALLBACK_TRIGGERED"
+                        rm -rf "$_BUILD_TMP/build"
+                        if cmake $CMAKE_GENERATOR_ARGS -S "$_BUILD_TMP" -B "$_BUILD_TMP/build" $CPU_FALLBACK_CMAKE_ARGS; then
+                            _BUILD_DESC="building (CPU fallback)"
+                            cmake --build "$_BUILD_TMP/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
+                        else
+                            BUILD_OK=false
+                        fi
+                    else
+                        BUILD_OK=false
+                    fi
+                fi
+            fi
+
+            echo "BUILD_OK=$BUILD_OK"
+            echo "BUILD_DESC=$_BUILD_DESC"
+            echo "TRY_METAL_CPU_FALLBACK=$_TRY_METAL_CPU_FALLBACK"
+        """)
+        output = run_bash(script)
+        assert "CONFIGURE_FALLBACK" not in output, "Configure should have succeeded"
+        assert "BUILD_FALLBACK_TRIGGERED" in output
+        assert "BUILD_OK=true" in output
+        assert "BUILD_DESC=building (CPU fallback)" in output
+        assert (
+            "TRY_METAL_CPU_FALLBACK=false" in output
+        ), "Fallback flag should be reset to false after build fallback"
+
+        # Verify: configure with Metal ON, build fails, re-configure with Metal OFF, rebuild
+        calls = calls_file.read_text().splitlines()
+        assert len(calls) >= 4, f"Expected >= 4 cmake calls, got {len(calls)}: {calls}"
+        # First call: configure with Metal ON
+        assert "-DGGML_METAL=ON" in calls[0]
+        # Second call: build (fails)
+        assert "--build" in calls[1]
+        # Third call: re-configure with Metal OFF and no RPATH flags
+        assert "-DGGML_METAL=OFF" in calls[2]
+        assert "-DGGML_METAL=ON" not in calls[2]
+        assert (
+            "@loader_path" not in calls[2]
+        ), f"CPU fallback should not have RPATH: {calls[2]}"
+        assert (
+            "-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" not in calls[2]
+        ), f"CPU fallback should not have RPATH build flag: {calls[2]}"
+        assert (
+            "-DLLAMA_BUILD_TESTS=OFF" in calls[2]
+        ), f"CPU fallback should preserve baseline flags: {calls[2]}"
+        # Fourth call: rebuild (succeeds)
+        assert "--build" in calls[3]
--- a/tests/studio/install/test_validate_llama_prebuilt.py
+++ b/tests/studio/install/test_validate_llama_prebuilt.py
@ -1,175 +0,0 @@
-import importlib.util
-import sys
-from pathlib import Path
-
-
-import pytest
-
-
-REPO_ROOT = Path(__file__).resolve().parents[3]
-MODULE_PATH = REPO_ROOT / "validate-llama-prebuilt.py"
-
-if not MODULE_PATH.is_file():
-    pytest.skip(
-        f"validate-llama-prebuilt.py not present at {MODULE_PATH}",
-        allow_module_level = True,
-    )
-
-SPEC = importlib.util.spec_from_file_location("validate_llama_prebuilt", MODULE_PATH)
-assert SPEC is not None and SPEC.loader is not None
-VALIDATE = importlib.util.module_from_spec(SPEC)
-sys.modules[SPEC.name] = VALIDATE
-SPEC.loader.exec_module(VALIDATE)
-
-
-def test_build_local_approved_checksums_uses_staged_upstream_tag(
-    tmp_path: Path, monkeypatch
-):
-    stage_dir = tmp_path / "release-1"
-    stage_dir.mkdir()
-    asset_path = stage_dir / "app-test-linux-x64-cuda12-newer.tar.gz"
-    asset_path.write_bytes(b"bundle")
-    sibling_checksums = stage_dir / VALIDATE.installer.DEFAULT_PUBLISHED_SHA256_ASSET
-    sibling_checksums.write_text(
-        """
-{
-  "schema_version": 1,
-  "component": "llama.cpp",
-  "release_tag": "release-1",
-  "upstream_tag": "b9001",
-  "source_commit": "deadbeef",
-  "artifacts": {
-    "llama.cpp-source-b9001.tar.gz": {
-      "sha256": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
-      "repo": "ggml-org/llama.cpp",
-      "kind": "upstream-source"
-    }
-  }
-}
-        """.strip()
-        + "\n",
-        encoding = "utf-8",
-    )
-    asset = VALIDATE.LocalAsset(
-        path = asset_path,
-        tag = "test",
-        name = asset_path.name,
-        install_kind = "linux-cuda",
-        source_kind = "app-bundle",
-        native_runnable = True,
-        bundle_profile = "cuda12-newer",
-        runtime_line = "cuda12",
-    )
-
-    checksums = VALIDATE.build_local_approved_checksums(
-        asset,
-        allow_network_source_hash = False,
-    )
-
-    assert checksums.release_tag == "release-1"
-    assert checksums.upstream_tag == "b9001"
-    assert "llama.cpp-source-b9001.tar.gz" in checksums.artifacts
-    assert "llama.cpp-source-test.tar.gz" not in checksums.artifacts
-
-
-def test_validate_native_asset_passes_release_tag_and_upstream_tag(
-    tmp_path: Path, monkeypatch
-):
-    stage_dir = tmp_path / "release-7"
-    stage_dir.mkdir()
-    asset_path = stage_dir / "app-test-linux-x64-cuda12-newer.tar.gz"
-    asset_path.write_bytes(b"bundle")
-    sibling_checksums = stage_dir / VALIDATE.installer.DEFAULT_PUBLISHED_SHA256_ASSET
-    sibling_checksums.write_text(
-        """
-{
-  "schema_version": 1,
-  "component": "llama.cpp",
-  "release_tag": "release-7",
-  "upstream_tag": "b9007",
-  "source_commit": "deadbeef",
-  "artifacts": {
-    "llama.cpp-source-b9007.tar.gz": {
-      "sha256": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
-      "repo": "ggml-org/llama.cpp",
-      "kind": "upstream-source"
-    }
-  }
-}
-        """.strip()
-        + "\n",
-        encoding = "utf-8",
-    )
-    asset = VALIDATE.LocalAsset(
-        path = asset_path,
-        tag = "test",
-        name = asset_path.name,
-        install_kind = "linux-cuda",
-        source_kind = "app-bundle",
-        native_runnable = True,
-        bundle_profile = "cuda12-newer",
-        runtime_line = "cuda12",
-    )
-
-    host = VALIDATE.installer.HostInfo(
-        system = "Linux",
-        machine = "x86_64",
-        is_windows = False,
-        is_linux = True,
-        is_macos = False,
-        is_x86_64 = True,
-        is_arm64 = False,
-        nvidia_smi = None,
-        driver_cuda_version = None,
-        compute_caps = [],
-        visible_cuda_devices = None,
-        has_physical_nvidia = False,
-        has_usable_nvidia = False,
-    )
-    monkeypatch.setattr(VALIDATE.installer, "detect_host", lambda: host)
-    monkeypatch.setattr(
-        VALIDATE.installer,
-        "download_validation_model",
-        lambda probe_path, cache_path: probe_path.write_bytes(b"probe"),
-    )
-
-    captured = {}
-
-    def fake_validate_prebuilt_attempts(
-        attempts,
-        host,
-        install_dir,
-        work_dir,
-        probe_path,
-        *,
-        requested_tag,
-        llama_tag,
-        release_tag,
-        approved_checksums,
-        initial_fallback_used = False,
-        existing_install_dir = None,
-    ):
-        captured["requested_tag"] = requested_tag
-        captured["llama_tag"] = llama_tag
-        captured["release_tag"] = release_tag
-        staging_dir = VALIDATE.installer.create_install_staging_dir(install_dir)
-        return attempts[0], staging_dir, False
-
-    monkeypatch.setattr(
-        VALIDATE.installer,
-        "validate_prebuilt_attempts",
-        fake_validate_prebuilt_attempts,
-    )
-
-    record = VALIDATE.validate_native_asset(
-        asset,
-        keep_temp = False,
-        allow_network_source_hash = False,
-    )
-
-    assert record.status == "PASS"
-    assert captured == {
-        "requested_tag": "test",
-        "llama_tag": "b9007",
-        "release_tag": "release-7",
-    }