From 1ccfd2e0a5e56e78ed36ee93cbaaa12462e5b5b3 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 15 Apr 2026 05:24:41 -0700 Subject: [PATCH] fix(rocm): tighten gfx regex to ignore generic ISA lines (#5033) * fix(rocm): tighten gfx regex to ignore generic ISA lines ROCm 6.1+ rocminfo emits generic ISA names such as "amdgcn-amd-amdhsa--gfx11-generic" and "amdgcn-amd-amdhsa--gfx9-4-generic" alongside the real GPU name. The previous `gfx[1-9]` regex used in `_has_rocm_gpu` matched both, so a host with only a generic ISA entry would be reported as having a usable AMD GPU. Tighten the pattern to `gfx[1-9][0-9a-z]{2,3}` so only real gfx ids match. This covers every documented target from GFX6 (gfx600) through GFX12 (gfx1201), including letter-suffixed ids like gfx90a (MI250 / MI250X) and gfx90c. Documented generic ISA names always have 1 or 2 digits before the dash and no longer match. Applied to both `studio/install_python_stack.py` and `studio/install_llama_prebuilt.py` so the two detection paths agree. Co-authored-by: Martin Hoyer * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Martin Hoyer Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- studio/install_llama_prebuilt.py | 10 ++++++++-- studio/install_python_stack.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/studio/install_llama_prebuilt.py b/studio/install_llama_prebuilt.py index e43a78bae..fd2b78d05 100755 --- a/studio/install_llama_prebuilt.py +++ b/studio/install_llama_prebuilt.py @@ -2576,8 +2576,14 @@ def detect_host() -> HostInfo: has_rocm = False if is_linux: for _cmd, _check in ( - # rocminfo: look for "gfxNNNN" with nonzero first digit (gfx000 is CPU agent) - (["rocminfo"], lambda out: bool(re.search(r"gfx[1-9]", out.lower()))), + # rocminfo: look for a real gfx GPU id (3-4 chars, nonzero first digit). + # gfx000 is the CPU agent; ROCm 6.1+ also emits generic ISA lines like + # "gfx11-generic" or "gfx9-4-generic" which only have 1-2 digits before + # the dash and must not be treated as a real GPU. + ( + ["rocminfo"], + lambda out: bool(re.search(r"gfx[1-9][0-9a-z]{2,3}", out.lower())), + ), (["amd-smi", "list"], _amd_smi_has_gpu), ): _exe = shutil.which(_cmd[0]) diff --git a/studio/install_python_stack.py b/studio/install_python_stack.py index cf024d407..2b9fd084d 100644 --- a/studio/install_python_stack.py +++ b/studio/install_python_stack.py @@ -183,8 +183,14 @@ def _has_rocm_gpu() -> bool: import re for cmd, check_fn in ( - # rocminfo: look for "Name: gfxNNNN" with nonzero first digit (gfx000 is the CPU agent) - (["rocminfo"], lambda out: bool(re.search(r"gfx[1-9]", out.lower()))), + # rocminfo: look for a real gfx GPU id (3-4 chars, nonzero first digit). + # gfx000 is the CPU agent; ROCm 6.1+ also emits generic ISA lines like + # "gfx11-generic" or "gfx9-4-generic" which only have 1-2 digits before + # the dash and must not be treated as a real GPU. + ( + ["rocminfo"], + lambda out: bool(re.search(r"gfx[1-9][0-9a-z]{2,3}", out.lower())), + ), # amd-smi list: require "GPU: " data rows, not just a header ( ["amd-smi", "list"],