Auto-configure AMDGPU_ASIC_ID_TABLE_PATH on ROCm startup (#4060)

* Auto-configure AMDGPU_ASIC_ID_TABLE_PATH on ROCm startup

* Remove ROCm fd2 amdgpu.ids noise filter wrappers

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Use PyPI bitsandbytes for amd extra to avoid malformed wheel URL

* Add amd-preview extra for bitsandbytes continuous wheel channel

* Keep amd extra on bitsandbytes>=0.49.1 and remove amd-preview

---------

Co-authored-by: Daniel Hanchen <danielhanchen@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Daniel Han 2026-02-14 21:52:31 -08:00 committed by GitHub
parent 842099f2b0
commit defcbf8bea
3 changed files with 84 additions and 85 deletions

View file

@ -994,9 +994,8 @@ intel = [
]
amd = [
"unsloth[huggingfacenotorch]",
"bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl ; ('linux' in sys_platform) and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
"bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-win_amd64.whl ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
"bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_aarch64.whl ; ('linux' in sys_platform) and (platform_machine == 'aarch64')",
"bitsandbytes>=0.49.1 ; ('linux' in sys_platform) and (platform_machine == 'AMD64' or platform_machine == 'x86_64' or platform_machine == 'aarch64')",
"bitsandbytes>=0.49.1 ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
]
[project.urls]

View file

@ -29,18 +29,21 @@ from .import_fixes import (
fix_message_factory_issue,
check_fbgemm_gpu_version,
disable_broken_causal_conv1d,
_filter_rocm_amdgpu_ids_fd2_noise,
configure_amdgpu_asic_id_table_path,
torchvision_compatibility_check,
fix_diffusers_warnings,
fix_huggingface_hub,
)
# Configure libdrm ids table path early so ROCm can resolve AMD GPU names.
configure_amdgpu_asic_id_table_path()
disable_broken_causal_conv1d()
fix_message_factory_issue()
check_fbgemm_gpu_version()
torchvision_compatibility_check()
fix_diffusers_warnings()
fix_huggingface_hub()
del configure_amdgpu_asic_id_table_path
del disable_broken_causal_conv1d
del fix_message_factory_issue
del check_fbgemm_gpu_version
@ -96,9 +99,7 @@ try:
# os.system("pip install --upgrade --no-cache-dir --no-deps --user unsloth_zoo")
# except:
# raise ImportError("Unsloth: Please update unsloth_zoo via `pip install --upgrade --no-cache-dir --no-deps unsloth_zoo`")
# Filter native fd=2 amdgpu.ids noise during early unsloth_zoo import.
with _filter_rocm_amdgpu_ids_fd2_noise():
import unsloth_zoo
import unsloth_zoo
except PackageNotFoundError:
raise ImportError(
f"Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo` then retry!"
@ -109,9 +110,7 @@ del PackageNotFoundError, importlib_version
# Try importing PyTorch and check version
try:
# Filter native fd=2 amdgpu.ids noise during torch import on ROCm.
with _filter_rocm_amdgpu_ids_fd2_noise():
import torch
import torch
except ModuleNotFoundError:
raise ImportError(
"Unsloth: Pytorch is not installed. Go to https://pytorch.org/.\n"
@ -120,16 +119,14 @@ except ModuleNotFoundError:
except:
raise
# Filter native fd=2 amdgpu.ids noise during early device detection import.
with _filter_rocm_amdgpu_ids_fd2_noise():
from unsloth_zoo.device_type import (
is_hip,
get_device_type,
DEVICE_TYPE,
DEVICE_TYPE_TORCH,
DEVICE_COUNT,
ALLOW_PREQUANTIZED_MODELS,
)
from unsloth_zoo.device_type import (
is_hip,
get_device_type,
DEVICE_TYPE,
DEVICE_TYPE_TORCH,
DEVICE_COUNT,
ALLOW_PREQUANTIZED_MODELS,
)
# Fix other issues
from .import_fixes import (
@ -305,10 +302,8 @@ elif DEVICE_TYPE == "xpu":
# TODO: check triton for intel installed properly.
pass
# Filter native fd=2 amdgpu.ids noise during model import startup.
with _filter_rocm_amdgpu_ids_fd2_noise():
from .models import *
from .models import __version__
from .models import *
from .models import __version__
from .save import *
from .chat_templates import *
from .tokenizer_utils import *

View file

@ -16,7 +16,6 @@ import os
import importlib.abc
import importlib.machinery
import importlib.util
import contextlib
from pathlib import Path
from importlib.metadata import version as importlib_version
from packaging.version import Version as TrueVersion
@ -26,7 +25,6 @@ import textwrap
import warnings
import sys
import functools
import tempfile
# We cannot do from unsloth_zoo.log import logger since FBGEMM might cause seg faults.
UNSLOTH_ENABLE_LOGGING = os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") in (
@ -1191,6 +1189,13 @@ _ROCM_PATH_HINTS = (
Path("/dev/kfd"),
Path("/sys/module/amdgpu"),
)
_AMDGPU_ASIC_ID_TABLE_PATH_ENV = "AMDGPU_ASIC_ID_TABLE_PATH"
_AMDGPU_ASIC_ID_CANDIDATE_PATHS = (
Path("/usr/share/libdrm/amdgpu.ids"),
Path("/usr/local/share/libdrm/amdgpu.ids"),
Path("/opt/rocm/share/libdrm/amdgpu.ids"),
Path("/opt/amdgpu/share/libdrm/amdgpu.ids"),
)
def _log_rocm_detection(message):
@ -1236,68 +1241,70 @@ def _is_rocm_torch_build() -> bool:
return False
@contextlib.contextmanager
def _filter_stderr_fd(
suppressed_substrings = (_AMDGPU_IDS_MISSING_TEXT,),
):
"""
Capture low-level fd=2 writes, drop only known noisy substrings, and replay
everything else after the protected block.
"""
saved_stderr_fd = None
temp_file = None
redirected = False
def _iter_amdgpu_asic_id_table_candidates():
# Try torch-adjacent ids table paths first without importing torch.
try:
saved_stderr_fd = os.dup(2)
temp_file = tempfile.TemporaryFile(mode = "w+b")
os.dup2(temp_file.fileno(), 2)
redirected = True
torch_spec = importlib.util.find_spec("torch")
except Exception:
redirected = False
torch_spec = None
try:
yield
finally:
captured = b""
if redirected and temp_file is not None:
try:
temp_file.flush()
temp_file.seek(0)
captured = temp_file.read()
except Exception:
captured = b""
if redirected and saved_stderr_fd is not None:
try:
os.dup2(saved_stderr_fd, 2)
except Exception:
pass
if captured and saved_stderr_fd is not None:
try:
for raw_line in captured.splitlines(keepends = True):
line = raw_line.decode("utf-8", errors = "ignore")
if any(s in line for s in suppressed_substrings):
continue
os.write(saved_stderr_fd, raw_line)
except Exception:
pass
if temp_file is not None:
try:
temp_file.close()
except Exception:
pass
if saved_stderr_fd is not None:
try:
os.close(saved_stderr_fd)
except Exception:
pass
roots = []
if torch_spec is not None:
if torch_spec.origin:
roots.append(Path(torch_spec.origin).resolve().parent)
if torch_spec.submodule_search_locations:
for location in torch_spec.submodule_search_locations:
roots.append(Path(location).resolve())
seen = set()
for root in roots:
for candidate in (
root / "share" / "libdrm" / "amdgpu.ids",
root.parent / "share" / "libdrm" / "amdgpu.ids",
root.parent.parent / "share" / "libdrm" / "amdgpu.ids",
):
candidate_str = str(candidate)
if candidate_str in seen:
continue
seen.add(candidate_str)
yield candidate
for candidate in _AMDGPU_ASIC_ID_CANDIDATE_PATHS:
candidate_str = str(candidate)
if candidate_str in seen:
continue
seen.add(candidate_str)
yield candidate
def _filter_rocm_amdgpu_ids_fd2_noise():
# ROCm/libdrm can emit amdgpu.ids missing errors via low-level fd=2 writes.
# Python-level stderr filters cannot intercept those writes.
def configure_amdgpu_asic_id_table_path():
# Honor an existing valid user-provided path.
configured = os.environ.get(_AMDGPU_ASIC_ID_TABLE_PATH_ENV, "").strip()
if configured:
configured_path = Path(configured)
try:
if configured_path.is_file():
return str(configured_path)
except Exception:
pass
# Only attempt this on ROCm-like environments.
if not _is_rocm_torch_build():
return contextlib.nullcontext()
return _filter_stderr_fd()
return None
for candidate in _iter_amdgpu_asic_id_table_candidates():
try:
if candidate.is_file():
os.environ[_AMDGPU_ASIC_ID_TABLE_PATH_ENV] = str(candidate)
if UNSLOTH_ENABLE_LOGGING:
logger.info(
f"Unsloth: Set {_AMDGPU_ASIC_ID_TABLE_PATH_ENV}={candidate}"
)
return str(candidate)
except Exception:
continue
return None
def _is_causal_conv1d_name(module_name: str) -> bool:
@ -1432,9 +1439,7 @@ def disable_broken_causal_conv1d():
return
try:
# Suppress only native fd=2 amdgpu.ids noise during causal_conv1d probe.
with _filter_rocm_amdgpu_ids_fd2_noise():
import causal_conv1d # noqa: F401
import causal_conv1d # noqa: F401
return
except Exception as error: