mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
Add ROCm (AMD GPU) support to studio setup (#4585)
* Add support for ROCm in studio setup
* Fix ROCm detection bugs: ROCM_PATH resolution, CUDA guard, compiler selection
- Set GPU_BACKEND="cuda" when nvcc is found (CUDA path was unreachable)
- Guard ROCm detection with `if [ -z "$GPU_BACKEND" ]` so CUDA takes
priority on mixed-toolchain hosts
- Rename ROCM_PATH to ROCM_HIPCC for the hipcc binary; resolve the
actual ROCm root via readlink -f and hipconfig -R into ROCM_ROOT
- Export both ROCM_PATH and HIP_PATH as the resolved root directory
- Use HIPCXX via hipconfig -l instead of legacy CMAKE_C_COMPILER=hipcc
- Switch grep -oP to grep -oE for portability across Linux distros
- Use GPU_TARGETS (upstream cmake variable) instead of AMDGPU_TARGETS
- Remove stale hardcoded fallback targets; let cmake auto-detect instead
* Fix gfx regex to match gfx90a (MI210/MI250/MI250X)
The grep and bash regex used {3,4} digits after 'gfx', which silently
excluded gfx90a (2 digits + letter 'a') -- the architecture for AMD
Instinct MI210, MI250, and MI250X data-center GPUs. Change to {2,4}
so all real gfx targets from gfx90a through gfx1200 are matched.
---------
Co-authored-by: edamamez <eda.zhou@amd.com>
This commit is contained in:
parent
7eb48512bc
commit
3446e0c489
1 changed files with 68 additions and 1 deletions
|
|
@ -490,17 +490,40 @@ rm -rf "$LLAMA_CPP_DIR"
|
|||
echo " Using ccache for faster compilation"
|
||||
fi
|
||||
|
||||
# Detect CUDA: check nvcc on PATH, then common install locations
|
||||
# Detect GPU backend: CUDA (NVIDIA) or ROCm (AMD)
|
||||
GPU_BACKEND=""
|
||||
|
||||
# Check for CUDA: check nvcc on PATH, then common install locations
|
||||
NVCC_PATH=""
|
||||
if command -v nvcc &>/dev/null; then
|
||||
NVCC_PATH="$(command -v nvcc)"
|
||||
GPU_BACKEND="cuda"
|
||||
elif [ -x /usr/local/cuda/bin/nvcc ]; then
|
||||
NVCC_PATH="/usr/local/cuda/bin/nvcc"
|
||||
export PATH="/usr/local/cuda/bin:$PATH"
|
||||
GPU_BACKEND="cuda"
|
||||
elif ls /usr/local/cuda-*/bin/nvcc &>/dev/null 2>&1; then
|
||||
# Pick the newest cuda-XX.X directory
|
||||
NVCC_PATH="$(ls -d /usr/local/cuda-*/bin/nvcc 2>/dev/null | sort -V | tail -1)"
|
||||
export PATH="$(dirname "$NVCC_PATH"):$PATH"
|
||||
GPU_BACKEND="cuda"
|
||||
fi
|
||||
|
||||
# Check for ROCm (AMD) only if CUDA was not already selected
|
||||
ROCM_HIPCC=""
|
||||
if [ -z "$GPU_BACKEND" ]; then
|
||||
if command -v hipcc &>/dev/null; then
|
||||
ROCM_HIPCC="$(command -v hipcc)"
|
||||
GPU_BACKEND="rocm"
|
||||
elif [ -x /opt/rocm/bin/hipcc ]; then
|
||||
ROCM_HIPCC="/opt/rocm/bin/hipcc"
|
||||
export PATH="/opt/rocm/bin:$PATH"
|
||||
GPU_BACKEND="rocm"
|
||||
elif ls /opt/rocm-*/bin/hipcc &>/dev/null 2>&1; then
|
||||
ROCM_HIPCC="$(ls -d /opt/rocm-*/bin/hipcc 2>/dev/null | sort -V | tail -1)"
|
||||
export PATH="$(dirname "$ROCM_HIPCC"):$PATH"
|
||||
GPU_BACKEND="rocm"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$NVCC_PATH" ]; then
|
||||
|
|
@ -535,9 +558,53 @@ rm -rf "$LLAMA_CPP_DIR"
|
|||
|
||||
# Multi-threaded nvcc compilation (uses all CPU cores per .cu file)
|
||||
CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CUDA_FLAGS=--threads=0"
|
||||
elif [ "$GPU_BACKEND" = "rocm" ]; then
|
||||
# Resolve hipcc symlinks to find the real ROCm root
|
||||
_HIPCC_REAL="$(readlink -f "$ROCM_HIPCC" 2>/dev/null || printf '%s' "$ROCM_HIPCC")"
|
||||
ROCM_ROOT=""
|
||||
if command -v hipconfig &>/dev/null; then
|
||||
ROCM_ROOT="$(hipconfig -R 2>/dev/null || true)"
|
||||
fi
|
||||
if [ -z "$ROCM_ROOT" ]; then
|
||||
ROCM_ROOT="$(cd "$(dirname "$_HIPCC_REAL")/.." 2>/dev/null && pwd)"
|
||||
fi
|
||||
|
||||
echo " Building with ROCm support (AMD GPU, hipcc: $_HIPCC_REAL)..."
|
||||
CMAKE_ARGS="$CMAKE_ARGS -DGGML_HIP=ON"
|
||||
export ROCM_PATH="$ROCM_ROOT"
|
||||
export HIP_PATH="$ROCM_ROOT"
|
||||
|
||||
# Use upstream-recommended HIP compiler (not legacy hipcc-as-CXX)
|
||||
if command -v hipconfig &>/dev/null; then
|
||||
_HIP_CLANG_DIR="$(hipconfig -l 2>/dev/null || true)"
|
||||
[ -n "$_HIP_CLANG_DIR" ] && export HIPCXX="$_HIP_CLANG_DIR/clang"
|
||||
fi
|
||||
|
||||
# Detect AMD GPU architecture (gfx target)
|
||||
GPU_TARGETS=""
|
||||
if command -v rocminfo &>/dev/null; then
|
||||
_gfx_list=$(rocminfo 2>/dev/null | grep -oE 'gfx[0-9]{2,4}[a-z]?' | sort -u || true)
|
||||
_valid_gfx=""
|
||||
for _gfx in $_gfx_list; do
|
||||
if [[ "$_gfx" =~ ^gfx[0-9]{2,4}[a-z]?$ ]]; then
|
||||
_valid_gfx="${_valid_gfx}${_valid_gfx:+;}$_gfx"
|
||||
fi
|
||||
done
|
||||
[ -n "$_valid_gfx" ] && GPU_TARGETS="$_valid_gfx"
|
||||
fi
|
||||
|
||||
if [ -n "$GPU_TARGETS" ]; then
|
||||
echo " AMD GPU architectures: ${GPU_TARGETS//;/, } -- limiting build to detected targets"
|
||||
CMAKE_ARGS="$CMAKE_ARGS -DGPU_TARGETS=${GPU_TARGETS}"
|
||||
else
|
||||
echo " Could not detect AMD GPU arch -- building for default targets (cmake will auto-detect)"
|
||||
fi
|
||||
elif [ -d /usr/local/cuda ] || nvidia-smi &>/dev/null; then
|
||||
echo " CUDA driver detected but nvcc not found — building CPU-only"
|
||||
echo " To enable GPU: install cuda-toolkit or add nvcc to PATH"
|
||||
elif [ -d /opt/rocm ] || command -v rocm-smi &>/dev/null; then
|
||||
echo " ROCm driver detected but hipcc not found — building CPU-only"
|
||||
echo " To enable GPU: install rocm-dev or add hipcc to PATH"
|
||||
else
|
||||
echo " Building CPU-only (no CUDA detected)..."
|
||||
fi
|
||||
|
|
|
|||
Loading…
Reference in a new issue