mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
fix(install.ps1): fix non-NVIDIA package resolution — split torch+unsloth install (#4515)
* fix(install.ps1): split torch+unsloth install to fix non-NVIDIA package resolution --torch-backend=auto on a non-NVIDIA Windows machine causes uv to resolve unsloth==2024.8 (pre-CLI, no unsloth.exe). Fix: detect GPU robustly (PATH + hardcoded fallback paths, mirrors setup.ps1), install torch first with an explicit --index-url (CUDA variant for NVIDIA, CPU for everyone else), then install unsloth separately without --torch-backend so the solver always picks a modern release that ships the Studio CLI. Closes the remaining gap flagged in #4478. * fix(install.ps1): align warning with setup.ps1, add --upgrade, handle CUDA 11.x - Match the no-GPU warning message to studio/setup.ps1 wording (chat-only GGUF mode, driver download link) - Add CUDA 11.x floor check in Get-TorchIndexUrl so old drivers fall back to CPU wheels instead of silently getting cu124 - Log a warning when nvidia-smi output cannot be parsed - Add --upgrade to both uv pip install calls so re-runs pick up newer package versions * revert --upgrade from uv pip install calls uv pip install already resolves to the latest satisfying version; --upgrade is unnecessary and could force unwanted re-installs. * fix: replace frozen cu124 fallbacks with cu126, guard CUDA 11.x cu124 wheels are frozen at torch 2.6.0 -- falling back to them pins users to an outdated PyTorch. Three issues fixed in both install.ps1 and setup.ps1: 1. CUDA 12.0-12.5 now maps to cu126 (was cu124). 2. CUDA 11.x and older now falls back to cpu (was cu124, which would silently install incompatible GPU wheels). 3. Parse-failure and no-nvidia-smi fallbacks updated to cu126/cpu. Adds tests/test_cuda_wheel_mapping.py covering the mapping logic, nvidia-smi parsing, PS1 file sync, PyTorch index URL validation, and sandbox torch installs. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove test file from PR branch Test file kept locally, not needed in the PR. * fix: map CUDA 11.x to cu118 instead of cpu PyTorch still publishes cu118 wheels (up to torch 2.7.1), so CUDA 11.x users get GPU-accelerated torch rather than being forced to CPU-only. Only CUDA 10.x and older fall back to cpu. * fix: revert CUDA 12.0-12.5 to cu124, handle cpu tag in setup.ps1 CUDA 12.0-12.5 drivers only support up to their reported CUDA version, so cu126 wheels (built with CUDA 12.6) fail to load. Revert the catch- all for 12.0-12.5 back to cu124. Also fix setup.ps1 caller: when Get-PytorchCudaTag returns "cpu" (e.g. CUDA 10.x driver), the installer now correctly skips Triton and prints "CPU-only" instead of "CUDA support (cpu)". * fix: add --upgrade to unsloth install for stale venv repair On reruns against an existing venv, uv pip install unsloth makes no changes if unsloth==2024.8 is already installed (it satisfies the constraint). Adding --upgrade only to the unsloth install ensures stale installs get repaired without forcing a multi-GB torch re-download. * fix: use --upgrade-package to avoid clobbering torch CUDA wheels `--upgrade unsloth` re-resolves torch from default PyPI, stripping the +cuXXX suffix installed in step 1. `--upgrade-package unsloth unsloth` upgrades only unsloth (and pulls missing deps like transformers, trl) while preserving the pinned torch from the CUDA-specific index. * docs: explain why split-install and --upgrade-package are needed Expand the inline comment block to document both design decisions: 1. Why torch is installed separately (solver fallback to 2024.8) 2. Why --upgrade-package is used instead of --upgrade (preserves CUDA wheels) --------- Co-authored-by: LeoBorcherding <LeoBorcherding@users.noreply.github.com> Co-authored-by: Daniel Han <danielhanchen@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
100b8857f2
commit
71c77d4e96
2 changed files with 97 additions and 13 deletions
81
install.ps1
81
install.ps1
|
|
@ -158,9 +158,86 @@ function Install-UnslothStudio {
|
|||
Write-Host "==> Virtual environment ${VenvName} already exists, skipping creation."
|
||||
}
|
||||
|
||||
# ── Install unsloth directly into the venv (no activation needed) ──
|
||||
# ── Detect GPU (robust: PATH + hardcoded fallback paths, mirrors setup.ps1) ──
|
||||
$HasNvidiaSmi = $false
|
||||
$NvidiaSmiExe = $null
|
||||
try {
|
||||
$nvSmiCmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
|
||||
if ($nvSmiCmd) {
|
||||
& $nvSmiCmd.Source 2>&1 | Out-Null
|
||||
if ($LASTEXITCODE -eq 0) { $HasNvidiaSmi = $true; $NvidiaSmiExe = $nvSmiCmd.Source }
|
||||
}
|
||||
} catch {}
|
||||
if (-not $HasNvidiaSmi) {
|
||||
foreach ($p in @(
|
||||
"$env:ProgramFiles\NVIDIA Corporation\NVSMI\nvidia-smi.exe",
|
||||
"$env:SystemRoot\System32\nvidia-smi.exe"
|
||||
)) {
|
||||
if (Test-Path $p) {
|
||||
try {
|
||||
& $p 2>&1 | Out-Null
|
||||
if ($LASTEXITCODE -eq 0) { $HasNvidiaSmi = $true; $NvidiaSmiExe = $p; break }
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($HasNvidiaSmi) {
|
||||
Write-Host "[OK] NVIDIA GPU detected" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "[WARN] No NVIDIA GPU detected. Studio will run in chat-only (GGUF) mode." -ForegroundColor Yellow
|
||||
Write-Host " Training and GPU inference require an NVIDIA GPU with drivers installed." -ForegroundColor Yellow
|
||||
Write-Host " https://www.nvidia.com/Download/index.aspx" -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
# ── Choose the correct PyTorch index URL based on driver CUDA version ──
|
||||
# Mirrors Get-PytorchCudaTag in setup.ps1.
|
||||
function Get-TorchIndexUrl {
|
||||
$baseUrl = "https://download.pytorch.org/whl"
|
||||
if (-not $NvidiaSmiExe) { return "$baseUrl/cpu" }
|
||||
try {
|
||||
$output = & $NvidiaSmiExe 2>&1 | Out-String
|
||||
if ($output -match 'CUDA Version:\s+(\d+)\.(\d+)') {
|
||||
$major = [int]$Matches[1]; $minor = [int]$Matches[2]
|
||||
if ($major -ge 13) { return "$baseUrl/cu130" }
|
||||
if ($major -eq 12 -and $minor -ge 8) { return "$baseUrl/cu128" }
|
||||
if ($major -eq 12 -and $minor -ge 6) { return "$baseUrl/cu126" }
|
||||
if ($major -ge 12) { return "$baseUrl/cu124" }
|
||||
if ($major -ge 11) { return "$baseUrl/cu118" }
|
||||
return "$baseUrl/cpu"
|
||||
}
|
||||
} catch {}
|
||||
Write-Host "[WARN] Could not determine CUDA version from nvidia-smi, defaulting to cu126" -ForegroundColor Yellow
|
||||
return "$baseUrl/cu126"
|
||||
}
|
||||
$TorchIndexUrl = Get-TorchIndexUrl
|
||||
|
||||
# ── Install PyTorch first, then unsloth separately ──
|
||||
#
|
||||
# Why two steps?
|
||||
# `uv pip install unsloth --torch-backend=cpu` on Windows resolves to
|
||||
# unsloth==2024.8 (a pre-CLI release with no unsloth.exe) because the
|
||||
# cpu-only solver cannot satisfy newer unsloth's dependencies.
|
||||
# Installing torch first from the explicit CUDA index, then upgrading
|
||||
# unsloth in a second step, avoids this solver dead-end.
|
||||
#
|
||||
# Why --upgrade-package instead of --upgrade?
|
||||
# `--upgrade unsloth` re-resolves ALL dependencies including torch,
|
||||
# pulling torch from default PyPI and stripping the +cuXXX suffix
|
||||
# that step 1 installed (e.g. torch 2.5.1+cu124 -> 2.10.0 with no
|
||||
# CUDA suffix). `--upgrade-package unsloth` upgrades ONLY unsloth
|
||||
# to the latest version while preserving the already-pinned torch
|
||||
# CUDA wheels. Missing dependencies (transformers, trl, peft, etc.)
|
||||
# are still pulled in because they are new, not upgrades.
|
||||
#
|
||||
Write-Host "==> Installing PyTorch ($TorchIndexUrl)..."
|
||||
uv pip install --python $VenvPython torch torchvision torchaudio --index-url $TorchIndexUrl
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "[ERROR] Failed to install PyTorch (exit code $LASTEXITCODE)" -ForegroundColor Red
|
||||
return
|
||||
}
|
||||
|
||||
Write-Host "==> Installing unsloth (this may take a few minutes)..."
|
||||
uv pip install --python $VenvPython unsloth --torch-backend=auto
|
||||
uv pip install --python $VenvPython --upgrade-package unsloth unsloth
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "[ERROR] Failed to install unsloth (exit code $LASTEXITCODE)" -ForegroundColor Red
|
||||
return
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ function Get-PytorchCudaTag {
|
|||
$cmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
|
||||
if ($cmd) { $cmd.Source } else { $null }
|
||||
}
|
||||
if (-not $smiExe) { return "cu124" }
|
||||
if (-not $smiExe) { return "cu126" }
|
||||
|
||||
try {
|
||||
# 2>&1 | Out-String merges stderr into stdout then converts to a single
|
||||
|
|
@ -190,11 +190,13 @@ function Get-PytorchCudaTag {
|
|||
if ($major -ge 13) { return "cu130" }
|
||||
if ($major -eq 12 -and $minor -ge 8) { return "cu128" }
|
||||
if ($major -eq 12 -and $minor -ge 6) { return "cu126" }
|
||||
return "cu124"
|
||||
if ($major -ge 12) { return "cu124" }
|
||||
if ($major -ge 11) { return "cu118" }
|
||||
return "cpu"
|
||||
}
|
||||
} catch { }
|
||||
|
||||
return "cu124"
|
||||
return "cu126"
|
||||
}
|
||||
|
||||
# Find Visual Studio Build Tools for cmake -G flag.
|
||||
|
|
@ -1111,6 +1113,19 @@ Write-Host "[OK] TORCHINDUCTOR_CACHE_DIR set to $TorchCacheDir (avoids MAX_PATH
|
|||
|
||||
if ($HasNvidiaSmi) {
|
||||
$CuTag = Get-PytorchCudaTag
|
||||
} else {
|
||||
$CuTag = "cpu"
|
||||
}
|
||||
|
||||
if ($CuTag -eq "cpu") {
|
||||
Write-Host " Installing PyTorch (CPU-only)..." -ForegroundColor Cyan
|
||||
$output = Fast-Install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/cpu" | Out-String
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "[FAILED] PyTorch install failed (exit code $LASTEXITCODE)" -ForegroundColor Red
|
||||
Write-Host $output -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
} else {
|
||||
Write-Host " Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan
|
||||
Write-Host " (This download is ~2.8 GB -- may take a few minutes)" -ForegroundColor Gray
|
||||
$output = Fast-Install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" | Out-String
|
||||
|
|
@ -1129,14 +1144,6 @@ if ($HasNvidiaSmi) {
|
|||
} else {
|
||||
Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green
|
||||
}
|
||||
} else {
|
||||
Write-Host " Installing PyTorch (CPU-only)..." -ForegroundColor Cyan
|
||||
$output = Fast-Install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/cpu" | Out-String
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "[FAILED] PyTorch install failed (exit code $LASTEXITCODE)" -ForegroundColor Red
|
||||
Write-Host $output -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
# Ordered heavy dependency installation -- shared cross-platform script
|
||||
|
|
|
|||
Loading…
Reference in a new issue