mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 21:47:51 +00:00
* fix: prevent UnicodeEncodeError on Windows CP1252 consoles in studio setup On Windows, `unsloth studio setup` crashes with a UnicodeEncodeError when install_python_stack.py tries to print Unicode status glyphs (✅, ❌, ⚠️) to a console that uses a legacy code page like CP1252. Add a _safe_print() helper that catches UnicodeEncodeError and gracefully degrades emoji to ASCII equivalents ([OK], [FAIL], [!]). Replace all print() calls that emit Unicode glyphs with _safe_print(). Fixes #4509 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Replace Unicode dashes with ASCII in install_python_stack.py Box-drawing (U+2500) and em dash (U+2014) chars in section dividers and comments are themselves not representable on CP1252 -- replace with plain ASCII dashes for consistency with the fix. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Daniel Han <danielhanchen@gmail.com>
464 lines
15 KiB
Python
464 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
|
|
|
|
"""Cross-platform Python dependency installer for Unsloth Studio.
|
|
|
|
Called by both setup.sh (Linux / WSL) and setup.ps1 (Windows) after the
|
|
virtual environment is already activated. Expects `pip` and `python` on
|
|
PATH to point at the venv.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import urllib.request
|
|
from pathlib import Path
|
|
|
|
IS_WINDOWS = sys.platform == "win32"
|
|
|
|
# -- Verbosity control ----------------------------------------------------------
|
|
# By default the installer shows a minimal progress bar (one line, in-place).
|
|
# Set UNSLOTH_VERBOSE=1 in the environment to restore full per-step output:
|
|
# Linux/Mac: UNSLOTH_VERBOSE=1 ./studio/setup.sh
|
|
# Windows: $env:UNSLOTH_VERBOSE="1" ; .\studio\setup.ps1
|
|
VERBOSE: bool = os.environ.get("UNSLOTH_VERBOSE", "0") == "1"
|
|
|
|
# Progress bar state -- updated by _progress() as each install step runs.
|
|
# _TOTAL counts: pip-upgrade + 7 shared steps + triton (non-Windows) + local-plugin + finalize
|
|
# Update _TOTAL here if you add or remove install steps in install_python_stack().
|
|
_STEP: int = 0
|
|
_TOTAL: int = 0 # set at runtime in install_python_stack() based on platform
|
|
|
|
# -- Paths --------------------------------------------------------------
|
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
REQ_ROOT = SCRIPT_DIR / "backend" / "requirements"
|
|
SINGLE_ENV = REQ_ROOT / "single-env"
|
|
CONSTRAINTS = SINGLE_ENV / "constraints.txt"
|
|
LOCAL_DD_UNSTRUCTURED_PLUGIN = (
|
|
SCRIPT_DIR / "backend" / "plugins" / "data-designer-unstructured-seed"
|
|
)
|
|
|
|
# -- Unicode-safe printing ---------------------------------------------
|
|
# On Windows the default console encoding can be a legacy code page
|
|
# (e.g. CP1252) that cannot represent Unicode glyphs such as ✅ or ❌.
|
|
# _safe_print() gracefully degrades to ASCII equivalents so the
|
|
# installer never crashes just because of a status glyph.
|
|
|
|
_UNICODE_TO_ASCII: dict[str, str] = {
|
|
"\u2705": "[OK]", # ✅
|
|
"\u274c": "[FAIL]", # ❌
|
|
"\u26a0\ufe0f": "[!]", # ⚠️ (warning + variation selector)
|
|
"\u26a0": "[!]", # ⚠ (warning without variation selector)
|
|
}
|
|
|
|
|
|
def _safe_print(*args: object, **kwargs: object) -> None:
|
|
"""Drop-in print() replacement that survives non-UTF-8 consoles."""
|
|
try:
|
|
print(*args, **kwargs)
|
|
except UnicodeEncodeError:
|
|
# Stringify, then swap emoji for ASCII equivalents
|
|
text = " ".join(str(a) for a in args)
|
|
for uni, ascii_alt in _UNICODE_TO_ASCII.items():
|
|
text = text.replace(uni, ascii_alt)
|
|
# Final fallback: replace any remaining unencodable chars
|
|
print(
|
|
text.encode(sys.stdout.encoding or "ascii", errors = "replace").decode(
|
|
sys.stdout.encoding or "ascii", errors = "replace"
|
|
),
|
|
**kwargs,
|
|
)
|
|
|
|
|
|
# -- Color support ------------------------------------------------------
|
|
|
|
|
|
def _enable_colors() -> bool:
|
|
"""Try to enable ANSI color support. Returns True if available."""
|
|
if not hasattr(sys.stdout, "fileno"):
|
|
return False
|
|
try:
|
|
if not os.isatty(sys.stdout.fileno()):
|
|
return False
|
|
except Exception:
|
|
return False
|
|
if IS_WINDOWS:
|
|
try:
|
|
import ctypes
|
|
|
|
kernel32 = ctypes.windll.kernel32
|
|
# Enable ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x0004) on stdout
|
|
handle = kernel32.GetStdHandle(-11) # STD_OUTPUT_HANDLE
|
|
mode = ctypes.c_ulong()
|
|
kernel32.GetConsoleMode(handle, ctypes.byref(mode))
|
|
kernel32.SetConsoleMode(handle, mode.value | 0x0004)
|
|
return True
|
|
except Exception:
|
|
return False
|
|
return True # Unix terminals support ANSI by default
|
|
|
|
|
|
# Colors disabled -- Colab and most CI runners render ANSI fine, but plain output
|
|
# is cleaner in the notebook cell. Re-enable by setting _HAS_COLOR = _enable_colors()
|
|
_HAS_COLOR = False
|
|
|
|
|
|
def _green(msg: str) -> str:
|
|
return f"\033[92m{msg}\033[0m" if _HAS_COLOR else msg
|
|
|
|
|
|
def _cyan(msg: str) -> str:
|
|
return f"\033[96m{msg}\033[0m" if _HAS_COLOR else msg
|
|
|
|
|
|
def _red(msg: str) -> str:
|
|
return f"\033[91m{msg}\033[0m" if _HAS_COLOR else msg
|
|
|
|
|
|
def _progress(label: str) -> None:
|
|
"""Print an in-place progress bar for the current install step.
|
|
|
|
Uses only stdlib (sys.stdout) -- no extra packages required.
|
|
In VERBOSE mode this is a no-op; per-step labels are printed by run() instead.
|
|
"""
|
|
global _STEP
|
|
_STEP += 1
|
|
if VERBOSE:
|
|
return # verbose mode: run() already printed the label
|
|
width = 20
|
|
filled = int(width * _STEP / _TOTAL)
|
|
bar = "=" * filled + "-" * (width - filled)
|
|
end = "\n" if _STEP >= _TOTAL else "" # newline only on the final step
|
|
sys.stdout.write(f"\r[{bar}] {_STEP:2}/{_TOTAL} {label:<40}{end}")
|
|
sys.stdout.flush()
|
|
|
|
|
|
def run(
|
|
label: str, cmd: list[str], *, quiet: bool = True
|
|
) -> subprocess.CompletedProcess[bytes]:
|
|
"""Run a command; on failure print output and exit."""
|
|
if VERBOSE:
|
|
print(f" {label}...")
|
|
result = subprocess.run(
|
|
cmd,
|
|
stdout = subprocess.PIPE if quiet else None,
|
|
stderr = subprocess.STDOUT if quiet else None,
|
|
)
|
|
if result.returncode != 0:
|
|
_safe_print(_red(f"❌ {label} failed (exit code {result.returncode}):"))
|
|
if result.stdout:
|
|
print(result.stdout.decode(errors = "replace"))
|
|
sys.exit(result.returncode)
|
|
return result
|
|
|
|
|
|
# Packages to skip on Windows (require special build steps)
|
|
WINDOWS_SKIP_PACKAGES = {"open_spiel", "triton_kernels"}
|
|
|
|
# -- uv bootstrap ------------------------------------------------------
|
|
|
|
USE_UV = False # Set by _bootstrap_uv() at the start of install_python_stack()
|
|
UV_NEEDS_SYSTEM = False # Set by _bootstrap_uv() via probe
|
|
|
|
|
|
def _bootstrap_uv() -> bool:
|
|
"""Check if uv is available and probe whether --system is needed."""
|
|
global UV_NEEDS_SYSTEM
|
|
if not shutil.which("uv"):
|
|
return False
|
|
# Probe: try a dry-run install targeting the current Python explicitly.
|
|
# Without --python, uv can ignore the activated venv on some platforms.
|
|
probe = subprocess.run(
|
|
["uv", "pip", "install", "--dry-run", "--python", sys.executable, "pip"],
|
|
stdout = subprocess.PIPE,
|
|
stderr = subprocess.STDOUT,
|
|
)
|
|
if probe.returncode != 0:
|
|
# Retry with --system (some envs need it when uv can't find a venv)
|
|
probe_sys = subprocess.run(
|
|
["uv", "pip", "install", "--dry-run", "--system", "pip"],
|
|
stdout = subprocess.PIPE,
|
|
stderr = subprocess.STDOUT,
|
|
)
|
|
if probe_sys.returncode != 0:
|
|
return False # uv is broken, fall back to pip
|
|
UV_NEEDS_SYSTEM = True
|
|
return True
|
|
|
|
|
|
def _filter_requirements(req: Path, skip: set[str]) -> Path:
|
|
"""Return a temp copy of a requirements file with certain packages removed."""
|
|
lines = req.read_text(encoding = "utf-8").splitlines(keepends = True)
|
|
filtered = [
|
|
line
|
|
for line in lines
|
|
if not any(line.strip().lower().startswith(pkg) for pkg in skip)
|
|
]
|
|
tmp = tempfile.NamedTemporaryFile(
|
|
mode = "w",
|
|
suffix = ".txt",
|
|
delete = False,
|
|
encoding = "utf-8",
|
|
)
|
|
tmp.writelines(filtered)
|
|
tmp.close()
|
|
return Path(tmp.name)
|
|
|
|
|
|
def _translate_pip_args_for_uv(args: tuple[str, ...]) -> list[str]:
|
|
"""Translate pip flags to their uv equivalents."""
|
|
translated: list[str] = []
|
|
for arg in args:
|
|
if arg == "--no-cache-dir":
|
|
continue # uv cache is fast; drop this flag
|
|
elif arg == "--force-reinstall":
|
|
translated.append("--reinstall")
|
|
else:
|
|
translated.append(arg)
|
|
return translated
|
|
|
|
|
|
def _build_pip_cmd(args: tuple[str, ...]) -> list[str]:
|
|
"""Build a standard pip install command."""
|
|
cmd = [sys.executable, "-m", "pip", "install"]
|
|
cmd.extend(args)
|
|
return cmd
|
|
|
|
|
|
def _build_uv_cmd(args: tuple[str, ...]) -> list[str]:
|
|
"""Build a uv pip install command with translated flags."""
|
|
cmd = ["uv", "pip", "install"]
|
|
if UV_NEEDS_SYSTEM:
|
|
cmd.append("--system")
|
|
# Always pass --python so uv targets the correct environment.
|
|
# Without this, uv can ignore an activated venv and install into
|
|
# the system Python (observed on Colab and similar environments).
|
|
cmd.extend(["--python", sys.executable])
|
|
cmd.extend(_translate_pip_args_for_uv(args))
|
|
cmd.append("--torch-backend=auto")
|
|
return cmd
|
|
|
|
|
|
def pip_install(
|
|
label: str,
|
|
*args: str,
|
|
req: Path | None = None,
|
|
constrain: bool = True,
|
|
) -> None:
|
|
"""Build and run a pip install command (uses uv when available, falls back to pip)."""
|
|
constraint_args: list[str] = []
|
|
if constrain and CONSTRAINTS.is_file():
|
|
constraint_args = ["-c", str(CONSTRAINTS)]
|
|
|
|
actual_req = req
|
|
if req is not None and IS_WINDOWS and WINDOWS_SKIP_PACKAGES:
|
|
actual_req = _filter_requirements(req, WINDOWS_SKIP_PACKAGES)
|
|
req_args: list[str] = []
|
|
if actual_req is not None:
|
|
req_args = ["-r", str(actual_req)]
|
|
|
|
try:
|
|
if USE_UV:
|
|
uv_cmd = _build_uv_cmd(args) + constraint_args + req_args
|
|
if VERBOSE:
|
|
print(f" {label}...")
|
|
result = subprocess.run(
|
|
uv_cmd,
|
|
stdout = subprocess.PIPE,
|
|
stderr = subprocess.STDOUT,
|
|
)
|
|
if result.returncode == 0:
|
|
return
|
|
print(_red(f" uv failed, falling back to pip..."))
|
|
if result.stdout:
|
|
print(result.stdout.decode(errors = "replace"))
|
|
|
|
pip_cmd = _build_pip_cmd(args) + constraint_args + req_args
|
|
run(f"{label} (pip)" if USE_UV else label, pip_cmd)
|
|
finally:
|
|
if actual_req is not None and actual_req != req:
|
|
actual_req.unlink(missing_ok = True)
|
|
|
|
|
|
def download_file(url: str, dest: Path) -> None:
|
|
"""Download a file using urllib (no curl dependency)."""
|
|
urllib.request.urlretrieve(url, dest)
|
|
|
|
|
|
def patch_package_file(package_name: str, relative_path: str, url: str) -> None:
|
|
"""Download a file from url and overwrite a file inside an installed package."""
|
|
result = subprocess.run(
|
|
[sys.executable, "-m", "pip", "show", package_name],
|
|
capture_output = True,
|
|
text = True,
|
|
)
|
|
if result.returncode != 0:
|
|
_safe_print(
|
|
_red(f" ⚠️ Could not find package {package_name}, skipping patch")
|
|
)
|
|
return
|
|
|
|
location = None
|
|
for line in result.stdout.splitlines():
|
|
if line.lower().startswith("location:"):
|
|
location = line.split(":", 1)[1].strip()
|
|
break
|
|
|
|
if not location:
|
|
_safe_print(_red(f" ⚠️ Could not determine location of {package_name}"))
|
|
return
|
|
|
|
dest = Path(location) / relative_path
|
|
print(_cyan(f" Patching {dest.name} in {package_name}..."))
|
|
download_file(url, dest)
|
|
|
|
|
|
# -- Main install sequence ---------------------------------------------
|
|
|
|
|
|
def install_python_stack() -> int:
|
|
global USE_UV, _STEP, _TOTAL
|
|
_STEP = 0
|
|
_TOTAL = 10 if IS_WINDOWS else 11
|
|
|
|
# 1. Upgrade pip (needed even with uv as fallback and for bootstrapping)
|
|
_progress("pip upgrade")
|
|
run("Upgrading pip", [sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
|
|
|
|
# Try to use uv for faster installs
|
|
USE_UV = _bootstrap_uv()
|
|
|
|
# 2. Core packages: unsloth-zoo + unsloth
|
|
_progress("base packages")
|
|
pip_install(
|
|
"Installing base packages",
|
|
"--no-cache-dir",
|
|
req = REQ_ROOT / "base.txt",
|
|
)
|
|
|
|
# 3. Extra dependencies
|
|
_progress("unsloth extras")
|
|
pip_install(
|
|
"Installing additional unsloth dependencies",
|
|
"--no-cache-dir",
|
|
req = REQ_ROOT / "extras.txt",
|
|
)
|
|
|
|
# 3b. Extra dependencies (no-deps) -- audio model support etc.
|
|
_progress("extra codecs")
|
|
pip_install(
|
|
"Installing extras (no-deps)",
|
|
"--no-deps",
|
|
"--no-cache-dir",
|
|
req = REQ_ROOT / "extras-no-deps.txt",
|
|
)
|
|
|
|
# 4. Overrides (torchao, transformers) -- force-reinstall
|
|
_progress("dependency overrides")
|
|
pip_install(
|
|
"Installing dependency overrides",
|
|
"--force-reinstall",
|
|
"--no-cache-dir",
|
|
req = REQ_ROOT / "overrides.txt",
|
|
)
|
|
|
|
# 5. Triton kernels (no-deps, from source)
|
|
if not IS_WINDOWS:
|
|
_progress("triton kernels")
|
|
pip_install(
|
|
"Installing triton kernels",
|
|
"--no-deps",
|
|
"--no-cache-dir",
|
|
req = REQ_ROOT / "triton-kernels.txt",
|
|
constrain = False,
|
|
)
|
|
|
|
# # 6. Patch: override llama_cpp.py with fix from unsloth-zoo feature/llama-cpp-windows-support branch
|
|
# patch_package_file(
|
|
# "unsloth-zoo",
|
|
# os.path.join("unsloth_zoo", "llama_cpp.py"),
|
|
# "https://raw.githubusercontent.com/unslothai/unsloth-zoo/refs/heads/main/unsloth_zoo/llama_cpp.py",
|
|
# )
|
|
|
|
# # 7a. Patch: override vision.py with fix from unsloth PR #4091
|
|
# patch_package_file(
|
|
# "unsloth",
|
|
# os.path.join("unsloth", "models", "vision.py"),
|
|
# "https://raw.githubusercontent.com/unslothai/unsloth/80e0108a684c882965a02a8ed851e3473c1145ab/unsloth/models/vision.py",
|
|
# )
|
|
|
|
# # 7b. Patch : override save.py with fix from feature/llama-cpp-windows-support
|
|
# patch_package_file(
|
|
# "unsloth",
|
|
# os.path.join("unsloth", "save.py"),
|
|
# "https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/unsloth/save.py",
|
|
# )
|
|
|
|
# 8. Studio dependencies
|
|
_progress("studio deps")
|
|
pip_install(
|
|
"Installing studio dependencies",
|
|
"--no-cache-dir",
|
|
req = REQ_ROOT / "studio.txt",
|
|
)
|
|
|
|
# 9. Data-designer dependencies
|
|
_progress("data designer deps")
|
|
pip_install(
|
|
"Installing data-designer base dependencies",
|
|
"--no-cache-dir",
|
|
req = SINGLE_ENV / "data-designer-deps.txt",
|
|
)
|
|
|
|
# 10. Data-designer packages (no-deps to avoid conflicts)
|
|
_progress("data designer")
|
|
pip_install(
|
|
"Installing data-designer",
|
|
"--no-cache-dir",
|
|
"--no-deps",
|
|
req = SINGLE_ENV / "data-designer.txt",
|
|
)
|
|
|
|
# 11. Local Data Designer seed plugin
|
|
if not LOCAL_DD_UNSTRUCTURED_PLUGIN.is_dir():
|
|
_safe_print(
|
|
_red(
|
|
f"❌ Missing local plugin directory: {LOCAL_DD_UNSTRUCTURED_PLUGIN}",
|
|
),
|
|
)
|
|
return 1
|
|
_progress("local plugin")
|
|
pip_install(
|
|
"Installing local data-designer unstructured plugin",
|
|
"--no-cache-dir",
|
|
"--no-deps",
|
|
str(LOCAL_DD_UNSTRUCTURED_PLUGIN),
|
|
constrain = False,
|
|
)
|
|
|
|
# 12. Patch metadata for single-env compatibility
|
|
_progress("finalizing")
|
|
run(
|
|
"Patching single-env metadata",
|
|
[sys.executable, str(SINGLE_ENV / "patch_metadata.py")],
|
|
)
|
|
|
|
# 13. Final check (silent; third-party conflicts are expected)
|
|
subprocess.run(
|
|
[sys.executable, "-m", "pip", "check"],
|
|
stdout = subprocess.DEVNULL,
|
|
stderr = subprocess.DEVNULL,
|
|
)
|
|
|
|
_safe_print(_green("✅ Python dependencies installed"))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(install_python_stack())
|