mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
Bug fixes (#249)
* Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update llama.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update cross_entropy_loss.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * rope * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * llama * Update llama.py * gemma * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update save.py * RoPE * Update llama.py * Update llama.py * Update llama.py * Update gemma.py * correct_dtype * Update gemma.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Chat Templates * Update README.md * Update README.md * Update llama.py * DoRA * Update _utils.py * Update chat_templates.py * Update llama.py * Hotfix - fix DoRA, Gemma prompt template (#202) (#203) * Update save.py * saving * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update save.py * Update __init__.py * Update save.py * Update save.py * Update save.py * save * trainer * spaces * original * Gemma * Update pyproject.toml * Update mapper.py * Update fast_lora.py * FastGemmaModel * model_type * Update llama.py * Update llama.py * Update gemma.py * Update gemma.py * Update gemma.py * Update llama.py * Update llama.py * Update fast_lora.py * Update llama.py * Update llama.py * Update cross_entropy_loss.py * Update llama.py * Update llama.py * gemma * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update fast_lora.py * Update fast_lora.py * Fast CE Loss * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * CE * Update llama.py * Update llama.py * Update cross_entropy_loss.py * Update geglu.py * Update cross_entropy_loss.py * revert * Update llama.py * Update llama.py * norm * Update gemma.py * Update gemma.py * position_ids * Update gemma.py * Update gemma.py * pos * Update llama.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update cross_entropy_loss.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update llama.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update llama.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * revert * revert * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update llama.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update cross_entropy_loss.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * rope * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * llama * Update llama.py * gemma * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update save.py * RoPE * Update llama.py * Update llama.py * Update llama.py * Update gemma.py * correct_dtype * Update gemma.py * Update cross_entropy_loss.py * Update cross_entropy_loss.py * Chat Templates * Update README.md * Update README.md * Update llama.py * DoRA * Update _utils.py * Update chat_templates.py * Update pyproject.toml * Small fixes * Update pyproject.toml * Approx gelu * Update geglu.py * Approx gelu * Update llama.py * Update __init__.py * Update __init__.py * Update _utils.py * Update geglu.py * Update gemma.py * Update rms_layernorm.py * Update rms_layernorm.py * Update rms_layernorm.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Update gemma.py * Fix Gemma merging * Update rms_layernorm.py * Update gemma.py * Update pyproject.toml * Layernorms * Gemma precision * Update gemma.py * sqrt * Update gemma.py * Update save.py * RoPE and Gemma precision * Update rms_layernorm.py * Fix warning * Update chat_templates.py * Update chat_templates.py * Update save.py * Update save.py * Update save.py * Update chat_templates.py * Update llama.py * model_name * Update loader.py * Tokenizer overwritten * Update llama.py * Update llama.py * Update llama.py * Update save.py * Accuracy * Revert * Update save.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update chat_templates.py * Update save.py * Update save.py * Update llama.py * Update llama.py * Account for DoRA * Update llama.py * Update save.py * GGUF incorrect * Update save.py * Update pyproject.toml * kaggle new * Update pyproject.toml * Update pyproject.toml * upcasting * Fix Colab * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update chat_templates.py * Update chat_templates.py * Update chat_templates.py * Update chat_templates.py * Update chat_templates.py * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update rope_embedding.py * Update rope_embedding.py * Fix bugs * Update fast_lora.py * Update fast_lora.py * Update README.md * Update README.md * GGUF * Update save.py * Update save.py * Update save.py * Update save.py * Update README.md * Update README.md
This commit is contained in:
parent
39713e66ed
commit
c599ae0f27
5 changed files with 116 additions and 37 deletions
24
README.md
24
README.md
|
|
@ -91,13 +91,11 @@ Select either `pytorch-cuda=11.8` for CUDA 11.8 or `pytorch-cuda=12.1` for CUDA
|
|||
conda create --name unsloth_env python=3.10
|
||||
conda activate unsloth_env
|
||||
|
||||
conda install pytorch cudatoolkit torchvision torchaudio pytorch-cuda=<12.1/11.8> -c pytorch -c nvidia
|
||||
conda install pytorch-cuda=<12.1/11.8> pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers
|
||||
|
||||
conda install xformers -c xformers
|
||||
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
||||
|
||||
pip install bitsandbytes
|
||||
|
||||
pip install "unsloth[conda] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install --no-deps trl peft accelerate bitsandbytes
|
||||
```
|
||||
|
||||
### Pip Installation
|
||||
|
|
@ -144,6 +142,22 @@ pip install "unsloth[cu121-ampere-torch220] @ git+https://github.com/unslothai/u
|
|||
```bash
|
||||
pip install --upgrade pip
|
||||
```
|
||||
6. For Pytorch 2.2.1:
|
||||
```bash
|
||||
# RTX 3090, 4090 Ampere GPUs:
|
||||
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
|
||||
|
||||
# Pre Ampere RTX 2080, T4, GTX 1080 GPUs:
|
||||
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
||||
pip install --no-deps xformers trl peft accelerate bitsandbytes
|
||||
```
|
||||
7. To troubleshoot installs try the below (all must succeed). Xformers should mostly all be available.
|
||||
```bash
|
||||
nvcc
|
||||
python -m xformers.info
|
||||
python -m bitsandbytes
|
||||
```
|
||||
|
||||
## 📜 Documentation
|
||||
- Go to our [Wiki page](https://github.com/unslothai/unsloth/wiki) for saving to GGUF, checkpointing, evaluation and more!
|
||||
|
|
|
|||
|
|
@ -18,9 +18,9 @@ import importlib
|
|||
# Currently only supports 1 GPU, or else seg faults will occur.
|
||||
if "CUDA_VISIBLE_DEVICES" in os.environ:
|
||||
devices = os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
# check if there are multiple cuda devices set in env
|
||||
# Check if there are multiple cuda devices set in env
|
||||
if not devices.isdigit():
|
||||
first_id = devices.split(',')[0]
|
||||
first_id = devices.split(",")[0]
|
||||
warnings.warn(
|
||||
f"Unsloth: 'CUDA_VISIBLE_DEVICES' is currently {devices} \n"\
|
||||
"Multiple CUDA devices detected but we require a single device.\n"\
|
||||
|
|
@ -33,20 +33,29 @@ else:
|
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
pass
|
||||
|
||||
# Reduce VRAM usage by reducing fragmentation
|
||||
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
||||
|
||||
try:
|
||||
import torch
|
||||
except:
|
||||
raise ImportError("Pytorch is not installed. Go to https://pytorch.org/.\n"\
|
||||
"We have some installation instructions on our Github page.")
|
||||
|
||||
# We support torch 2.1 and 2.1.1
|
||||
# We support Pytorch 2
|
||||
# Fixes https://github.com/unslothai/unsloth/issues/38
|
||||
torch_version = torch.__version__.split(".")
|
||||
major_torch, minor_torch = torch_version[0], torch_version[1]
|
||||
major_torch, minor_torch = int(major_torch), int(minor_torch)
|
||||
if (major_torch != 2):# or (major_torch == 2 and minor_torch < 1):
|
||||
raise ImportError("Unsloth only supports Pytorch 2.1 for now. Please update your Pytorch to 2.1.\n"\
|
||||
if (major_torch < 2):
|
||||
raise ImportError("Unsloth only supports Pytorch 2 for now. Please update your Pytorch to 2.1.\n"\
|
||||
"We have some installation instructions on our Github page.")
|
||||
elif (major_torch == 2) and (minor_torch < 2):
|
||||
# Disable expandable_segments
|
||||
del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
|
||||
# Must reimport Pytorch!
|
||||
importlib.reload(torch)
|
||||
pass
|
||||
|
||||
|
||||
# Try loading bitsandbytes and triton
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import triton.language as tl
|
|||
import torch
|
||||
from .utils import calculate_settings
|
||||
|
||||
ROPE_GROUP_SIZE = 4
|
||||
|
||||
@triton.heuristics({"BACKWARD_PASS": lambda args: args["BACKWARD_PASS"],})
|
||||
@triton.jit
|
||||
|
|
@ -24,9 +25,11 @@ def _rope_embedding(
|
|||
Q, Q_row_stride,
|
||||
cos, cos_row_stride,
|
||||
sin, sin_row_stride,
|
||||
seqlen, head_dim, group_size, n_heads,
|
||||
BACKWARD_PASS: tl.constexpr,
|
||||
BLOCK_SIZE : tl.constexpr,
|
||||
seqlen,
|
||||
head_dim : tl.constexpr,
|
||||
n_heads : tl.constexpr,
|
||||
BACKWARD_PASS : tl.constexpr,
|
||||
BLOCK_SIZE : tl.constexpr,
|
||||
):
|
||||
"""
|
||||
Calculates the RoPE Embedding quickly
|
||||
|
|
@ -49,16 +52,18 @@ def _rope_embedding(
|
|||
sin1 = -sin1
|
||||
pass
|
||||
|
||||
head_start = group_head_position * group_size
|
||||
head_end = tl.math.min((head_start + group_size), n_heads)
|
||||
# [TODO] Autotune ROPE_GROUP_SIZE to be 1, 2, 4, 8
|
||||
head_start = group_head_position * ROPE_GROUP_SIZE
|
||||
head_end = min((head_start + ROPE_GROUP_SIZE), n_heads)
|
||||
|
||||
for i in range(head_start, head_end):
|
||||
offs_q1 = row_position * Q_row_stride + i * head_dim + col_offsets
|
||||
offs_q2 = row_position * Q_row_stride + i * head_dim + col_offsets + half_head_dim
|
||||
# 10% Faster kernel from [HuyNguyen-hust](https://github.com/unslothai/unsloth/pull/238)
|
||||
for k in range(head_start, head_end):
|
||||
offs_q1 = row_position * Q_row_stride + k * head_dim + col_offsets
|
||||
offs_q2 = row_position * Q_row_stride + k * head_dim + col_offsets + half_head_dim
|
||||
|
||||
# For Gemma - sometimes RoPE must be done in float32 and not bfloat16
|
||||
Q1 = tl.load(Q + offs_q1, mask = mask, other = 0).to(sin1.dtype)
|
||||
Q2 = tl.load(Q + offs_q2, mask = mask, other = 0).to(sin1.dtype)
|
||||
Q1 = tl.load(Q + offs_q1, mask = mask, other = 0).to(sin1.dtype)
|
||||
Q2 = tl.load(Q + offs_q2, mask = mask, other = 0).to(sin1.dtype)
|
||||
|
||||
tl.store(Q + offs_q1, Q1*cos1 - Q2*sin1, mask = mask)
|
||||
tl.store(Q + offs_q2, Q2*cos1 + Q1*sin1, mask = mask)
|
||||
|
|
@ -78,21 +83,24 @@ class Fast_RoPE_Embedding(torch.autograd.Function):
|
|||
# [TODO] Changing blocksize to head_dim//2 seems to have
|
||||
# some concurrency / un-deterministic issues.
|
||||
BLOCK_SIZE, num_warps = calculate_settings(head_dim//2) # (head_dim//2)
|
||||
group_size = 4 # 4 or 8, too large group_size can hurt performance.
|
||||
n_groups = triton.cdiv(n_heads, group_size)
|
||||
|
||||
# group_size = 4 # 4 or 8, too large group_size can hurt performance.
|
||||
div, mod = divmod(n_heads, ROPE_GROUP_SIZE)
|
||||
n_groups = div + (mod != 0)
|
||||
|
||||
grid = (n_rows, n_groups, )
|
||||
_rope_embedding[grid](
|
||||
_rope_embedding[(n_rows, n_groups, )](
|
||||
Q, Q.stride(0),
|
||||
cos, cos.stride(0),
|
||||
sin, sin.stride(0),
|
||||
seq_len, head_dim, group_size, n_heads,
|
||||
seq_len,
|
||||
head_dim, n_heads,
|
||||
BACKWARD_PASS = False,
|
||||
BLOCK_SIZE = BLOCK_SIZE,
|
||||
num_warps = num_warps,
|
||||
)
|
||||
ctx.BLOCK_SIZE = BLOCK_SIZE
|
||||
ctx.num_warps = num_warps
|
||||
ctx.n_groups = n_groups
|
||||
ctx.cos = cos
|
||||
ctx.sin = sin
|
||||
return Q.view(batch, seq_len, n_heads, head_dim)
|
||||
|
|
@ -108,15 +116,11 @@ class Fast_RoPE_Embedding(torch.autograd.Function):
|
|||
cos = ctx.cos
|
||||
sin = ctx.sin
|
||||
|
||||
group_size = 4 # 4 or 8, too large group_size can hurt performance.
|
||||
n_groups = triton.cdiv(n_heads, group_size)
|
||||
|
||||
grid = (n_rows, n_groups, )
|
||||
_rope_embedding[grid](
|
||||
_rope_embedding[(n_rows, ctx.n_groups, )](
|
||||
dY, dY .stride(0),
|
||||
cos, cos.stride(0),
|
||||
sin, sin.stride(0),
|
||||
seq_len, head_dim, group_size, n_heads,
|
||||
seq_len, head_dim, n_heads,
|
||||
BACKWARD_PASS = True,
|
||||
BLOCK_SIZE = ctx.BLOCK_SIZE,
|
||||
num_warps = ctx.num_warps,
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ import warnings
|
|||
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "torch")
|
||||
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "huggingface_hub")
|
||||
warnings.filterwarnings(action = "ignore", category = RuntimeWarning, module = "subprocess")
|
||||
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "transformers")
|
||||
warnings.filterwarnings(action = "ignore", category = FutureWarning, module = "accelerate")
|
||||
import bitsandbytes as bnb
|
||||
from transformers.models.llama.modeling_llama import logger
|
||||
from transformers import AutoTokenizer
|
||||
|
|
|
|||
|
|
@ -593,16 +593,17 @@ def install_llama_cpp_old(version = -10):
|
|||
pass
|
||||
|
||||
# Clone a specific commit
|
||||
# Also don't use the GPU!
|
||||
commands = [
|
||||
"git clone https://github.com/ggerganov/llama.cpp",
|
||||
f"cd llama.cpp && git reset --hard {version} && git clean -df && "\
|
||||
f"make clean && LLAMA_CUBLAS=1 make all -j{psutil.cpu_count()*2}",
|
||||
f"make clean make all -j{psutil.cpu_count()*2}",
|
||||
"pip install gguf protobuf",
|
||||
]
|
||||
for command in commands:
|
||||
with subprocess.Popen(command, shell = True, stdout = subprocess.PIPE, bufsize = 1) as sp:
|
||||
for line in sp.stdout:
|
||||
print(line.decode("utf-8"), flush = True, end = "")
|
||||
print(line.decode("utf-8", errors = "replace"), flush = True, end = "")
|
||||
pass
|
||||
pass
|
||||
# Check if successful
|
||||
|
|
@ -625,12 +626,55 @@ def install_llama_cpp_blocking():
|
|||
for command in commands:
|
||||
with subprocess.Popen(command, shell = True, stdout = subprocess.PIPE, bufsize = 1) as sp:
|
||||
for line in sp.stdout:
|
||||
print(line.decode("utf-8"), flush = True, end = "")
|
||||
print(line.decode("utf-8", errors = "replace"), flush = True, end = "")
|
||||
pass
|
||||
pass
|
||||
pass
|
||||
|
||||
|
||||
def _fix_gemma_gguf():
|
||||
# Fixes Gemma saving to GGUF to float32 instead of float16!
|
||||
with open("llama.cpp/convert-hf-to-gguf.py", "rb") as file:
|
||||
text = file.read()
|
||||
pass
|
||||
|
||||
gemma_start = text.find(b"class GemmaModel(Model):")
|
||||
if gemma_start == -1: return
|
||||
|
||||
gemma_end = text.find(b"self.gguf_writer.add_tensor(new_name, data)", gemma_start)
|
||||
if gemma_end == -1: return
|
||||
|
||||
gemma_text = text[gemma_start : gemma_end]
|
||||
bad_text = \
|
||||
b""" data = data.astype(np.float32)
|
||||
|
||||
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||
data = data.astype(np.float16)"""
|
||||
good_text = \
|
||||
b""" # if f32 desired, convert any float16 to float32
|
||||
if self.ftype == 0 and data_dtype == np.float16:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||
data = data.astype(np.float16)"""
|
||||
find_bad = gemma_text.find(bad_text)
|
||||
if find_bad == -1: return
|
||||
|
||||
gemma_text = gemma_text[:find_bad] + good_text + gemma_text[find_bad + len(bad_text):]
|
||||
text = text[:gemma_start] + gemma_text + text[gemma_end:]
|
||||
|
||||
with open("llama.cpp/convert-hf-to-gguf.py", "w+b") as file:
|
||||
file.write(text)
|
||||
pass
|
||||
pass
|
||||
|
||||
|
||||
def save_to_gguf(
|
||||
model_type : str,
|
||||
model_directory : str = "unsloth_finetuned_model",
|
||||
|
|
@ -686,7 +730,10 @@ def save_to_gguf(
|
|||
install_llama_cpp_blocking()
|
||||
pass
|
||||
# Check if successful. If not install 10th latest release
|
||||
if error != 0 or not os.path.exists("llama.cpp/quantize"): install_llama_cpp_old(-10)
|
||||
if error != 0 or not os.path.exists("llama.cpp/quantize"):
|
||||
print(f"Unsloth: llama.cpp error code = {error}.")
|
||||
install_llama_cpp_old(-10)
|
||||
pass
|
||||
|
||||
if quantization_method == "f32": first_conversion = "f32"
|
||||
elif quantization_method == "f16": first_conversion = "f16"
|
||||
|
|
@ -723,6 +770,9 @@ def save_to_gguf(
|
|||
f"--outfile {final_location} --vocab-type hfft "\
|
||||
f"--outtype {first_conversion} --concurrency {n_cpus}"
|
||||
else:
|
||||
# Need to fix convert-hf-to-gguf.py for some models!
|
||||
_fix_gemma_gguf()
|
||||
|
||||
command = f"python llama.cpp/convert-hf-to-gguf.py {model_directory} "\
|
||||
f"--outfile {final_location} "\
|
||||
f"--outtype {first_conversion}"
|
||||
|
|
@ -730,7 +780,7 @@ def save_to_gguf(
|
|||
|
||||
with subprocess.Popen(command, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE, bufsize = 1) as sp:
|
||||
for line in sp.stdout:
|
||||
print(line.decode("utf-8"), flush = True, end = "")
|
||||
print(line.decode("utf-8", errors = "replace"), flush = True, end = "")
|
||||
if sp.returncode is not None and sp.returncode != 0:
|
||||
raise subprocess.CalledProcessError(sp.returncode, sp.args)
|
||||
pass
|
||||
|
|
@ -760,7 +810,7 @@ def save_to_gguf(
|
|||
# quantize uses stderr
|
||||
with subprocess.Popen(command, shell = True, stderr = subprocess.PIPE, bufsize = 1) as sp:
|
||||
for line in sp.stderr:
|
||||
print(line.decode("utf-8"), flush = True, end = "")
|
||||
print(line.decode("utf-8", errors = "replace"), flush = True, end = "")
|
||||
if sp.returncode is not None and sp.returncode != 0:
|
||||
raise subprocess.CalledProcessError(sp.returncode, sp.args)
|
||||
pass
|
||||
|
|
|
|||
Loading…
Reference in a new issue