mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
More bug fixes (#133)
* faster saving & inference * Update llama.py * Update save.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update mistral.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * fast inference * Update llama.py * Update save.py * Update llama.py * Mistral correct RoPE scaling * Max sequence lengths * Apache 2 * fast_linear_forward * Update utils.py * Update utils.py * No print * Update utils.py * Update utils.py * inference * Update llama.py * Fast inference RoPE * Update llama.py * Update llama.py * RoPE * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * LoRA * Fast LoRA saving * Update llama.py * hidden_states * q_len == 1 * q_len issue * Update mistral.py * Update mistral.py * incorrect inference * Update to transformers 4.37 * Graceful FA2 error + torch 2.1.1 * Update mapper.py * Update pyproject.toml * Fix saving and bnb-4bit * Update fast_lora.py * Update fast_lora.py * remove patching * Update llama.py * Update llama.py * Update swiglu.py * Repatch * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update llama.py * Update fast_lora.py * Update llama.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update swiglu.py * Update fast_lora.py * Update swiglu.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update fast_lora.py * Update save.py * Update fast_lora.py * Update utils.py * Update llama.py * Update fast_lora.py * Update swiglu.py * Update save.py * Update save.py * Update llama.py * Update llama.py * Update llama.py
This commit is contained in:
parent
62fae3aa74
commit
7da0c50f75
3 changed files with 21 additions and 10 deletions
|
|
@ -179,7 +179,10 @@ pass
|
|||
|
||||
def fast_linear_forward(proj, X, temp_lora = None, out = None):
|
||||
W, W_quant, lora_A, lora_B, lora_S = get_lora_parameters(proj)
|
||||
out = fast_gemv(X, W, W_quant, out = out)
|
||||
if W_quant is None:
|
||||
out = torch.matmul(X, W.t())
|
||||
else:
|
||||
out = fast_gemv(X, W, W_quant, out = out)
|
||||
if lora_A is not None:
|
||||
|
||||
# Save LoRAs for inference to stop data movement costs
|
||||
|
|
|
|||
|
|
@ -489,7 +489,7 @@ def LlamaModel_fast_forward(
|
|||
# Ignore attention_mask
|
||||
if attention_mask is None:
|
||||
padding_mask = None
|
||||
elif True:#self.training:
|
||||
elif self.training:
|
||||
attention_mask = None
|
||||
padding_mask = None
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -258,11 +258,19 @@ def unsloth_save_model(
|
|||
"private" : save_pretrained_settings["private"],
|
||||
"token" : save_pretrained_settings["token"],
|
||||
}
|
||||
|
||||
|
||||
# Check if PEFT Model or not - if yes, 3 levels. If not 2 levels.
|
||||
from peft import PeftModelForCausalLM
|
||||
if isinstance(model, PeftModelForCausalLM):
|
||||
internal_model = model.model
|
||||
else:
|
||||
internal_model = model
|
||||
pass
|
||||
|
||||
# Cannot be converted properly!
|
||||
if (save_method == "merged_4bit") or (save_method == "lora") or (
|
||||
not hasattr(model, "model") or \
|
||||
not hasattr(model.model, "model") or \
|
||||
not hasattr(model.model.model, "layers")
|
||||
not hasattr(internal_model.model, "layers")
|
||||
):
|
||||
# Do general saving
|
||||
|
||||
|
|
@ -343,12 +351,12 @@ def unsloth_save_model(
|
|||
# HF also uses a OrderedDict
|
||||
from collections import OrderedDict
|
||||
state_dict = OrderedDict()
|
||||
state_dict["model.embed_tokens.weight"] = model.model.model.embed_tokens.weight.data
|
||||
state_dict["model.embed_tokens.weight"] = internal_model.model.embed_tokens.weight.data
|
||||
|
||||
max_vram = int(torch.cuda.get_device_properties(0).total_memory * maximum_memory_usage)
|
||||
|
||||
from tqdm import tqdm as ProgressBar
|
||||
for j, layer in enumerate(ProgressBar(model.model.model.layers)):
|
||||
for j, layer in enumerate(ProgressBar(internal_model.model.layers)):
|
||||
for item in LLAMA_WEIGHTS:
|
||||
proj = eval(f"layer.{item}")
|
||||
name = f"model.layers.{j}.{item}.weight"
|
||||
|
|
@ -375,8 +383,8 @@ def unsloth_save_model(
|
|||
pass
|
||||
pass
|
||||
|
||||
state_dict["model.norm.weight"] = model.model.model.norm.weight.data
|
||||
state_dict["lm_head.weight"] = model.model.lm_head.weight.data
|
||||
state_dict["model.norm.weight"] = internal_model.model.norm.weight.data
|
||||
state_dict["lm_head.weight"] = internal_model.lm_head.weight.data
|
||||
|
||||
# All tensors MUST be type torch.Tensor and not torch.nn.parameter.Parameter
|
||||
for key, value in state_dict.items():
|
||||
|
|
@ -418,7 +426,7 @@ def unsloth_save_model(
|
|||
model.config = new_config
|
||||
|
||||
# Save!
|
||||
model.model.save_pretrained(**save_pretrained_settings)
|
||||
internal_model.save_pretrained(**save_pretrained_settings)
|
||||
|
||||
# Revert config back
|
||||
original_model = model
|
||||
|
|
|
|||
Loading…
Reference in a new issue