Fix more bugs (#232)

* Update gemma.py

* position_ids

* Update gemma.py

* Update gemma.py

* pos

* Update llama.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update cross_entropy_loss.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* revert

* revert

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update cross_entropy_loss.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update llama.py

* Update llama.py

* Update llama.py

* rope

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* llama

* Update llama.py

* gemma

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update save.py

* RoPE

* Update llama.py

* Update llama.py

* Update llama.py

* Update gemma.py

* correct_dtype

* Update gemma.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Chat Templates

* Update README.md

* Update README.md

* Update llama.py

* DoRA

* Update _utils.py

* Update chat_templates.py

* Update llama.py

* Hotfix - fix DoRA, Gemma prompt template (#202) (#203)

* Update save.py

* saving

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update save.py

* Update __init__.py

* Update save.py

* Update save.py

* Update save.py

* save

* trainer

* spaces

* original

* Gemma

* Update pyproject.toml

* Update mapper.py

* Update fast_lora.py

* FastGemmaModel

* model_type

* Update llama.py

* Update llama.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update llama.py

* Update fast_lora.py

* Update llama.py

* Update llama.py

* Update cross_entropy_loss.py

* Update llama.py

* Update llama.py

* gemma

* Update llama.py

* Update llama.py

* Update llama.py

* Update llama.py

* Update fast_lora.py

* Update fast_lora.py

* Fast CE Loss

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update llama.py

* Update llama.py

* Update llama.py

* Update llama.py

* CE

* Update llama.py

* Update llama.py

* Update cross_entropy_loss.py

* Update geglu.py

* Update cross_entropy_loss.py

* revert

* Update llama.py

* Update llama.py

* norm

* Update gemma.py

* Update gemma.py

* position_ids

* Update gemma.py

* Update gemma.py

* pos

* Update llama.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update cross_entropy_loss.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* revert

* revert

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update cross_entropy_loss.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update llama.py

* Update llama.py

* Update llama.py

* Update llama.py

* rope

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* llama

* Update llama.py

* gemma

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update save.py

* RoPE

* Update llama.py

* Update llama.py

* Update llama.py

* Update gemma.py

* correct_dtype

* Update gemma.py

* Update cross_entropy_loss.py

* Update cross_entropy_loss.py

* Chat Templates

* Update README.md

* Update README.md

* Update llama.py

* DoRA

* Update _utils.py

* Update chat_templates.py

* Update pyproject.toml

* Small fixes

* Update pyproject.toml

* Approx gelu

* Update geglu.py

* Approx gelu

* Update llama.py

* Update __init__.py

* Update __init__.py

* Update _utils.py

* Update geglu.py

* Update gemma.py

* Update rms_layernorm.py

* Update rms_layernorm.py

* Update rms_layernorm.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Update gemma.py

* Fix Gemma merging

* Update rms_layernorm.py

* Update gemma.py

* Update pyproject.toml

* Layernorms

* Gemma precision

* Update gemma.py

* sqrt

* Update gemma.py

* Update save.py

* RoPE and Gemma precision

* Update rms_layernorm.py

* Fix warning

* Update chat_templates.py

* Update chat_templates.py

* Update save.py

* Update save.py

* Update save.py

* Update chat_templates.py

* Update llama.py

* model_name

* Update loader.py

* Tokenizer overwritten

* Update llama.py

* Update llama.py

* Update llama.py

* Update save.py

* Accuracy

* Revert

* Update save.py

* Update fast_lora.py

* Update fast_lora.py

* Update fast_lora.py

* Update fast_lora.py

* Update fast_lora.py

* Update chat_templates.py

* Update save.py

* Update save.py

* Update llama.py

* Update llama.py

* Account for DoRA

* Update llama.py
This commit is contained in:
Daniel Han 2024-03-11 04:31:03 +11:00 committed by GitHub
parent 8bea94c137
commit 32223779c4
4 changed files with 114 additions and 25 deletions

View file

@ -253,13 +253,15 @@ def get_chat_template(
mapping = {"role" : "role", "content" : "content", "user" : "user", "assistant" : "assistant"},
map_eos_token = True,
):
old_tokenizer = tokenizer
if map_eos_token is False:
assert("Unsloth: Can only map new tokens to EOS for now. Adding new tokens is not yet supported.")
pass
# if tokenizer.__class__.__name__.startswith("Gemma") and chat_template == "chatml":
# chat_template = "gemma_chatml"
# pass
if tokenizer.__class__.__name__.startswith("Gemma") and chat_template == "chatml":
chat_template = "gemma_chatml"
pass
old_padding_side = tokenizer.padding_side
@ -340,6 +342,17 @@ def get_chat_template(
tokenizer.padding_side = old_padding_side
tokenizer.chat_template = chat_template
# Also fix up other tokens
old_pad_token = getattr(old_tokenizer, "pad_token", None)
old_bos_token = getattr(old_tokenizer, "bos_token", None)
old_unk_token = getattr(old_tokenizer, "unk_token", None)
new_pad_token = getattr(tokenizer, "pad_token", None)
new_bos_token = getattr(tokenizer, "bos_token", None)
new_unk_token = getattr(tokenizer, "unk_token", None)
if old_pad_token != new_pad_token: tokenizer.pad_token = old_pad_token
if old_bos_token != new_bos_token: tokenizer.bos_token = old_bos_token
if old_unk_token != new_unk_token: tokenizer.unk_token = old_unk_token
#stopping_criteria = create_stopping_criteria(tokenizer, stop_word)
return tokenizer#, stopping_criteria

View file

@ -95,7 +95,7 @@ def prepare_model_for_kbit_training(
# Freeze all parameters except LoRA
for name, param in model.named_parameters():
if ".lora_A." in name or ".lora_B." in name:
if ".lora_A." in name or ".lora_B." in name or ".lora_magnitude_vector" in name:
param.requires_grad_(True)
else:
param.requires_grad_(False)

View file

@ -511,26 +511,36 @@ def LlamaModel_fast_forward(
# Mormalized from Gemma
IS_GEMMA = self.config.model_type == "gemma"
train_embed_tokens = self.embed_tokens.weight.requires_grad
if IS_GEMMA:
inputs_requires_grad = inputs_embeds.requires_grad
if not inputs_embeds.is_leaf:
inputs_embeds = inputs_embeds.detach()
inputs_requires_grad = True
elif inputs_requires_grad:
inputs_embeds.requires_grad_(False)
pass
# Match Gemma exactly by casting to bfloat16 / float16
# inputs_embeds *= math_sqrt(self.config.hidden_size)
# Ie 3072**0.5 = 55.5000 in bfloat16, whilst 55.4256 in float32
# & 2048**0.5 = 45.2500 in bfloat16, whilst 45.2548 in float32
inputs_embeds *= torch.tensor(math_sqrt(self.config.hidden_size), dtype = inputs_embeds.dtype)
# inputs_embeds *= math_sqrt(self.config.hidden_size)
if inputs_requires_grad: inputs_embeds.requires_grad_(True)
normalizer = torch.tensor(math_sqrt(self.config.hidden_size), dtype = inputs_embeds.dtype)
if train_embed_tokens:
# Careful we must not do an inplace op!
inputs_embeds = inputs_embeds * normalizer
else:
inputs_requires_grad = inputs_embeds.requires_grad
if not inputs_embeds.is_leaf:
inputs_embeds = inputs_embeds.detach()
inputs_requires_grad = True
elif inputs_requires_grad:
inputs_embeds.requires_grad_(False)
pass
inputs_embeds *= normalizer
# inputs_embeds *= math_sqrt(self.config.hidden_size)
if inputs_requires_grad: inputs_embeds.requires_grad_(True)
pass
pass
# Fix up attention mask by setting elements to 0
# Specifically for DPO
if self._has_no_labels and (attention_mask is not None) and (past_key_values is None):
if self._has_no_labels and (attention_mask is not None) and (past_key_values is None) and \
(not train_embed_tokens):
# Careful for inference the attention_mask is size (1, kv_seq_len)
# Whilst the input_embeds is size (1, 1, 4096)
inputs_requires_grad = inputs_embeds.requires_grad
@ -1226,6 +1236,7 @@ class FastLlamaModel:
random_state = 3407,
max_seq_length = 2048, # not used anymore
use_rslora = False,
modules_to_save = None,
init_lora_weights = True,
loftq_config = {},
**kwargs,
@ -1312,15 +1323,45 @@ class FastLlamaModel:
accepted_modules = frozenset(("q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",),)
model.config.update({"unsloth_version" : __version__})
train_lm_head = False
train_embed_tokens = False
final_modules = []
for module in target_modules:
assert(module in accepted_modules)
if module == "lm_head":
logger.warning_once(
"Unsloth: `lm_head` should be placed in `modules_to_save` and not `target_modules`."\
"We shall do it for you!"
)
train_lm_head = True
elif module == "embed_tokens":
logger.warning_once(
"Unsloth: `embed_tokens` should be placed in `modules_to_save` and not `target_modules`."\
"We shall do it for you!"
)
train_embed_tokens = True
else:
assert(module in accepted_modules)
final_modules.append(module)
pass
# Check modules_to_save
if modules_to_save is not None:
for module in modules_to_save:
if module == "lm_head":
train_lm_head = True
elif module == "embed_tokens":
train_embed_tokens = True
pass
pass
# Get LoRA
arguments = dict(
r = r,
lora_alpha = lora_alpha,
target_modules = target_modules,
target_modules = final_modules,
lora_dropout = lora_dropout,
bias = bias,
task_type = TaskType.CAUSAL_LM,
@ -1328,6 +1369,7 @@ class FastLlamaModel:
init_lora_weights = init_lora_weights,
loftq_config = loftq_config,
use_rslora = use_rslora,
modules_to_save = modules_to_save,
**kwargs,
)
if not SUPPORTS_LOFTQ: del arguments["loftq_config"]
@ -1337,6 +1379,14 @@ class FastLlamaModel:
model = _get_peft_model(model, lora_config)
model = FastLlamaModel.patch_peft_model(model, use_gradient_checkpointing)
# Now patch lm_head and embed_tokens
if train_embed_tokens:
model.model.model.embed_tokens.requires_grad_(True)
if train_lm_head:
model.model.lm_head.requires_grad_(True)
pass
return model
pass
@ -1427,9 +1477,12 @@ class FastLlamaModel:
if hasattr(gate_proj, "lora_A") and \
hasattr( up_proj, "lora_A") and \
hasattr(down_proj, "lora_A") and \
(gate_proj.base_layer if hasattr(gate_proj, "base_layer") else gate_proj).bias is None and \
( up_proj.base_layer if hasattr( up_proj, "base_layer") else up_proj).bias is None and \
(down_proj.base_layer if hasattr(down_proj, "base_layer") else down_proj).bias is None:
((gate_proj.base_layer if hasattr(gate_proj, "base_layer") else gate_proj).bias is None) and \
(( up_proj.base_layer if hasattr( up_proj, "base_layer") else up_proj).bias is None) and \
((down_proj.base_layer if hasattr(down_proj, "base_layer") else down_proj).bias is None) and \
((gate_proj.lora_magnitude_vector if hasattr(gate_proj, "lora_magnitude_vector") else None) is None) and \
(( up_proj.lora_magnitude_vector if hasattr( up_proj, "lora_magnitude_vector") else None) is None) and \
((down_proj.lora_magnitude_vector if hasattr(down_proj, "lora_magnitude_vector") else None) is None):
# https://stackoverflow.com/questions/50599045/python-replacing-a-function-within-a-class-of-a-module
layer.mlp.forward = types.MethodType(apply_lora_mlp, layer.mlp)
@ -1448,9 +1501,12 @@ class FastLlamaModel:
if hasattr(q_proj, "lora_A") and \
hasattr(k_proj, "lora_A") and \
hasattr(v_proj, "lora_A") and \
(q_proj.base_layer if hasattr(q_proj, "base_layer") else q_proj).bias is None and \
(k_proj.base_layer if hasattr(k_proj, "base_layer") else k_proj).bias is None and \
(v_proj.base_layer if hasattr(v_proj, "base_layer") else v_proj).bias is None:
((q_proj.base_layer if hasattr(q_proj, "base_layer") else q_proj).bias is None) and \
((k_proj.base_layer if hasattr(k_proj, "base_layer") else k_proj).bias is None) and \
((v_proj.base_layer if hasattr(v_proj, "base_layer") else v_proj).bias is None) and \
((q_proj.lora_magnitude_vector if hasattr(q_proj, "lora_magnitude_vector") else None) is None) and \
((k_proj.lora_magnitude_vector if hasattr(k_proj, "lora_magnitude_vector") else None) is None) and \
((v_proj.lora_magnitude_vector if hasattr(v_proj, "lora_magnitude_vector") else None) is None):
layer.self_attn.apply_qkv = apply_lora_qkv
n_qkv += 1
@ -1464,7 +1520,8 @@ class FastLlamaModel:
# O attention patching
o_proj = layer.self_attn.o_proj
if hasattr(o_proj, "lora_A") and \
(o_proj.base_layer if hasattr(o_proj, "base_layer") else o_proj).bias is None:
((o_proj.base_layer if hasattr(o_proj, "base_layer") else o_proj).bias is None) and \
((o_proj.lora_magnitude_vector if hasattr(o_proj, "lora_magnitude_vector") else None) is None):
layer.self_attn.apply_o = apply_lora_o
n_o += 1

View file

@ -203,7 +203,11 @@ def unsloth_save_model(
print("Unsloth: Merging 4bit and LoRA weights to 4bit...")
print("This might take 5 minutes...")
model = model.merge_and_unload()
# Counteract no LoRA adapters!
if hasattr(model, "merge_and_unload"):
model = model.merge_and_unload()
pass
print("Done.")
pass
@ -573,6 +577,21 @@ def install_llama_cpp_old(version = -10):
latest = releases[-1]
version = releases[version].split(" ")[0]
# Check if the llama.cpp exists
if os.path.exists("llama.cpp"):
print(
"**[WARNING]** You have a llama.cpp old directory which is broken.\n"\
"Unsloth will DELETE the broken directory and install a new one.\n"\
"Press CTRL + C / cancel this if this is wrong. We shall wait 10 seconds.\n"
)
import time
for i in range(10):
print(f"**[WARNING]** Deleting llama.cpp directory... {10-i} seconds left.")
time.sleep(1)
import shutil
shutil.rmtree("llama.cpp")
pass
# Clone a specific commit
commands = [
"git clone https://github.com/ggerganov/llama.cpp",