attention mask

2026-04-21 13:37:39 +00:00 · 2024-01-28 04:18:19 +11:00 · 2024-01-28 04:18:19 +11:00 · 166f8c812e
commit 166f8c812e
parent 6c7f0dbcb4
2 changed files with 2 additions and 2 deletions
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@ -489,7 +489,7 @@ def LlamaModel_fast_forward(
    # Ignore attention_mask
    if attention_mask is None:
        padding_mask = None
-    elif False:#self.training:
+    elif self.training:
        attention_mask = None
        padding_mask = None
    else:
--- a/unsloth/models/mistral.py
+++ b/unsloth/models/mistral.py
@ -90,7 +90,7 @@ def MistralAttention_fast_forward(
    past_key_value = (K, V) if use_cache else None

    # Attention module
-    if (attention_mask is None and not HAS_FLASH_ATTENTION):
+    if (not HAS_FLASH_ATTENTION):
        # Xformers memory efficient attention
        Q = Q.transpose(1, 2)
        K = K.transpose(1, 2)