* Update _utils.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [FIX] [Transformers] VLM input embeds fix for gradients (#3715)

* Fix get_input_embeds call for VLMs

* patch input_require_grads instead

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* cleanup old patch

* cleanup old patch

* cleanup

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Apply suggestion from @danielhanchen

* use logger instead of prints

* Move unsloth present set

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Daniel Han <danielhanchen@gmail.com>

* Update rope_embedding.py

* Fixes

* Update _utils.py

* Update import_fixes.py

* Update rl_replacements.py

* fix_openenv_no_vllm

* Fix

* Update __init__.py

* Update __init__.py

* Update __init__.py

* Update import_fixes.py

* Update import_fixes.py

* Update import_fixes.py

* logger

* Update __init__.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update __init__.py

* Update import_fixes.py

* Update __init__.py

* Update import_fixes.py

* Update import_fixes.py

* Update import_fixes.py

* Update import_fixes.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update import_fixes.py

* Update unsloth/import_fixes.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>

* Update save.py

* [fbgemm] Silence tma fbgemm (#3735)

* Silence fbgemm TMA print

Also safer .push_to_hub

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Update loader.py

* Update save.py

* Update save.py

* Update _utils.py

* Update _utils.py

* Diffusers warnings

* Update pyproject.toml

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Datta Nimmaturi <venkatadattasainimmaturi@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Daniel Han 2025-12-17 03:31:48 -08:00 committed by GitHub
parent 23a7ac5d17
commit 1e7302cd77
6 changed files with 64 additions and 16 deletions

View file

@ -60,7 +60,7 @@ huggingfacenotorch = [
]
huggingface = [
"unsloth[huggingfacenotorch]",
"unsloth_zoo>=2025.12.4",
"unsloth_zoo>=2025.12.5",
"torchvision",
"unsloth[triton]",
]
@ -523,7 +523,7 @@ colab-ampere-torch220 = [
"flash-attn>=2.6.3 ; ('linux' in sys_platform)",
]
colab-new = [
"unsloth_zoo>=2025.12.4",
"unsloth_zoo>=2025.12.5",
"packaging",
"tyro",
"transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,!=4.57.0,<=4.57.3",

View file

@ -29,14 +29,17 @@ from .import_fixes import (
fix_message_factory_issue,
check_fbgemm_gpu_version,
torchvision_compatibility_check,
fix_diffusers_warnings,
)
fix_message_factory_issue()
check_fbgemm_gpu_version()
torchvision_compatibility_check()
fix_diffusers_warnings()
del fix_message_factory_issue
del check_fbgemm_gpu_version
del torchvision_compatibility_check
del fix_diffusers_warnings
# This check is critical because Unsloth optimizes these libraries by modifying
# their code at import time. If they're imported first, the original (slower,

View file

@ -71,6 +71,36 @@ class HideLoggingMessage(logging.Filter):
return not (self.text in x.getMessage())
class HidePrintMessage:
__slots__ = ("_original_stream", "_hidden_texts")
def __init__(self, original_stream):
self._original_stream = original_stream
self._hidden_texts = []
def add_filter(self, text):
self._hidden_texts.append(text)
def write(self, message):
if not any(text in message for text in self._hidden_texts):
self._original_stream.write(message)
def flush(self):
self._original_stream.flush()
def __getattr__(self, name):
return getattr(self._original_stream, name)
if os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") != "1":
import sys
# Apply to stderr for FBGEMM
sys.stderr = HidePrintMessage(sys.stderr)
# https://github.com/pytorch/FBGEMM/blob/d99cd96490ec4aabac2ee95b1e76ea4dcfcfa628/fbgemm_gpu/experimental/gemm/triton_gemm/utils.py#L43-L52
sys.stderr.add_filter("TMA benchmarks will be running")
# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
# MUST do this at the start primarily due to tensorflow causing issues
def fix_message_factory_issue():
@ -506,3 +536,8 @@ def fix_executorch():
logger.info("Unsloth: Patching Executorch to fix get_mapped_key")
except Exception as e:
logger.info(f"Unsloth: Failed Executorch with error = {str(e)}")
def fix_diffusers_warnings():
# Silence Flax classes are deprecated and will be removed in Diffusers v1.0.0.
os.environ["DIFFUSERS_VERBOSITY"] = "error"

View file

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = "2025.12.5"
__version__ = "2025.12.6"
__all__ = [
"SUPPORTS_BFLOAT16",
@ -413,16 +413,6 @@ try:
except:
pass
# Flax classes are deprecated and will be removed in Diffusers v1.0.0.
try:
from diffusers.utils import logger as diffusers_logger
diffusers_logger.addFilter(HideLoggingMessage("are deprecated"))
del diffusers_logger
except:
pass
# Errors out on
# Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint
from transformers.modeling_utils import logger as transformers_logger

View file

@ -739,6 +739,8 @@ class FastModel(FastBaseModel):
"compatible with `full_finetuning=True`. If you wish to use QAT with LoRA, "
"please pass in `qat_scheme` in `FastLanguageModel.get_peft_model(...)` instead."
)
if qat_scheme == "phone-deployment":
qat_scheme = "int8-int4"
# Check if 4bit is allowed specifically for AMD
if not ALLOW_BITSANDBYTES and not use_exact_model_name:
if load_in_4bit or load_in_8bit or model_name.lower().endswith("-bnb-4bit"):

View file

@ -2745,6 +2745,17 @@ def _unsloth_save_torchao_with_attached_config(
"""Save a QAT-trained model by converting fake-quantized weights to real quantized weights."""
# Convert QAT fake-quantized weights to real quantized weights
_convert_torchao_model(model)
# PEFT models also might come here, so parse it
if isinstance(model, PeftModelForCausalLM):
_unsloth_save_torchao_with_given_config(
model = model,
save_directory = save_directory,
tokenizer = tokenizer,
torchao_config = model.config.quantization_config,
push_to_hub = push_to_hub,
token = token,
)
return
# TorchAO does not support safe_serialization reliably
safe_serialization = False
@ -2806,7 +2817,10 @@ def _unsloth_save_torchao_with_given_config(
)
from torchao import quantize_
quantization_config = TorchAoConfig(quant_type = torchao_config)
if isinstance(torchao_config, TorchAoConfig):
quantization_config = torchao_config
else:
quantization_config = TorchAoConfig(quant_type = torchao_config)
# Determine if this is a VLM
is_vlm = False
@ -2897,7 +2911,7 @@ def unsloth_save_pretrained_torchao(
)
if torchao_config is not None:
# PTQ path: user provided a config, model must NOT have QAT config
# PTQ path: user provided a config, model must NOT have QAT config unless PEFT
assert not has_qat_config, (
"Unsloth: You passed `torchao_config` but this model was trained with `qat_scheme`. "
"For QAT models, do not pass `torchao_config` - the quantization config is already "
@ -3010,7 +3024,11 @@ def patch_saving_functions(model, vision = False):
original_model = model
while True:
if original_model.push_to_hub.__name__ != "unsloth_push_to_hub":
# Check if push_to_hub exists before accessing its __name__
if (
hasattr(original_model, "push_to_hub")
and original_model.push_to_hub.__name__ != "unsloth_push_to_hub"
):
original_model.original_push_to_hub = original_model.push_to_hub
original_model.push_to_hub = types.MethodType(
unsloth_push_to_hub, original_model