Nightly (#3737)

* Update _utils.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [FIX] [Transformers] VLM input embeds fix for gradients (#3715) * Fix get_input_embeds call for VLMs * patch input_require_grads instead * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * cleanup old patch * cleanup old patch * cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply suggestion from @danielhanchen * use logger instead of prints * Move unsloth present set * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Daniel Han <danielhanchen@gmail.com> * Update rope_embedding.py * Fixes * Update _utils.py * Update import_fixes.py * Update rl_replacements.py * fix_openenv_no_vllm * Fix * Update __init__.py * Update __init__.py * Update __init__.py * Update import_fixes.py * Update import_fixes.py * Update import_fixes.py * logger * Update __init__.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update __init__.py * Update import_fixes.py * Update __init__.py * Update import_fixes.py * Update import_fixes.py * Update import_fixes.py * Update import_fixes.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update import_fixes.py * Update unsloth/import_fixes.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update save.py * [fbgemm] Silence tma fbgemm (#3735) * Silence fbgemm TMA print Also safer .push_to_hub * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Update loader.py * Update save.py * Update save.py * Update _utils.py * Update _utils.py * Diffusers warnings * Update pyproject.toml * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Datta Nimmaturi <venkatadattasainimmaturi@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-04-21 13:37:39 +00:00 · 2025-12-17 03:31:48 -08:00 · 2025-12-17 03:31:48 -08:00 · 1e7302cd77
commit 1e7302cd77
parent 23a7ac5d17
6 changed files with 64 additions and 16 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -60,7 +60,7 @@ huggingfacenotorch = [
 ]
 huggingface = [
    "unsloth[huggingfacenotorch]",
-    "unsloth_zoo>=2025.12.4",
+    "unsloth_zoo>=2025.12.5",
    "torchvision",
    "unsloth[triton]",
 ]
@ -523,7 +523,7 @@ colab-ampere-torch220 = [
    "flash-attn>=2.6.3 ; ('linux' in sys_platform)",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.12.4",
+    "unsloth_zoo>=2025.12.5",
    "packaging",
    "tyro",
    "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,!=4.57.0,<=4.57.3",
--- a/unsloth/init.py
+++ b/unsloth/init.py
@ -29,14 +29,17 @@ from .import_fixes import (
    fix_message_factory_issue,
    check_fbgemm_gpu_version,
    torchvision_compatibility_check,
+    fix_diffusers_warnings,
 )

 fix_message_factory_issue()
 check_fbgemm_gpu_version()
 torchvision_compatibility_check()
+fix_diffusers_warnings()
 del fix_message_factory_issue
 del check_fbgemm_gpu_version
 del torchvision_compatibility_check
+del fix_diffusers_warnings

 # This check is critical because Unsloth optimizes these libraries by modifying
 # their code at import time. If they're imported first, the original (slower,
--- a/unsloth/import_fixes.py
+++ b/unsloth/import_fixes.py
@ -71,6 +71,36 @@ class HideLoggingMessage(logging.Filter):
        return not (self.text in x.getMessage())


+class HidePrintMessage:
+    __slots__ = ("_original_stream", "_hidden_texts")
+
+    def __init__(self, original_stream):
+        self._original_stream = original_stream
+        self._hidden_texts = []
+
+    def add_filter(self, text):
+        self._hidden_texts.append(text)
+
+    def write(self, message):
+        if not any(text in message for text in self._hidden_texts):
+            self._original_stream.write(message)
+
+    def flush(self):
+        self._original_stream.flush()
+
+    def __getattr__(self, name):
+        return getattr(self._original_stream, name)
+
+
+if os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") != "1":
+    import sys
+
+    # Apply to stderr for FBGEMM
+    sys.stderr = HidePrintMessage(sys.stderr)
+    # https://github.com/pytorch/FBGEMM/blob/d99cd96490ec4aabac2ee95b1e76ea4dcfcfa628/fbgemm_gpu/experimental/gemm/triton_gemm/utils.py#L43-L52
+    sys.stderr.add_filter("TMA benchmarks will be running")
+
+
 # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
 # MUST do this at the start primarily due to tensorflow causing issues
 def fix_message_factory_issue():
@ -506,3 +536,8 @@ def fix_executorch():
                logger.info("Unsloth: Patching Executorch to fix get_mapped_key")
    except Exception as e:
        logger.info(f"Unsloth: Failed Executorch with error = {str(e)}")
+
+
+def fix_diffusers_warnings():
+    # Silence Flax classes are deprecated and will be removed in Diffusers v1.0.0.
+    os.environ["DIFFUSERS_VERBOSITY"] = "error"
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-__version__ = "2025.12.5"
+__version__ = "2025.12.6"

 __all__ = [
    "SUPPORTS_BFLOAT16",
@ -413,16 +413,6 @@ try:
 except:
    pass

-# Flax classes are deprecated and will be removed in Diffusers v1.0.0.
-try:
-    from diffusers.utils import logger as diffusers_logger
-
-    diffusers_logger.addFilter(HideLoggingMessage("are deprecated"))
-    del diffusers_logger
-except:
-    pass
-
-
 # Errors out on
 # Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint
 from transformers.modeling_utils import logger as transformers_logger
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@ -739,6 +739,8 @@ class FastModel(FastBaseModel):
                "compatible with `full_finetuning=True`. If you wish to use QAT with LoRA, "
                "please pass in `qat_scheme` in `FastLanguageModel.get_peft_model(...)` instead."
            )
+        if qat_scheme == "phone-deployment":
+            qat_scheme = "int8-int4"
        # Check if 4bit is allowed specifically for AMD
        if not ALLOW_BITSANDBYTES and not use_exact_model_name:
            if load_in_4bit or load_in_8bit or model_name.lower().endswith("-bnb-4bit"):
--- a/unsloth/save.py
+++ b/unsloth/save.py
@ -2745,6 +2745,17 @@ def _unsloth_save_torchao_with_attached_config(
    """Save a QAT-trained model by converting fake-quantized weights to real quantized weights."""
    # Convert QAT fake-quantized weights to real quantized weights
    _convert_torchao_model(model)
+    # PEFT models also might come here, so parse it
+    if isinstance(model, PeftModelForCausalLM):
+        _unsloth_save_torchao_with_given_config(
+            model = model,
+            save_directory = save_directory,
+            tokenizer = tokenizer,
+            torchao_config = model.config.quantization_config,
+            push_to_hub = push_to_hub,
+            token = token,
+        )
+        return

    # TorchAO does not support safe_serialization reliably
    safe_serialization = False
@ -2806,7 +2817,10 @@ def _unsloth_save_torchao_with_given_config(
    )
    from torchao import quantize_

-    quantization_config = TorchAoConfig(quant_type = torchao_config)
+    if isinstance(torchao_config, TorchAoConfig):
+        quantization_config = torchao_config
+    else:
+        quantization_config = TorchAoConfig(quant_type = torchao_config)

    # Determine if this is a VLM
    is_vlm = False
@ -2897,7 +2911,7 @@ def unsloth_save_pretrained_torchao(
    )

    if torchao_config is not None:
-        # PTQ path: user provided a config, model must NOT have QAT config
+        # PTQ path: user provided a config, model must NOT have QAT config unless PEFT
        assert not has_qat_config, (
            "Unsloth: You passed `torchao_config` but this model was trained with `qat_scheme`. "
            "For QAT models, do not pass `torchao_config` - the quantization config is already "
@ -3010,7 +3024,11 @@ def patch_saving_functions(model, vision = False):

    original_model = model
    while True:
-        if original_model.push_to_hub.__name__ != "unsloth_push_to_hub":
+        # Check if push_to_hub exists before accessing its __name__
+        if (
+            hasattr(original_model, "push_to_hub")
+            and original_model.push_to_hub.__name__ != "unsloth_push_to_hub"
+        ):
            original_model.original_push_to_hub = original_model.push_to_hub
            original_model.push_to_hub = types.MethodType(
                unsloth_push_to_hub, original_model