mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
Nightly (#3737)
* Update _utils.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [FIX] [Transformers] VLM input embeds fix for gradients (#3715) * Fix get_input_embeds call for VLMs * patch input_require_grads instead * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * cleanup old patch * cleanup old patch * cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply suggestion from @danielhanchen * use logger instead of prints * Move unsloth present set * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Daniel Han <danielhanchen@gmail.com> * Update rope_embedding.py * Fixes * Update _utils.py * Update import_fixes.py * Update rl_replacements.py * fix_openenv_no_vllm * Fix * Update __init__.py * Update __init__.py * Update __init__.py * Update import_fixes.py * Update import_fixes.py * Update import_fixes.py * logger * Update __init__.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update __init__.py * Update import_fixes.py * Update __init__.py * Update import_fixes.py * Update import_fixes.py * Update import_fixes.py * Update import_fixes.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update import_fixes.py * Update unsloth/import_fixes.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update save.py * [fbgemm] Silence tma fbgemm (#3735) * Silence fbgemm TMA print Also safer .push_to_hub * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Update loader.py * Update save.py * Update save.py * Update _utils.py * Update _utils.py * Diffusers warnings * Update pyproject.toml * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Datta Nimmaturi <venkatadattasainimmaturi@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
23a7ac5d17
commit
1e7302cd77
6 changed files with 64 additions and 16 deletions
|
|
@ -60,7 +60,7 @@ huggingfacenotorch = [
|
|||
]
|
||||
huggingface = [
|
||||
"unsloth[huggingfacenotorch]",
|
||||
"unsloth_zoo>=2025.12.4",
|
||||
"unsloth_zoo>=2025.12.5",
|
||||
"torchvision",
|
||||
"unsloth[triton]",
|
||||
]
|
||||
|
|
@ -523,7 +523,7 @@ colab-ampere-torch220 = [
|
|||
"flash-attn>=2.6.3 ; ('linux' in sys_platform)",
|
||||
]
|
||||
colab-new = [
|
||||
"unsloth_zoo>=2025.12.4",
|
||||
"unsloth_zoo>=2025.12.5",
|
||||
"packaging",
|
||||
"tyro",
|
||||
"transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,!=4.57.0,<=4.57.3",
|
||||
|
|
|
|||
|
|
@ -29,14 +29,17 @@ from .import_fixes import (
|
|||
fix_message_factory_issue,
|
||||
check_fbgemm_gpu_version,
|
||||
torchvision_compatibility_check,
|
||||
fix_diffusers_warnings,
|
||||
)
|
||||
|
||||
fix_message_factory_issue()
|
||||
check_fbgemm_gpu_version()
|
||||
torchvision_compatibility_check()
|
||||
fix_diffusers_warnings()
|
||||
del fix_message_factory_issue
|
||||
del check_fbgemm_gpu_version
|
||||
del torchvision_compatibility_check
|
||||
del fix_diffusers_warnings
|
||||
|
||||
# This check is critical because Unsloth optimizes these libraries by modifying
|
||||
# their code at import time. If they're imported first, the original (slower,
|
||||
|
|
|
|||
|
|
@ -71,6 +71,36 @@ class HideLoggingMessage(logging.Filter):
|
|||
return not (self.text in x.getMessage())
|
||||
|
||||
|
||||
class HidePrintMessage:
|
||||
__slots__ = ("_original_stream", "_hidden_texts")
|
||||
|
||||
def __init__(self, original_stream):
|
||||
self._original_stream = original_stream
|
||||
self._hidden_texts = []
|
||||
|
||||
def add_filter(self, text):
|
||||
self._hidden_texts.append(text)
|
||||
|
||||
def write(self, message):
|
||||
if not any(text in message for text in self._hidden_texts):
|
||||
self._original_stream.write(message)
|
||||
|
||||
def flush(self):
|
||||
self._original_stream.flush()
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._original_stream, name)
|
||||
|
||||
|
||||
if os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") != "1":
|
||||
import sys
|
||||
|
||||
# Apply to stderr for FBGEMM
|
||||
sys.stderr = HidePrintMessage(sys.stderr)
|
||||
# https://github.com/pytorch/FBGEMM/blob/d99cd96490ec4aabac2ee95b1e76ea4dcfcfa628/fbgemm_gpu/experimental/gemm/triton_gemm/utils.py#L43-L52
|
||||
sys.stderr.add_filter("TMA benchmarks will be running")
|
||||
|
||||
|
||||
# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
|
||||
# MUST do this at the start primarily due to tensorflow causing issues
|
||||
def fix_message_factory_issue():
|
||||
|
|
@ -506,3 +536,8 @@ def fix_executorch():
|
|||
logger.info("Unsloth: Patching Executorch to fix get_mapped_key")
|
||||
except Exception as e:
|
||||
logger.info(f"Unsloth: Failed Executorch with error = {str(e)}")
|
||||
|
||||
|
||||
def fix_diffusers_warnings():
|
||||
# Silence Flax classes are deprecated and will be removed in Diffusers v1.0.0.
|
||||
os.environ["DIFFUSERS_VERBOSITY"] = "error"
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__version__ = "2025.12.5"
|
||||
__version__ = "2025.12.6"
|
||||
|
||||
__all__ = [
|
||||
"SUPPORTS_BFLOAT16",
|
||||
|
|
@ -413,16 +413,6 @@ try:
|
|||
except:
|
||||
pass
|
||||
|
||||
# Flax classes are deprecated and will be removed in Diffusers v1.0.0.
|
||||
try:
|
||||
from diffusers.utils import logger as diffusers_logger
|
||||
|
||||
diffusers_logger.addFilter(HideLoggingMessage("are deprecated"))
|
||||
del diffusers_logger
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
# Errors out on
|
||||
# Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint
|
||||
from transformers.modeling_utils import logger as transformers_logger
|
||||
|
|
|
|||
|
|
@ -739,6 +739,8 @@ class FastModel(FastBaseModel):
|
|||
"compatible with `full_finetuning=True`. If you wish to use QAT with LoRA, "
|
||||
"please pass in `qat_scheme` in `FastLanguageModel.get_peft_model(...)` instead."
|
||||
)
|
||||
if qat_scheme == "phone-deployment":
|
||||
qat_scheme = "int8-int4"
|
||||
# Check if 4bit is allowed specifically for AMD
|
||||
if not ALLOW_BITSANDBYTES and not use_exact_model_name:
|
||||
if load_in_4bit or load_in_8bit or model_name.lower().endswith("-bnb-4bit"):
|
||||
|
|
|
|||
|
|
@ -2745,6 +2745,17 @@ def _unsloth_save_torchao_with_attached_config(
|
|||
"""Save a QAT-trained model by converting fake-quantized weights to real quantized weights."""
|
||||
# Convert QAT fake-quantized weights to real quantized weights
|
||||
_convert_torchao_model(model)
|
||||
# PEFT models also might come here, so parse it
|
||||
if isinstance(model, PeftModelForCausalLM):
|
||||
_unsloth_save_torchao_with_given_config(
|
||||
model = model,
|
||||
save_directory = save_directory,
|
||||
tokenizer = tokenizer,
|
||||
torchao_config = model.config.quantization_config,
|
||||
push_to_hub = push_to_hub,
|
||||
token = token,
|
||||
)
|
||||
return
|
||||
|
||||
# TorchAO does not support safe_serialization reliably
|
||||
safe_serialization = False
|
||||
|
|
@ -2806,7 +2817,10 @@ def _unsloth_save_torchao_with_given_config(
|
|||
)
|
||||
from torchao import quantize_
|
||||
|
||||
quantization_config = TorchAoConfig(quant_type = torchao_config)
|
||||
if isinstance(torchao_config, TorchAoConfig):
|
||||
quantization_config = torchao_config
|
||||
else:
|
||||
quantization_config = TorchAoConfig(quant_type = torchao_config)
|
||||
|
||||
# Determine if this is a VLM
|
||||
is_vlm = False
|
||||
|
|
@ -2897,7 +2911,7 @@ def unsloth_save_pretrained_torchao(
|
|||
)
|
||||
|
||||
if torchao_config is not None:
|
||||
# PTQ path: user provided a config, model must NOT have QAT config
|
||||
# PTQ path: user provided a config, model must NOT have QAT config unless PEFT
|
||||
assert not has_qat_config, (
|
||||
"Unsloth: You passed `torchao_config` but this model was trained with `qat_scheme`. "
|
||||
"For QAT models, do not pass `torchao_config` - the quantization config is already "
|
||||
|
|
@ -3010,7 +3024,11 @@ def patch_saving_functions(model, vision = False):
|
|||
|
||||
original_model = model
|
||||
while True:
|
||||
if original_model.push_to_hub.__name__ != "unsloth_push_to_hub":
|
||||
# Check if push_to_hub exists before accessing its __name__
|
||||
if (
|
||||
hasattr(original_model, "push_to_hub")
|
||||
and original_model.push_to_hub.__name__ != "unsloth_push_to_hub"
|
||||
):
|
||||
original_model.original_push_to_hub = original_model.push_to_hub
|
||||
original_model.push_to_hub = types.MethodType(
|
||||
unsloth_push_to_hub, original_model
|
||||
|
|
|
|||
Loading…
Reference in a new issue