From f18e9dddf0cfaf1b6382dfe937430c3d480dd83c Mon Sep 17 00:00:00 2001 From: Avaya Aggarwal <119044997+OnePunchMonk@users.noreply.github.com> Date: Wed, 15 Apr 2026 20:09:11 +0530 Subject: [PATCH] feat: Add support for OLMo-3 model (#4678) * feat: Add support for OLMo-3 model in mapping and tests * Update unsloth/models/mapper.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update tests/test_get_model_name.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Fix casing, add Think variants, and align version gate for OLMo-3 PR 4678 Mapper: switch slugs from OLMo-3 to canonical Olmo-3 mixed case, drop the non-existent unsloth/Olmo-3-7B-Instruct-bnb-4bit dead alias, and add the already-published Olmo-3-7B-Think and Olmo-3-32B-Think Unsloth mirrors. Loader: change the olmo3 transformers version gate from Version("4.57.0") to Version("4.57.0.dev0") so nightly/source builds that already contain olmo3 are not blocked, matching the OLMo-2, Gemma 3 and Cohere patterns. * Use canonical Olmo-3 casing and cover Think variants in OLMo-3 tests Mirrors the mapper.py fixes on pr-4678-code: HuggingFace canonical slugs for the OLMo-3 family use mixed-case Olmo-3 (not OLMo-3 like OLMo-2), and Unsloth already hosts Olmo-3-7B-Think and Olmo-3-32B-Think mirrors, so the resolution matrix now covers all three published Olmo-3 families. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Daniel Han --- tests/test_get_model_name.py | 40 ++++++++++++++++++++++++++++++++++++ unsloth/models/loader.py | 9 +++++++- unsloth/models/mapper.py | 12 +++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/tests/test_get_model_name.py b/tests/test_get_model_name.py index ad89f595f..15dd44f0e 100644 --- a/tests/test_get_model_name.py +++ b/tests/test_get_model_name.py @@ -64,6 +64,42 @@ class TestGetModelName(unittest.TestCase): "unsloth/Ministral-3-3B-Instruct-2512", True, ), + ( + "allenai/Olmo-3-7B-Instruct", + True, + "unsloth/Olmo-3-7B-Instruct-unsloth-bnb-4bit", + True, + ), + ( + "allenai/Olmo-3-7B-Instruct", + False, + "unsloth/Olmo-3-7B-Instruct", + True, + ), + ( + "allenai/Olmo-3-7B-Think", + True, + "unsloth/Olmo-3-7B-Think-unsloth-bnb-4bit", + True, + ), + ( + "allenai/Olmo-3-7B-Think", + False, + "unsloth/Olmo-3-7B-Think", + True, + ), + ( + "allenai/Olmo-3-32B-Think", + True, + "unsloth/Olmo-3-32B-Think-unsloth-bnb-4bit", + True, + ), + ( + "allenai/Olmo-3-32B-Think", + False, + "unsloth/Olmo-3-32B-Think", + True, + ), ("unsloth/Kimi-K2-Instruct", True, "unsloth/Kimi-K2-Instruct-BF16", True), ("unsloth/Kimi-K2-Instruct", False, "unsloth/Kimi-K2-Instruct", False), # Fallback-to-original behavior @@ -113,6 +149,10 @@ class TestGetModelName(unittest.TestCase): "mistralai/ministral-3-3b-instruct-2512", "unsloth/ministral-3-3b-instruct-2512-unsloth-bnb-4bit", ), + ( + "allenai/olmo-3-7b-instruct", + "unsloth/olmo-3-7b-instruct-unsloth-bnb-4bit", + ), ("unsloth/kimi-k2-instruct", "unsloth/kimi-k2-instruct-bf16"), ] for src, expected in contracts: diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index a66a7c6ab..cd12544ae 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -1210,13 +1210,20 @@ class FastModel(FastBaseModel): # Granite-4 rms norms are stored as 16 bit, but we upcast os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" - # Olmo 2 + # OLMo 2 elif "olmo2" in model_types_all and transformers_version < Version( "4.50.0.dev0" ): raise RuntimeError( "Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY ) + # OLMo 3 + elif "olmo3" in model_types_all and transformers_version < Version( + "4.57.0.dev0" + ): + raise RuntimeError( + "Unsloth: OLMo-3 only works on transformers >= 4.57.0." + LATEST + ) elif "falcon_h1" in model_types_all: # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee' # since Mamba kernels error out on using lower precision diff --git a/unsloth/models/mapper.py b/unsloth/models/mapper.py index f0f430eb7..8f2861db6 100644 --- a/unsloth/models/mapper.py +++ b/unsloth/models/mapper.py @@ -762,6 +762,18 @@ __INT_TO_FLOAT_MAPPER = \ "allenai/OLMo-2-0325-32B-Instruct", "unsloth/OLMo-2-0325-32B-Instruct-bnb-4bit", ), + "unsloth/Olmo-3-7B-Instruct-unsloth-bnb-4bit" : ( + "unsloth/Olmo-3-7B-Instruct", + "allenai/Olmo-3-7B-Instruct", + ), + "unsloth/Olmo-3-7B-Think-unsloth-bnb-4bit" : ( + "unsloth/Olmo-3-7B-Think", + "allenai/Olmo-3-7B-Think", + ), + "unsloth/Olmo-3-32B-Think-unsloth-bnb-4bit" : ( + "unsloth/Olmo-3-32B-Think", + "allenai/Olmo-3-32B-Think", + ), "unsloth/Mistral-Small-3.1-24B-Instruct-2503-unsloth-bnb-4bit" : ( "unsloth/Mistral-Small-3.1-24B-Instruct-2503", "mistralai/Mistral-Small-3.1-24B-Instruct-2503",