mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
Add Qwen3.6 inference defaults for Studio (#5065)
* Add Qwen3.6 inference defaults for Studio Add qwen3.6 family entry to inference_defaults.json with the recommended sampling parameters from Qwen's documentation: temperature=0.7, top_p=0.8, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0. Without this, Qwen3.6 models fall through to the generic qwen3 pattern which uses different defaults (temperature=0.6, top_p=0.95, no presence_penalty). * Add Qwen3.6-35B-A3B-GGUF to default model lists * Add Qwen3.5/3.6 presence_penalty to thinking toggle and small-model disable logic - Thinking toggle (on-load + button click) now sets presencePenalty: 1.5 for Qwen3.5 and Qwen3.6 models (both thinking-ON and thinking-OFF states) - Small-model thinking-disable check (<9B defaults to no-thinking) extended from Qwen3.5-only to also cover Qwen3.6, in all 3 locations: frontend on-load, frontend refresh, backend llama_cpp.py
This commit is contained in:
parent
d56f980452
commit
0b57884120
5 changed files with 26 additions and 13 deletions
|
|
@ -1,6 +1,14 @@
|
|||
{
|
||||
"_comment": "Per-model-family inference parameter defaults. Sources: (1) Ollama params blobs, (2) Existing Unsloth Studio YAML configs. Patterns ordered longest-match-first.",
|
||||
"families": {
|
||||
"qwen3.6": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.8,
|
||||
"top_k": 20,
|
||||
"min_p": 0.0,
|
||||
"repetition_penalty": 1.0,
|
||||
"presence_penalty": 1.5
|
||||
},
|
||||
"qwen3.5": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.8,
|
||||
|
|
@ -369,7 +377,7 @@
|
|||
}
|
||||
},
|
||||
"patterns": [
|
||||
"qwen3.5",
|
||||
"qwen3.6", "qwen3.5",
|
||||
"qwen3-coder", "qwen3-next", "qwen3-vl", "qwen3",
|
||||
"qwen2.5-coder", "qwen2.5-vl", "qwen2.5-omni", "qwen2.5-math", "qwen2.5",
|
||||
"qwen2-vl", "qwen2",
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ DEFAULT_MODELS_GGUF = [
|
|||
"unsloth/gemma-4-E4B-it-GGUF",
|
||||
"unsloth/gemma-4-31B-it-GGUF",
|
||||
"unsloth/gemma-4-26B-A4B-it-GGUF",
|
||||
"unsloth/Qwen3.6-35B-A3B-GGUF",
|
||||
"unsloth/Qwen3.5-4B-GGUF",
|
||||
"unsloth/Qwen3.5-9B-GGUF",
|
||||
"unsloth/Qwen3.5-35B-A3B-GGUF",
|
||||
|
|
@ -27,6 +28,7 @@ DEFAULT_MODELS_STANDARD = [
|
|||
"unsloth/gemma-4-E4B-it-GGUF",
|
||||
"unsloth/gemma-4-31B-it-GGUF",
|
||||
"unsloth/gemma-4-26B-A4B-it-GGUF",
|
||||
"unsloth/Qwen3.6-35B-A3B-GGUF",
|
||||
"unsloth/Qwen3.5-4B-GGUF",
|
||||
"unsloth/Qwen3.5-9B-GGUF",
|
||||
"unsloth/Qwen3.5-35B-A3B-GGUF",
|
||||
|
|
|
|||
|
|
@ -1514,12 +1514,12 @@ class LlamaCppBackend:
|
|||
)
|
||||
|
||||
# For reasoning models, set default thinking mode.
|
||||
# Qwen3.5 models below 9B (0.8B, 2B, 4B) disable thinking by default.
|
||||
# Qwen3.5/3.6 models below 9B (0.8B, 2B, 4B) disable thinking by default.
|
||||
# Only 9B and larger enable thinking.
|
||||
if self._supports_reasoning:
|
||||
thinking_default = True
|
||||
mid = (model_identifier or "").lower()
|
||||
if "qwen3.5" in mid:
|
||||
if "qwen3.5" in mid or "qwen3.6" in mid:
|
||||
size_val = _extract_model_size_b(mid)
|
||||
if size_val is not None and size_val < 9:
|
||||
thinking_default = False
|
||||
|
|
|
|||
|
|
@ -289,11 +289,11 @@ export function useChatModelRuntime() {
|
|||
loadedSpeculativeType: currentSpecType,
|
||||
});
|
||||
|
||||
// Set reasoning default for Qwen3.5 small models
|
||||
// Set reasoning default for Qwen3.5/3.6 small models
|
||||
if (supportsReasoning) {
|
||||
let reasoningDefault = true;
|
||||
const mid = statusRes.active_model.toLowerCase();
|
||||
if (mid.includes("qwen3.5")) {
|
||||
if (mid.includes("qwen3.5") || mid.includes("qwen3.6")) {
|
||||
const sizeMatch = mid.match(/(\d+\.?\d*)\s*b/);
|
||||
if (sizeMatch && parseFloat(sizeMatch[1]) < 9) {
|
||||
reasoningDefault = false;
|
||||
|
|
@ -462,11 +462,11 @@ export function useChatModelRuntime() {
|
|||
setParams(
|
||||
mergeRecommendedInference(currentParams, loadResponse, modelId),
|
||||
);
|
||||
// Qwen3.5 small models (0.8B, 2B, 4B, 9B) disable thinking by default
|
||||
// Qwen3.5/3.6 small models (0.8B, 2B, 4B, 9B) disable thinking by default
|
||||
let reasoningDefault = loadResponse.supports_reasoning ?? false;
|
||||
if (reasoningDefault) {
|
||||
const mid = modelId.toLowerCase();
|
||||
if (mid.includes("qwen3.5")) {
|
||||
if (mid.includes("qwen3.5") || mid.includes("qwen3.6")) {
|
||||
const sizeMatch = mid.match(/(\d+\.?\d*)\s*b/);
|
||||
if (sizeMatch && parseFloat(sizeMatch[1]) < 9) {
|
||||
reasoningDefault = false;
|
||||
|
|
@ -509,12 +509,14 @@ export function useChatModelRuntime() {
|
|||
defaultChatTemplate: loadResponse.chat_template ?? null,
|
||||
chatTemplateOverride: null,
|
||||
});
|
||||
// Qwen3/3.5: apply thinking-mode-specific params after load
|
||||
// Qwen3/3.5/3.6: apply thinking-mode-specific params after load
|
||||
if (modelId.toLowerCase().includes("qwen3") && (loadResponse.supports_reasoning ?? false)) {
|
||||
const store = useChatRuntimeStore.getState();
|
||||
const mid = modelId.toLowerCase();
|
||||
const needsPresencePenalty = mid.includes("qwen3.5") || mid.includes("qwen3.6");
|
||||
const p = reasoningDefault
|
||||
? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0 }
|
||||
: { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0 };
|
||||
? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0, ...(needsPresencePenalty ? { presencePenalty: 1.5 } : {}) }
|
||||
: { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0, ...(needsPresencePenalty ? { presencePenalty: 1.5 } : {}) };
|
||||
store.setParams({ ...store.params, ...p });
|
||||
}
|
||||
await refresh();
|
||||
|
|
|
|||
|
|
@ -557,13 +557,14 @@ export function SharedComposer({
|
|||
if (reasoningAlwaysOn) return;
|
||||
const next = !reasoningEnabled;
|
||||
setReasoningEnabled(next);
|
||||
// Qwen3/3.5: adjust params for thinking on/off
|
||||
// Qwen3/3.5/3.6: adjust params for thinking on/off
|
||||
const store = useChatRuntimeStore.getState();
|
||||
const cp = store.params.checkpoint?.toLowerCase() ?? "";
|
||||
if (cp.includes("qwen3")) {
|
||||
const needsPresencePenalty = cp.includes("qwen3.5") || cp.includes("qwen3.6");
|
||||
const p = next
|
||||
? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0 }
|
||||
: { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0 };
|
||||
? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0, ...(needsPresencePenalty ? { presencePenalty: 1.5 } : {}) }
|
||||
: { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0, ...(needsPresencePenalty ? { presencePenalty: 1.5 } : {}) };
|
||||
store.setParams({ ...store.params, ...p });
|
||||
}
|
||||
}}
|
||||
|
|
|
|||
Loading…
Reference in a new issue