Merge branch 'main' into feature/tool-choice-kwarg-openai-format

This commit is contained in:
Roland Tannous 2026-04-16 22:43:08 +04:00 committed by GitHub
commit b4a4ac7cf0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 26 additions and 13 deletions

View file

@ -1,6 +1,14 @@
{
"_comment": "Per-model-family inference parameter defaults. Sources: (1) Ollama params blobs, (2) Existing Unsloth Studio YAML configs. Patterns ordered longest-match-first.",
"families": {
"qwen3.6": {
"temperature": 0.7,
"top_p": 0.8,
"top_k": 20,
"min_p": 0.0,
"repetition_penalty": 1.0,
"presence_penalty": 1.5
},
"qwen3.5": {
"temperature": 0.7,
"top_p": 0.8,
@ -369,7 +377,7 @@
}
},
"patterns": [
"qwen3.5",
"qwen3.6", "qwen3.5",
"qwen3-coder", "qwen3-next", "qwen3-vl", "qwen3",
"qwen2.5-coder", "qwen2.5-vl", "qwen2.5-omni", "qwen2.5-math", "qwen2.5",
"qwen2-vl", "qwen2",

View file

@ -10,6 +10,7 @@ DEFAULT_MODELS_GGUF = [
"unsloth/gemma-4-E4B-it-GGUF",
"unsloth/gemma-4-31B-it-GGUF",
"unsloth/gemma-4-26B-A4B-it-GGUF",
"unsloth/Qwen3.6-35B-A3B-GGUF",
"unsloth/Qwen3.5-4B-GGUF",
"unsloth/Qwen3.5-9B-GGUF",
"unsloth/Qwen3.5-35B-A3B-GGUF",
@ -27,6 +28,7 @@ DEFAULT_MODELS_STANDARD = [
"unsloth/gemma-4-E4B-it-GGUF",
"unsloth/gemma-4-31B-it-GGUF",
"unsloth/gemma-4-26B-A4B-it-GGUF",
"unsloth/Qwen3.6-35B-A3B-GGUF",
"unsloth/Qwen3.5-4B-GGUF",
"unsloth/Qwen3.5-9B-GGUF",
"unsloth/Qwen3.5-35B-A3B-GGUF",

View file

@ -1514,12 +1514,12 @@ class LlamaCppBackend:
)
# For reasoning models, set default thinking mode.
# Qwen3.5 models below 9B (0.8B, 2B, 4B) disable thinking by default.
# Qwen3.5/3.6 models below 9B (0.8B, 2B, 4B) disable thinking by default.
# Only 9B and larger enable thinking.
if self._supports_reasoning:
thinking_default = True
mid = (model_identifier or "").lower()
if "qwen3.5" in mid:
if "qwen3.5" in mid or "qwen3.6" in mid:
size_val = _extract_model_size_b(mid)
if size_val is not None and size_val < 9:
thinking_default = False

View file

@ -289,11 +289,11 @@ export function useChatModelRuntime() {
loadedSpeculativeType: currentSpecType,
});
// Set reasoning default for Qwen3.5 small models
// Set reasoning default for Qwen3.5/3.6 small models
if (supportsReasoning) {
let reasoningDefault = true;
const mid = statusRes.active_model.toLowerCase();
if (mid.includes("qwen3.5")) {
if (mid.includes("qwen3.5") || mid.includes("qwen3.6")) {
const sizeMatch = mid.match(/(\d+\.?\d*)\s*b/);
if (sizeMatch && parseFloat(sizeMatch[1]) < 9) {
reasoningDefault = false;
@ -462,11 +462,11 @@ export function useChatModelRuntime() {
setParams(
mergeRecommendedInference(currentParams, loadResponse, modelId),
);
// Qwen3.5 small models (0.8B, 2B, 4B, 9B) disable thinking by default
// Qwen3.5/3.6 small models (0.8B, 2B, 4B, 9B) disable thinking by default
let reasoningDefault = loadResponse.supports_reasoning ?? false;
if (reasoningDefault) {
const mid = modelId.toLowerCase();
if (mid.includes("qwen3.5")) {
if (mid.includes("qwen3.5") || mid.includes("qwen3.6")) {
const sizeMatch = mid.match(/(\d+\.?\d*)\s*b/);
if (sizeMatch && parseFloat(sizeMatch[1]) < 9) {
reasoningDefault = false;
@ -509,12 +509,14 @@ export function useChatModelRuntime() {
defaultChatTemplate: loadResponse.chat_template ?? null,
chatTemplateOverride: null,
});
// Qwen3/3.5: apply thinking-mode-specific params after load
// Qwen3/3.5/3.6: apply thinking-mode-specific params after load
if (modelId.toLowerCase().includes("qwen3") && (loadResponse.supports_reasoning ?? false)) {
const store = useChatRuntimeStore.getState();
const mid = modelId.toLowerCase();
const needsPresencePenalty = mid.includes("qwen3.5") || mid.includes("qwen3.6");
const p = reasoningDefault
? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0 }
: { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0 };
? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0, ...(needsPresencePenalty ? { presencePenalty: 1.5 } : {}) }
: { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0, ...(needsPresencePenalty ? { presencePenalty: 1.5 } : {}) };
store.setParams({ ...store.params, ...p });
}
await refresh();

View file

@ -557,13 +557,14 @@ export function SharedComposer({
if (reasoningAlwaysOn) return;
const next = !reasoningEnabled;
setReasoningEnabled(next);
// Qwen3/3.5: adjust params for thinking on/off
// Qwen3/3.5/3.6: adjust params for thinking on/off
const store = useChatRuntimeStore.getState();
const cp = store.params.checkpoint?.toLowerCase() ?? "";
if (cp.includes("qwen3")) {
const needsPresencePenalty = cp.includes("qwen3.5") || cp.includes("qwen3.6");
const p = next
? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0 }
: { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0 };
? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0, ...(needsPresencePenalty ? { presencePenalty: 1.5 } : {}) }
: { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0, ...(needsPresencePenalty ? { presencePenalty: 1.5 } : {}) };
store.setParams({ ...store.params, ...p });
}
}}