Merge branch 'main' into studio/api-keys

This commit is contained in:
Roland Tannous 2026-04-13 20:43:05 +04:00 committed by GitHub
commit 69d80ca926
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 972 additions and 193 deletions

View file

@ -94,6 +94,10 @@ class ValidateModelResponse(BaseModel):
is_gguf: bool = Field(False, description = "Whether this is a GGUF model (llama.cpp)")
is_lora: bool = Field(False, description = "Whether this is a LoRA adapter")
is_vision: bool = Field(False, description = "Whether this is a vision-capable model")
requires_trust_remote_code: bool = Field(
False,
description = "Whether the model defaults require trust_remote_code to be enabled for loading.",
)
class GenerateRequest(BaseModel):
@ -137,6 +141,10 @@ class LoadResponse(BaseModel):
inference: dict = Field(
..., description = "Inference parameters (temperature, top_p, top_k, min_p)"
)
requires_trust_remote_code: bool = Field(
False,
description = "Whether the model defaults require trust_remote_code to be enabled for loading.",
)
context_length: Optional[int] = Field(
None, description = "Model's native context length (from GGUF metadata)"
)
@ -213,6 +221,10 @@ class InferenceStatusResponse(BaseModel):
inference: Optional[Dict[str, Any]] = Field(
None, description = "Recommended inference parameters for the active model"
)
requires_trust_remote_code: bool = Field(
False,
description = "Whether the active model requires trust_remote_code to be enabled for loading.",
)
supports_reasoning: bool = Field(
False, description = "Whether the active model supports reasoning/thinking mode"
)

View file

@ -13,4 +13,4 @@ torch-c-dlpack-ext
sentence_transformers==5.2.0
transformers==4.57.6
pytorch_tokenizers
kernels
kernels==0.12.1

View file

@ -198,6 +198,9 @@ async def load_model(
if _gguf_audio
else False,
inference = inference_config,
requires_trust_remote_code = bool(
inference_config.get("trust_remote_code", False)
),
context_length = llama_backend.context_length,
max_context_length = llama_backend.max_context_length,
native_context_length = llama_backend.native_context_length,
@ -235,6 +238,9 @@ async def load_model(
audio_type = _model_info.get("audio_type"),
has_audio_input = _model_info.get("has_audio_input", False),
inference = inference_config,
requires_trust_remote_code = bool(
inference_config.get("trust_remote_code", False)
),
chat_template = _chat_template,
)
@ -341,6 +347,9 @@ async def load_model(
audio_type = _gguf_audio,
has_audio_input = is_audio_input_type(_gguf_audio),
inference = inference_config,
requires_trust_remote_code = bool(
inference_config.get("trust_remote_code", False)
),
context_length = llama_backend.context_length,
max_context_length = llama_backend.max_context_length,
native_context_length = llama_backend.native_context_length,
@ -479,6 +488,9 @@ async def load_model(
audio_type = config.audio_type,
has_audio_input = config.has_audio_input,
inference = inference_config,
requires_trust_remote_code = bool(
inference_config.get("trust_remote_code", False)
),
chat_template = _chat_template,
)
@ -534,6 +546,9 @@ async def validate_model(
is_gguf = getattr(config, "is_gguf", False),
is_lora = getattr(config, "is_lora", False),
is_vision = getattr(config, "is_vision", False),
requires_trust_remote_code = bool(
load_inference_config(config.identifier).get("trust_remote_code", False)
),
)
except HTTPException:
@ -679,6 +694,9 @@ async def get_status(
loading = [],
loaded = [_model_id],
inference = _inference_cfg,
requires_trust_remote_code = bool(
(_inference_cfg or {}).get("trust_remote_code", False)
),
supports_reasoning = llama_backend.supports_reasoning,
reasoning_always_on = llama_backend.reasoning_always_on,
supports_tools = llama_backend.supports_tools,
@ -706,6 +724,11 @@ async def get_status(
supports_reasoning = False
if backend.active_model_name and hasattr(backend, "_is_gpt_oss_model"):
supports_reasoning = backend._is_gpt_oss_model()
inference_config = (
load_inference_config(backend.active_model_name)
if backend.active_model_name
else None
)
return InferenceStatusResponse(
active_model = backend.active_model_name,
@ -716,6 +739,10 @@ async def get_status(
has_audio_input = has_audio_input,
loading = list(getattr(backend, "loading_models", set())),
loaded = list(backend.models.keys()),
inference = inference_config,
requires_trust_remote_code = bool(
(inference_config or {}).get("trust_remote_code", False)
),
supports_reasoning = supports_reasoning,
)

View file

@ -41,7 +41,7 @@ const COPY_RESET_MS = 2000;
const MERMAID_SOURCE_RE = /```mermaid\s*([\s\S]*?)```/i;
const CODE_FENCE_RE = /^```([^\r\n`]*)\r?\n([\s\S]*?)\r?\n?```$/;
const ACTION_PANEL_CLASS =
"pointer-events-auto flex shrink-0 items-center gap-2 rounded-md border border-sidebar bg-sidebar/80 px-1.5 py-1 supports-[backdrop-filter]:bg-sidebar/70 supports-[backdrop-filter]:backdrop-blur";
"pointer-events-auto flex shrink-0 items-center gap-2 rounded-md border border-sidebar bg-sidebar/80 px-1.5 py-1 supports-[backdrop-filter]:bg-sidebar/70 supports-[backdrop-filter]:backdrop-blur dark:border-white/10 dark:bg-code-block dark:supports-[backdrop-filter]:bg-code-block";
const ACTION_BUTTON_CLASS =
"cursor-pointer p-1 text-muted-foreground transition-all hover:text-foreground disabled:cursor-not-allowed disabled:opacity-50";

View file

@ -841,7 +841,7 @@ export function HubModelPicker({
(cachedGguf.length > 0 ||
(!chatOnly && cachedModels.length > 0)) ? (
<>
<ListLabel>{"\uD83E\uDDA5"} Downloaded</ListLabel>
<ListLabel>Downloaded</ListLabel>
{cachedGguf.map((c) => (
<div key={c.repo_id}>
<ModelRow
@ -1076,7 +1076,7 @@ export function HubModelPicker({
{!showHfSection && cachedReady ? (
<>
<ListLabel>{"\uD83E\uDDA5"} Recommended</ListLabel>
<ListLabel>Recommended</ListLabel>
{visibleRecommendedIds.length === 0 ? (
<div className="px-2.5 py-2 text-xs text-muted-foreground">
No default models.
@ -1128,7 +1128,7 @@ export function HubModelPicker({
{showHfSection && filteredRecommendedIds.length > 0 ? (
<>
<ListLabel>{"\uD83E\uDDA5"} Recommended</ListLabel>
<ListLabel>Recommended</ListLabel>
{filteredRecommendedIds.map((id) => {
const vram = recommendedVramMap.get(id);
return (

View file

@ -56,9 +56,12 @@ import {
RefreshCwIcon,
SquareIcon,
TerminalIcon,
Trash2Icon,
XIcon,
} from "lucide-react";
import { type FC, useCallback, useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { deleteThreadMessage } from "@/features/chat/utils/delete-thread-message";
import { useChatRuntimeStore } from "@/features/chat/stores/chat-runtime-store";
export const Thread: FC<{ hideComposer?: boolean; hideWelcome?: boolean }> = ({
@ -635,6 +638,41 @@ const AssistantMessage: FC = () => {
const COPY_RESET_MS = 2000;
const DeleteMessageButton: FC = () => {
const aui = useAui();
const messageId = useAuiState(({ message }) => message.id);
const isRunning = useAuiState(({ thread }) => thread.isRunning);
const handleDelete = async () => {
const remoteId = aui.threadListItem().getState().remoteId;
const thread = aui.thread();
try {
await deleteThreadMessage({
thread: {
export: () => thread.export(),
import: (data) => thread.import(data),
},
messageId,
remoteId,
});
} catch (error) {
console.error("Failed to delete message", error);
toast.error("Failed to delete message");
}
};
return (
<TooltipIconButton
tooltip="Delete message"
disabled={isRunning}
onClick={handleDelete}
className="text-muted-foreground hover:text-destructive"
>
<Trash2Icon className="size-4" />
</TooltipIconButton>
);
};
const CopyButton: FC = () => {
const aui = useAui();
const [copied, setCopied] = useState(false);
@ -673,6 +711,7 @@ const AssistantActionBar: FC = () => {
<RefreshCwIcon />
</TooltipIconButton>
</ActionBarPrimitive.Reload>
<DeleteMessageButton />
<MessageTiming side="top" />
<ActionBarMorePrimitive.Root>
<ActionBarMorePrimitive.Trigger asChild={true}>
@ -748,6 +787,7 @@ const UserActionBar: FC = () => {
<PencilIcon />
</TooltipIconButton>
</ActionBarPrimitive.Edit>
<DeleteMessageButton />
</ActionBarPrimitive.Root>
);
};

View file

@ -11,6 +11,7 @@ import {
listGgufVariants,
loadModel,
streamChatCompletions,
validateModel,
} from "./chat-api";
import { db } from "../db";
import { useChatRuntimeStore } from "../stores/chat-runtime-store";
@ -252,13 +253,39 @@ function waitForModelReady(abortSignal?: AbortSignal): Promise<void> {
* without selecting one. Prefers GGUF (picks smallest cached variant),
* falls back to smallest cached safetensors model.
*/
async function autoLoadSmallestModel(): Promise<boolean> {
const hfToken = useChatRuntimeStore.getState().hfToken || null;
async function autoLoadSmallestModel(): Promise<{
loaded: boolean;
blockedByTrustRemoteCode: boolean;
}> {
const store = useChatRuntimeStore.getState();
const hfToken = store.hfToken || null;
const trustRemoteCode = store.params.trustRemoteCode ?? false;
const toastId = toast("Loading a model…", {
description: "Auto-selecting the smallest downloaded model.",
duration: 5000,
closeButton: true,
});
let blockedByTrustRemoteCode = false;
let hadNonTrustFailure = false;
async function canAutoLoad(payload: {
model_path: string;
max_seq_length: number;
is_lora: boolean;
gguf_variant?: string | null;
}): Promise<boolean> {
const validation = await validateModel({
...payload,
hf_token: hfToken,
load_in_4bit: true,
trust_remote_code: trustRemoteCode,
});
if (validation.requires_trust_remote_code && !trustRemoteCode) {
blockedByTrustRemoteCode = true;
return false;
}
return true;
}
try {
const [ggufRepos, modelRepos] = await Promise.all([
listCachedGguf().catch(() => []),
@ -277,6 +304,16 @@ async function autoLoadSmallestModel(): Promise<boolean> {
.sort((a, b) => a.size_bytes - b.size_bytes);
if (downloaded.length > 0) {
const variant = downloaded[0];
if (
!(await canAutoLoad({
model_path: repo.repo_id,
max_seq_length: 0,
is_lora: false,
gguf_variant: variant.quant,
}))
) {
continue;
}
const loadResp = await loadModel({
model_path: repo.repo_id,
hf_token: hfToken,
@ -284,10 +321,13 @@ async function autoLoadSmallestModel(): Promise<boolean> {
load_in_4bit: true,
is_lora: false,
gguf_variant: variant.quant,
trust_remote_code: false,
trust_remote_code: trustRemoteCode,
});
useChatRuntimeStore.getState().setCheckpoint(repo.repo_id, variant.quant);
const store = useChatRuntimeStore.getState();
store.setModelRequiresTrustRemoteCode(
loadResp.requires_trust_remote_code ?? false,
);
store.setParams({ ...store.params, maxTokens: loadResp.context_length ?? 131072 });
// Add model to store so the selector shows the name
const autoModel: ChatModelSummary = {
@ -319,9 +359,10 @@ async function autoLoadSmallestModel(): Promise<boolean> {
chatTemplateOverride: null,
});
toast.success(`Loaded ${repo.repo_id} (${variant.quant})`, { id: toastId });
return true;
return { loaded: true, blockedByTrustRemoteCode: false };
}
} catch {
hadNonTrustFailure = true;
continue;
}
}
@ -332,6 +373,16 @@ async function autoLoadSmallestModel(): Promise<boolean> {
const sorted = [...modelRepos].sort((a, b) => a.size_bytes - b.size_bytes);
for (const repo of sorted) {
try {
if (
!(await canAutoLoad({
model_path: repo.repo_id,
max_seq_length: 4096,
is_lora: false,
gguf_variant: null,
}))
) {
continue;
}
const sfLoadResp = await loadModel({
model_path: repo.repo_id,
hf_token: hfToken,
@ -339,10 +390,13 @@ async function autoLoadSmallestModel(): Promise<boolean> {
load_in_4bit: true,
is_lora: false,
gguf_variant: null,
trust_remote_code: false,
trust_remote_code: trustRemoteCode,
});
useChatRuntimeStore.getState().setCheckpoint(repo.repo_id);
const store = useChatRuntimeStore.getState();
store.setModelRequiresTrustRemoteCode(
sfLoadResp.requires_trust_remote_code ?? false,
);
store.setParams({ ...store.params, maxTokens: 4096 });
const sfModel: ChatModelSummary = {
id: repo.repo_id,
@ -355,8 +409,9 @@ async function autoLoadSmallestModel(): Promise<boolean> {
store.setModels([...store.models, sfModel]);
}
toast.success(`Loaded ${repo.repo_id}`, { id: toastId });
return true;
return { loaded: true, blockedByTrustRemoteCode: false };
} catch {
hadNonTrustFailure = true;
continue;
}
}
@ -369,6 +424,17 @@ async function autoLoadSmallestModel(): Promise<boolean> {
duration: 30000,
});
try {
if (
!(await canAutoLoad({
model_path: "unsloth/Qwen3.5-4B-GGUF",
max_seq_length: 0,
is_lora: false,
gguf_variant: "UD-Q4_K_XL",
}))
) {
toast.dismiss(toastId);
return { loaded: false, blockedByTrustRemoteCode };
}
const loadResp = await loadModel({
model_path: "unsloth/Qwen3.5-4B-GGUF",
hf_token: hfToken,
@ -376,10 +442,13 @@ async function autoLoadSmallestModel(): Promise<boolean> {
load_in_4bit: true,
is_lora: false,
gguf_variant: "UD-Q4_K_XL",
trust_remote_code: false,
trust_remote_code: trustRemoteCode,
});
useChatRuntimeStore.getState().setCheckpoint("unsloth/Qwen3.5-4B-GGUF", "UD-Q4_K_XL");
const store = useChatRuntimeStore.getState();
store.setModelRequiresTrustRemoteCode(
loadResp.requires_trust_remote_code ?? false,
);
store.setParams({ ...store.params, maxTokens: loadResp.context_length ?? 131072 });
const defaultModel: ChatModelSummary = {
id: "unsloth/Qwen3.5-4B-GGUF",
@ -406,14 +475,24 @@ async function autoLoadSmallestModel(): Promise<boolean> {
chatTemplateOverride: null,
});
toast.success("Loaded Qwen3.5-4B (UD-Q4_K_XL)", { id: toastId });
return true;
return { loaded: true, blockedByTrustRemoteCode: false };
} catch {
toast.dismiss(toastId);
return false;
hadNonTrustFailure = true;
return {
loaded: false,
blockedByTrustRemoteCode:
blockedByTrustRemoteCode && !hadNonTrustFailure,
};
}
} catch {
toast.dismiss(toastId);
return false;
hadNonTrustFailure = true;
return {
loaded: false,
blockedByTrustRemoteCode:
blockedByTrustRemoteCode && !hadNonTrustFailure,
};
}
}
@ -434,11 +513,19 @@ export function createOpenAIStreamAdapter(): ChatModelAdapter {
if (!useChatRuntimeStore.getState().params.checkpoint) {
// Auto-load the smallest downloaded model
const loaded = await autoLoadSmallestModel();
const { loaded, blockedByTrustRemoteCode } =
await autoLoadSmallestModel();
if (!loaded) {
toast.error("No model loaded", {
description: "Pick a model in the top bar, then retry.",
});
toast.error(
blockedByTrustRemoteCode
? "Enable custom code to auto-load this model"
: "No model loaded",
{
description: blockedByTrustRemoteCode
? 'Turn on "Enable custom code" in Chat Settings, or pick another model in the top bar.'
: "Pick a model in the top bar, then retry.",
},
);
throw new Error("Load a model first.");
}
}

View file

@ -1,6 +1,11 @@
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
import {
Alert,
AlertDescription,
AlertTitle,
} from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import {
Dialog,
@ -10,7 +15,19 @@ import {
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import { Input } from "@/components/ui/input";
import {
InputGroup,
InputGroupAddon,
InputGroupButton,
InputGroupInput,
} from "@/components/ui/input-group";
import {
Select,
SelectContent,
@ -29,6 +46,7 @@ import { Slider } from "@/components/ui/slider";
import { Switch } from "@/components/ui/switch";
import { Textarea } from "@/components/ui/textarea";
import { useIsMobile } from "@/hooks/use-mobile";
import { cn } from "@/lib/utils";
import {
ArrowDown01Icon,
CodeIcon,
@ -43,7 +61,8 @@ import {
import { HugeiconsIcon } from "@hugeicons/react";
import { AnimatePresence, motion } from "motion/react";
import type { ReactNode } from "react";
import { useEffect, useMemo, useState } from "react";
import { useEffect, useLayoutEffect, useMemo, useRef, useState } from "react";
import { toast } from "sonner";
import { useChatRuntimeStore } from "./stores/chat-runtime-store";
import {
DEFAULT_INFERENCE_PARAMS,
@ -58,6 +77,11 @@ export interface Preset {
params: InferenceParams;
}
interface LegacySystemPromptTemplate {
name: string;
content: string;
}
const BUILTIN_PRESETS: Preset[] = [
{ name: "Default", params: { ...defaultInferenceParams } },
{
@ -86,19 +110,134 @@ const BUILTIN_PRESETS: Preset[] = [
const CHAT_PRESETS_KEY = "unsloth_chat_custom_presets";
const CHAT_ACTIVE_PRESET_KEY = "unsloth_chat_active_preset";
const LEGACY_CHAT_SYSTEM_PROMPTS_KEY = "unsloth_chat_system_prompts";
const LEGACY_CHAT_SYSTEM_PROMPTS_MIGRATED_KEY =
"unsloth_chat_system_prompts_migrated";
function canUseStorage(): boolean {
return typeof window !== "undefined";
}
function getUniquePresetName(baseName: string, usedNames: Set<string>): string {
const normalizedBase = baseName.trim() || "Imported Prompt";
let nextName = normalizedBase;
let suffix = 2;
while (usedNames.has(nextName)) {
nextName = `${normalizedBase} ${suffix}`;
suffix += 1;
}
usedNames.add(nextName);
return nextName;
}
function migrateLegacySystemPromptTemplates(presets: Preset[]): Preset[] {
if (!canUseStorage()) return presets;
try {
const raw = localStorage.getItem(LEGACY_CHAT_SYSTEM_PROMPTS_KEY);
if (!raw) return presets;
if (localStorage.getItem(LEGACY_CHAT_SYSTEM_PROMPTS_MIGRATED_KEY) === raw) {
return presets;
}
let parsed: unknown;
try {
parsed = JSON.parse(raw) as unknown;
} catch {
localStorage.removeItem(LEGACY_CHAT_SYSTEM_PROMPTS_KEY);
localStorage.setItem(LEGACY_CHAT_SYSTEM_PROMPTS_MIGRATED_KEY, raw);
return presets;
}
if (!Array.isArray(parsed)) {
localStorage.removeItem(LEGACY_CHAT_SYSTEM_PROMPTS_KEY);
localStorage.setItem(LEGACY_CHAT_SYSTEM_PROMPTS_MIGRATED_KEY, raw);
return presets;
}
const usedNames = new Set([
...BUILTIN_PRESETS.map((preset) => preset.name),
...presets.map((preset) => preset.name),
]);
const seenImportedConfigKeys = new Set(
[...BUILTIN_PRESETS, ...presets].map((preset) =>
JSON.stringify({
temperature: preset.params.temperature,
topP: preset.params.topP,
topK: preset.params.topK,
minP: preset.params.minP,
repetitionPenalty: preset.params.repetitionPenalty,
presencePenalty: preset.params.presencePenalty,
maxSeqLength: preset.params.maxSeqLength,
maxTokens: preset.params.maxTokens,
systemPrompt: preset.params.systemPrompt,
trustRemoteCode: preset.params.trustRemoteCode ?? false,
}),
),
);
const importedPresets = parsed
.filter((item): item is LegacySystemPromptTemplate => {
if (!item || typeof item !== "object") return false;
const maybe = item as Partial<LegacySystemPromptTemplate>;
return (
typeof maybe.name === "string" && typeof maybe.content === "string"
);
})
.map((template) => ({
template,
importedParams: {
...defaultInferenceParams,
systemPrompt: template.content,
},
}))
.filter(({ importedParams }) => {
const configKey = JSON.stringify({
temperature: importedParams.temperature,
topP: importedParams.topP,
topK: importedParams.topK,
minP: importedParams.minP,
repetitionPenalty: importedParams.repetitionPenalty,
presencePenalty: importedParams.presencePenalty,
maxSeqLength: importedParams.maxSeqLength,
maxTokens: importedParams.maxTokens,
systemPrompt: importedParams.systemPrompt,
trustRemoteCode: importedParams.trustRemoteCode ?? false,
});
if (seenImportedConfigKeys.has(configKey)) return false;
seenImportedConfigKeys.add(configKey);
return true;
})
.map(({ template, importedParams }) => ({
name: getUniquePresetName(`${template.name} Prompt`, usedNames),
params: importedParams,
}));
if (importedPresets.length === 0) {
localStorage.removeItem(LEGACY_CHAT_SYSTEM_PROMPTS_KEY);
localStorage.setItem(LEGACY_CHAT_SYSTEM_PROMPTS_MIGRATED_KEY, raw);
return presets;
}
const mergedPresets = [...presets, ...importedPresets];
localStorage.setItem(CHAT_PRESETS_KEY, JSON.stringify(mergedPresets));
try {
localStorage.setItem(LEGACY_CHAT_SYSTEM_PROMPTS_MIGRATED_KEY, raw);
localStorage.removeItem(LEGACY_CHAT_SYSTEM_PROMPTS_KEY);
} catch {
// ignore cleanup failure after successful import write
}
return mergedPresets;
} catch {
return presets;
}
}
function loadSavedCustomPresets(): Preset[] {
if (!canUseStorage()) return [];
try {
const raw = localStorage.getItem(CHAT_PRESETS_KEY);
if (!raw) return [];
if (!raw) {
return migrateLegacySystemPromptTemplates([]);
}
const parsed = JSON.parse(raw) as unknown;
if (!Array.isArray(parsed)) return [];
return parsed
if (!Array.isArray(parsed)) {
return migrateLegacySystemPromptTemplates([]);
}
const presets = parsed
.filter((item): item is Preset => {
if (!item || typeof item !== "object") return false;
const maybe = item as Partial<Preset>;
@ -111,13 +250,10 @@ function loadSavedCustomPresets(): Preset[] {
...preset.params,
},
}))
.filter(
(preset) =>
preset.name.length > 0 &&
!BUILTIN_PRESETS.some((builtin) => builtin.name === preset.name),
);
.filter((preset) => preset.name.length > 0);
return migrateLegacySystemPromptTemplates(presets);
} catch {
return [];
return migrateLegacySystemPromptTemplates([]);
}
}
@ -130,6 +266,82 @@ function loadSavedActivePreset(): string {
}
}
type PresetSaveMode =
| "disabled"
| "overwrite-active"
| "overwrite-other"
| "create";
interface PresetSaveState {
mode: PresetSaveMode;
canSubmit: boolean;
isSaveReady: boolean;
buttonLabel: string;
title: string;
}
function isSamePresetConfig(a: InferenceParams, b: InferenceParams): boolean {
return (
a.temperature === b.temperature &&
a.topP === b.topP &&
a.topK === b.topK &&
a.minP === b.minP &&
a.repetitionPenalty === b.repetitionPenalty &&
a.presencePenalty === b.presencePenalty &&
a.maxSeqLength === b.maxSeqLength &&
a.maxTokens === b.maxTokens &&
a.systemPrompt === b.systemPrompt &&
(a.trustRemoteCode ?? false) === (b.trustRemoteCode ?? false)
);
}
function getPresetSaveState({
rawName,
activePreset,
presets,
activePresetDirty,
}: {
rawName: string;
activePreset: string;
presets: Preset[];
activePresetDirty: boolean;
}): PresetSaveState {
const trimmedName = rawName.trim();
if (!trimmedName) {
return {
mode: "disabled",
canSubmit: false,
isSaveReady: false,
buttonLabel: "Save",
title: "Enter a preset name",
};
}
const matchingPreset = presets.find((preset) => preset.name === trimmedName);
if (matchingPreset) {
const isActiveMatch = matchingPreset.name === activePreset;
return {
mode: isActiveMatch ? "overwrite-active" : "overwrite-other",
canSubmit: !isActiveMatch || activePresetDirty,
isSaveReady: !isActiveMatch || activePresetDirty,
buttonLabel: isActiveMatch && !activePresetDirty ? "Saved" : "Overwrite",
title: isActiveMatch
? activePresetDirty
? "Save current settings to this preset"
: "No unsaved changes"
: `Overwrite preset "${trimmedName}"`,
};
}
return {
mode: "create",
canSubmit: true,
isSaveReady: true,
buttonLabel: "Save as New",
title: `Save current settings as "${trimmedName}"`,
};
}
function ParamSlider({
label,
value,
@ -286,6 +498,9 @@ export function ChatSettingsPanel({
(s) => s.loadedSpeculativeType,
);
const currentModels = useChatRuntimeStore((s) => s.models);
const modelRequiresTrustRemoteCode = useChatRuntimeStore(
(s) => s.modelRequiresTrustRemoteCode,
);
const currentCheckpoint = params.checkpoint;
const currentModelIsVision =
currentModels.find((m) => m.id === currentCheckpoint)?.isVision ?? false;
@ -316,13 +531,57 @@ export function ChatSettingsPanel({
const [activePreset, setActivePreset] = useState(() =>
loadSavedActivePreset(),
);
const [savePresetOpen, setSavePresetOpen] = useState(false);
const [presetNameDraft, setPresetNameDraft] = useState("");
const presets = useMemo(
() => [...BUILTIN_PRESETS, ...customPresets],
[customPresets],
const [presetNameInput, setPresetNameInput] = useState(() =>
loadSavedActivePreset(),
);
const isBuiltinPreset = BUILTIN_PRESETS.some((p) => p.name === activePreset);
const presetControlRowRef = useRef<HTMLDivElement>(null);
const [presetMenuWidthPx, setPresetMenuWidthPx] = useState<
number | undefined
>(undefined);
const [systemPromptEditorOpen, setSystemPromptEditorOpen] = useState(false);
const [systemPromptDraft, setSystemPromptDraft] = useState("");
const presets = useMemo(() => {
const overrides = new Set(customPresets.map((preset) => preset.name));
return [
...BUILTIN_PRESETS.filter((preset) => !overrides.has(preset.name)),
...customPresets,
];
}, [customPresets]);
const activePresetDefinition = useMemo(
() => presets.find((preset) => preset.name === activePreset) ?? null,
[activePreset, presets],
);
const activeCustomPreset = useMemo(
() => customPresets.find((preset) => preset.name === activePreset) ?? null,
[activePreset, customPresets],
);
const activeBuiltinPreset = useMemo(
() =>
BUILTIN_PRESETS.find((preset) => preset.name === activePreset) ?? null,
[activePreset],
);
const activePresetDirty = useMemo(
() =>
activePresetDefinition == null
? false
: !isSamePresetConfig(activePresetDefinition.params, params),
[activePresetDefinition, params],
);
const presetSaveState = useMemo(
() =>
getPresetSaveState({
rawName: presetNameInput,
activePreset,
presets,
activePresetDirty,
}),
[activePreset, activePresetDirty, presetNameInput, presets],
);
const systemPromptEditorDirty = systemPromptDraft !== params.systemPrompt;
const trustRemoteCodeMissing =
Boolean(currentCheckpoint) &&
modelRequiresTrustRemoteCode &&
!(params.trustRemoteCode ?? false);
function set<K extends keyof InferenceParams>(key: K) {
return (v: InferenceParams[K]) => onParamsChange({ ...params, [key]: v });
@ -331,11 +590,19 @@ export function ChatSettingsPanel({
function applyPreset(name: string) {
const p = presets.find((pr) => pr.name === name);
if (p) {
if (
modelRequiresTrustRemoteCode &&
!(p.params.trustRemoteCode ?? false)
) {
toast.warning("This configuration turns custom code off", {
description:
"The current model needs custom code enabled to load. Keep it on for this model.",
});
return;
}
onParamsChange({
...p.params,
systemPrompt: params.systemPrompt,
checkpoint: params.checkpoint,
trustRemoteCode: params.trustRemoteCode,
});
setActivePreset(name);
if (canUseStorage()) {
@ -348,32 +615,23 @@ export function ChatSettingsPanel({
}
}
function openSavePresetDialog() {
setPresetNameDraft(activePreset === "Default" ? "" : activePreset);
setSavePresetOpen(true);
}
function savePresetWithName(rawName: string) {
const trimmed = rawName.trim();
if (!trimmed) {
return;
}
if (BUILTIN_PRESETS.some((preset) => preset.name === trimmed)) {
toast.error("Enter a preset name");
return;
}
setCustomPresets((prev) => {
const next = [
...prev.filter((preset) => preset.name !== trimmed),
{ name: trimmed, params: { ...params } },
];
const next = prev.filter((p) => p.name !== trimmed);
const merged = [...next, { name: trimmed, params: { ...params } }];
if (canUseStorage()) {
try {
localStorage.setItem(CHAT_PRESETS_KEY, JSON.stringify(next));
localStorage.setItem(CHAT_PRESETS_KEY, JSON.stringify(merged));
} catch {
// ignore
}
}
return next;
return merged;
});
if (canUseStorage()) {
try {
@ -383,11 +641,31 @@ export function ChatSettingsPanel({
}
}
setActivePreset(trimmed);
setSavePresetOpen(false);
setPresetNameInput(trimmed);
}
function deletePreset(name: string) {
if (BUILTIN_PRESETS.some((p) => p.name === name)) {
const hasCustomPreset = customPresets.some(
(preset) => preset.name === name,
);
if (!hasCustomPreset) {
return;
}
const builtinPreset = BUILTIN_PRESETS.find((preset) => preset.name === name);
const fallbackPreset =
builtinPreset ??
BUILTIN_PRESETS.find((preset) => preset.name === "Default") ??
null;
if (
activePreset === name &&
fallbackPreset &&
modelRequiresTrustRemoteCode &&
!(fallbackPreset.params.trustRemoteCode ?? false)
) {
toast.warning("Reset would turn custom code off", {
description:
"The current model needs custom code enabled to load. Keep it on for this model.",
});
return;
}
setCustomPresets((prev) => {
@ -402,17 +680,33 @@ export function ChatSettingsPanel({
return next;
});
if (activePreset === name) {
setActivePreset("Default");
if (canUseStorage()) {
try {
localStorage.setItem(CHAT_ACTIVE_PRESET_KEY, "Default");
} catch {
// ignore
if (fallbackPreset) {
onParamsChange({
...fallbackPreset.params,
checkpoint: params.checkpoint,
});
setActivePreset(fallbackPreset.name);
if (canUseStorage()) {
try {
localStorage.setItem(CHAT_ACTIVE_PRESET_KEY, fallbackPreset.name);
} catch {
// ignore
}
}
}
}
}
function openSystemPromptEditor() {
setSystemPromptDraft(params.systemPrompt);
setSystemPromptEditorOpen(true);
}
function saveSystemPromptEditor() {
set("systemPrompt")(systemPromptDraft);
setSystemPromptEditorOpen(false);
}
useEffect(() => {
if (presets.some((preset) => preset.name === activePreset)) return;
setActivePreset("Default");
@ -425,6 +719,28 @@ export function ChatSettingsPanel({
}
}, [activePreset, presets]);
useEffect(() => {
setPresetNameInput(activePreset);
}, [activePreset]);
useEffect(() => {
if (!open) {
setSystemPromptEditorOpen(false);
}
}, [open]);
useLayoutEffect(() => {
const el = presetControlRowRef.current;
if (!el || !open) return;
const measure = () => {
setPresetMenuWidthPx(el.getBoundingClientRect().width);
};
measure();
const ro = new ResizeObserver(measure);
ro.observe(el);
return () => ro.disconnect();
}, [open]);
const settingsContent = (
<>
<div className="flex items-center gap-2 px-4 py-3">
@ -440,52 +756,138 @@ export function ChatSettingsPanel({
<div className="flex-1 overflow-y-auto px-1.5">
{/* mt-4 matches the Playground sidebar gap (SidebarHeader py-3 + SidebarGroup pt-1) */}
<div className="mt-4 px-2 pb-3">
<div className="flex items-center gap-2">
<Select value={activePreset} onValueChange={applyPreset}>
<SelectTrigger className="h-8 flex-1 corner-squircle text-xs">
<SelectValue />
</SelectTrigger>
<SelectContent>
{presets.map((p) => (
<SelectItem key={p.name} value={p.name}>
{p.name}
</SelectItem>
))}
</SelectContent>
</Select>
<button
type="button"
onClick={openSavePresetDialog}
className="flex h-8 items-center gap-1.5 rounded-md border px-2.5 text-xs text-muted-foreground transition-colors hover:bg-accent"
title="Save preset"
>
<HugeiconsIcon icon={FloppyDiskIcon} className="size-3.5" />
Save
</button>
<button
type="button"
onClick={() => deletePreset(activePreset)}
disabled={isBuiltinPreset}
className="flex h-8 items-center gap-1.5 rounded-md border px-2.5 text-xs text-muted-foreground transition-colors hover:bg-accent disabled:cursor-not-allowed disabled:opacity-50"
title={
isBuiltinPreset
? "Built-in presets cannot be deleted"
: "Delete selected preset"
}
>
<HugeiconsIcon icon={Delete02Icon} className="size-3.5" />
Delete
</button>
<div className="space-y-1.5">
<div ref={presetControlRowRef} className="w-full min-w-0">
<DropdownMenu>
<InputGroup className="!h-8 min-h-8 min-w-0 items-stretch gap-0 rounded-2xl pr-0 focus-within:border-input focus-within:ring-0 focus-within:shadow-none has-[[data-slot=input-group-control]:focus-visible]:border-input has-[[data-slot=input-group-control]:focus-visible]:ring-0 has-[[data-slot=input-group-control]:focus-visible]:shadow-none">
<InputGroupInput
id="inference-preset-name"
value={presetNameInput}
onChange={(e) => setPresetNameInput(e.target.value)}
onKeyDown={(e) => {
if (e.key === "Enter" && presetSaveState.canSubmit) {
e.preventDefault();
savePresetWithName(presetNameInput);
}
}}
placeholder="Preset name"
maxLength={80}
autoComplete="off"
className={cn(
"!h-8 min-h-0 min-w-0 self-stretch !pl-2.5 !pr-2 pt-1 pb-1 text-sm leading-10 md:text-sm",
presetSaveState.isSaveReady &&
"text-foreground placeholder:text-primary/45",
)}
aria-label="Inference preset name"
/>
<InputGroupAddon
align="inline-end"
className="min-h-0 shrink-0 gap-0 self-stretch border-0 py-0 pl-0 !pr-0 has-[>button]:mr-0"
>
<DropdownMenuTrigger asChild={true}>
<InputGroupButton
type="button"
variant="ghost"
size="icon-sm"
className="!h-8 min-h-8 !w-7 min-w-7 shrink-0 rounded-none rounded-r-2xl border-l border-border px-0 text-muted-foreground transition-colors hover:bg-primary/15 hover:text-primary data-[state=open]:bg-primary/20 data-[state=open]:text-primary"
title="Choose a preset"
aria-label="Open preset list"
>
<HugeiconsIcon
icon={ArrowDown01Icon}
className="size-3.5"
strokeWidth={2}
/>
</InputGroupButton>
</DropdownMenuTrigger>
</InputGroupAddon>
</InputGroup>
<DropdownMenuContent
align="end"
className="min-w-40 max-w-none"
style={
presetMenuWidthPx != null
? {
width: presetMenuWidthPx,
minWidth: presetMenuWidthPx,
}
: undefined
}
>
{presets.map((p) => (
<DropdownMenuItem
key={p.name}
onSelect={() => applyPreset(p.name)}
>
{p.name}
</DropdownMenuItem>
))}
</DropdownMenuContent>
</DropdownMenu>
</div>
<div className="grid grid-cols-2 gap-1.5">
<Button
type="button"
onClick={() => savePresetWithName(presetNameInput)}
disabled={!presetSaveState.canSubmit}
variant={presetSaveState.isSaveReady ? "default" : "outline"}
size="sm"
className={cn(
"h-8 w-full text-xs",
presetSaveState.isSaveReady &&
"bg-primary/92 text-primary-foreground hover:bg-primary",
)}
title={presetSaveState.title}
aria-label={presetSaveState.title}
>
<span className="inline-flex shrink-0 items-center pr-1.5">
<HugeiconsIcon icon={FloppyDiskIcon} className="size-3.5" />
</span>
{presetSaveState.buttonLabel}
</Button>
<Button
type="button"
onClick={() => deletePreset(activePreset)}
disabled={!activeCustomPreset}
variant="outline"
size="sm"
className="h-8 w-full text-xs text-muted-foreground"
title={
activeCustomPreset
? activeBuiltinPreset
? "Reset selected preset to built-in defaults"
: "Delete selected preset"
: "No saved override to delete"
}
>
<span className="inline-flex shrink-0 items-center pr-1.5">
<HugeiconsIcon icon={Delete02Icon} className="size-3.5" />
</span>
Delete
</Button>
</div>
</div>
</div>
<div className="px-2 pb-4">
<label
htmlFor="system-prompt"
className="mb-1.5 block text-xs font-medium"
>
System Prompt
</label>
<div className="mb-1.5 flex items-center justify-between gap-2">
<label
htmlFor="system-prompt"
className="block text-xs font-medium"
>
System Prompt
</label>
<Button
type="button"
variant="outline"
size="sm"
className="h-6 px-2 text-[11px]"
onClick={openSystemPromptEditor}
title="Open the full system prompt editor"
>
Edit
</Button>
</div>
<Textarea
id="system-prompt"
value={params.systemPrompt}
@ -561,37 +963,41 @@ export function ChatSettingsPanel({
typeof ctxDisplayValue === "number" &&
ctxDisplayValue > ggufMaxContextLength && (
<p className="text-[11px] text-amber-500">
Exceeds estimated VRAM capacity ({ggufMaxContextLength.toLocaleString()} tokens). The model may use system RAM.
Exceeds estimated VRAM capacity (
{ggufMaxContextLength.toLocaleString()} tokens). The
model may use system RAM.
</p>
)}
</div>
<div className="flex items-center justify-between gap-3">
<div className="grid grid-cols-[minmax(0,1fr)_65px] items-center gap-x-3">
<div className="min-w-0">
<div className="text-xs font-medium">KV Cache Dtype</div>
<div className="text-[11px] text-muted-foreground">
Quantize KV cache to reduce VRAM.
</div>
</div>
<Select
value={kvCacheDtype ?? "f16"}
onValueChange={(v) => {
setKvCacheDtype(v === "f16" ? null : v);
}}
>
<SelectTrigger className="h-7 w-[90px] text-xs">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="f16">f16</SelectItem>
<SelectItem value="bf16">bf16</SelectItem>
<SelectItem value="q8_0">q8_0</SelectItem>
<SelectItem value="q5_1">q5_1</SelectItem>
<SelectItem value="q4_1">q4_1</SelectItem>
</SelectContent>
</Select>
<div className="w-full min-w-0">
<Select
value={kvCacheDtype ?? "f16"}
onValueChange={(v) => {
setKvCacheDtype(v === "f16" ? null : v);
}}
>
<SelectTrigger className="grid h-7 w-full min-w-0 grid-cols-[minmax(0,1fr)_auto] items-center gap-1 px-2 py-0 text-xs [&_[data-slot=select-value]]:min-w-0 [&_[data-slot=select-value]]:truncate [&>svg]:shrink-0">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="f16">f16</SelectItem>
<SelectItem value="bf16">bf16</SelectItem>
<SelectItem value="q8_0">q8_0</SelectItem>
<SelectItem value="q5_1">q5_1</SelectItem>
<SelectItem value="q4_1">q4_1</SelectItem>
</SelectContent>
</Select>
</div>
</div>
{!currentModelIsVision && (
<div className="flex items-center justify-between gap-3">
<div className="grid grid-cols-[minmax(0,1fr)_65px] items-center gap-x-3">
<div className="min-w-0">
<div className="text-xs font-medium">
Speculative Decoding
@ -600,20 +1006,22 @@ export function ChatSettingsPanel({
Speed up generation with no VRAM cost.
</div>
</div>
<Select
value={speculativeType ?? "off"}
onValueChange={(v) => {
setSpeculativeType(v === "off" ? null : v);
}}
>
<SelectTrigger className="h-7 w-[120px] text-xs">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="ngram-mod">On</SelectItem>
<SelectItem value="off">Off</SelectItem>
</SelectContent>
</Select>
<div className="w-full min-w-0">
<Select
value={speculativeType ?? "off"}
onValueChange={(v) => {
setSpeculativeType(v === "off" ? null : v);
}}
>
<SelectTrigger className="grid h-7 w-full min-w-0 grid-cols-[minmax(0,1fr)_auto] items-center gap-1 px-2 py-0 text-xs [&_[data-slot=select-value]]:min-w-0 [&_[data-slot=select-value]]:truncate [&>svg]:shrink-0">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="ngram-mod">On</SelectItem>
<SelectItem value="off">Off</SelectItem>
</SelectContent>
</Select>
</div>
</div>
)}
{modelSettingsDirty && (
@ -641,19 +1049,33 @@ export function ChatSettingsPanel({
</>
)}
{!isGguf && params.checkpoint && (
<div className="flex items-center justify-between gap-3">
<div className="min-w-0">
<div className="text-xs font-medium">Enable custom code</div>
<div className="text-[11px] text-muted-foreground">
Allow models with custom code (e.g. Nemotron). Only enable
if sure.
<>
<div className="flex items-center justify-between gap-3">
<div className="min-w-0">
<div className="text-xs font-medium">Enable custom code</div>
<div className="text-[11px] text-muted-foreground">
Allow models with custom code (e.g. Nemotron). Only
enable if sure.
</div>
</div>
<Switch
checked={params.trustRemoteCode ?? false}
onCheckedChange={set("trustRemoteCode")}
/>
</div>
<Switch
checked={params.trustRemoteCode ?? false}
onCheckedChange={set("trustRemoteCode")}
/>
</div>
{trustRemoteCodeMissing && (
<Alert className="border-amber-200/70 bg-amber-50/70 px-3 py-2 text-amber-950 dark:border-amber-900/70 dark:bg-amber-950/35 dark:text-amber-100">
<AlertTitle className="text-[11px] font-medium">
Keep custom code enabled for this model
</AlertTitle>
<AlertDescription className="text-[11px] text-amber-800 dark:text-amber-200">
This model requires custom code to load. You can edit the
toggle, but loading will stay blocked until it is turned
back on.
</AlertDescription>
</Alert>
)}
</>
)}
</div>
</CollapsibleSection>
@ -774,51 +1196,57 @@ export function ChatSettingsPanel({
<ChatTemplateSection onReloadModel={onReloadModel} />
</div>
<Dialog
open={savePresetOpen}
open={systemPromptEditorOpen}
onOpenChange={(nextOpen) => {
setSavePresetOpen(nextOpen);
if (!nextOpen) {
setPresetNameDraft("");
}
setSystemPromptEditorOpen(nextOpen);
}}
>
<DialogContent className="corner-squircle sm:max-w-sm">
<DialogContent
className="corner-squircle border border-border/60 bg-background/98 shadow-none sm:max-w-3xl"
overlayClassName="bg-background/35 supports-backdrop-filter:backdrop-blur-[1px]"
>
<DialogHeader>
<DialogTitle>Save Preset</DialogTitle>
<DialogTitle>Edit System Prompt</DialogTitle>
<DialogDescription>
Enter a name for this inference preset.
This prompt is part of the current configuration and saves with
the preset.
</DialogDescription>
</DialogHeader>
<form
onSubmit={(event) => {
event.preventDefault();
savePresetWithName(presetNameDraft);
}}
className="space-y-4"
>
<Input
autoFocus={true}
value={presetNameDraft}
onChange={(event) => setPresetNameDraft(event.target.value)}
placeholder="Preset name"
maxLength={80}
<div className="space-y-2">
<div className="space-y-0.5 px-0.5">
<div className="text-[11px] font-medium">Prompt editor</div>
<p className="text-[11px] text-muted-foreground">
Use this for longer edits. Save writes back to the active
configuration only.
</p>
</div>
<Textarea
value={systemPromptDraft}
onChange={(event) => setSystemPromptDraft(event.target.value)}
placeholder="You are a helpful assistant..."
className="min-h-[24rem] text-sm leading-6 corner-squircle"
rows={14}
/>
<DialogFooter>
<Button
type="button"
variant="outline"
onClick={() => setSavePresetOpen(false)}
>
Cancel
</Button>
<Button
type="submit"
disabled={presetNameDraft.trim().length === 0}
>
Save
</Button>
</DialogFooter>
</form>
</div>
<DialogFooter className="flex-wrap gap-2 sm:justify-between">
<Button
type="button"
variant="ghost"
onClick={() => {
setSystemPromptDraft(params.systemPrompt);
setSystemPromptEditorOpen(false);
}}
>
Cancel
</Button>
<Button
type="button"
onClick={saveSystemPromptEditor}
disabled={!systemPromptEditorDirty}
>
Save
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
</>
@ -963,7 +1391,7 @@ function ChatTemplateSection({
<Textarea
value={displayValue}
onChange={(e) => setOverride(e.target.value)}
className="min-h-32 font-mono text-[10px] leading-relaxed corner-squircle"
className="min-h-32 font-mono text-[10px] leading-relaxed md:text-[10px] corner-squircle"
rows={6}
spellCheck={false}
/>

View file

@ -15,7 +15,7 @@ import {
validateModel,
} from "../api/chat-api";
import { useChatRuntimeStore } from "../stores/chat-runtime-store";
import type { LoadModelResponse } from "../types/api";
import type { InferenceStatusResponse, LoadModelResponse } from "../types/api";
import type {
ChatLoraSummary,
ChatModelSummary,
@ -124,9 +124,13 @@ function toFiniteNumber(value: unknown): number | undefined {
return value;
}
function getTrustRemoteCodeRequiredMessage(modelName: string): string {
return `${modelName} needs custom code enabled to load. Turn on "Enable custom code" in Chat Settings, then try again.`;
}
function mergeRecommendedInference(
current: InferenceParams,
response: LoadModelResponse,
response: LoadModelResponse | InferenceStatusResponse,
modelId: string,
): InferenceParams {
const inference = response.inference;
@ -233,7 +237,7 @@ export function useChatModelRuntime() {
if (statusRes.inference) {
const currentParams = useChatRuntimeStore.getState().params;
setParams(
mergeRecommendedInference(currentParams, statusRes as any, statusRes.active_model),
mergeRecommendedInference(currentParams, statusRes, statusRes.active_model),
);
}
@ -258,6 +262,8 @@ export function useChatModelRuntime() {
ggufContextLength: currentGgufContextLength,
ggufMaxContextLength,
ggufNativeContextLength,
modelRequiresTrustRemoteCode:
statusRes.requires_trust_remote_code ?? false,
speculativeType: currentSpecType,
loadedSpeculativeType: currentSpecType,
});
@ -274,6 +280,10 @@ export function useChatModelRuntime() {
}
useChatRuntimeStore.getState().setReasoningEnabled(reasoningDefault);
}
} else {
useChatRuntimeStore.setState({
modelRequiresTrustRemoteCode: false,
});
}
} catch (error) {
const message =
@ -347,7 +357,7 @@ export function useChatModelRuntime() {
const previousIsLora =
previousModel?.isLora ?? (previousLora ? true : false);
// Covers Unix absolute (/), relative (./ ../), tilde (~/), Windows drive (C:\), UNC (\\server)
const isLocal = /^(\/|\.{1,2}[\\\/]|~[\\\/]|[A-Za-z]:[\\\/]|\\\\)/.test(modelId);
const isLocal = /^(\/|\.{1,2}[\\/]|~[\\/]|[A-Za-z]:[\\/]|\\\\)/.test(modelId);
const isCachedLora = isLora && isLocal;
const loadingDescription = [
currentCheckpoint ? "Switching models." : null,
@ -377,12 +387,15 @@ export function useChatModelRuntime() {
const currentCheckpoint =
useChatRuntimeStore.getState().params.checkpoint;
const paramsBeforeLoad = useChatRuntimeStore.getState().params;
const trustRemoteCode = paramsBeforeLoad.trustRemoteCode ?? false;
const maxSeqLength = paramsBeforeLoad.maxSeqLength;
const hfToken = useChatRuntimeStore.getState().hfToken || null;
const previousModelRequiresTrustRemoteCode =
useChatRuntimeStore.getState().modelRequiresTrustRemoteCode;
try {
// Lightweight pre-flight validation: avoid unloading a working model
// if the new identifier is clearly invalid (e.g. bad HF id / path).
await validateModel({
const validation = await validateModel({
model_path: modelId,
hf_token: hfToken,
max_seq_length: maxSeqLength,
@ -390,6 +403,9 @@ export function useChatModelRuntime() {
is_lora: isLora,
gguf_variant: ggufVariant ?? null,
});
if (validation.requires_trust_remote_code && !trustRemoteCode) {
throw new Error(getTrustRemoteCodeRequiredMessage(displayName));
}
if (currentCheckpoint) {
await unloadModel({ model_path: currentCheckpoint });
@ -409,7 +425,7 @@ export function useChatModelRuntime() {
load_in_4bit: true,
is_lora: isLora,
gguf_variant: ggufVariant ?? null,
trust_remote_code: paramsBeforeLoad.trustRemoteCode ?? false,
trust_remote_code: trustRemoteCode,
chat_template_override: chatTemplateOverride,
cache_type_kv: kvCacheDtype,
speculative_type: speculativeType,
@ -454,6 +470,8 @@ export function useChatModelRuntime() {
ggufContextLength: nativeCtx,
ggufMaxContextLength,
ggufNativeContextLength: reportedNativeCtx,
modelRequiresTrustRemoteCode:
loadResponse.requires_trust_remote_code ?? false,
supportsReasoning: loadResponse.supports_reasoning ?? false,
reasoningAlwaysOn,
reasoningEnabled: reasoningAlwaysOn ? true : reasoningDefault,
@ -490,6 +508,8 @@ export function useChatModelRuntime() {
load_in_4bit: true,
is_lora: previousIsLora,
gguf_variant: previousVariant,
trust_remote_code:
previousModelRequiresTrustRemoteCode || trustRemoteCode,
});
await refresh();
} catch {

View file

@ -331,6 +331,9 @@ function fallbackTitleFromUserText(userText: string): string {
}
function cloneContent(content: ThreadMessage["content"]): ThreadMessage["content"] {
if (typeof content === "string") {
return content;
}
return Array.isArray(content)
? JSON.parse(JSON.stringify(content))
: [];

View file

@ -8,7 +8,7 @@ import { useAui } from "@assistant-ui/react";
import { cn } from "@/lib/utils";
import { ArrowUpIcon, GlobeIcon, HeadphonesIcon, LightbulbIcon, LightbulbOffIcon, MicIcon, PlusIcon, SquareIcon, TerminalIcon, XIcon } from "lucide-react";
import { toast } from "sonner";
import { loadModel } from "./api/chat-api";
import { loadModel, validateModel } from "./api/chat-api";
import { useChatRuntimeStore } from "./stores/chat-runtime-store";
import {
type KeyboardEvent,
@ -336,6 +336,27 @@ export function SharedComposer({
// Helper: load a model and update store checkpoint
async function ensureModelLoaded(sel: CompareModelSelection): Promise<string> {
const currentStore = useChatRuntimeStore.getState();
const isAlreadyActive =
currentStore.params.checkpoint === sel.id &&
(currentStore.activeGgufVariant ?? null) === (sel.ggufVariant ?? null);
if (!isAlreadyActive) {
const validation = await validateModel({
model_path: sel.id,
hf_token: currentStore.hfToken || null,
max_seq_length: maxSeqLength,
load_in_4bit: true,
is_lora: sel.isLora,
gguf_variant: sel.ggufVariant ?? null,
trust_remote_code: trustRemoteCode,
chat_template_override: chatTemplateOverride,
});
if (validation.requires_trust_remote_code && !trustRemoteCode) {
throw new Error(
`${modelDisplayName(sel.id)} needs custom code enabled to load. Turn on "Enable custom code" in Chat Settings, then try again.`,
);
}
}
const resp = await loadModel({
model_path: sel.id,
hf_token: useChatRuntimeStore.getState().hfToken || null,
@ -346,10 +367,14 @@ export function SharedComposer({
trust_remote_code: trustRemoteCode,
chat_template_override: chatTemplateOverride,
});
useChatRuntimeStore.getState().setCheckpoint(
const store = useChatRuntimeStore.getState();
store.setCheckpoint(
resp.model,
resp.is_gguf ? (sel.ggufVariant ?? undefined) : null,
);
store.setModelRequiresTrustRemoteCode(
resp.requires_trust_remote_code ?? false,
);
return resp.status;
}

View file

@ -132,7 +132,8 @@ function loadInferenceParams(): InferenceParams {
function saveInferenceParams(params: InferenceParams): boolean {
if (!canUseStorage()) return false;
try {
const { checkpoint: _, ...rest } = params;
const { checkpoint, ...rest } = params;
void checkpoint;
localStorage.setItem(INFERENCE_PARAMS_KEY, JSON.stringify(rest));
return true;
} catch {
@ -152,6 +153,7 @@ type ChatRuntimeStore = {
ggufContextLength: number | null;
ggufMaxContextLength: number | null;
ggufNativeContextLength: number | null;
modelRequiresTrustRemoteCode: boolean;
supportsReasoning: boolean;
reasoningAlwaysOn: boolean;
reasoningEnabled: boolean;
@ -181,6 +183,7 @@ type ChatRuntimeStore = {
} | null;
modelLoading: boolean;
setModelLoading: (loading: boolean) => void;
setModelRequiresTrustRemoteCode: (required: boolean) => void;
setParams: (params: InferenceParams) => void;
setModels: (models: ChatModelSummary[]) => void;
setLoras: (loras: ChatLoraSummary[]) => void;
@ -220,6 +223,7 @@ export const useChatRuntimeStore = create<ChatRuntimeStore>((set) => ({
ggufContextLength: null,
ggufMaxContextLength: null,
ggufNativeContextLength: null,
modelRequiresTrustRemoteCode: false,
supportsReasoning: false,
reasoningAlwaysOn: false,
reasoningEnabled: true,
@ -244,6 +248,8 @@ export const useChatRuntimeStore = create<ChatRuntimeStore>((set) => ({
contextUsage: null,
modelLoading: false,
setModelLoading: (loading) => set({ modelLoading: loading }),
setModelRequiresTrustRemoteCode: (modelRequiresTrustRemoteCode) =>
set({ modelRequiresTrustRemoteCode }),
setParams: (params) =>
set(() => {
const persisted = saveInferenceParams(params);
@ -298,6 +304,7 @@ export const useChatRuntimeStore = create<ChatRuntimeStore>((set) => ({
ggufContextLength: null,
ggufMaxContextLength: null,
ggufNativeContextLength: null,
modelRequiresTrustRemoteCode: false,
contextUsage: null,
supportsReasoning: false,
reasoningEnabled: true,

View file

@ -52,6 +52,7 @@ export interface ValidateModelResponse {
is_gguf?: boolean;
is_lora?: boolean;
is_vision?: boolean;
requires_trust_remote_code?: boolean;
}
export interface GgufVariantDetail {
@ -86,6 +87,7 @@ export interface LoadModelResponse {
presence_penalty?: number;
trust_remote_code?: boolean;
};
requires_trust_remote_code?: boolean;
context_length?: number | null;
max_context_length?: number | null;
native_context_length?: number | null;
@ -119,6 +121,7 @@ export interface InferenceStatusResponse {
presence_penalty?: number;
trust_remote_code?: boolean;
};
requires_trust_remote_code?: boolean;
supports_reasoning?: boolean;
reasoning_always_on?: boolean;
supports_tools?: boolean;

View file

@ -0,0 +1,119 @@
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
import type {
CompleteAttachment,
ExportedMessageRepository,
ThreadMessage,
} from "@assistant-ui/react";
/**
* assistant-ui does not expose a public `deleteMessage` on `ThreadRuntime` / `MessageRuntime`
* in our version, but it already implements branch-safe deletion inside `MessageRepository`.
* We import that helper from an **internal** package path (`runtime/utils/message-repository`).
*
* **Maintainability:** treat this file as the only place that imports `MessageRepository` from
* `@assistant-ui/core`. When bumping `@assistant-ui/react` / `@assistant-ui/core`, re-run chat
* delete + reload smoke tests; the path or API may change without a semver signal on public
* surface area.
*/
import { MessageRepository } from "@assistant-ui/core/runtime/utils/message-repository";
import { db } from "@/features/chat/db";
import type { MessageRecord } from "@/features/chat/types";
function cloneContent(content: ThreadMessage["content"]): ThreadMessage["content"] {
if (typeof content === "string") {
return content;
}
return Array.isArray(content) ? JSON.parse(JSON.stringify(content)) : [];
}
function cloneAttachments(
attachments: readonly CompleteAttachment[] | undefined,
): readonly CompleteAttachment[] {
if (!Array.isArray(attachments)) {
return [];
}
return JSON.parse(JSON.stringify(attachments));
}
function exportedItemToRecord(
threadId: string,
parentId: string | null,
message: ThreadMessage,
): MessageRecord {
const content = cloneContent(message.content);
if (message.role === "user") {
const attachments = cloneAttachments(message.attachments);
const custom = message.metadata?.custom;
return {
id: message.id,
threadId,
parentId: parentId ?? null,
role: "user",
content: content as Extract<ThreadMessage, { role: "user" }>["content"],
...(attachments.length > 0 && { attachments }),
...(custom && Object.keys(custom).length > 0 && { metadata: custom }),
createdAt: message.createdAt?.getTime?.() ?? Date.now(),
};
}
const custom = (message.metadata?.custom ?? {}) as Record<string, unknown>;
return {
id: message.id,
threadId,
parentId: parentId ?? null,
role: "assistant",
content: content as Extract<ThreadMessage, { role: "assistant" }>["content"],
...(Object.keys(custom).length > 0 && { metadata: custom }),
createdAt: message.createdAt?.getTime?.() ?? Date.now(),
};
}
/**
* Persist the exact message list represented by `exp` for this thread, removing
* Dexie rows that are no longer present (e.g. after a delete).
*/
async function syncExportedRepositoryToDexie(
remoteId: string,
exp: ExportedMessageRepository,
): Promise<void> {
await db.transaction("rw", db.messages, async () => {
const keepIds = new Set(exp.messages.map((x) => x.message.id));
const existing = await db.messages.where("threadId").equals(remoteId).toArray();
const idsToDelete = existing
.filter((m) => !keepIds.has(m.id))
.map((m) => m.id);
if (idsToDelete.length > 0) {
await db.messages.bulkDelete(idsToDelete);
}
await db.messages.bulkPut(
exp.messages.map(({ message, parentId }) =>
exportedItemToRecord(remoteId, parentId, message),
),
);
});
}
type ThreadImportExport = {
export: () => ExportedMessageRepository;
import: (data: ExportedMessageRepository) => void;
};
/**
* Remove a message from the thread and mirror the result to IndexedDB.
*/
export async function deleteThreadMessage(args: {
thread: ThreadImportExport;
messageId: string;
remoteId: string | undefined;
}): Promise<void> {
const { thread, messageId, remoteId } = args;
const exported = thread.export();
const repo = new MessageRepository();
repo.import(exported);
repo.deleteMessage(messageId);
const next = repo.export();
if (remoteId) {
await syncExportedRepositoryToDexie(remoteId, next);
}
thread.import(next);
}

View file

@ -173,6 +173,7 @@
--color-chart-3: var(--chart-3);
--color-chart-2: var(--chart-2);
--color-chart-1: var(--chart-1);
--color-code-block: #181818;
--color-ring: var(--ring);
--color-input: var(--input);
--color-border: var(--border);
@ -395,6 +396,13 @@
width: calc(100% + 1.25rem);
max-width: calc(100% + 1.25rem);
}
.dark .aui-thread-root [data-streamdown="code-block"] {
/* Streamdown `pre` uses `dark:bg-[var(--shiki-dark-bg,...)]`; keep one surface on the outer shell. */
--shiki-dark-bg: transparent;
background: var(--color-code-block);
border: 1px solid oklch(1 0 0 / 0.07);
}
}
/* Minimal scrollbar — thumb only, no track */