maxOutputTokens -> reservedOutputTokenSpace

This commit is contained in:
Andrew Pareles 2025-05-04 19:52:23 -07:00
parent 25c12b101f
commit 4dae90e047
5 changed files with 121 additions and 121 deletions

View file

@ -6,7 +6,7 @@ import { createDecorator } from '../../../../platform/instantiation/common/insta
import { IWorkspaceContextService } from '../../../../platform/workspace/common/workspace.js';
import { IEditorService } from '../../../services/editor/common/editorService.js';
import { ChatMessage } from '../common/chatThreadServiceTypes.js';
import { getIsReasoningEnabledState, getMaxOutputTokens, getModelCapabilities } from '../common/modelCapabilities.js';
import { getIsReasoningEnabledState, getReservedOutputTokenSpace, getModelCapabilities } from '../common/modelCapabilities.js';
import { reParsedToolXMLString, chat_systemMessage, ToolName } from '../common/prompt/prompts.js';
import { AnthropicLLMChatMessage, AnthropicReasoning, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, OpenAILLMChatMessage, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
import { IVoidSettingsService } from '../common/voidSettingsService.js';
@ -259,7 +259,7 @@ const prepareOpenAIOrAnthropicMessages = ({
specialToolFormat,
supportsAnthropicReasoning,
contextWindow,
maxOutputTokens,
reservedOutputTokenSpace,
}: {
messages: SimpleLLMMessage[],
systemMessage: string,
@ -268,10 +268,10 @@ const prepareOpenAIOrAnthropicMessages = ({
specialToolFormat: 'openai-style' | 'anthropic-style' | undefined,
supportsAnthropicReasoning: boolean,
contextWindow: number,
maxOutputTokens: number | null | undefined,
reservedOutputTokenSpace: number | null | undefined,
}): { messages: AnthropicOrOpenAILLMMessage[], separateSystemMessage: string | undefined } => {
maxOutputTokens = maxOutputTokens ?? 4_096 // default to 4096
reservedOutputTokenSpace = reservedOutputTokenSpace ?? 4_096 // default to 4096
let messages: (SimpleLLMMessage | { role: 'system', content: string })[] = deepClone(messages_)
// ================ system message ================
@ -336,7 +336,7 @@ const prepareOpenAIOrAnthropicMessages = ({
let totalLen = 0
for (const m of messages) { totalLen += m.content.length }
const charsNeedToTrim = totalLen - Math.max(
(contextWindow - maxOutputTokens) * CHARS_PER_TOKEN, // can be 0, in which case charsNeedToTrim=everything, bad
(contextWindow - reservedOutputTokenSpace) * CHARS_PER_TOKEN, // can be 0, in which case charsNeedToTrim=everything, bad
4_096 // ensure we don't trim at least 4096 chars (just a random small value)
)
@ -494,7 +494,7 @@ const prepareMessages = (params: {
specialToolFormat: 'openai-style' | 'anthropic-style' | 'gemini-style' | undefined,
supportsAnthropicReasoning: boolean,
contextWindow: number,
maxOutputTokens: number | null | undefined,
reservedOutputTokenSpace: number | null | undefined,
providerName: ProviderName
}): { messages: LLMChatMessage[], separateSystemMessage: string | undefined } => {
@ -647,7 +647,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
const aiInstructions = this._getCombinedAIInstructions();
const isReasoningEnabled = getIsReasoningEnabledState(featureName, providerName, modelName, modelSelectionOptions, overridesOfModel)
const maxOutputTokens = getMaxOutputTokens(providerName, modelName, { isReasoningEnabled, overridesOfModel })
const reservedOutputTokenSpace = getReservedOutputTokenSpace(providerName, modelName, { isReasoningEnabled, overridesOfModel })
const { messages, separateSystemMessage } = prepareMessages({
messages: simpleMessages,
@ -657,7 +657,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
specialToolFormat,
supportsAnthropicReasoning: providerName === 'anthropic',
contextWindow,
maxOutputTokens,
reservedOutputTokenSpace,
providerName,
})
return { messages, separateSystemMessage };
@ -681,7 +681,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
const aiInstructions = this._getCombinedAIInstructions();
const isReasoningEnabled = getIsReasoningEnabledState('Chat', providerName, modelName, modelSelectionOptions, overridesOfModel)
const maxOutputTokens = getMaxOutputTokens(providerName, modelName, { isReasoningEnabled, overridesOfModel })
const reservedOutputTokenSpace = getReservedOutputTokenSpace(providerName, modelName, { isReasoningEnabled, overridesOfModel })
const llmMessages = this._chatMessagesToSimpleMessages(chatMessages)
const { messages, separateSystemMessage } = prepareMessages({
@ -692,7 +692,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
specialToolFormat,
supportsAnthropicReasoning: providerName === 'anthropic',
contextWindow,
maxOutputTokens,
reservedOutputTokenSpace,
providerName,
})
return { messages, separateSystemMessage };

View file

@ -364,7 +364,7 @@ const TableOfModelsForProvider = ({ providerName }: { providerName: ProviderName
contextWindow,
isUnrecognizedModel,
maxOutputTokens,
reservedOutputTokenSpace,
supportsSystemMessage,
} = capabilities

View file

@ -329,24 +329,24 @@ const ModelSettingsDialog = ({
// Initialize form state for all potential override options
const [formValues, setFormValues] = useState<{
contextWindow: string;
maxOutputTokens: string;
reservedOutputTokenSpace: string;
specialToolFormat: 'openai-style' | 'gemini-style' | 'anthropic-style' | undefined | '';
supportsSystemMessage: 'system-role' | 'developer-role' | 'separated' | false | '';
supportsFIM: boolean | null;
reasoningCapabilities: boolean | null;
canTurnOffReasoning: boolean;
reasoningMaxOutputTokens: string;
reasoningReservedOutputTokenSpace: string;
openSourceThinkTags: [string, string] | null;
}>({
// start form as default values
contextWindow: '',
maxOutputTokens: '',
reservedOutputTokenSpace: '',
specialToolFormat: '',
supportsSystemMessage: '',
supportsFIM: null,
reasoningCapabilities: null,
canTurnOffReasoning: false,
reasoningMaxOutputTokens: '',
reasoningReservedOutputTokenSpace: '',
openSourceThinkTags: null,
});
@ -370,15 +370,15 @@ const ModelSettingsDialog = ({
// to indicate default values should be used
setFormValues({
contextWindow: overrides.contextWindow !== undefined ? String(overrides.contextWindow) : '',
maxOutputTokens: overrides.maxOutputTokens !== undefined ? String(overrides.maxOutputTokens) : '',
reservedOutputTokenSpace: overrides.reservedOutputTokenSpace !== undefined ? String(overrides.reservedOutputTokenSpace) : '',
specialToolFormat: overrides.specialToolFormat !== undefined ? overrides.specialToolFormat : '',
supportsSystemMessage: overrides.supportsSystemMessage !== undefined ? overrides.supportsSystemMessage : '',
supportsFIM: overrides.supportsFIM !== undefined ? overrides.supportsFIM : null,
reasoningCapabilities: overrides.reasoningCapabilities !== undefined ?
!!overrides.reasoningCapabilities : null,
canTurnOffReasoning: typeof reasoningCapabilities === 'object' ? !!reasoningCapabilities.canTurnOffReasoning : false,
reasoningMaxOutputTokens: typeof reasoningCapabilities === 'object' && reasoningCapabilities.reasoningMaxOutputTokens ?
String(reasoningCapabilities.reasoningMaxOutputTokens) : '',
reasoningReservedOutputTokenSpace: typeof reasoningCapabilities === 'object' && reasoningCapabilities.reasoningReservedOutputTokenSpace ?
String(reasoningCapabilities.reasoningReservedOutputTokenSpace) : '',
openSourceThinkTags: thinkTags,
});
}
@ -406,11 +406,11 @@ const ModelSettingsDialog = ({
if (!isNaN(tokens)) newSettings.contextWindow = tokens;
}
if (formValues.maxOutputTokens.trim() === '') {
newSettings.maxOutputTokens = defaultModelCapabilities.maxOutputTokens;
} else if (formValues.maxOutputTokens) {
const tokens = parseInt(formValues.maxOutputTokens);
if (!isNaN(tokens)) newSettings.maxOutputTokens = tokens;
if (formValues.reservedOutputTokenSpace.trim() === '') {
newSettings.reservedOutputTokenSpace = defaultModelCapabilities.reservedOutputTokenSpace;
} else if (formValues.reservedOutputTokenSpace) {
const tokens = parseInt(formValues.reservedOutputTokenSpace);
if (!isNaN(tokens)) newSettings.reservedOutputTokenSpace = tokens;
}
// Handle dropdown fields
@ -442,8 +442,8 @@ const ModelSettingsDialog = ({
};
// Only add these if they have values
if (formValues.reasoningMaxOutputTokens) {
reasoningSettings.reasoningMaxOutputTokens = parseInt(formValues.reasoningMaxOutputTokens);
if (formValues.reasoningReservedOutputTokenSpace) {
reasoningSettings.reasoningReservedOutputTokenSpace = parseInt(formValues.reasoningReservedOutputTokenSpace);
}
if (formValues.openSourceThinkTags) {
@ -506,18 +506,18 @@ const ModelSettingsDialog = ({
<div className="flex items-center gap-2">
<VoidSwitch
size="xxs"
value={formValues.maxOutputTokens !== ''}
value={formValues.reservedOutputTokenSpace !== ''}
onChange={(enabled) => {
updateField('maxOutputTokens', enabled ? String(defaultModelCapabilities.maxOutputTokens) : '');
updateField('reservedOutputTokenSpace', enabled ? String(defaultModelCapabilities.reservedOutputTokenSpace) : '');
}}
/>
{formValues.maxOutputTokens === '' ? (
<span className="text-void-fg-3 text-xs w-24 text-right">Default ({defaultModelCapabilities.maxOutputTokens})</span>
{formValues.reservedOutputTokenSpace === '' ? (
<span className="text-void-fg-3 text-xs w-24 text-right">Default ({defaultModelCapabilities.reservedOutputTokenSpace})</span>
) : (
<VoidSimpleInputBox
value={formValues.maxOutputTokens}
onChangeValue={(value) => updateField('maxOutputTokens', value)}
placeholder={String(defaultModelCapabilities.maxOutputTokens)}
value={formValues.reservedOutputTokenSpace}
onChangeValue={(value) => updateField('reservedOutputTokenSpace', value)}
placeholder={String(defaultModelCapabilities.reservedOutputTokenSpace)}
compact={true}
className="max-w-24"
/>
@ -633,19 +633,19 @@ const ModelSettingsDialog = ({
<div className="flex items-center gap-2">
<VoidSwitch
size="xxs"
value={formValues.reasoningMaxOutputTokens !== ''}
value={formValues.reasoningReservedOutputTokenSpace !== ''}
onChange={(enabled) => {
// Use a reasonable default value when enabling
const defaultValue = defaultModelCapabilities.maxOutputTokens || 500;
updateField('reasoningMaxOutputTokens', enabled ? String(defaultValue) : '');
const defaultValue = defaultModelCapabilities.reservedOutputTokenSpace || 500;
updateField('reasoningReservedOutputTokenSpace', enabled ? String(defaultValue) : '');
}}
/>
{formValues.reasoningMaxOutputTokens === '' ? (
{formValues.reasoningReservedOutputTokenSpace === '' ? (
<span className="text-void-fg-3 text-xs w-24 text-right">Default</span>
) : (
<VoidSimpleInputBox
value={formValues.reasoningMaxOutputTokens}
onChangeValue={(value) => updateField('reasoningMaxOutputTokens', value)}
value={formValues.reasoningReservedOutputTokenSpace}
onChangeValue={(value) => updateField('reasoningReservedOutputTokenSpace', value)}
placeholder="Default"
compact={true}
className="max-w-24"

View file

@ -141,7 +141,7 @@ export const defaultModelsOfProvider = {
export type VoidStaticModelInfo = { // not stateful
contextWindow: number; // input tokens
maxOutputTokens: number | null; // output tokens, defaults to 4092
reservedOutputTokenSpace: number | null; // output tokens, defaults to 4092
cost: { // <-- UNUSED
input: number;
output: number;
@ -162,7 +162,7 @@ export type VoidStaticModelInfo = { // not stateful
// reasoning options if supports reasoning
readonly canTurnOffReasoning: boolean; // whether or not the user can disable reasoning mode (false if the model only supports reasoning)
readonly canIOReasoning: boolean; // whether or not the model actually outputs reasoning (eg o1 lets us control reasoning but not output it)
readonly reasoningMaxOutputTokens?: number; // overrides normal maxOutputTokens
readonly reasoningReservedOutputTokenSpace?: number; // overrides normal reservedOutputTokenSpace
readonly reasoningBudgetSlider?: { type: 'slider'; min: number; max: number; default: number };
// options related specifically to model output
@ -174,7 +174,7 @@ export type VoidStaticModelInfo = { // not stateful
export type ModelOverrideOptions = Partial<Pick<VoidStaticModelInfo,
'contextWindow' | 'maxOutputTokens' | 'specialToolFormat' | 'supportsSystemMessage' | 'supportsFIM' | 'reasoningCapabilities'
'contextWindow' | 'reservedOutputTokenSpace' | 'specialToolFormat' | 'supportsSystemMessage' | 'supportsFIM' | 'reasoningCapabilities'
>>
@ -199,7 +199,7 @@ type VoidStaticProviderInfo = { // doesn't change (not stateful)
const defaultModelOptions = {
contextWindow: 4_096,
maxOutputTokens: 4_096,
reservedOutputTokenSpace: 4_096,
cost: { input: 0, output: 0 },
downloadable: false,
supportsSystemMessage: false,
@ -215,57 +215,57 @@ const openSourceModelOptions_assumingOAICompat = {
supportsFIM: false,
supportsSystemMessage: false,
reasoningCapabilities: { supportsReasoning: true, canTurnOffReasoning: false, canIOReasoning: true, openSourceThinkTags: ['<think>', '</think>'] },
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'deepseekCoderV3': {
supportsFIM: false,
supportsSystemMessage: false, // unstable
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'deepseekCoderV2': {
supportsFIM: false,
supportsSystemMessage: false, // unstable
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'codestral': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'openhands-lm-32b': { // https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false, // built on qwen 2.5 32B instruct
contextWindow: 128_000, maxOutputTokens: 4_096
contextWindow: 128_000, reservedOutputTokenSpace: 4_096
},
'phi4': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 16_000, maxOutputTokens: 4_096,
contextWindow: 16_000, reservedOutputTokenSpace: 4_096,
},
'gemma': { // https://news.ycombinator.com/item?id=43451406
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
// llama 4 https://ai.meta.com/blog/llama-4-multimodal-intelligence/
'llama4-scout': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 10_000_000, maxOutputTokens: 4_096,
contextWindow: 10_000_000, reservedOutputTokenSpace: 4_096,
},
'llama4-maverick': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 10_000_000, maxOutputTokens: 4_096,
contextWindow: 10_000_000, reservedOutputTokenSpace: 4_096,
},
// llama 3
@ -273,65 +273,65 @@ const openSourceModelOptions_assumingOAICompat = {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'llama3.1': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'llama3.2': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'llama3.3': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
// qwen
'qwen2.5coder': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'qwq': {
supportsFIM: false, // no FIM, yes reasoning
supportsSystemMessage: 'system-role',
reasoningCapabilities: { supportsReasoning: true, canTurnOffReasoning: false, canIOReasoning: true, openSourceThinkTags: ['<think>', '</think>'] },
contextWindow: 128_000, maxOutputTokens: 8_192,
contextWindow: 128_000, reservedOutputTokenSpace: 8_192,
},
'qwen3': {
supportsFIM: false, // replaces QwQ
supportsSystemMessage: 'system-role',
reasoningCapabilities: { supportsReasoning: true, canTurnOffReasoning: true, canIOReasoning: true, openSourceThinkTags: ['<think>', '</think>'] },
contextWindow: 32_768, maxOutputTokens: 8_192,
contextWindow: 32_768, reservedOutputTokenSpace: 8_192,
},
// FIM only
'starcoder2': {
supportsFIM: true,
supportsSystemMessage: false,
reasoningCapabilities: false,
contextWindow: 128_000, maxOutputTokens: 8_192,
contextWindow: 128_000, reservedOutputTokenSpace: 8_192,
},
'codegemma:2b': {
supportsFIM: true,
supportsSystemMessage: false,
reasoningCapabilities: false,
contextWindow: 128_000, maxOutputTokens: 8_192,
contextWindow: 128_000, reservedOutputTokenSpace: 8_192,
},
'quasar': { // openrouter/quasar-alpha
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
contextWindow: 1_000_000, maxOutputTokens: 32_000,
contextWindow: 1_000_000, reservedOutputTokenSpace: 32_000,
}
} as const satisfies { [s: string]: Partial<VoidStaticModelInfo> }
@ -416,7 +416,7 @@ const extensiveModelFallback: VoidStaticProviderInfo['modelOptionsFallback'] = (
const anthropicModelOptions = {
'claude-3-7-sonnet-20250219': { // https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
contextWindow: 200_000,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
downloadable: false,
supportsFIM: false,
@ -426,14 +426,14 @@ const anthropicModelOptions = {
supportsReasoning: true,
canTurnOffReasoning: true,
canIOReasoning: true,
reasoningMaxOutputTokens: 64_000, // can bump it to 128_000 with beta mode output-128k-2025-02-19
reasoningReservedOutputTokenSpace: 64_000, // can bump it to 128_000 with beta mode output-128k-2025-02-19
reasoningBudgetSlider: { type: 'slider', min: 1024, max: 32_000, default: 1024 }, // they recommend batching if max > 32_000
},
},
'claude-3-5-sonnet-20241022': {
contextWindow: 200_000,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
downloadable: false,
supportsFIM: false,
@ -443,7 +443,7 @@ const anthropicModelOptions = {
},
'claude-3-5-haiku-20241022': {
contextWindow: 200_000,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 0.80, cache_read: 0.08, cache_write: 1.00, output: 4.00 },
downloadable: false,
supportsFIM: false,
@ -453,7 +453,7 @@ const anthropicModelOptions = {
},
'claude-3-opus-20240229': {
contextWindow: 200_000,
maxOutputTokens: 4_096,
reservedOutputTokenSpace: 4_096,
cost: { input: 15.00, cache_read: 1.50, cache_write: 18.75, output: 75.00 },
downloadable: false,
supportsFIM: false,
@ -464,7 +464,7 @@ const anthropicModelOptions = {
'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in
contextWindow: 200_000, cost: { input: 3.00, output: 15.00 },
downloadable: false,
maxOutputTokens: 4_096,
reservedOutputTokenSpace: 4_096,
supportsFIM: false,
specialToolFormat: 'anthropic-style',
supportsSystemMessage: 'separated',
@ -493,7 +493,7 @@ const anthropicSettings: VoidStaticProviderInfo = {
if (lower.includes('claude-3-opus')) fallbackName = 'claude-3-opus-20240229'
if (lower.includes('claude-3-sonnet')) fallbackName = 'claude-3-sonnet-20240229'
if (fallbackName) return { modelName: fallbackName, ...anthropicModelOptions[fallbackName] }
return { modelName, ...defaultModelOptions, maxOutputTokens: 4_096 }
return { modelName, ...defaultModelOptions, reservedOutputTokenSpace: 4_096 }
},
}
@ -502,7 +502,7 @@ const anthropicSettings: VoidStaticProviderInfo = {
const openAIModelOptions = { // https://platform.openai.com/docs/pricing
'o3': {
contextWindow: 1_047_576,
maxOutputTokens: 32_768,
reservedOutputTokenSpace: 32_768,
cost: { input: 10.00, output: 40.00, cache_read: 2.50 },
downloadable: false,
supportsFIM: false,
@ -512,7 +512,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'o4-mini': {
contextWindow: 1_047_576,
maxOutputTokens: 32_768,
reservedOutputTokenSpace: 32_768,
cost: { input: 1.10, output: 4.40, cache_read: 0.275 },
downloadable: false,
supportsFIM: false,
@ -522,7 +522,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4.1': {
contextWindow: 1_047_576,
maxOutputTokens: 32_768,
reservedOutputTokenSpace: 32_768,
cost: { input: 2.00, output: 8.00, cache_read: 0.50 },
downloadable: false,
supportsFIM: false,
@ -532,7 +532,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4.1-mini': {
contextWindow: 1_047_576,
maxOutputTokens: 32_768,
reservedOutputTokenSpace: 32_768,
cost: { input: 0.40, output: 1.60, cache_read: 0.10 },
downloadable: false,
supportsFIM: false,
@ -542,7 +542,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4.1-nano': {
contextWindow: 1_047_576,
maxOutputTokens: 32_768,
reservedOutputTokenSpace: 32_768,
cost: { input: 0.10, output: 0.40, cache_read: 0.03 },
downloadable: false,
supportsFIM: false,
@ -552,7 +552,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'o1': {
contextWindow: 128_000,
maxOutputTokens: 100_000,
reservedOutputTokenSpace: 100_000,
cost: { input: 15.00, cache_read: 7.50, output: 60.00, },
downloadable: false,
supportsFIM: false,
@ -561,7 +561,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'o3-mini': {
contextWindow: 200_000,
maxOutputTokens: 100_000,
reservedOutputTokenSpace: 100_000,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
downloadable: false,
supportsFIM: false,
@ -570,7 +570,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4o': {
contextWindow: 128_000,
maxOutputTokens: 16_384,
reservedOutputTokenSpace: 16_384,
cost: { input: 2.50, cache_read: 1.25, output: 10.00, },
downloadable: false,
supportsFIM: false,
@ -580,7 +580,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'o1-mini': {
contextWindow: 128_000,
maxOutputTokens: 65_536,
reservedOutputTokenSpace: 65_536,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
downloadable: false,
supportsFIM: false,
@ -589,7 +589,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4o-mini': {
contextWindow: 128_000,
maxOutputTokens: 16_384,
reservedOutputTokenSpace: 16_384,
cost: { input: 0.15, cache_read: 0.075, output: 0.60, },
downloadable: false,
supportsFIM: false,
@ -617,7 +617,7 @@ const openAISettings: VoidStaticProviderInfo = {
const xAIModelOptions = {
'grok-2': {
contextWindow: 131_072,
maxOutputTokens: null, // 131_072,
reservedOutputTokenSpace: null, // 131_072,
cost: { input: 2.00, output: 10.00 },
downloadable: false,
supportsFIM: false,
@ -626,7 +626,7 @@ const xAIModelOptions = {
},
// 'grok-3': {
// contextWindow: 1_000_000,
// maxOutputTokens: null,
// reservedOutputTokenSpace: null,
// cost: {},
// downloadable: false,
// supportsFIM: false,
@ -651,7 +651,7 @@ const xAISettings: VoidStaticProviderInfo = {
const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
'gemini-2.5-flash-preview-04-17': {
contextWindow: 1_048_576,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 0.15, output: .60 }, // TODO $3.50 output with thinking not included
downloadable: false,
supportsFIM: false,
@ -661,7 +661,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-2.5-pro-exp-03-25': {
contextWindow: 1_048_576,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@ -671,7 +671,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-2.0-flash': {
contextWindow: 1_048_576,
maxOutputTokens: 8_192, // 8_192,
reservedOutputTokenSpace: 8_192, // 8_192,
cost: { input: 0.10, output: 0.40 },
downloadable: false,
supportsFIM: false,
@ -681,7 +681,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-2.0-flash-lite-preview-02-05': {
contextWindow: 1_048_576,
maxOutputTokens: 8_192, // 8_192,
reservedOutputTokenSpace: 8_192, // 8_192,
cost: { input: 0.075, output: 0.30 },
downloadable: false,
supportsFIM: false,
@ -691,7 +691,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-1.5-flash': {
contextWindow: 1_048_576,
maxOutputTokens: 8_192, // 8_192,
reservedOutputTokenSpace: 8_192, // 8_192,
cost: { input: 0.075, output: 0.30 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
downloadable: false,
supportsFIM: false,
@ -701,7 +701,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-1.5-pro': {
contextWindow: 2_097_152,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 1.25, output: 5.00 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
downloadable: false,
supportsFIM: false,
@ -711,7 +711,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-1.5-flash-8b': {
contextWindow: 1_048_576,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 0.0375, output: 0.15 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
downloadable: false,
supportsFIM: false,
@ -733,14 +733,14 @@ const deepseekModelOptions = {
'deepseek-chat': {
...openSourceModelOptions_assumingOAICompat.deepseekR1,
contextWindow: 64_000, // https://api-docs.deepseek.com/quick_start/pricing
maxOutputTokens: 8_000, // 8_000,
reservedOutputTokenSpace: 8_000, // 8_000,
cost: { cache_read: .07, input: .27, output: 1.10, },
downloadable: false,
},
'deepseek-reasoner': {
...openSourceModelOptions_assumingOAICompat.deepseekCoderV2,
contextWindow: 64_000,
maxOutputTokens: 8_000, // 8_000,
reservedOutputTokenSpace: 8_000, // 8_000,
cost: { cache_read: .14, input: .55, output: 2.19, },
downloadable: false,
},
@ -763,7 +763,7 @@ const deepseekSettings: VoidStaticProviderInfo = {
const mistralModelOptions = { // https://mistral.ai/products/la-plateforme#pricing https://docs.mistral.ai/getting-started/models/models_overview/#premier-models
'mistral-large-latest': {
contextWindow: 131_000,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 2.00, output: 6.00 },
supportsFIM: false,
downloadable: { sizeGb: 73 },
@ -772,7 +772,7 @@ const mistralModelOptions = { // https://mistral.ai/products/la-plateforme#prici
},
'codestral-latest': {
contextWindow: 256_000,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 0.30, output: 0.90 },
supportsFIM: true,
downloadable: { sizeGb: 13 },
@ -781,7 +781,7 @@ const mistralModelOptions = { // https://mistral.ai/products/la-plateforme#prici
},
'ministral-8b-latest': { // ollama 'mistral'
contextWindow: 131_000,
maxOutputTokens: 4_096,
reservedOutputTokenSpace: 4_096,
cost: { input: 0.10, output: 0.10 },
supportsFIM: false,
downloadable: { sizeGb: 4.1 },
@ -790,7 +790,7 @@ const mistralModelOptions = { // https://mistral.ai/products/la-plateforme#prici
},
'ministral-3b-latest': {
contextWindow: 131_000,
maxOutputTokens: 4_096,
reservedOutputTokenSpace: 4_096,
cost: { input: 0.04, output: 0.04 },
supportsFIM: false,
downloadable: { sizeGb: 'not-known' },
@ -809,7 +809,7 @@ const mistralSettings: VoidStaticProviderInfo = {
const groqModelOptions = { // https://console.groq.com/docs/models, https://groq.com/pricing/
'llama-3.3-70b-versatile': {
contextWindow: 128_000,
maxOutputTokens: 32_768, // 32_768,
reservedOutputTokenSpace: 32_768, // 32_768,
cost: { input: 0.59, output: 0.79 },
downloadable: false,
supportsFIM: false,
@ -818,7 +818,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
},
'llama-3.1-8b-instant': {
contextWindow: 128_000,
maxOutputTokens: 8_192,
reservedOutputTokenSpace: 8_192,
cost: { input: 0.05, output: 0.08 },
downloadable: false,
supportsFIM: false,
@ -827,7 +827,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
},
'qwen-2.5-coder-32b': {
contextWindow: 128_000,
maxOutputTokens: null, // not specified?
reservedOutputTokenSpace: null, // not specified?
cost: { input: 0.79, output: 0.79 },
downloadable: false,
supportsFIM: false, // unfortunately looks like no FIM support on groq
@ -836,7 +836,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
},
'qwen-qwq-32b': { // https://huggingface.co/Qwen/QwQ-32B
contextWindow: 128_000,
maxOutputTokens: null, // not specified?
reservedOutputTokenSpace: null, // not specified?
cost: { input: 0.29, output: 0.39 },
downloadable: false,
supportsFIM: false,
@ -882,7 +882,7 @@ const microsoftAzureSettings: VoidStaticProviderInfo = {
const ollamaModelOptions = {
'qwen2.5-coder:7b': {
contextWindow: 32_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 1.9 },
supportsFIM: true,
@ -891,7 +891,7 @@ const ollamaModelOptions = {
},
'qwen2.5-coder:3b': {
contextWindow: 32_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 1.9 },
supportsFIM: true,
@ -900,7 +900,7 @@ const ollamaModelOptions = {
},
'qwen2.5-coder:1.5b': {
contextWindow: 32_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: .986 },
supportsFIM: true,
@ -909,7 +909,7 @@ const ollamaModelOptions = {
},
'llama3.1': {
contextWindow: 128_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 4.9 },
supportsFIM: false,
@ -918,7 +918,7 @@ const ollamaModelOptions = {
},
'qwen2.5-coder': {
contextWindow: 128_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 4.7 },
supportsFIM: false,
@ -927,7 +927,7 @@ const ollamaModelOptions = {
},
'qwq': {
contextWindow: 128_000,
maxOutputTokens: 32_000,
reservedOutputTokenSpace: 32_000,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 20 },
supportsFIM: false,
@ -936,7 +936,7 @@ const ollamaModelOptions = {
},
'deepseek-r1': {
contextWindow: 128_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 4.7 },
supportsFIM: false,
@ -986,7 +986,7 @@ const liteLLMSettings: VoidStaticProviderInfo = { // https://docs.litellm.ai/doc
const openRouterModelOptions_assumingOpenAICompat = {
'mistralai/mistral-small-3.1-24b-instruct:free': {
contextWindow: 128_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@ -995,7 +995,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
},
'google/gemini-2.0-flash-lite-preview-02-05:free': {
contextWindow: 1_048_576,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@ -1004,7 +1004,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
},
'google/gemini-2.0-pro-exp-02-05:free': {
contextWindow: 1_048_576,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@ -1013,7 +1013,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
},
'google/gemini-2.0-flash-exp:free': {
contextWindow: 1_048_576,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@ -1023,13 +1023,13 @@ const openRouterModelOptions_assumingOpenAICompat = {
'deepseek/deepseek-r1': {
...openSourceModelOptions_assumingOAICompat.deepseekR1,
contextWindow: 128_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0.8, output: 2.4 },
downloadable: false,
},
'anthropic/claude-3.7-sonnet:thinking': {
contextWindow: 200_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 3.00, output: 15.00 },
downloadable: false,
supportsFIM: false,
@ -1038,13 +1038,13 @@ const openRouterModelOptions_assumingOpenAICompat = {
supportsReasoning: true,
canTurnOffReasoning: false,
canIOReasoning: true,
reasoningMaxOutputTokens: 64_000,
reasoningReservedOutputTokenSpace: 64_000,
reasoningBudgetSlider: { type: 'slider', min: 1024, max: 32_000, default: 1024 }, // they recommend batching if max > 32_000
},
},
'anthropic/claude-3.7-sonnet': {
contextWindow: 200_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 3.00, output: 15.00 },
downloadable: false,
supportsFIM: false,
@ -1053,7 +1053,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
},
'anthropic/claude-3.5-sonnet': {
contextWindow: 200_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 3.00, output: 15.00 },
downloadable: false,
supportsFIM: false,
@ -1063,7 +1063,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
'mistralai/codestral-2501': {
...openSourceModelOptions_assumingOAICompat.codestral,
contextWindow: 256_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0.3, output: 0.9 },
downloadable: false,
reasoningCapabilities: false,
@ -1071,14 +1071,14 @@ const openRouterModelOptions_assumingOpenAICompat = {
'qwen/qwen-2.5-coder-32b-instruct': {
...openSourceModelOptions_assumingOAICompat['qwen2.5coder'],
contextWindow: 33_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0.07, output: 0.16 },
downloadable: false,
},
'qwen/qwq-32b': {
...openSourceModelOptions_assumingOAICompat['qwq'],
contextWindow: 33_000,
maxOutputTokens: null,
reservedOutputTokenSpace: null,
cost: { input: 0.07, output: 0.16 },
downloadable: false,
}
@ -1201,12 +1201,12 @@ export const getIsReasoningEnabledState = (
}
export const getMaxOutputTokens = (providerName: ProviderName, modelName: string, opts: { isReasoningEnabled: boolean, overridesOfModel: OverridesOfModel | undefined }) => {
export const getReservedOutputTokenSpace = (providerName: ProviderName, modelName: string, opts: { isReasoningEnabled: boolean, overridesOfModel: OverridesOfModel | undefined }) => {
const {
reasoningCapabilities,
maxOutputTokens,
reservedOutputTokenSpace,
} = getModelCapabilities(providerName, modelName, opts.overridesOfModel)
return opts.isReasoningEnabled && reasoningCapabilities ? reasoningCapabilities.reasoningMaxOutputTokens : maxOutputTokens
return opts.isReasoningEnabled && reasoningCapabilities ? reasoningCapabilities.reasoningReservedOutputTokenSpace : reservedOutputTokenSpace
}
// used to force reasoning state (complex) into something simple we can just read from when sending a message

View file

@ -16,7 +16,7 @@ import { GoogleAuth } from 'google-auth-library'
import { AnthropicLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getMaxOutputTokens } from '../../common/modelCapabilities.js';
import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js';
import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js';
import { availableTools, InternalToolInfo, isAToolName, ToolParamName, voidTools } from '../../common/prompt/prompts.js';
import { generateUuid } from '../../../../../base/common/uuid.js';
@ -430,7 +430,7 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
const includeInPayload = providerReasoningIOSettings?.input?.includeInPayload?.(reasoningInfo) || {}
// anthropic-specific - max tokens
const maxTokens = getMaxOutputTokens(providerName, modelName_, { isReasoningEnabled: !!reasoningInfo?.isReasoningEnabled, overridesOfModel })
const maxTokens = getReservedOutputTokenSpace(providerName, modelName_, { isReasoningEnabled: !!reasoningInfo?.isReasoningEnabled, overridesOfModel })
// tools
const potentialTools = chatMode !== null ? anthropicTools(chatMode) : null