diff --git a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts
index 1ce7996e..27c054ce 100644
--- a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts
+++ b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts
@@ -6,7 +6,7 @@ import { createDecorator } from '../../../../platform/instantiation/common/insta
import { IWorkspaceContextService } from '../../../../platform/workspace/common/workspace.js';
import { IEditorService } from '../../../services/editor/common/editorService.js';
import { ChatMessage } from '../common/chatThreadServiceTypes.js';
-import { getIsReasoningEnabledState, getMaxOutputTokens, getModelCapabilities } from '../common/modelCapabilities.js';
+import { getIsReasoningEnabledState, getReservedOutputTokenSpace, getModelCapabilities } from '../common/modelCapabilities.js';
import { reParsedToolXMLString, chat_systemMessage, ToolName } from '../common/prompt/prompts.js';
import { AnthropicLLMChatMessage, AnthropicReasoning, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, OpenAILLMChatMessage, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
import { IVoidSettingsService } from '../common/voidSettingsService.js';
@@ -259,7 +259,7 @@ const prepareOpenAIOrAnthropicMessages = ({
specialToolFormat,
supportsAnthropicReasoning,
contextWindow,
- maxOutputTokens,
+ reservedOutputTokenSpace,
}: {
messages: SimpleLLMMessage[],
systemMessage: string,
@@ -268,10 +268,10 @@ const prepareOpenAIOrAnthropicMessages = ({
specialToolFormat: 'openai-style' | 'anthropic-style' | undefined,
supportsAnthropicReasoning: boolean,
contextWindow: number,
- maxOutputTokens: number | null | undefined,
+ reservedOutputTokenSpace: number | null | undefined,
}): { messages: AnthropicOrOpenAILLMMessage[], separateSystemMessage: string | undefined } => {
- maxOutputTokens = maxOutputTokens ?? 4_096 // default to 4096
+ reservedOutputTokenSpace = reservedOutputTokenSpace ?? 4_096 // default to 4096
let messages: (SimpleLLMMessage | { role: 'system', content: string })[] = deepClone(messages_)
// ================ system message ================
@@ -336,7 +336,7 @@ const prepareOpenAIOrAnthropicMessages = ({
let totalLen = 0
for (const m of messages) { totalLen += m.content.length }
const charsNeedToTrim = totalLen - Math.max(
- (contextWindow - maxOutputTokens) * CHARS_PER_TOKEN, // can be 0, in which case charsNeedToTrim=everything, bad
+ (contextWindow - reservedOutputTokenSpace) * CHARS_PER_TOKEN, // can be 0, in which case charsNeedToTrim=everything, bad
4_096 // ensure we don't trim at least 4096 chars (just a random small value)
)
@@ -494,7 +494,7 @@ const prepareMessages = (params: {
specialToolFormat: 'openai-style' | 'anthropic-style' | 'gemini-style' | undefined,
supportsAnthropicReasoning: boolean,
contextWindow: number,
- maxOutputTokens: number | null | undefined,
+ reservedOutputTokenSpace: number | null | undefined,
providerName: ProviderName
}): { messages: LLMChatMessage[], separateSystemMessage: string | undefined } => {
@@ -647,7 +647,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
const aiInstructions = this._getCombinedAIInstructions();
const isReasoningEnabled = getIsReasoningEnabledState(featureName, providerName, modelName, modelSelectionOptions, overridesOfModel)
- const maxOutputTokens = getMaxOutputTokens(providerName, modelName, { isReasoningEnabled, overridesOfModel })
+ const reservedOutputTokenSpace = getReservedOutputTokenSpace(providerName, modelName, { isReasoningEnabled, overridesOfModel })
const { messages, separateSystemMessage } = prepareMessages({
messages: simpleMessages,
@@ -657,7 +657,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
specialToolFormat,
supportsAnthropicReasoning: providerName === 'anthropic',
contextWindow,
- maxOutputTokens,
+ reservedOutputTokenSpace,
providerName,
})
return { messages, separateSystemMessage };
@@ -681,7 +681,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
const aiInstructions = this._getCombinedAIInstructions();
const isReasoningEnabled = getIsReasoningEnabledState('Chat', providerName, modelName, modelSelectionOptions, overridesOfModel)
- const maxOutputTokens = getMaxOutputTokens(providerName, modelName, { isReasoningEnabled, overridesOfModel })
+ const reservedOutputTokenSpace = getReservedOutputTokenSpace(providerName, modelName, { isReasoningEnabled, overridesOfModel })
const llmMessages = this._chatMessagesToSimpleMessages(chatMessages)
const { messages, separateSystemMessage } = prepareMessages({
@@ -692,7 +692,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
specialToolFormat,
supportsAnthropicReasoning: providerName === 'anthropic',
contextWindow,
- maxOutputTokens,
+ reservedOutputTokenSpace,
providerName,
})
return { messages, separateSystemMessage };
diff --git a/src/vs/workbench/contrib/void/browser/react/src/void-onboarding/VoidOnboarding.tsx b/src/vs/workbench/contrib/void/browser/react/src/void-onboarding/VoidOnboarding.tsx
index 81a76547..bf8a3223 100644
--- a/src/vs/workbench/contrib/void/browser/react/src/void-onboarding/VoidOnboarding.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/void-onboarding/VoidOnboarding.tsx
@@ -364,7 +364,7 @@ const TableOfModelsForProvider = ({ providerName }: { providerName: ProviderName
contextWindow,
isUnrecognizedModel,
- maxOutputTokens,
+ reservedOutputTokenSpace,
supportsSystemMessage,
} = capabilities
diff --git a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
index de93ce27..20a4e9d0 100644
--- a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
@@ -329,24 +329,24 @@ const ModelSettingsDialog = ({
// Initialize form state for all potential override options
const [formValues, setFormValues] = useState<{
contextWindow: string;
- maxOutputTokens: string;
+ reservedOutputTokenSpace: string;
specialToolFormat: 'openai-style' | 'gemini-style' | 'anthropic-style' | undefined | '';
supportsSystemMessage: 'system-role' | 'developer-role' | 'separated' | false | '';
supportsFIM: boolean | null;
reasoningCapabilities: boolean | null;
canTurnOffReasoning: boolean;
- reasoningMaxOutputTokens: string;
+ reasoningReservedOutputTokenSpace: string;
openSourceThinkTags: [string, string] | null;
}>({
// start form as default values
contextWindow: '',
- maxOutputTokens: '',
+ reservedOutputTokenSpace: '',
specialToolFormat: '',
supportsSystemMessage: '',
supportsFIM: null,
reasoningCapabilities: null,
canTurnOffReasoning: false,
- reasoningMaxOutputTokens: '',
+ reasoningReservedOutputTokenSpace: '',
openSourceThinkTags: null,
});
@@ -370,15 +370,15 @@ const ModelSettingsDialog = ({
// to indicate default values should be used
setFormValues({
contextWindow: overrides.contextWindow !== undefined ? String(overrides.contextWindow) : '',
- maxOutputTokens: overrides.maxOutputTokens !== undefined ? String(overrides.maxOutputTokens) : '',
+ reservedOutputTokenSpace: overrides.reservedOutputTokenSpace !== undefined ? String(overrides.reservedOutputTokenSpace) : '',
specialToolFormat: overrides.specialToolFormat !== undefined ? overrides.specialToolFormat : '',
supportsSystemMessage: overrides.supportsSystemMessage !== undefined ? overrides.supportsSystemMessage : '',
supportsFIM: overrides.supportsFIM !== undefined ? overrides.supportsFIM : null,
reasoningCapabilities: overrides.reasoningCapabilities !== undefined ?
!!overrides.reasoningCapabilities : null,
canTurnOffReasoning: typeof reasoningCapabilities === 'object' ? !!reasoningCapabilities.canTurnOffReasoning : false,
- reasoningMaxOutputTokens: typeof reasoningCapabilities === 'object' && reasoningCapabilities.reasoningMaxOutputTokens ?
- String(reasoningCapabilities.reasoningMaxOutputTokens) : '',
+ reasoningReservedOutputTokenSpace: typeof reasoningCapabilities === 'object' && reasoningCapabilities.reasoningReservedOutputTokenSpace ?
+ String(reasoningCapabilities.reasoningReservedOutputTokenSpace) : '',
openSourceThinkTags: thinkTags,
});
}
@@ -406,11 +406,11 @@ const ModelSettingsDialog = ({
if (!isNaN(tokens)) newSettings.contextWindow = tokens;
}
- if (formValues.maxOutputTokens.trim() === '') {
- newSettings.maxOutputTokens = defaultModelCapabilities.maxOutputTokens;
- } else if (formValues.maxOutputTokens) {
- const tokens = parseInt(formValues.maxOutputTokens);
- if (!isNaN(tokens)) newSettings.maxOutputTokens = tokens;
+ if (formValues.reservedOutputTokenSpace.trim() === '') {
+ newSettings.reservedOutputTokenSpace = defaultModelCapabilities.reservedOutputTokenSpace;
+ } else if (formValues.reservedOutputTokenSpace) {
+ const tokens = parseInt(formValues.reservedOutputTokenSpace);
+ if (!isNaN(tokens)) newSettings.reservedOutputTokenSpace = tokens;
}
// Handle dropdown fields
@@ -442,8 +442,8 @@ const ModelSettingsDialog = ({
};
// Only add these if they have values
- if (formValues.reasoningMaxOutputTokens) {
- reasoningSettings.reasoningMaxOutputTokens = parseInt(formValues.reasoningMaxOutputTokens);
+ if (formValues.reasoningReservedOutputTokenSpace) {
+ reasoningSettings.reasoningReservedOutputTokenSpace = parseInt(formValues.reasoningReservedOutputTokenSpace);
}
if (formValues.openSourceThinkTags) {
@@ -506,18 +506,18 @@ const ModelSettingsDialog = ({
{
- updateField('maxOutputTokens', enabled ? String(defaultModelCapabilities.maxOutputTokens) : '');
+ updateField('reservedOutputTokenSpace', enabled ? String(defaultModelCapabilities.reservedOutputTokenSpace) : '');
}}
/>
- {formValues.maxOutputTokens === '' ? (
- Default ({defaultModelCapabilities.maxOutputTokens})
+ {formValues.reservedOutputTokenSpace === '' ? (
+ Default ({defaultModelCapabilities.reservedOutputTokenSpace})
) : (
updateField('maxOutputTokens', value)}
- placeholder={String(defaultModelCapabilities.maxOutputTokens)}
+ value={formValues.reservedOutputTokenSpace}
+ onChangeValue={(value) => updateField('reservedOutputTokenSpace', value)}
+ placeholder={String(defaultModelCapabilities.reservedOutputTokenSpace)}
compact={true}
className="max-w-24"
/>
@@ -633,19 +633,19 @@ const ModelSettingsDialog = ({
{
// Use a reasonable default value when enabling
- const defaultValue = defaultModelCapabilities.maxOutputTokens || 500;
- updateField('reasoningMaxOutputTokens', enabled ? String(defaultValue) : '');
+ const defaultValue = defaultModelCapabilities.reservedOutputTokenSpace || 500;
+ updateField('reasoningReservedOutputTokenSpace', enabled ? String(defaultValue) : '');
}}
/>
- {formValues.reasoningMaxOutputTokens === '' ? (
+ {formValues.reasoningReservedOutputTokenSpace === '' ? (
Default
) : (
updateField('reasoningMaxOutputTokens', value)}
+ value={formValues.reasoningReservedOutputTokenSpace}
+ onChangeValue={(value) => updateField('reasoningReservedOutputTokenSpace', value)}
placeholder="Default"
compact={true}
className="max-w-24"
diff --git a/src/vs/workbench/contrib/void/common/modelCapabilities.ts b/src/vs/workbench/contrib/void/common/modelCapabilities.ts
index 93cd953e..455b9e78 100644
--- a/src/vs/workbench/contrib/void/common/modelCapabilities.ts
+++ b/src/vs/workbench/contrib/void/common/modelCapabilities.ts
@@ -141,7 +141,7 @@ export const defaultModelsOfProvider = {
export type VoidStaticModelInfo = { // not stateful
contextWindow: number; // input tokens
- maxOutputTokens: number | null; // output tokens, defaults to 4092
+ reservedOutputTokenSpace: number | null; // output tokens, defaults to 4092
cost: { // <-- UNUSED
input: number;
output: number;
@@ -162,7 +162,7 @@ export type VoidStaticModelInfo = { // not stateful
// reasoning options if supports reasoning
readonly canTurnOffReasoning: boolean; // whether or not the user can disable reasoning mode (false if the model only supports reasoning)
readonly canIOReasoning: boolean; // whether or not the model actually outputs reasoning (eg o1 lets us control reasoning but not output it)
- readonly reasoningMaxOutputTokens?: number; // overrides normal maxOutputTokens
+ readonly reasoningReservedOutputTokenSpace?: number; // overrides normal reservedOutputTokenSpace
readonly reasoningBudgetSlider?: { type: 'slider'; min: number; max: number; default: number };
// options related specifically to model output
@@ -174,7 +174,7 @@ export type VoidStaticModelInfo = { // not stateful
export type ModelOverrideOptions = Partial>
@@ -199,7 +199,7 @@ type VoidStaticProviderInfo = { // doesn't change (not stateful)
const defaultModelOptions = {
contextWindow: 4_096,
- maxOutputTokens: 4_096,
+ reservedOutputTokenSpace: 4_096,
cost: { input: 0, output: 0 },
downloadable: false,
supportsSystemMessage: false,
@@ -215,57 +215,57 @@ const openSourceModelOptions_assumingOAICompat = {
supportsFIM: false,
supportsSystemMessage: false,
reasoningCapabilities: { supportsReasoning: true, canTurnOffReasoning: false, canIOReasoning: true, openSourceThinkTags: ['', ''] },
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'deepseekCoderV3': {
supportsFIM: false,
supportsSystemMessage: false, // unstable
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'deepseekCoderV2': {
supportsFIM: false,
supportsSystemMessage: false, // unstable
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'codestral': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'openhands-lm-32b': { // https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false, // built on qwen 2.5 32B instruct
- contextWindow: 128_000, maxOutputTokens: 4_096
+ contextWindow: 128_000, reservedOutputTokenSpace: 4_096
},
'phi4': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 16_000, maxOutputTokens: 4_096,
+ contextWindow: 16_000, reservedOutputTokenSpace: 4_096,
},
'gemma': { // https://news.ycombinator.com/item?id=43451406
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
// llama 4 https://ai.meta.com/blog/llama-4-multimodal-intelligence/
'llama4-scout': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 10_000_000, maxOutputTokens: 4_096,
+ contextWindow: 10_000_000, reservedOutputTokenSpace: 4_096,
},
'llama4-maverick': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 10_000_000, maxOutputTokens: 4_096,
+ contextWindow: 10_000_000, reservedOutputTokenSpace: 4_096,
},
// llama 3
@@ -273,65 +273,65 @@ const openSourceModelOptions_assumingOAICompat = {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'llama3.1': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'llama3.2': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'llama3.3': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
// qwen
'qwen2.5coder': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 32_000, maxOutputTokens: 4_096,
+ contextWindow: 32_000, reservedOutputTokenSpace: 4_096,
},
'qwq': {
supportsFIM: false, // no FIM, yes reasoning
supportsSystemMessage: 'system-role',
reasoningCapabilities: { supportsReasoning: true, canTurnOffReasoning: false, canIOReasoning: true, openSourceThinkTags: ['', ''] },
- contextWindow: 128_000, maxOutputTokens: 8_192,
+ contextWindow: 128_000, reservedOutputTokenSpace: 8_192,
},
'qwen3': {
supportsFIM: false, // replaces QwQ
supportsSystemMessage: 'system-role',
reasoningCapabilities: { supportsReasoning: true, canTurnOffReasoning: true, canIOReasoning: true, openSourceThinkTags: ['', ''] },
- contextWindow: 32_768, maxOutputTokens: 8_192,
+ contextWindow: 32_768, reservedOutputTokenSpace: 8_192,
},
// FIM only
'starcoder2': {
supportsFIM: true,
supportsSystemMessage: false,
reasoningCapabilities: false,
- contextWindow: 128_000, maxOutputTokens: 8_192,
+ contextWindow: 128_000, reservedOutputTokenSpace: 8_192,
},
'codegemma:2b': {
supportsFIM: true,
supportsSystemMessage: false,
reasoningCapabilities: false,
- contextWindow: 128_000, maxOutputTokens: 8_192,
+ contextWindow: 128_000, reservedOutputTokenSpace: 8_192,
},
'quasar': { // openrouter/quasar-alpha
supportsFIM: false,
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
- contextWindow: 1_000_000, maxOutputTokens: 32_000,
+ contextWindow: 1_000_000, reservedOutputTokenSpace: 32_000,
}
} as const satisfies { [s: string]: Partial }
@@ -416,7 +416,7 @@ const extensiveModelFallback: VoidStaticProviderInfo['modelOptionsFallback'] = (
const anthropicModelOptions = {
'claude-3-7-sonnet-20250219': { // https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
contextWindow: 200_000,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
downloadable: false,
supportsFIM: false,
@@ -426,14 +426,14 @@ const anthropicModelOptions = {
supportsReasoning: true,
canTurnOffReasoning: true,
canIOReasoning: true,
- reasoningMaxOutputTokens: 64_000, // can bump it to 128_000 with beta mode output-128k-2025-02-19
+ reasoningReservedOutputTokenSpace: 64_000, // can bump it to 128_000 with beta mode output-128k-2025-02-19
reasoningBudgetSlider: { type: 'slider', min: 1024, max: 32_000, default: 1024 }, // they recommend batching if max > 32_000
},
},
'claude-3-5-sonnet-20241022': {
contextWindow: 200_000,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
downloadable: false,
supportsFIM: false,
@@ -443,7 +443,7 @@ const anthropicModelOptions = {
},
'claude-3-5-haiku-20241022': {
contextWindow: 200_000,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 0.80, cache_read: 0.08, cache_write: 1.00, output: 4.00 },
downloadable: false,
supportsFIM: false,
@@ -453,7 +453,7 @@ const anthropicModelOptions = {
},
'claude-3-opus-20240229': {
contextWindow: 200_000,
- maxOutputTokens: 4_096,
+ reservedOutputTokenSpace: 4_096,
cost: { input: 15.00, cache_read: 1.50, cache_write: 18.75, output: 75.00 },
downloadable: false,
supportsFIM: false,
@@ -464,7 +464,7 @@ const anthropicModelOptions = {
'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in
contextWindow: 200_000, cost: { input: 3.00, output: 15.00 },
downloadable: false,
- maxOutputTokens: 4_096,
+ reservedOutputTokenSpace: 4_096,
supportsFIM: false,
specialToolFormat: 'anthropic-style',
supportsSystemMessage: 'separated',
@@ -493,7 +493,7 @@ const anthropicSettings: VoidStaticProviderInfo = {
if (lower.includes('claude-3-opus')) fallbackName = 'claude-3-opus-20240229'
if (lower.includes('claude-3-sonnet')) fallbackName = 'claude-3-sonnet-20240229'
if (fallbackName) return { modelName: fallbackName, ...anthropicModelOptions[fallbackName] }
- return { modelName, ...defaultModelOptions, maxOutputTokens: 4_096 }
+ return { modelName, ...defaultModelOptions, reservedOutputTokenSpace: 4_096 }
},
}
@@ -502,7 +502,7 @@ const anthropicSettings: VoidStaticProviderInfo = {
const openAIModelOptions = { // https://platform.openai.com/docs/pricing
'o3': {
contextWindow: 1_047_576,
- maxOutputTokens: 32_768,
+ reservedOutputTokenSpace: 32_768,
cost: { input: 10.00, output: 40.00, cache_read: 2.50 },
downloadable: false,
supportsFIM: false,
@@ -512,7 +512,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'o4-mini': {
contextWindow: 1_047_576,
- maxOutputTokens: 32_768,
+ reservedOutputTokenSpace: 32_768,
cost: { input: 1.10, output: 4.40, cache_read: 0.275 },
downloadable: false,
supportsFIM: false,
@@ -522,7 +522,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4.1': {
contextWindow: 1_047_576,
- maxOutputTokens: 32_768,
+ reservedOutputTokenSpace: 32_768,
cost: { input: 2.00, output: 8.00, cache_read: 0.50 },
downloadable: false,
supportsFIM: false,
@@ -532,7 +532,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4.1-mini': {
contextWindow: 1_047_576,
- maxOutputTokens: 32_768,
+ reservedOutputTokenSpace: 32_768,
cost: { input: 0.40, output: 1.60, cache_read: 0.10 },
downloadable: false,
supportsFIM: false,
@@ -542,7 +542,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4.1-nano': {
contextWindow: 1_047_576,
- maxOutputTokens: 32_768,
+ reservedOutputTokenSpace: 32_768,
cost: { input: 0.10, output: 0.40, cache_read: 0.03 },
downloadable: false,
supportsFIM: false,
@@ -552,7 +552,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'o1': {
contextWindow: 128_000,
- maxOutputTokens: 100_000,
+ reservedOutputTokenSpace: 100_000,
cost: { input: 15.00, cache_read: 7.50, output: 60.00, },
downloadable: false,
supportsFIM: false,
@@ -561,7 +561,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'o3-mini': {
contextWindow: 200_000,
- maxOutputTokens: 100_000,
+ reservedOutputTokenSpace: 100_000,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
downloadable: false,
supportsFIM: false,
@@ -570,7 +570,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4o': {
contextWindow: 128_000,
- maxOutputTokens: 16_384,
+ reservedOutputTokenSpace: 16_384,
cost: { input: 2.50, cache_read: 1.25, output: 10.00, },
downloadable: false,
supportsFIM: false,
@@ -580,7 +580,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'o1-mini': {
contextWindow: 128_000,
- maxOutputTokens: 65_536,
+ reservedOutputTokenSpace: 65_536,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
downloadable: false,
supportsFIM: false,
@@ -589,7 +589,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
},
'gpt-4o-mini': {
contextWindow: 128_000,
- maxOutputTokens: 16_384,
+ reservedOutputTokenSpace: 16_384,
cost: { input: 0.15, cache_read: 0.075, output: 0.60, },
downloadable: false,
supportsFIM: false,
@@ -617,7 +617,7 @@ const openAISettings: VoidStaticProviderInfo = {
const xAIModelOptions = {
'grok-2': {
contextWindow: 131_072,
- maxOutputTokens: null, // 131_072,
+ reservedOutputTokenSpace: null, // 131_072,
cost: { input: 2.00, output: 10.00 },
downloadable: false,
supportsFIM: false,
@@ -626,7 +626,7 @@ const xAIModelOptions = {
},
// 'grok-3': {
// contextWindow: 1_000_000,
- // maxOutputTokens: null,
+ // reservedOutputTokenSpace: null,
// cost: {},
// downloadable: false,
// supportsFIM: false,
@@ -651,7 +651,7 @@ const xAISettings: VoidStaticProviderInfo = {
const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
'gemini-2.5-flash-preview-04-17': {
contextWindow: 1_048_576,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 0.15, output: .60 }, // TODO $3.50 output with thinking not included
downloadable: false,
supportsFIM: false,
@@ -661,7 +661,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-2.5-pro-exp-03-25': {
contextWindow: 1_048_576,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@@ -671,7 +671,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-2.0-flash': {
contextWindow: 1_048_576,
- maxOutputTokens: 8_192, // 8_192,
+ reservedOutputTokenSpace: 8_192, // 8_192,
cost: { input: 0.10, output: 0.40 },
downloadable: false,
supportsFIM: false,
@@ -681,7 +681,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-2.0-flash-lite-preview-02-05': {
contextWindow: 1_048_576,
- maxOutputTokens: 8_192, // 8_192,
+ reservedOutputTokenSpace: 8_192, // 8_192,
cost: { input: 0.075, output: 0.30 },
downloadable: false,
supportsFIM: false,
@@ -691,7 +691,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-1.5-flash': {
contextWindow: 1_048_576,
- maxOutputTokens: 8_192, // 8_192,
+ reservedOutputTokenSpace: 8_192, // 8_192,
cost: { input: 0.075, output: 0.30 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
downloadable: false,
supportsFIM: false,
@@ -701,7 +701,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-1.5-pro': {
contextWindow: 2_097_152,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 1.25, output: 5.00 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
downloadable: false,
supportsFIM: false,
@@ -711,7 +711,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
},
'gemini-1.5-flash-8b': {
contextWindow: 1_048_576,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 0.0375, output: 0.15 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
downloadable: false,
supportsFIM: false,
@@ -733,14 +733,14 @@ const deepseekModelOptions = {
'deepseek-chat': {
...openSourceModelOptions_assumingOAICompat.deepseekR1,
contextWindow: 64_000, // https://api-docs.deepseek.com/quick_start/pricing
- maxOutputTokens: 8_000, // 8_000,
+ reservedOutputTokenSpace: 8_000, // 8_000,
cost: { cache_read: .07, input: .27, output: 1.10, },
downloadable: false,
},
'deepseek-reasoner': {
...openSourceModelOptions_assumingOAICompat.deepseekCoderV2,
contextWindow: 64_000,
- maxOutputTokens: 8_000, // 8_000,
+ reservedOutputTokenSpace: 8_000, // 8_000,
cost: { cache_read: .14, input: .55, output: 2.19, },
downloadable: false,
},
@@ -763,7 +763,7 @@ const deepseekSettings: VoidStaticProviderInfo = {
const mistralModelOptions = { // https://mistral.ai/products/la-plateforme#pricing https://docs.mistral.ai/getting-started/models/models_overview/#premier-models
'mistral-large-latest': {
contextWindow: 131_000,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 2.00, output: 6.00 },
supportsFIM: false,
downloadable: { sizeGb: 73 },
@@ -772,7 +772,7 @@ const mistralModelOptions = { // https://mistral.ai/products/la-plateforme#prici
},
'codestral-latest': {
contextWindow: 256_000,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 0.30, output: 0.90 },
supportsFIM: true,
downloadable: { sizeGb: 13 },
@@ -781,7 +781,7 @@ const mistralModelOptions = { // https://mistral.ai/products/la-plateforme#prici
},
'ministral-8b-latest': { // ollama 'mistral'
contextWindow: 131_000,
- maxOutputTokens: 4_096,
+ reservedOutputTokenSpace: 4_096,
cost: { input: 0.10, output: 0.10 },
supportsFIM: false,
downloadable: { sizeGb: 4.1 },
@@ -790,7 +790,7 @@ const mistralModelOptions = { // https://mistral.ai/products/la-plateforme#prici
},
'ministral-3b-latest': {
contextWindow: 131_000,
- maxOutputTokens: 4_096,
+ reservedOutputTokenSpace: 4_096,
cost: { input: 0.04, output: 0.04 },
supportsFIM: false,
downloadable: { sizeGb: 'not-known' },
@@ -809,7 +809,7 @@ const mistralSettings: VoidStaticProviderInfo = {
const groqModelOptions = { // https://console.groq.com/docs/models, https://groq.com/pricing/
'llama-3.3-70b-versatile': {
contextWindow: 128_000,
- maxOutputTokens: 32_768, // 32_768,
+ reservedOutputTokenSpace: 32_768, // 32_768,
cost: { input: 0.59, output: 0.79 },
downloadable: false,
supportsFIM: false,
@@ -818,7 +818,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
},
'llama-3.1-8b-instant': {
contextWindow: 128_000,
- maxOutputTokens: 8_192,
+ reservedOutputTokenSpace: 8_192,
cost: { input: 0.05, output: 0.08 },
downloadable: false,
supportsFIM: false,
@@ -827,7 +827,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
},
'qwen-2.5-coder-32b': {
contextWindow: 128_000,
- maxOutputTokens: null, // not specified?
+ reservedOutputTokenSpace: null, // not specified?
cost: { input: 0.79, output: 0.79 },
downloadable: false,
supportsFIM: false, // unfortunately looks like no FIM support on groq
@@ -836,7 +836,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
},
'qwen-qwq-32b': { // https://huggingface.co/Qwen/QwQ-32B
contextWindow: 128_000,
- maxOutputTokens: null, // not specified?
+ reservedOutputTokenSpace: null, // not specified?
cost: { input: 0.29, output: 0.39 },
downloadable: false,
supportsFIM: false,
@@ -882,7 +882,7 @@ const microsoftAzureSettings: VoidStaticProviderInfo = {
const ollamaModelOptions = {
'qwen2.5-coder:7b': {
contextWindow: 32_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 1.9 },
supportsFIM: true,
@@ -891,7 +891,7 @@ const ollamaModelOptions = {
},
'qwen2.5-coder:3b': {
contextWindow: 32_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 1.9 },
supportsFIM: true,
@@ -900,7 +900,7 @@ const ollamaModelOptions = {
},
'qwen2.5-coder:1.5b': {
contextWindow: 32_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: .986 },
supportsFIM: true,
@@ -909,7 +909,7 @@ const ollamaModelOptions = {
},
'llama3.1': {
contextWindow: 128_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 4.9 },
supportsFIM: false,
@@ -918,7 +918,7 @@ const ollamaModelOptions = {
},
'qwen2.5-coder': {
contextWindow: 128_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 4.7 },
supportsFIM: false,
@@ -927,7 +927,7 @@ const ollamaModelOptions = {
},
'qwq': {
contextWindow: 128_000,
- maxOutputTokens: 32_000,
+ reservedOutputTokenSpace: 32_000,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 20 },
supportsFIM: false,
@@ -936,7 +936,7 @@ const ollamaModelOptions = {
},
'deepseek-r1': {
contextWindow: 128_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: { sizeGb: 4.7 },
supportsFIM: false,
@@ -986,7 +986,7 @@ const liteLLMSettings: VoidStaticProviderInfo = { // https://docs.litellm.ai/doc
const openRouterModelOptions_assumingOpenAICompat = {
'mistralai/mistral-small-3.1-24b-instruct:free': {
contextWindow: 128_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@@ -995,7 +995,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
},
'google/gemini-2.0-flash-lite-preview-02-05:free': {
contextWindow: 1_048_576,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@@ -1004,7 +1004,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
},
'google/gemini-2.0-pro-exp-02-05:free': {
contextWindow: 1_048_576,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@@ -1013,7 +1013,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
},
'google/gemini-2.0-flash-exp:free': {
contextWindow: 1_048_576,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
@@ -1023,13 +1023,13 @@ const openRouterModelOptions_assumingOpenAICompat = {
'deepseek/deepseek-r1': {
...openSourceModelOptions_assumingOAICompat.deepseekR1,
contextWindow: 128_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0.8, output: 2.4 },
downloadable: false,
},
'anthropic/claude-3.7-sonnet:thinking': {
contextWindow: 200_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 3.00, output: 15.00 },
downloadable: false,
supportsFIM: false,
@@ -1038,13 +1038,13 @@ const openRouterModelOptions_assumingOpenAICompat = {
supportsReasoning: true,
canTurnOffReasoning: false,
canIOReasoning: true,
- reasoningMaxOutputTokens: 64_000,
+ reasoningReservedOutputTokenSpace: 64_000,
reasoningBudgetSlider: { type: 'slider', min: 1024, max: 32_000, default: 1024 }, // they recommend batching if max > 32_000
},
},
'anthropic/claude-3.7-sonnet': {
contextWindow: 200_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 3.00, output: 15.00 },
downloadable: false,
supportsFIM: false,
@@ -1053,7 +1053,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
},
'anthropic/claude-3.5-sonnet': {
contextWindow: 200_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 3.00, output: 15.00 },
downloadable: false,
supportsFIM: false,
@@ -1063,7 +1063,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
'mistralai/codestral-2501': {
...openSourceModelOptions_assumingOAICompat.codestral,
contextWindow: 256_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0.3, output: 0.9 },
downloadable: false,
reasoningCapabilities: false,
@@ -1071,14 +1071,14 @@ const openRouterModelOptions_assumingOpenAICompat = {
'qwen/qwen-2.5-coder-32b-instruct': {
...openSourceModelOptions_assumingOAICompat['qwen2.5coder'],
contextWindow: 33_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0.07, output: 0.16 },
downloadable: false,
},
'qwen/qwq-32b': {
...openSourceModelOptions_assumingOAICompat['qwq'],
contextWindow: 33_000,
- maxOutputTokens: null,
+ reservedOutputTokenSpace: null,
cost: { input: 0.07, output: 0.16 },
downloadable: false,
}
@@ -1201,12 +1201,12 @@ export const getIsReasoningEnabledState = (
}
-export const getMaxOutputTokens = (providerName: ProviderName, modelName: string, opts: { isReasoningEnabled: boolean, overridesOfModel: OverridesOfModel | undefined }) => {
+export const getReservedOutputTokenSpace = (providerName: ProviderName, modelName: string, opts: { isReasoningEnabled: boolean, overridesOfModel: OverridesOfModel | undefined }) => {
const {
reasoningCapabilities,
- maxOutputTokens,
+ reservedOutputTokenSpace,
} = getModelCapabilities(providerName, modelName, opts.overridesOfModel)
- return opts.isReasoningEnabled && reasoningCapabilities ? reasoningCapabilities.reasoningMaxOutputTokens : maxOutputTokens
+ return opts.isReasoningEnabled && reasoningCapabilities ? reasoningCapabilities.reasoningReservedOutputTokenSpace : reservedOutputTokenSpace
}
// used to force reasoning state (complex) into something simple we can just read from when sending a message
diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
index 9b6a51cd..4cad7d20 100644
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
@@ -16,7 +16,7 @@ import { GoogleAuth } from 'google-auth-library'
import { AnthropicLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
-import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getMaxOutputTokens } from '../../common/modelCapabilities.js';
+import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js';
import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js';
import { availableTools, InternalToolInfo, isAToolName, ToolParamName, voidTools } from '../../common/prompt/prompts.js';
import { generateUuid } from '../../../../../base/common/uuid.js';
@@ -430,7 +430,7 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
const includeInPayload = providerReasoningIOSettings?.input?.includeInPayload?.(reasoningInfo) || {}
// anthropic-specific - max tokens
- const maxTokens = getMaxOutputTokens(providerName, modelName_, { isReasoningEnabled: !!reasoningInfo?.isReasoningEnabled, overridesOfModel })
+ const maxTokens = getReservedOutputTokenSpace(providerName, modelName_, { isReasoningEnabled: !!reasoningInfo?.isReasoningEnabled, overridesOfModel })
// tools
const potentialTools = chatMode !== null ? anthropicTools(chatMode) : null