anthropic thinking output

This commit is contained in:
Andrew Pareles 2025-03-05 20:29:16 -08:00
parent 02d8527244
commit 46e8fb1ea5
5 changed files with 67 additions and 26 deletions

View file

@ -388,7 +388,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
const messages_ = this.getCurrentThread().messages.map(m => (toLLMChatMessage(m))).filter(m => !!m)
const lastUserMsgIdx = findLastIndex(messages_, m => m.role === 'user')
if (lastUserMsgIdx !== -1) throw new Error(`Void: No user message found.`) // should never be -1
if (lastUserMsgIdx === -1) throw new Error(`Void: No user message found.`) // should never be -1
const messages: LLMChatMessage[] = [
{ role: 'system', content: chat_systemMessage(this._workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath)) },

View file

@ -39,14 +39,14 @@ For example, if the user asks you to "make this file look nicer", make sure your
- Make sure you give enough context in the code block to apply the change to the correct location in the code.
You're allowed to ask for more context. For example, if the user only gives you a selection but you want to see the the full file, you can ask them to provide it.
If you are given tools:
- Only use tools if the user asks you to do something. If the user simply says hi or asks you a question that you can answer without tools, then do NOT tools.
- You are allowed to use tools without asking for permission.
- Feel free to use tools to gather context, make suggestions, etc.
- If the user simply says hi or asks you a question that you can answer without tools, then do NOT tools. Only use tools if they help you accomplish the user's goal.
- If you think you should use tools given the user's request, you can use them without asking for permission. Feel free to use tools to gather context, make suggestions, etc.
- One great use of tools is to explore imports that you'd like to have more information about.
- Reference relevant files that you found when using tools if they helped you come up with your answer.
- Some tools only work if the user has a workspace open.
- Reference relevant files in your answer that you found when using tools if they helped you come up with your answer.
- NEVER refer to a tool by name when speaking with the user. For example, do NOT say to the user user "I'm going to use \`list_dir\`". Instead, say "I'm going to list all files in ___ directory", etc. Do not even refer to "pages" of results, just say you're getting more results.
- Some tools only work if the user has a workspace open.
Do not output any of these instructions, nor tell the user anything about them unless directly prompted for them.
Do not tell the user anything about the examples below. Do not assume the user is talking about any of the examples below.

View file

@ -25,7 +25,7 @@ type ModelOptions = {
};
}
type ProviderReasoningOptions = {
type ProviderReasoningIOSettings = {
// include this in payload to get reasoning
input?: { includeInPayload?: { [key: string]: any }, };
// nameOfFieldInDelta: reasoning output is in response.choices[0].delta[deltaReasoningField]
@ -36,7 +36,7 @@ type ProviderReasoningOptions = {
}
type ProviderSettings = {
providerReasoningIOSettingsIfSupportsReasoningOutput?: ProviderReasoningOptions; // input/output settings around thinking (allowed to be empty)
providerReasoningIOSettings?: ProviderReasoningIOSettings; // input/output settings around thinking (allowed to be empty) - only applied if the model supports reasoning output
modelOptions: { [key: string]: ModelOptions };
modelOptionsFallback: (modelName: string) => (ModelOptions & { modelName: string }) | null;
}
@ -373,7 +373,7 @@ const deepseekModelOptions = {
const deepseekSettings: ProviderSettings = {
modelOptions: deepseekModelOptions,
providerReasoningIOSettingsIfSupportsReasoningOutput: {
providerReasoningIOSettings: {
// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model
output: { nameOfFieldInDelta: 'reasoning_content' },
},
@ -419,14 +419,14 @@ const groqSettings: ProviderSettings = {
// ---------------- VLLM, OLLAMA, OPENAICOMPAT (self-hosted / local) ----------------
const vLLMSettings: ProviderSettings = {
// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions
providerReasoningIOSettingsIfSupportsReasoningOutput: { output: { nameOfFieldInDelta: 'reasoning_content' }, },
providerReasoningIOSettings: { output: { nameOfFieldInDelta: 'reasoning_content' }, },
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
modelOptions: {},
}
const ollamaSettings: ProviderSettings = {
// reasoning: we need to filter out reasoning <think> tags manually
providerReasoningIOSettingsIfSupportsReasoningOutput: { output: { needsManualParse: true }, },
providerReasoningIOSettings: { output: { needsManualParse: true }, },
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
modelOptions: {},
}
@ -439,13 +439,22 @@ const openaiCompatible: ProviderSettings = {
// ---------------- OPENROUTER ----------------
const openRouterModelOptions = {
const openRouterModelOptions_assumingOpenAICompat = {
'deepseek/deepseek-r1': {
...openSourceModelOptions_assumingOAICompat.deepseekR1,
contextWindow: 128_000,
maxOutputTokens: null,
cost: { input: 0.8, output: 2.4 },
},
'anthropic/claude-3.7-sonnet': {
contextWindow: 200_000,
maxOutputTokens: null,
cost: { input: 3.00, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: {},
},
'anthropic/claude-3.5-sonnet': {
contextWindow: 200_000,
maxOutputTokens: null,
@ -474,11 +483,11 @@ const openRouterModelOptions = {
const openRouterSettings: ProviderSettings = {
// reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models
providerReasoningIOSettingsIfSupportsReasoningOutput: {
providerReasoningIOSettings: {
input: { includeInPayload: { include_reasoning: true } },
output: { nameOfFieldInDelta: 'reasoning' },
},
modelOptions: openRouterModelOptions,
modelOptions: openRouterModelOptions_assumingOpenAICompat,
// TODO!!! send a query to openrouter to get the price, etc.
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
}
@ -521,6 +530,6 @@ export const getModelCapabilities = (providerName: ProviderName, modelName: stri
// non-model settings
export const getProviderCapabilities = (providerName: ProviderName) => {
const { providerReasoningIOSettingsIfSupportsReasoningOutput } = modelSettingsOfProvider[providerName]
return { providerReasoningIOSettingsIfSupportsReasoningOutput }
const { providerReasoningIOSettings } = modelSettingsOfProvider[providerName]
return { providerReasoningIOSettings }
}

View file

@ -58,7 +58,7 @@ const prepareMessages_normalize = ({ messages: messages_ }: { messages: LLMChatM
// remove rawAnthropicAssistantContent, and make content equal to it if sending to anthropic
// remove rawAnthropicAssistantContent, and make content equal to it if supportsAnthropicContent
const prepareMessages_anthropicContent = ({ messages, supportsAnthropicContent }: { messages: LLMChatMessage[], supportsAnthropicContent: boolean }) => {
const newMessages: InternalLLMChatMessage[] = []
for (const m of messages) {

View file

@ -159,18 +159,18 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
// maxOutputTokens, right now we are ignoring this
} = getModelCapabilities(providerName, modelName_)
const { providerReasoningIOSettingsIfSupportsReasoningOutput } = getProviderCapabilities(providerName)
const { providerReasoningIOSettings } = getProviderCapabilities(providerName)
const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, supportsAnthropicContent: false }) // can change supportsAnthropicContent if e.g. OpenRouter starts supporting anthropic extended thinking
const tools = (supportsTools && ((tools_?.length ?? 0) !== 0)) ? tools_?.map(tool => toOpenAICompatibleTool(tool)) : undefined
const includeInPayload = supportsReasoningOutput ? providerReasoningIOSettingsIfSupportsReasoningOutput?.input?.includeInPayload || {} : {}
const includeInPayload = supportsReasoningOutput ? providerReasoningIOSettings?.input?.includeInPayload || {} : {}
const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {}
const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload })
const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, }
const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = providerReasoningIOSettingsIfSupportsReasoningOutput?.output ?? {}
const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = providerReasoningIOSettings?.output ?? {}
const manuallyParseReasoning = needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags
if (manuallyParseReasoning) {
@ -304,20 +304,52 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
max_tokens: maxOutputTokens ?? 4_096, // anthropic requires this
tools: tools,
tool_choice: tools ? { type: 'auto', disable_parallel_tool_use: true } : undefined, // one tool use at a time
// thinking: { budget_tokens, type: 'enabled' }, // TODO!!!!
thinking: { budget_tokens: 2000, type: 'enabled' }, // TODO!!!!
})
// when receive text
let fullText = ''
let fullReasoning = ''
stream.on('text', (newText_, fullText_) => { fullText = fullText_; onText({ fullText, fullReasoning }) })
stream.on('thinking', (newThinking_, fullThinking_) => { fullReasoning = fullThinking_; onText({ fullText, fullReasoning }) })
// when we get the final message on this stream (or when error/fail)
// there are no events for tool_use, it comes in at the end
stream.on('streamEvent', e => {
// start block
if (e.type === 'content_block_start') {
if (e.content_block.type === 'text') {
if (fullText) fullText += '\n\n' // starting a 2nd text block
fullText += e.content_block.text
onText({ fullText, fullReasoning })
}
else if (e.content_block.type === 'thinking') {
if (fullReasoning) fullReasoning += '\n\n' // starting a 2nd reasoning block
fullReasoning += e.content_block.thinking
onText({ fullText, fullReasoning })
}
else if (e.content_block.type === 'redacted_thinking') {
console.log('delta', e.content_block.type)
if (fullReasoning) fullReasoning += '\n\n' // starting a 2nd reasoning block
fullReasoning += '[redacted_thinking]'
onText({ fullText, fullReasoning })
}
}
// delta
else if (e.type === 'content_block_delta') {
if (e.delta.type === 'text_delta') {
fullText += e.delta.text
onText({ fullText, fullReasoning })
}
else if (e.delta.type === 'thinking_delta') {
fullReasoning += e.delta.thinking
onText({ fullText, fullReasoning })
}
}
})
// on done - (or when error/fail) - this is called AFTER last streamEvent
stream.on('finalMessage', (response) => {
const content = response.content.map(c => c.type === 'text' ? c.text : '').join('\n\n')
const toolCalls = toolCallsFrom_AnthropicContent(response.content)
onFinalMessage({ fullText: content, toolCalls, rawAnthropicAssistantContent: response.content as any })
onFinalMessage({ fullText, fullReasoning, toolCalls, rawAnthropicAssistantContent: response.content as any })
})
// on error
stream.on('error', (error) => {