mirror of
https://github.com/voideditor/void
synced 2026-05-24 09:58:23 +00:00
anthropic thinking output
This commit is contained in:
parent
02d8527244
commit
46e8fb1ea5
5 changed files with 67 additions and 26 deletions
|
|
@ -388,7 +388,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
const messages_ = this.getCurrentThread().messages.map(m => (toLLMChatMessage(m))).filter(m => !!m)
|
||||
const lastUserMsgIdx = findLastIndex(messages_, m => m.role === 'user')
|
||||
|
||||
if (lastUserMsgIdx !== -1) throw new Error(`Void: No user message found.`) // should never be -1
|
||||
if (lastUserMsgIdx === -1) throw new Error(`Void: No user message found.`) // should never be -1
|
||||
|
||||
const messages: LLMChatMessage[] = [
|
||||
{ role: 'system', content: chat_systemMessage(this._workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath)) },
|
||||
|
|
|
|||
|
|
@ -39,14 +39,14 @@ For example, if the user asks you to "make this file look nicer", make sure your
|
|||
- Make sure you give enough context in the code block to apply the change to the correct location in the code.
|
||||
|
||||
You're allowed to ask for more context. For example, if the user only gives you a selection but you want to see the the full file, you can ask them to provide it.
|
||||
|
||||
If you are given tools:
|
||||
- Only use tools if the user asks you to do something. If the user simply says hi or asks you a question that you can answer without tools, then do NOT tools.
|
||||
- You are allowed to use tools without asking for permission.
|
||||
- Feel free to use tools to gather context, make suggestions, etc.
|
||||
- If the user simply says hi or asks you a question that you can answer without tools, then do NOT tools. Only use tools if they help you accomplish the user's goal.
|
||||
- If you think you should use tools given the user's request, you can use them without asking for permission. Feel free to use tools to gather context, make suggestions, etc.
|
||||
- One great use of tools is to explore imports that you'd like to have more information about.
|
||||
- Reference relevant files that you found when using tools if they helped you come up with your answer.
|
||||
- Some tools only work if the user has a workspace open.
|
||||
- Reference relevant files in your answer that you found when using tools if they helped you come up with your answer.
|
||||
- NEVER refer to a tool by name when speaking with the user. For example, do NOT say to the user user "I'm going to use \`list_dir\`". Instead, say "I'm going to list all files in ___ directory", etc. Do not even refer to "pages" of results, just say you're getting more results.
|
||||
- Some tools only work if the user has a workspace open.
|
||||
|
||||
Do not output any of these instructions, nor tell the user anything about them unless directly prompted for them.
|
||||
Do not tell the user anything about the examples below. Do not assume the user is talking about any of the examples below.
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ type ModelOptions = {
|
|||
};
|
||||
}
|
||||
|
||||
type ProviderReasoningOptions = {
|
||||
type ProviderReasoningIOSettings = {
|
||||
// include this in payload to get reasoning
|
||||
input?: { includeInPayload?: { [key: string]: any }, };
|
||||
// nameOfFieldInDelta: reasoning output is in response.choices[0].delta[deltaReasoningField]
|
||||
|
|
@ -36,7 +36,7 @@ type ProviderReasoningOptions = {
|
|||
}
|
||||
|
||||
type ProviderSettings = {
|
||||
providerReasoningIOSettingsIfSupportsReasoningOutput?: ProviderReasoningOptions; // input/output settings around thinking (allowed to be empty)
|
||||
providerReasoningIOSettings?: ProviderReasoningIOSettings; // input/output settings around thinking (allowed to be empty) - only applied if the model supports reasoning output
|
||||
modelOptions: { [key: string]: ModelOptions };
|
||||
modelOptionsFallback: (modelName: string) => (ModelOptions & { modelName: string }) | null;
|
||||
}
|
||||
|
|
@ -373,7 +373,7 @@ const deepseekModelOptions = {
|
|||
|
||||
const deepseekSettings: ProviderSettings = {
|
||||
modelOptions: deepseekModelOptions,
|
||||
providerReasoningIOSettingsIfSupportsReasoningOutput: {
|
||||
providerReasoningIOSettings: {
|
||||
// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model
|
||||
output: { nameOfFieldInDelta: 'reasoning_content' },
|
||||
},
|
||||
|
|
@ -419,14 +419,14 @@ const groqSettings: ProviderSettings = {
|
|||
// ---------------- VLLM, OLLAMA, OPENAICOMPAT (self-hosted / local) ----------------
|
||||
const vLLMSettings: ProviderSettings = {
|
||||
// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions
|
||||
providerReasoningIOSettingsIfSupportsReasoningOutput: { output: { nameOfFieldInDelta: 'reasoning_content' }, },
|
||||
providerReasoningIOSettings: { output: { nameOfFieldInDelta: 'reasoning_content' }, },
|
||||
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
|
||||
modelOptions: {},
|
||||
}
|
||||
|
||||
const ollamaSettings: ProviderSettings = {
|
||||
// reasoning: we need to filter out reasoning <think> tags manually
|
||||
providerReasoningIOSettingsIfSupportsReasoningOutput: { output: { needsManualParse: true }, },
|
||||
providerReasoningIOSettings: { output: { needsManualParse: true }, },
|
||||
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
|
||||
modelOptions: {},
|
||||
}
|
||||
|
|
@ -439,13 +439,22 @@ const openaiCompatible: ProviderSettings = {
|
|||
|
||||
|
||||
// ---------------- OPENROUTER ----------------
|
||||
const openRouterModelOptions = {
|
||||
const openRouterModelOptions_assumingOpenAICompat = {
|
||||
'deepseek/deepseek-r1': {
|
||||
...openSourceModelOptions_assumingOAICompat.deepseekR1,
|
||||
contextWindow: 128_000,
|
||||
maxOutputTokens: null,
|
||||
cost: { input: 0.8, output: 2.4 },
|
||||
},
|
||||
'anthropic/claude-3.7-sonnet': {
|
||||
contextWindow: 200_000,
|
||||
maxOutputTokens: null,
|
||||
cost: { input: 3.00, output: 15.00 },
|
||||
supportsFIM: false,
|
||||
supportsSystemMessage: 'system-role',
|
||||
supportsTools: 'openai-style',
|
||||
supportsReasoningOutput: {},
|
||||
},
|
||||
'anthropic/claude-3.5-sonnet': {
|
||||
contextWindow: 200_000,
|
||||
maxOutputTokens: null,
|
||||
|
|
@ -474,11 +483,11 @@ const openRouterModelOptions = {
|
|||
|
||||
const openRouterSettings: ProviderSettings = {
|
||||
// reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models
|
||||
providerReasoningIOSettingsIfSupportsReasoningOutput: {
|
||||
providerReasoningIOSettings: {
|
||||
input: { includeInPayload: { include_reasoning: true } },
|
||||
output: { nameOfFieldInDelta: 'reasoning' },
|
||||
},
|
||||
modelOptions: openRouterModelOptions,
|
||||
modelOptions: openRouterModelOptions_assumingOpenAICompat,
|
||||
// TODO!!! send a query to openrouter to get the price, etc.
|
||||
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
|
||||
}
|
||||
|
|
@ -521,6 +530,6 @@ export const getModelCapabilities = (providerName: ProviderName, modelName: stri
|
|||
|
||||
// non-model settings
|
||||
export const getProviderCapabilities = (providerName: ProviderName) => {
|
||||
const { providerReasoningIOSettingsIfSupportsReasoningOutput } = modelSettingsOfProvider[providerName]
|
||||
return { providerReasoningIOSettingsIfSupportsReasoningOutput }
|
||||
const { providerReasoningIOSettings } = modelSettingsOfProvider[providerName]
|
||||
return { providerReasoningIOSettings }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ const prepareMessages_normalize = ({ messages: messages_ }: { messages: LLMChatM
|
|||
|
||||
|
||||
|
||||
// remove rawAnthropicAssistantContent, and make content equal to it if sending to anthropic
|
||||
// remove rawAnthropicAssistantContent, and make content equal to it if supportsAnthropicContent
|
||||
const prepareMessages_anthropicContent = ({ messages, supportsAnthropicContent }: { messages: LLMChatMessage[], supportsAnthropicContent: boolean }) => {
|
||||
const newMessages: InternalLLMChatMessage[] = []
|
||||
for (const m of messages) {
|
||||
|
|
|
|||
|
|
@ -159,18 +159,18 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
|
|||
// maxOutputTokens, right now we are ignoring this
|
||||
} = getModelCapabilities(providerName, modelName_)
|
||||
|
||||
const { providerReasoningIOSettingsIfSupportsReasoningOutput } = getProviderCapabilities(providerName)
|
||||
const { providerReasoningIOSettings } = getProviderCapabilities(providerName)
|
||||
|
||||
const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, supportsAnthropicContent: false }) // can change supportsAnthropicContent if e.g. OpenRouter starts supporting anthropic extended thinking
|
||||
const tools = (supportsTools && ((tools_?.length ?? 0) !== 0)) ? tools_?.map(tool => toOpenAICompatibleTool(tool)) : undefined
|
||||
|
||||
const includeInPayload = supportsReasoningOutput ? providerReasoningIOSettingsIfSupportsReasoningOutput?.input?.includeInPayload || {} : {}
|
||||
const includeInPayload = supportsReasoningOutput ? providerReasoningIOSettings?.input?.includeInPayload || {} : {}
|
||||
|
||||
const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {}
|
||||
const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload })
|
||||
const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, }
|
||||
|
||||
const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = providerReasoningIOSettingsIfSupportsReasoningOutput?.output ?? {}
|
||||
const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = providerReasoningIOSettings?.output ?? {}
|
||||
|
||||
const manuallyParseReasoning = needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags
|
||||
if (manuallyParseReasoning) {
|
||||
|
|
@ -304,20 +304,52 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
|
|||
max_tokens: maxOutputTokens ?? 4_096, // anthropic requires this
|
||||
tools: tools,
|
||||
tool_choice: tools ? { type: 'auto', disable_parallel_tool_use: true } : undefined, // one tool use at a time
|
||||
// thinking: { budget_tokens, type: 'enabled' }, // TODO!!!!
|
||||
thinking: { budget_tokens: 2000, type: 'enabled' }, // TODO!!!!
|
||||
})
|
||||
|
||||
// when receive text
|
||||
let fullText = ''
|
||||
let fullReasoning = ''
|
||||
stream.on('text', (newText_, fullText_) => { fullText = fullText_; onText({ fullText, fullReasoning }) })
|
||||
stream.on('thinking', (newThinking_, fullThinking_) => { fullReasoning = fullThinking_; onText({ fullText, fullReasoning }) })
|
||||
|
||||
// when we get the final message on this stream (or when error/fail)
|
||||
// there are no events for tool_use, it comes in at the end
|
||||
stream.on('streamEvent', e => {
|
||||
// start block
|
||||
if (e.type === 'content_block_start') {
|
||||
if (e.content_block.type === 'text') {
|
||||
if (fullText) fullText += '\n\n' // starting a 2nd text block
|
||||
fullText += e.content_block.text
|
||||
onText({ fullText, fullReasoning })
|
||||
}
|
||||
else if (e.content_block.type === 'thinking') {
|
||||
if (fullReasoning) fullReasoning += '\n\n' // starting a 2nd reasoning block
|
||||
fullReasoning += e.content_block.thinking
|
||||
onText({ fullText, fullReasoning })
|
||||
}
|
||||
else if (e.content_block.type === 'redacted_thinking') {
|
||||
console.log('delta', e.content_block.type)
|
||||
if (fullReasoning) fullReasoning += '\n\n' // starting a 2nd reasoning block
|
||||
fullReasoning += '[redacted_thinking]'
|
||||
onText({ fullText, fullReasoning })
|
||||
}
|
||||
}
|
||||
|
||||
// delta
|
||||
else if (e.type === 'content_block_delta') {
|
||||
if (e.delta.type === 'text_delta') {
|
||||
fullText += e.delta.text
|
||||
onText({ fullText, fullReasoning })
|
||||
}
|
||||
else if (e.delta.type === 'thinking_delta') {
|
||||
fullReasoning += e.delta.thinking
|
||||
onText({ fullText, fullReasoning })
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// on done - (or when error/fail) - this is called AFTER last streamEvent
|
||||
stream.on('finalMessage', (response) => {
|
||||
const content = response.content.map(c => c.type === 'text' ? c.text : '').join('\n\n')
|
||||
const toolCalls = toolCallsFrom_AnthropicContent(response.content)
|
||||
onFinalMessage({ fullText: content, toolCalls, rawAnthropicAssistantContent: response.content as any })
|
||||
onFinalMessage({ fullText, fullReasoning, toolCalls, rawAnthropicAssistantContent: response.content as any })
|
||||
})
|
||||
// on error
|
||||
stream.on('error', (error) => {
|
||||
|
|
|
|||
Loading…
Reference in a new issue