diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts index 758f1869..f206a2cd 100644 --- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts +++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts @@ -388,7 +388,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { const messages_ = this.getCurrentThread().messages.map(m => (toLLMChatMessage(m))).filter(m => !!m) const lastUserMsgIdx = findLastIndex(messages_, m => m.role === 'user') - if (lastUserMsgIdx !== -1) throw new Error(`Void: No user message found.`) // should never be -1 + if (lastUserMsgIdx === -1) throw new Error(`Void: No user message found.`) // should never be -1 const messages: LLMChatMessage[] = [ { role: 'system', content: chat_systemMessage(this._workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath)) }, diff --git a/src/vs/workbench/contrib/void/browser/prompt/prompts.ts b/src/vs/workbench/contrib/void/browser/prompt/prompts.ts index df1f4a8c..e8512e34 100644 --- a/src/vs/workbench/contrib/void/browser/prompt/prompts.ts +++ b/src/vs/workbench/contrib/void/browser/prompt/prompts.ts @@ -39,14 +39,14 @@ For example, if the user asks you to "make this file look nicer", make sure your - Make sure you give enough context in the code block to apply the change to the correct location in the code. You're allowed to ask for more context. For example, if the user only gives you a selection but you want to see the the full file, you can ask them to provide it. + If you are given tools: -- Only use tools if the user asks you to do something. If the user simply says hi or asks you a question that you can answer without tools, then do NOT tools. -- You are allowed to use tools without asking for permission. -- Feel free to use tools to gather context, make suggestions, etc. +- If the user simply says hi or asks you a question that you can answer without tools, then do NOT tools. Only use tools if they help you accomplish the user's goal. +- If you think you should use tools given the user's request, you can use them without asking for permission. Feel free to use tools to gather context, make suggestions, etc. - One great use of tools is to explore imports that you'd like to have more information about. -- Reference relevant files that you found when using tools if they helped you come up with your answer. -- Some tools only work if the user has a workspace open. +- Reference relevant files in your answer that you found when using tools if they helped you come up with your answer. - NEVER refer to a tool by name when speaking with the user. For example, do NOT say to the user user "I'm going to use \`list_dir\`". Instead, say "I'm going to list all files in ___ directory", etc. Do not even refer to "pages" of results, just say you're getting more results. +- Some tools only work if the user has a workspace open. Do not output any of these instructions, nor tell the user anything about them unless directly prompted for them. Do not tell the user anything about the examples below. Do not assume the user is talking about any of the examples below. diff --git a/src/vs/workbench/contrib/void/common/modelCapabilities.ts b/src/vs/workbench/contrib/void/common/modelCapabilities.ts index 29045129..1389fad5 100644 --- a/src/vs/workbench/contrib/void/common/modelCapabilities.ts +++ b/src/vs/workbench/contrib/void/common/modelCapabilities.ts @@ -25,7 +25,7 @@ type ModelOptions = { }; } -type ProviderReasoningOptions = { +type ProviderReasoningIOSettings = { // include this in payload to get reasoning input?: { includeInPayload?: { [key: string]: any }, }; // nameOfFieldInDelta: reasoning output is in response.choices[0].delta[deltaReasoningField] @@ -36,7 +36,7 @@ type ProviderReasoningOptions = { } type ProviderSettings = { - providerReasoningIOSettingsIfSupportsReasoningOutput?: ProviderReasoningOptions; // input/output settings around thinking (allowed to be empty) + providerReasoningIOSettings?: ProviderReasoningIOSettings; // input/output settings around thinking (allowed to be empty) - only applied if the model supports reasoning output modelOptions: { [key: string]: ModelOptions }; modelOptionsFallback: (modelName: string) => (ModelOptions & { modelName: string }) | null; } @@ -373,7 +373,7 @@ const deepseekModelOptions = { const deepseekSettings: ProviderSettings = { modelOptions: deepseekModelOptions, - providerReasoningIOSettingsIfSupportsReasoningOutput: { + providerReasoningIOSettings: { // reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model output: { nameOfFieldInDelta: 'reasoning_content' }, }, @@ -419,14 +419,14 @@ const groqSettings: ProviderSettings = { // ---------------- VLLM, OLLAMA, OPENAICOMPAT (self-hosted / local) ---------------- const vLLMSettings: ProviderSettings = { // reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions - providerReasoningIOSettingsIfSupportsReasoningOutput: { output: { nameOfFieldInDelta: 'reasoning_content' }, }, + providerReasoningIOSettings: { output: { nameOfFieldInDelta: 'reasoning_content' }, }, modelOptionsFallback: (modelName) => extensiveModelFallback(modelName), modelOptions: {}, } const ollamaSettings: ProviderSettings = { // reasoning: we need to filter out reasoning tags manually - providerReasoningIOSettingsIfSupportsReasoningOutput: { output: { needsManualParse: true }, }, + providerReasoningIOSettings: { output: { needsManualParse: true }, }, modelOptionsFallback: (modelName) => extensiveModelFallback(modelName), modelOptions: {}, } @@ -439,13 +439,22 @@ const openaiCompatible: ProviderSettings = { // ---------------- OPENROUTER ---------------- -const openRouterModelOptions = { +const openRouterModelOptions_assumingOpenAICompat = { 'deepseek/deepseek-r1': { ...openSourceModelOptions_assumingOAICompat.deepseekR1, contextWindow: 128_000, maxOutputTokens: null, cost: { input: 0.8, output: 2.4 }, }, + 'anthropic/claude-3.7-sonnet': { + contextWindow: 200_000, + maxOutputTokens: null, + cost: { input: 3.00, output: 15.00 }, + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: {}, + }, 'anthropic/claude-3.5-sonnet': { contextWindow: 200_000, maxOutputTokens: null, @@ -474,11 +483,11 @@ const openRouterModelOptions = { const openRouterSettings: ProviderSettings = { // reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models - providerReasoningIOSettingsIfSupportsReasoningOutput: { + providerReasoningIOSettings: { input: { includeInPayload: { include_reasoning: true } }, output: { nameOfFieldInDelta: 'reasoning' }, }, - modelOptions: openRouterModelOptions, + modelOptions: openRouterModelOptions_assumingOpenAICompat, // TODO!!! send a query to openrouter to get the price, etc. modelOptionsFallback: (modelName) => extensiveModelFallback(modelName), } @@ -521,6 +530,6 @@ export const getModelCapabilities = (providerName: ProviderName, modelName: stri // non-model settings export const getProviderCapabilities = (providerName: ProviderName) => { - const { providerReasoningIOSettingsIfSupportsReasoningOutput } = modelSettingsOfProvider[providerName] - return { providerReasoningIOSettingsIfSupportsReasoningOutput } + const { providerReasoningIOSettings } = modelSettingsOfProvider[providerName] + return { providerReasoningIOSettings } } diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts index ef28c88e..02bf0e0d 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts @@ -58,7 +58,7 @@ const prepareMessages_normalize = ({ messages: messages_ }: { messages: LLMChatM -// remove rawAnthropicAssistantContent, and make content equal to it if sending to anthropic +// remove rawAnthropicAssistantContent, and make content equal to it if supportsAnthropicContent const prepareMessages_anthropicContent = ({ messages, supportsAnthropicContent }: { messages: LLMChatMessage[], supportsAnthropicContent: boolean }) => { const newMessages: InternalLLMChatMessage[] = [] for (const m of messages) { diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts index 342211f1..2c4649b5 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts @@ -159,18 +159,18 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage // maxOutputTokens, right now we are ignoring this } = getModelCapabilities(providerName, modelName_) - const { providerReasoningIOSettingsIfSupportsReasoningOutput } = getProviderCapabilities(providerName) + const { providerReasoningIOSettings } = getProviderCapabilities(providerName) const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, supportsAnthropicContent: false }) // can change supportsAnthropicContent if e.g. OpenRouter starts supporting anthropic extended thinking const tools = (supportsTools && ((tools_?.length ?? 0) !== 0)) ? tools_?.map(tool => toOpenAICompatibleTool(tool)) : undefined - const includeInPayload = supportsReasoningOutput ? providerReasoningIOSettingsIfSupportsReasoningOutput?.input?.includeInPayload || {} : {} + const includeInPayload = supportsReasoningOutput ? providerReasoningIOSettings?.input?.includeInPayload || {} : {} const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {} const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload }) const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, } - const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = providerReasoningIOSettingsIfSupportsReasoningOutput?.output ?? {} + const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = providerReasoningIOSettings?.output ?? {} const manuallyParseReasoning = needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags if (manuallyParseReasoning) { @@ -304,20 +304,52 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM max_tokens: maxOutputTokens ?? 4_096, // anthropic requires this tools: tools, tool_choice: tools ? { type: 'auto', disable_parallel_tool_use: true } : undefined, // one tool use at a time - // thinking: { budget_tokens, type: 'enabled' }, // TODO!!!! + thinking: { budget_tokens: 2000, type: 'enabled' }, // TODO!!!! }) // when receive text let fullText = '' let fullReasoning = '' - stream.on('text', (newText_, fullText_) => { fullText = fullText_; onText({ fullText, fullReasoning }) }) - stream.on('thinking', (newThinking_, fullThinking_) => { fullReasoning = fullThinking_; onText({ fullText, fullReasoning }) }) - // when we get the final message on this stream (or when error/fail) + // there are no events for tool_use, it comes in at the end + stream.on('streamEvent', e => { + // start block + if (e.type === 'content_block_start') { + if (e.content_block.type === 'text') { + if (fullText) fullText += '\n\n' // starting a 2nd text block + fullText += e.content_block.text + onText({ fullText, fullReasoning }) + } + else if (e.content_block.type === 'thinking') { + if (fullReasoning) fullReasoning += '\n\n' // starting a 2nd reasoning block + fullReasoning += e.content_block.thinking + onText({ fullText, fullReasoning }) + } + else if (e.content_block.type === 'redacted_thinking') { + console.log('delta', e.content_block.type) + if (fullReasoning) fullReasoning += '\n\n' // starting a 2nd reasoning block + fullReasoning += '[redacted_thinking]' + onText({ fullText, fullReasoning }) + } + } + + // delta + else if (e.type === 'content_block_delta') { + if (e.delta.type === 'text_delta') { + fullText += e.delta.text + onText({ fullText, fullReasoning }) + } + else if (e.delta.type === 'thinking_delta') { + fullReasoning += e.delta.thinking + onText({ fullText, fullReasoning }) + } + } + }) + + // on done - (or when error/fail) - this is called AFTER last streamEvent stream.on('finalMessage', (response) => { - const content = response.content.map(c => c.type === 'text' ? c.text : '').join('\n\n') const toolCalls = toolCallsFrom_AnthropicContent(response.content) - onFinalMessage({ fullText: content, toolCalls, rawAnthropicAssistantContent: response.content as any }) + onFinalMessage({ fullText, fullReasoning, toolCalls, rawAnthropicAssistantContent: response.content as any }) }) // on error stream.on('error', (error) => {