This commit is contained in:
Andrew Pareles 2025-04-07 20:28:39 -07:00
parent 3aa0bf11e6
commit 8f8fa8548d
13 changed files with 443 additions and 671 deletions

27
package-lock.json generated
View file

@ -62,6 +62,7 @@
"posthog-node": "^4.8.1",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"sax": "^1.4.1",
"tas-client-umd": "0.2.0",
"v8-inspect-profiler": "^0.1.1",
"vscode-html-languageservice": "^5.3.1",
@ -88,6 +89,7 @@
"@types/node": "20.x",
"@types/react": "^19.0.10",
"@types/react-dom": "^19.0.4",
"@types/sax": "^1.2.7",
"@types/sinon": "^10.0.2",
"@types/sinon-test": "^2.4.2",
"@types/trusted-types": "^1.0.6",
@ -3939,6 +3941,16 @@
"@types/node": "*"
}
},
"node_modules/@types/sax": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/@types/sax/-/sax-1.2.7.tgz",
"integrity": "sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@types/semver": {
"version": "7.5.8",
"resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.8.tgz",
@ -15094,10 +15106,11 @@
}
},
"node_modules/nan": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
"integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==",
"version": "2.22.2",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.22.2.tgz",
"integrity": "sha512-DANghxFkS1plDdRsX0X9pm0Z6SJNN6gBdtXfanwoZ8hooC5gosGFSBGRYHUVPz1asKA/kMRqDRdHrluZ61SpBQ==",
"dev": true,
"license": "MIT",
"optional": true
},
"node_modules/nanoid": {
@ -18807,10 +18820,10 @@
}
},
"node_modules/sax": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
"integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==",
"dev": true
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/sax/-/sax-1.4.1.tgz",
"integrity": "sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==",
"license": "ISC"
},
"node_modules/scheduler": {
"version": "0.25.0",

View file

@ -123,6 +123,7 @@
"posthog-node": "^4.8.1",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"sax": "^1.4.1",
"tas-client-umd": "0.2.0",
"v8-inspect-profiler": "^0.1.1",
"vscode-html-languageservice": "^5.3.1",
@ -149,6 +150,7 @@
"@types/node": "20.x",
"@types/react": "^19.0.10",
"@types/react-dom": "^19.0.4",
"@types/sax": "^1.2.7",
"@types/sinon": "^10.0.2",
"@types/sinon-test": "^2.4.2",
"@types/trusted-types": "^1.0.6",

View file

@ -11,13 +11,13 @@ import { IStorageService, StorageScope, StorageTarget } from '../../../../platfo
import { URI } from '../../../../base/common/uri.js';
import { Emitter, Event } from '../../../../base/common/event.js';
import { ILLMMessageService } from '../common/sendLLMMessageService.js';
import { chat_userMessageContent, chat_systemMessage, voidTools } from '../common/prompt/prompts.js';
import { chat_userMessageContent, chat_systemMessage, } from '../common/prompt/prompts.js';
import { getErrorMessage, LLMChatMessage, ToolCallType } from '../common/sendLLMMessageTypes.js';
import { IWorkspaceContextService } from '../../../../platform/workspace/common/workspace.js';
import { generateUuid } from '../../../../base/common/uuid.js';
import { ChatMode, FeatureName, ModelSelection, ModelSelectionOptions } from '../common/voidSettingsTypes.js';
import { FeatureName, ModelSelection, ModelSelectionOptions } from '../common/voidSettingsTypes.js';
import { IVoidSettingsService } from '../common/voidSettingsService.js';
import { ToolName, ToolCallParams, ToolResultType, toolNamesThatRequireApproval, InternalToolInfo } from '../common/toolsServiceTypes.js';
import { ToolName, ToolCallParams, ToolResultType, toolNamesThatRequireApproval } from '../common/toolsServiceTypes.js';
import { IToolsService } from './toolsService.js';
import { CancellationToken } from '../../../../base/common/cancellation.js';
import { ILanguageFeaturesService } from '../../../../editor/common/services/languageFeatures.js';
@ -63,6 +63,9 @@ A checkpoint appears before every LLM message, and before every user message (be
const toLLMChatMessages = (chatMessages: ChatMessage[]): LLMChatMessage[] => {
const llmChatMessages: LLMChatMessage[] = []
// merge tools into user message
for (const c of chatMessages) {
if (c.role === 'user') {
llmChatMessages.push({ role: c.role, content: c.content })
@ -551,18 +554,6 @@ class ChatThreadService extends Disposable implements IChatThreadService {
private _tools = (chatMode: ChatMode) => {
const toolNames: ToolName[] | undefined = chatMode === 'normal' ? undefined
: chatMode === 'gather' ? (Object.keys(voidTools) as ToolName[]).filter(toolName => !toolNamesThatRequireApproval.has(toolName))
: chatMode === 'agent' ? Object.keys(voidTools) as ToolName[]
: undefined
const tools: InternalToolInfo[] | undefined = toolNames?.map(toolName => voidTools[toolName])
return tools
}
private readonly errMsgs = {
rejected: 'Tool call was rejected by the user.',
errWhenStringifying: (error: any) => `Tool call succeeded, but there was an error stringifying the output.\n${getErrorMessage(error)}`
@ -704,7 +695,6 @@ class ChatThreadService extends Disposable implements IChatThreadService {
// above just defines helpers, below starts the actual function
const { chatMode } = this._settingsService.state.globalSettings // should not change as we loop even if user changes it, so it goes here
const tools = this._tools(chatMode)
// clear any previous error
this._setStreamState(threadId, { error: undefined }, 'set')
@ -736,7 +726,6 @@ class ChatThreadService extends Disposable implements IChatThreadService {
const llmCancelToken = this._llmMessageService.sendLLMMessage({
messagesType: 'chatMessages',
messages,
tools: tools,
modelSelection,
modelSelectionOptions,
logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } },

View file

@ -287,6 +287,8 @@ class DirectoryStrService extends Disposable implements IDirectoryStrService {
let str: string = '';
let cutOff = false;
const folders = this.workspaceContextService.getWorkspace().folders;
if (folders.length === 0)
return { str: '(NO WORKSPACE OPEN)', wasCutOff: false };
for (let i = 0; i < folders.length; i += 1) {
if (i > 0) str += '\n';

View file

@ -3,9 +3,7 @@
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
*--------------------------------------------------------------------------------------*/
import { OnText } from '../sendLLMMessageTypes.js'
import { DIVIDER, FINAL, ORIGINAL } from '../prompt/prompts.js'
class SurroundingsRemover {
readonly originalS: string
i: number
@ -174,7 +172,7 @@ export type ExtractedSearchReplaceBlock = {
// JS substring swaps indices, so "ab".substr(1,0) will NOT be '', it will be 'a'!
const voidSubstr = (str: string, start: number, end: number) => end < start ? '' : str.substring(start, end)
const endsWithAnyPrefixOf = (str: string, anyPrefix: string) => {
export const endsWithAnyPrefixOf = (str: string, anyPrefix: string) => {
// for each prefix
for (let i = anyPrefix.length; i >= 1; i--) { // i >= 1 because must not be empty string
const prefix = anyPrefix.slice(0, i)
@ -250,122 +248,6 @@ export const extractSearchReplaceBlocks = (str: string) => {
// could simplify this - this assumes we can never add a tag without committing it to the user's screen, but that's not true
export const extractReasoningOnTextWrapper = (onText: OnText, thinkTags: [string, string]): OnText => {
let latestAddIdx = 0 // exclusive index in fullText_
let foundTag1 = false
let foundTag2 = false
let fullTextSoFar = ''
let fullReasoningSoFar = ''
let onText_ = onText
onText = (params) => {
onText_(params)
}
const newOnText: OnText = ({ fullText: fullText_, ...p }) => {
// until found the first think tag, keep adding to fullText
if (!foundTag1) {
const endsWithTag1 = endsWithAnyPrefixOf(fullText_, thinkTags[0])
if (endsWithTag1) {
// console.log('endswith1', { fullTextSoFar, fullReasoningSoFar, fullText_ })
// wait until we get the full tag or know more
return
}
// if found the first tag
const tag1Index = fullText_.indexOf(thinkTags[0])
if (tag1Index !== -1) {
// console.log('tag1Index !==1', { tag1Index, fullTextSoFar, fullReasoningSoFar, thinkTags, fullText_ })
foundTag1 = true
// Add text before the tag to fullTextSoFar
fullTextSoFar += fullText_.substring(0, tag1Index)
// Update latestAddIdx to after the first tag
latestAddIdx = tag1Index + thinkTags[0].length
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// console.log('adding to text A', { fullTextSoFar, fullReasoningSoFar })
// add the text to fullText
fullTextSoFar = fullText_
latestAddIdx = fullText_.length
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// at this point, we found <tag1>
// until found the second think tag, keep adding to fullReasoning
if (!foundTag2) {
const endsWithTag2 = endsWithAnyPrefixOf(fullText_, thinkTags[1])
if (endsWithTag2) {
// console.log('endsWith2', { fullTextSoFar, fullReasoningSoFar })
// wait until we get the full tag or know more
return
}
// if found the second tag
const tag2Index = fullText_.indexOf(thinkTags[1], latestAddIdx)
if (tag2Index !== -1) {
// console.log('tag2Index !== -1', { fullTextSoFar, fullReasoningSoFar })
foundTag2 = true
// Add everything between first and second tag to reasoning
fullReasoningSoFar += fullText_.substring(latestAddIdx, tag2Index)
// Update latestAddIdx to after the second tag
latestAddIdx = tag2Index + thinkTags[1].length
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// add the text to fullReasoning (content after first tag but before second tag)
// console.log('adding to text B', { fullTextSoFar, fullReasoningSoFar })
// If we have more text than we've processed, add it to reasoning
if (fullText_.length > latestAddIdx) {
fullReasoningSoFar += fullText_.substring(latestAddIdx)
latestAddIdx = fullText_.length
}
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// at this point, we found <tag2> - content after the second tag is normal text
// console.log('adding to text C', { fullTextSoFar, fullReasoningSoFar })
// Add any new text after the closing tag to fullTextSoFar
if (fullText_.length > latestAddIdx) {
fullTextSoFar += fullText_.substring(latestAddIdx)
latestAddIdx = fullText_.length
}
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
}
return newOnText
}
export const extractReasoningOnFinalMessage = (fullText_: string, thinkTags: [string, string]): { fullText: string, fullReasoning: string } => {
const tag1Idx = fullText_.indexOf(thinkTags[0])
const tag2Idx = fullText_.indexOf(thinkTags[1])
if (tag1Idx === -1) return { fullText: fullText_, fullReasoning: '' } // never started reasoning
if (tag2Idx === -1) return { fullText: '', fullReasoning: fullText_ } // never stopped reasoning
const fullReasoning = fullText_.substring(tag1Idx + thinkTags[0].length, tag2Idx)
const fullText = fullText_.substring(0, tag1Idx) + fullText_.substring(tag2Idx + thinkTags[1].length, Infinity)
return { fullText, fullReasoning }
}

View file

@ -81,7 +81,6 @@ type ModelOptions = {
cache_write?: number;
}
supportsSystemMessage: false | 'system-role' | 'developer-role' | 'separated';
supportsTools: false | 'anthropic-style' | 'openai-style';
supportsFIM: boolean;
reasoningCapabilities: false | {
@ -122,7 +121,6 @@ const modelOptionsDefaults: ModelOptions = {
maxOutputTokens: 4_096,
cost: { input: 0, output: 0 },
supportsSystemMessage: false,
supportsTools: false,
supportsFIM: false,
reasoningCapabilities: false,
}
@ -137,42 +135,36 @@ const openSourceModelOptions_assumingOAICompat = {
'deepseekR1': {
supportsFIM: false,
supportsSystemMessage: false,
supportsTools: false,
reasoningCapabilities: { supportsReasoning: true, canTurnOffReasoning: false, canIOReasoning: true, openSourceThinkTags: ['<think>', '</think>'] },
contextWindow: 32_000, maxOutputTokens: 4_096,
},
'deepseekCoderV3': {
supportsFIM: false,
supportsSystemMessage: false, // unstable
supportsTools: false,
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
'deepseekCoderV2': {
supportsFIM: false,
supportsSystemMessage: false, // unstable
supportsTools: false,
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
'codestral': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
'openhands-lm-32b': { // https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false, // built on qwen 2.5 32B instruct
contextWindow: 128_000, maxOutputTokens: 4_096
},
'phi4': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: false,
reasoningCapabilities: false,
contextWindow: 16_000, maxOutputTokens: 4_096,
},
@ -180,7 +172,6 @@ const openSourceModelOptions_assumingOAICompat = {
'gemma': { // https://news.ycombinator.com/item?id=43451406
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: false,
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
@ -188,14 +179,12 @@ const openSourceModelOptions_assumingOAICompat = {
'llama4-scout': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
contextWindow: 10_000_000, maxOutputTokens: 4_096,
},
'llama4-maverick': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
contextWindow: 10_000_000, maxOutputTokens: 4_096,
},
@ -204,28 +193,24 @@ const openSourceModelOptions_assumingOAICompat = {
'llama3': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
'llama3.1': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
'llama3.2': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
'llama3.3': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
@ -233,14 +218,12 @@ const openSourceModelOptions_assumingOAICompat = {
'qwen2.5coder': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
contextWindow: 32_000, maxOutputTokens: 4_096,
},
'qwq': {
supportsFIM: false, // no FIM, yes reasoning
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: { supportsReasoning: true, canTurnOffReasoning: false, canIOReasoning: true, openSourceThinkTags: ['<think>', '</think>'] },
contextWindow: 128_000, maxOutputTokens: 8_192,
},
@ -248,7 +231,6 @@ const openSourceModelOptions_assumingOAICompat = {
'starcoder2': {
supportsFIM: true,
supportsSystemMessage: false,
supportsTools: false,
reasoningCapabilities: false,
contextWindow: 128_000, maxOutputTokens: 8_192,
@ -256,7 +238,6 @@ const openSourceModelOptions_assumingOAICompat = {
'codegemma:2b': {
supportsFIM: true,
supportsSystemMessage: false,
supportsTools: false,
reasoningCapabilities: false,
contextWindow: 128_000, maxOutputTokens: 8_192,
@ -334,7 +315,6 @@ const anthropicModelOptions = {
cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
reasoningCapabilities: {
supportsReasoning: true,
canTurnOffReasoning: true,
@ -349,7 +329,6 @@ const anthropicModelOptions = {
cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
reasoningCapabilities: false,
},
'claude-3-5-haiku-20241022': {
@ -358,7 +337,6 @@ const anthropicModelOptions = {
cost: { input: 0.80, cache_read: 0.08, cache_write: 1.00, output: 4.00 },
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
reasoningCapabilities: false,
},
'claude-3-opus-20240229': {
@ -367,7 +345,6 @@ const anthropicModelOptions = {
cost: { input: 15.00, cache_read: 1.50, cache_write: 18.75, output: 75.00 },
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
reasoningCapabilities: false,
},
'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in
@ -375,7 +352,6 @@ const anthropicModelOptions = {
maxOutputTokens: 4_096,
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
reasoningCapabilities: false,
}
} as const satisfies { [s: string]: ModelOptions }
@ -413,7 +389,6 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
maxOutputTokens: 100_000,
cost: { input: 15.00, cache_read: 7.50, output: 60.00, },
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: 'developer-role',
reasoningCapabilities: { supportsReasoning: true, canIOReasoning: false, canTurnOffReasoning: false }, // it doesn't actually output reasoning, but our logic is fine with it
},
@ -422,7 +397,6 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
maxOutputTokens: 100_000,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: 'developer-role',
reasoningCapabilities: { supportsReasoning: true, canIOReasoning: false, canTurnOffReasoning: false },
},
@ -431,7 +405,6 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
maxOutputTokens: 16_384,
cost: { input: 2.50, cache_read: 1.25, output: 10.00, },
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
},
@ -440,7 +413,6 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
maxOutputTokens: 65_536,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: false, // does not support any system
reasoningCapabilities: { supportsReasoning: true, canIOReasoning: false, canTurnOffReasoning: false },
},
@ -449,7 +421,6 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
maxOutputTokens: 16_384,
cost: { input: 0.15, cache_read: 0.075, output: 0.60, },
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role', // ??
reasoningCapabilities: false,
},
@ -477,7 +448,6 @@ const xAIModelOptions = {
cost: { input: 2.00, output: 10.00 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
} as const satisfies { [s: string]: ModelOptions }
@ -502,7 +472,6 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
cost: { input: 0, output: 0 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style', // we are assuming OpenAI SDK when calling gemini
reasoningCapabilities: false,
},
'gemini-2.0-flash': {
@ -511,7 +480,6 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
cost: { input: 0.10, output: 0.40 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style', // we are assuming OpenAI SDK when calling gemini
reasoningCapabilities: false,
},
'gemini-2.0-flash-lite-preview-02-05': {
@ -520,7 +488,6 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
cost: { input: 0.075, output: 0.30 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
'gemini-1.5-flash': {
@ -529,7 +496,6 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
cost: { input: 0.075, output: 0.30 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
'gemini-1.5-pro': {
@ -538,7 +504,6 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
cost: { input: 1.25, output: 5.00 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
'gemini-1.5-flash-8b': {
@ -547,7 +512,6 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
cost: { input: 0.0375, output: 0.15 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
} as const satisfies { [s: string]: ModelOptions }
@ -593,7 +557,6 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
cost: { input: 0.59, output: 0.79 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
'llama-3.1-8b-instant': {
@ -602,7 +565,6 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
cost: { input: 0.05, output: 0.08 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
'qwen-2.5-coder-32b': {
@ -611,7 +573,6 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
cost: { input: 0.79, output: 0.79 },
supportsFIM: false, // unfortunately looks like no FIM support on groq
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
'qwen-qwq-32b': { // https://huggingface.co/Qwen/QwQ-32B
@ -620,7 +581,6 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
cost: { input: 0.29, output: 0.39 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: { supportsReasoning: true, canIOReasoning: true, canTurnOffReasoning: false, openSourceThinkTags: ['<think>', '</think>'] }, // we're using reasoning_format:parsed so really don't need to know openSourceThinkTags
},
} as const satisfies { [s: string]: ModelOptions }
@ -670,7 +630,6 @@ const openRouterModelOptions_assumingOpenAICompat = {
maxOutputTokens: null,
cost: { input: 0, output: 0 },
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
},
@ -679,7 +638,6 @@ const openRouterModelOptions_assumingOpenAICompat = {
maxOutputTokens: null,
cost: { input: 0, output: 0 },
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
},
@ -688,7 +646,6 @@ const openRouterModelOptions_assumingOpenAICompat = {
maxOutputTokens: null,
cost: { input: 0, output: 0 },
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
},
@ -697,7 +654,6 @@ const openRouterModelOptions_assumingOpenAICompat = {
maxOutputTokens: null,
cost: { input: 0, output: 0 },
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role',
reasoningCapabilities: false,
},
@ -713,7 +669,6 @@ const openRouterModelOptions_assumingOpenAICompat = {
cost: { input: 3.00, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: { // same as anthropic, see above
supportsReasoning: true,
canTurnOffReasoning: false,
@ -728,7 +683,6 @@ const openRouterModelOptions_assumingOpenAICompat = {
cost: { input: 3.00, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false, // stupidly, openrouter separates thinking from non-thinking
},
'anthropic/claude-3.5-sonnet': {
@ -737,7 +691,6 @@ const openRouterModelOptions_assumingOpenAICompat = {
cost: { input: 3.00, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
'mistralai/codestral-2501': {
@ -745,21 +698,18 @@ const openRouterModelOptions_assumingOpenAICompat = {
contextWindow: 256_000,
maxOutputTokens: null,
cost: { input: 0.3, output: 0.9 },
supportsTools: 'openai-style',
reasoningCapabilities: false,
},
'qwen/qwen-2.5-coder-32b-instruct': {
...openSourceModelOptions_assumingOAICompat['qwen2.5coder'],
contextWindow: 33_000,
maxOutputTokens: null,
supportsTools: false, // openrouter qwen doesn't seem to support tools...?
cost: { input: 0.07, output: 0.16 },
},
'qwen/qwq-32b': {
...openSourceModelOptions_assumingOAICompat['qwq'],
contextWindow: 33_000,
maxOutputTokens: null,
supportsTools: false, // openrouter qwen doesn't seem to support tools...?
cost: { input: 0.07, output: 0.16 },
}
} as const satisfies { [s: string]: ModelOptions }

View file

@ -6,7 +6,7 @@
import { os } from '../helpers/systemInfo.js';
import { StagingSelectionItem } from '../chatThreadServiceTypes.js';
import { ChatMode } from '../voidSettingsTypes.js';
import { InternalToolInfo } from '../toolsServiceTypes.js';
import { ToolName, toolNamesThatRequireApproval } from '../toolsServiceTypes.js';
import { IVoidModelService } from '../voidModelService.js';
import { EndOfLinePreference } from '../../../../../editor/common/model.js';
@ -22,7 +22,7 @@ const changesExampleContent = `\
// {{change 3}}
// ... existing code ...`
const editToolDescription = `\
const editToolDescriptionExample = `\
${tripleTick[0]}
${changesExampleContent}
${tripleTick[1]}`
@ -34,23 +34,32 @@ ${tripleTick[1]}`
// ======================================================== tools ========================================================
export type InternalToolInfo = {
name: string,
description: string,
params: {
[paramName: string]: { description: string }
},
}
const paginationHelper = {
desc: `Very large results may be paginated (a note will always be included if pagination took place). Pagination fails gracefully if out of bounds or invalid page number.`,
param: { pageNumber: { type: 'number', description: 'The page number (default is the first page = 1).' }, }
} as const
const uriParam = (object: string) => ({
uri: { type: 'string', description: `The FULL path to the ${object}.` }
uri: { description: `The FULL path to the ${object} from the root of the file system.` }
})
const searchParams = {
searchInFolder: { type: 'string', description: 'Only search files in this given folder. Leave as empty to search all available files.' },
isRegex: { type: 'string', description: 'Whether to treat the query as a regular expression. Default is "false".' },
searchInFolder: { description: 'Only search files in this given folder. Leave as empty to search all available files.' },
isRegex: { description: 'Whether to treat the query as a regular expression. Default is "false".' },
} as const
@ -62,8 +71,8 @@ export const voidTools = {
description: `Returns file contents of a given URI. ${paginationHelper.desc}`,
params: {
...uriParam('file'),
startLine: { type: 'string', description: 'Line to start reading from. Default is "null", treated as 1.' },
endLine: { type: 'string', description: 'Line to stop reading from (inclusive). Default is "null", treated as Infinity.' },
startLine: { description: 'Line to start reading from. Default is "null", treated as 1.' },
endLine: { description: 'Line to stop reading from (inclusive). Default is "null", treated as Infinity.' },
...paginationHelper.param,
},
},
@ -89,7 +98,7 @@ export const voidTools = {
name: 'search_pathnames_only',
description: `Returns all pathnames that match a given query (searches ONLY file names). You should use this when looking for a file with a specific name or path. ${paginationHelper.desc}`,
params: {
query: { type: 'string', description: undefined },
query: { description: `Your query for the search.` },
...searchParams,
...paginationHelper.param,
},
@ -97,9 +106,9 @@ export const voidTools = {
search_files: {
name: 'search_files',
description: `Returns all pathnames that match a given \`grep\`-style query (searches ONLY file contents). The query can be any regex. This is often followed by the \`read_file\` tool to view the full file contents of results. ${paginationHelper.desc}`,
description: `Returns all pathnames that match a given query (searches ONLY file contents). The query can be any substring or glob. This is often followed by the \`read_file\` tool to view the full file contents of results. ${paginationHelper.desc}`,
params: {
query: { type: 'string', description: undefined },
query: { description: `Your query for the search.` },
...searchParams,
...paginationHelper.param,
},
@ -120,7 +129,7 @@ export const voidTools = {
description: `Delete a file or folder at the given path. Fails gracefully if the file or folder does not exist.`,
params: {
...uriParam('file or folder'),
params: { type: 'string', description: 'Return -r here to delete recursively (if applicable). Default is the empty string.' }
params: { description: 'Return -r here to delete recursively (if applicable). Default is the empty string.' }
},
},
@ -130,12 +139,12 @@ export const voidTools = {
params: {
...uriParam('file'),
changeDescription: {
type: 'string', description: `\
description: `\
- Your changeDescription should be a brief code description of the change you want to make, with comments like "// ... existing code ..." to condense your writing.
- NEVER re-write the whole file, and ALWAYS use comments like "// ... existing code ...". Bias towards writing as little as possible.
- Your description will be handed to a dumber, faster model that will quickly apply the change, so it should be clear and concise.
- You must output your description in triple backticks.
Here's an example of a good description:\n${editToolDescription}.`
Here's an example of a good description:\n${editToolDescriptionExample}.`
}
},
},
@ -144,9 +153,9 @@ Here's an example of a good description:\n${editToolDescription}.`
name: 'run_terminal_command',
description: `Executes a terminal command.`,
params: {
command: { type: 'string', description: 'The terminal command to execute. Typically you should pipe to cat to avoid pagination.' },
waitForCompletion: { type: 'string', description: `Whether or not to await the command to complete and get the final result. Default is true. Make this value false when you want a command to run indefinitely without waiting for it.` },
terminalId: { type: 'string', description: 'Optional (value must be an integer >= 1, or empty which will go with the default). This is the ID of the terminal instance to execute the command in. The primary purpose of this is to start a new terminal for background processes or tasks that run indefinitely (e.g. if you want to run a server locally). Fails gracefully if a terminal ID does not exist, by creating a new terminal instance. Defaults to the preferred terminal ID.' },
command: { description: 'The terminal command to execute. Typically you should pipe to cat to avoid pagination.' },
waitForCompletion: { description: `Whether or not to await the command to complete and get the final result. Default is true. Make this value false when you want a command to run indefinitely without waiting for it.` },
terminalId: { description: 'Optional (value must be an integer >= 1, or empty which will go with the default). This is the ID of the terminal instance to execute the command in. The primary purpose of this is to start a new terminal for background processes or tasks that run indefinitely (e.g. if you want to run a server locally). Fails gracefully if a terminal ID does not exist, by creating a new terminal instance. Defaults to the preferred terminal ID.' },
},
},
@ -157,7 +166,61 @@ Here's an example of a good description:\n${editToolDescription}.`
} satisfies { [name: string]: InternalToolInfo }
export const availableTools = (chatMode: ChatMode) => {
const toolNames: ToolName[] | undefined = chatMode === 'normal' ? undefined
: chatMode === 'gather' ? (Object.keys(voidTools) as ToolName[]).filter(toolName => !toolNamesThatRequireApproval.has(toolName))
: chatMode === 'agent' ? Object.keys(voidTools) as ToolName[]
: undefined
const tools: InternalToolInfo[] | undefined = toolNames?.map(toolName => voidTools[toolName])
return tools
}
const availableToolsStr = (tools: InternalToolInfo[]) => {
return `${tools.map((t, i) => {
const params = Object.keys(t.params).map(paramName => `<${paramName}>\n${t.params[paramName].description}\n</${paramName}>`).join('\n')
return `\
${i}. ${t.name}: ${t.description}
<${t.name}>${!params ? '' : `\n${params}`}
</${t.name}>`
}).join('\n\n')}`
}
const systemToolsPrompt = (chatMode: ChatMode) => {
const tools = availableTools(chatMode)
if (!tools || tools.length === 0) return ''
return `\
You are allowed to call tools in your response.
Tool calling guidelines:
${chatMode === 'agent' ? `\
- Only call tools if they help you accomplish the user's goal. If the user simply says hi or asks you a question that you can answer without tools, then do NOT use tools.
- ALWAYS use tools to take actions. For example, if you would like to edit a file, you MUST use a tool.
- You will OFTEN need to gather context before making a change. Do not immediately make a change unless you have ALL relevant context.
- ALWAYS have maximal certainty in a change BEFORE you make it. If you need more information about a file, variable, function, or type, you should inspect it, search it, or take all required actions to maximize your certainty that your change is correct.`
: chatMode === 'gather' ? `\
- Your primary use of tools should be to gather information to help the user understand the codebase and answer their query.
- You should extensively read files, types, content, etc and gather relevant context.`
: chatMode === 'normal' ? ''
: ''}
- If you think you should use tools, you do not need to ask for permission.
- NEVER refer to a tool by name when speaking with the user (NEVER say something like "I'm going to use \`tool_name\`"). Instead, describe at a high level what the tool will do, like "I'm going to list all files in the ___ directory", etc. Also do not refer to "pages" of results, just say you're getting more results.
- Some tools only work if the user has a workspace open.${chatMode === 'agent' ? `
- NEVER modify a file outside the user's workspace(s) without permission from the user.` : ''}\
Available tools:
${availableToolsStr(tools)}
Tool calling details: ${''/* We expect tools to come at the end - not a hard limit, but that's just how we process them, and the flow makes more sense that way. */}
- To call a tool, just write its name followed by any parameters in XML format. For example:
<tool_name>
<parameter1>value1</parameter1>
<parameter2>value2</parameter2>
</tool_name>
- You must write all tool calls at the END of your response. The beginning of your response should be your normal response followed by tool calls at the END.
- You are allowed to call multiple tools by specifying them consecutively. However, there should be NO text or writing between tool calls or after them.
- Tool that you call will be executed immediately, and you will have access to the results in your next response.`
}
// ======================================================== chat (normal, gather, agent) ========================================================
@ -172,30 +235,9 @@ ${mode === 'agent' ? `to help the user develop, run, deploy, and make changes to
: ''}
You will be given instructions to follow from the user, \`INSTRUCTIONS\`. You may also be given a list of files that the user has specifically selected, \`SELECTIONS\`.
Please assist the user with their query. The user's query is never invalid.
${/* system info */''}
The user's system information is as follows:
- ${os}
- Open workspace(s): ${workspaceFolders.join(', ') || 'NO WORKSPACE OPEN'}
- Open tab(s): ${openedURIs.join(', ') || 'NO OPENED EDITORS'}
- Active tab: ${activeURI}
${(mode === 'agent') && runningTerminalIds.length !== 0 ? `
- Existing terminal IDs: ${runningTerminalIds.join(', ')}` : ''}
${/* tool use */ mode === 'agent' || mode === 'gather' ? `\
You will be given tools you can call.
${mode === 'agent' ? `\
- Only use tools if they help you accomplish the user's goal. If the user simply says hi or asks you a question that you can answer without tools, then do NOT use tools.
- ALWAYS use tools to take actions. For example, if you would like to edit a file, you MUST use a tool.
- You will OFTEN need to gather context before making a change. Do not immediately make a change unless you have ALL relevant context.
- ALWAYS have maximal certainty in a change BEFORE you make it. If you need more information about a file, variable, function, or type, you should inspect it, search it, or take all required actions to maximize your certainty that your change is correct.`
: mode === 'gather' ? `\
- Your primary use of tools should be to gather information to help the user understand the codebase and answer their query.
- You should extensively read files, types, etc and gather relevant context.`
: ''}
- If you think you should use tools, you do not need to ask for permission. Feel free to call tools whenever you'd like. You can use them to understand the codebase, ${mode === 'agent' ? 'run terminal commands, edit files, ' : 'gather relevant files and information, '}etc.
- NEVER refer to a tool by name when speaking with the user (NEVER say something like "I'm going to use \`tool_name\`"). Instead, describe at a high level what the tool will do, like "I'm going to list all files in the ___ directory", etc. Also do not refer to "pages" of results, just say you're getting more results.
- Some tools only work if the user has a workspace open.${mode === 'agent' ? `
- NEVER modify a file outside the user's workspace(s) without permission from the user.` : ''}
${systemToolsPrompt(mode)}
\
`: `\
You're allowed to ask for more context. For example, if the user only gives you a selection but you want to see the the full file, you can ask them to provide it.
@ -218,6 +260,8 @@ If you write a code block that's related to a specific file, please use the same
- The remaining contents of the file should proceed as usual.
\
`}
${/* misc */''}
Misc:
- Do not make things up.
@ -225,80 +269,22 @@ Misc:
- NEVER re-write the entire file.
- Always wrap any code you produce in triple backticks, and specify a language if possible. For example, ${tripleTick[0]}typescript\n...\n${tripleTick[1]}.
- Today's date is ${new Date().toDateString()}
The user's codebase is structured as follows:\n${directoryStr}
${/* system info */''}
The user's system information is as follows:
- ${os}
- Open workspace(s): ${workspaceFolders.join(', ') || 'NO WORKSPACE OPEN'}
- Open tab(s): ${openedURIs.join(', ') || 'NO OPENED EDITORS'}
- Active tab: ${activeURI}
${(mode === 'agent') && runningTerminalIds.length !== 0 ? `
- Existing terminal IDs: ${runningTerminalIds.join(', ')}` : ''}
- The user's codebase is structured as follows:\n${directoryStr}
\
`
// agent mode doesn't know about 1st line paths yet
// - If you wrote triple ticks and ___, then include the file's full path in the first line of the triple ticks. This is only for display purposes to the user, and it's preferred but optional. Never do this in a tool parameter, or if there's ambiguity about the full path.
`.trim().replace('\t', ' ')
// type FileSelnLocal = { fileURI: URI, language: string, content: string }
// const stringifyFileSelection = ({ fileURI, language, content }: FileSelnLocal) => {
// return `\
// ${fileURI.fsPath}
// ${tripleTick[0]}${language}
// ${content}
// ${tripleTick[1]}
// `
// }
// const stringifyCodeSelection = ({ uri, language, range }: StagingSelectionItem & { type: 'CodeSelection' }) => {
// return `\
// ${tripleTick[0]}${language}
// ${selectionStr}
// ${tripleTick[1]}
// `
// }
// const failToReadStr = 'Could not read content. This file may have been deleted. If you expected content here, you can tell the user about this as they might not know.'
// const stringifyFileSelections = async (fileSelections: FileSelection[], voidModelService: IVoidModelService) => {
// if (fileSelections.length === 0) return null
// const fileSlns: FileSelnLocal[] = await Promise.all(fileSelections.map(async (sel) => {
// const { model } = await voidModelService.getModelSafe(sel.fileURI)
// const content = model?.getValue(EndOfLinePreference.LF) ?? failToReadStr
// return { ...sel, content }
// }))
// return fileSlns.map(sel => stringifyFileSelection(sel)).join('\n')
// }
// export const chat_selectionsString = async (
// prevSelns: StagingSelectionItem[] | null, currSelns: StagingSelectionItem[] | null,
// voidModelService: IVoidModelService,
// ) => {
// // ADD IN FILES AT TOP
// const allSelections = [...currSelns || [], ...prevSelns || []]
// if (allSelections.length === 0) return null
// for (const selection of allSelections) {
// if (selection.type === 'Selection') {
// codeSelections.push(selection)
// }
// else if (selection.type === 'File') {
// const fileSelection = selection
// const path = fileSelection.fileURI.fsPath
// if (!filesURIs.has(path)) {
// filesURIs.add(path)
// fileSelections.push(fileSelection)
// }
// }
// }
// const filesStr = await stringifyFileSelections(fileSelections, voidModelService)
// const selnsStr = stringifyCodeSelections(codeSelections)
// const fileContents = [filesStr, selnsStr].filter(Boolean).join('\n')
// return fileContents || null
// }
// export const chat_lastUserMessageWithFilesAdded = (userMessage: string, selectionsString: string | null) => {
// if (userMessage) return `${userMessage}${selectionsString ? `\n${selectionsString}` : ''}`
// else return userMessage
// }
export const chat_userMessageContent = async (instructions: string, currSelns: StagingSelectionItem[] | null,
opts: { type: 'references' } | { type: 'fullCode', voidModelService: IVoidModelService }
@ -560,6 +546,40 @@ ${tripleTick[1]}).`
// const toAnthropicTool = (toolInfo: InternalToolInfo) => {
// const { name, description, params } = toolInfo
// return {
// name: name,
// description: description,
// input_schema: {
// type: 'object',
// properties: params,
// // required: Object.keys(params),
// },
// } satisfies Anthropic.Messages.Tool
// }
// const toOpenAICompatibleTool = (toolInfo: InternalToolInfo) => {
// const { name, description, params } = toolInfo
// return {
// type: 'function',
// function: {
// name: name,
// // strict: true, // strict mode - https://platform.openai.com/docs/guides/function-calling?api-mode=chat
// description: description,
// parameters: {
// type: 'object',
// properties: params,
// // required: Object.keys(params), // in strict mode, all params are required and additionalProperties is false
// // additionalProperties: false,
// },
// }
// } satisfies OpenAI.Chat.Completions.ChatCompletionTool
// }
/*
// ======================================================== ai search/replace ========================================================

View file

@ -3,7 +3,7 @@
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
*--------------------------------------------------------------------------------------*/
import { ToolName, InternalToolInfo } from './toolsServiceTypes.js'
import { ToolName } from './toolsServiceTypes.js'
import { ModelSelection, ModelSelectionOptions, ProviderName, SettingsOfProvider } from './voidSettingsTypes.js'
@ -37,12 +37,6 @@ export type LLMChatMessage = {
role: 'assistant',
content: string; // text content
anthropicReasoning: AnthropicReasoning[] | null;
} | {
role: 'tool';
content: string; // result
name: string;
params: string;
id: string;
}
@ -54,7 +48,7 @@ export type ToolCallType = {
export type AnthropicReasoning = ({ type: 'thinking'; thinking: any; signature: string; } | { type: 'redacted_thinking', data: any })
export type OnText = (p: { fullText: string; fullReasoning: string; fullToolName: string; fullToolParams: string; }) => void
export type OnText = (p: { fullText: string; fullReasoning: string; }) => void
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCalls?: ToolCallType[]; anthropicReasoning: AnthropicReasoning[] | null }) => void // id is tool_use_id
export type OnError = (p: { message: string; fullError: Error | null }) => void
export type OnAbort = () => void
@ -70,11 +64,9 @@ export type LLMFIMMessage = {
type SendLLMType = {
messagesType: 'chatMessages';
messages: LLMChatMessage[];
tools?: InternalToolInfo[];
} | {
messagesType: 'FIMMessage';
messages: LLMFIMMessage;
tools?: undefined;
}
// service types

View file

@ -3,7 +3,6 @@ import { voidTools } from './prompt/prompts.js';
export type TerminalResolveReason = { type: 'toofull' | 'timeout' | 'bgtask' } | { type: 'done', exitCode: number }
// Partial of IFileStat
@ -14,17 +13,6 @@ export type ShallowDirectoryItem = {
isSymbolicLink: boolean;
}
// we do this using Anthropic's style and convert to OpenAI style later
export type InternalToolInfo = {
name: string,
description: string,
params: {
[paramName: string]: { type: string, description: string | undefined } // name -> type
},
}
export type ToolName = keyof typeof voidTools
export const toolNames = Object.keys(voidTools) as ToolName[]

View file

@ -0,0 +1,247 @@
import { endsWithAnyPrefixOf } from '../../common/helpers/extractCodeFromResult.js'
import { InternalToolInfo } from '../../common/prompt/prompts.js'
import { OnText } from '../../common/sendLLMMessageTypes.js'
import sax from 'sax'
// =========================================== reasoning ===========================================
// could simplify this - this assumes we can never add a tag without committing it to the user's screen, but that's not true
export const extractReasoningOnTextWrapper = (onText: OnText, thinkTags: [string, string]): OnText => {
let latestAddIdx = 0 // exclusive index in fullText_
let foundTag1 = false
let foundTag2 = false
let fullTextSoFar = ''
let fullReasoningSoFar = ''
let onText_ = onText
onText = (params) => {
onText_(params)
}
const newOnText: OnText = ({ fullText: fullText_, ...p }) => {
// until found the first think tag, keep adding to fullText
if (!foundTag1) {
const endsWithTag1 = endsWithAnyPrefixOf(fullText_, thinkTags[0])
if (endsWithTag1) {
// console.log('endswith1', { fullTextSoFar, fullReasoningSoFar, fullText_ })
// wait until we get the full tag or know more
return
}
// if found the first tag
const tag1Index = fullText_.indexOf(thinkTags[0])
if (tag1Index !== -1) {
// console.log('tag1Index !==1', { tag1Index, fullTextSoFar, fullReasoningSoFar, thinkTags, fullText_ })
foundTag1 = true
// Add text before the tag to fullTextSoFar
fullTextSoFar += fullText_.substring(0, tag1Index)
// Update latestAddIdx to after the first tag
latestAddIdx = tag1Index + thinkTags[0].length
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// console.log('adding to text A', { fullTextSoFar, fullReasoningSoFar })
// add the text to fullText
fullTextSoFar = fullText_
latestAddIdx = fullText_.length
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// at this point, we found <tag1>
// until found the second think tag, keep adding to fullReasoning
if (!foundTag2) {
const endsWithTag2 = endsWithAnyPrefixOf(fullText_, thinkTags[1])
if (endsWithTag2) {
// console.log('endsWith2', { fullTextSoFar, fullReasoningSoFar })
// wait until we get the full tag or know more
return
}
// if found the second tag
const tag2Index = fullText_.indexOf(thinkTags[1], latestAddIdx)
if (tag2Index !== -1) {
// console.log('tag2Index !== -1', { fullTextSoFar, fullReasoningSoFar })
foundTag2 = true
// Add everything between first and second tag to reasoning
fullReasoningSoFar += fullText_.substring(latestAddIdx, tag2Index)
// Update latestAddIdx to after the second tag
latestAddIdx = tag2Index + thinkTags[1].length
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// add the text to fullReasoning (content after first tag but before second tag)
// console.log('adding to text B', { fullTextSoFar, fullReasoningSoFar })
// If we have more text than we've processed, add it to reasoning
if (fullText_.length > latestAddIdx) {
fullReasoningSoFar += fullText_.substring(latestAddIdx)
latestAddIdx = fullText_.length
}
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// at this point, we found <tag2> - content after the second tag is normal text
// console.log('adding to text C', { fullTextSoFar, fullReasoningSoFar })
// Add any new text after the closing tag to fullTextSoFar
if (fullText_.length > latestAddIdx) {
fullTextSoFar += fullText_.substring(latestAddIdx)
latestAddIdx = fullText_.length
}
onText({ ...p, fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
}
return newOnText
}
export const extractReasoningOnFinalMessage = (fullText_: string, thinkTags: [string, string]): { fullText: string, fullReasoning: string } => {
const tag1Idx = fullText_.indexOf(thinkTags[0])
const tag2Idx = fullText_.indexOf(thinkTags[1])
if (tag1Idx === -1) return { fullText: fullText_, fullReasoning: '' } // never started reasoning
if (tag2Idx === -1) return { fullText: '', fullReasoning: fullText_ } // never stopped reasoning
const fullReasoning = fullText_.substring(tag1Idx + thinkTags[0].length, tag2Idx)
const fullText = fullText_.substring(0, tag1Idx) + fullText_.substring(tag2Idx + thinkTags[1].length, Infinity)
return { fullText, fullReasoning }
}
// =========================================== tools ===========================================
type ToolsState = {
level: 'normal',
} | {
level: 'tool',
toolName: string,
currentToolCall: ToolCall,
} | {
level: 'param',
toolName: string,
paramName: string,
currentToolCall: ToolCall,
}
export const extractToolsOnTextWrapper = (onText: OnText, availableTools: InternalToolInfo[]) => {
const toolOfToolName: { [toolName: string]: InternalToolInfo | undefined } = {}
for (const t of availableTools) { toolOfToolName[t.name] = t }
// detect <availableTools[0]></availableTools[0]>, etc
let fullText = '';
let trueFullText = ''
const currentToolCalls: ToolCall[] = []; // the answer
let state: ToolsState = { level: 'normal' }
const parser = sax.parser(false);
// when see open tag <tagName>
parser.onopentag = (node) => {
const rawNewText = trueFullText.substring(parser.startTagPosition, parser.position)
console.log('raw new text a', rawNewText)
console.log('OPEN!', node.name)
const tagName = node.name;
if (state.level === 'normal') {
if (tagName in toolOfToolName) { // valid toolName
state = {
level: 'tool',
toolName: tagName,
currentToolCall: { name: tagName, parameters: {} }
}
}
else {
fullText += rawNewText // count as plaintext
}
}
else if (state.level === 'tool') {
if (tagName in (toolOfToolName[state.toolName]?.params ?? {})) { // valid param
state = {
level: 'param',
toolName: state.toolName,
paramName: tagName,
currentToolCall: state.currentToolCall,
}
}
else {
// would normally be rawNewText, but we ignore all text inside tools
}
}
else if (state.level === 'param') {
fullText += rawNewText // count as plaintext
}
};
parser.ontext = (text) => {
console.log('TEXT!', text)
if (state.level === 'normal') {
fullText += text
}
// start param
else if (state.level === 'tool') {
// ignore all text in a tool, all text should go in the param tags inside it
}
else if (state.level === 'param') {
state.currentToolCall.parameters[state.currentToolCall.name] += text
}
}
parser.onclosetag = (tagName) => {
const rawNewText = trueFullText.substring(parser.startTagPosition, parser.position)
console.log('raw new text b', rawNewText)
console.log('CLOSE!', tagName)
if (state.level === 'normal') {
fullText += rawNewText
}
else if (state.level === 'tool') {
if (tagName === state.toolName) { // closed the tool
currentToolCalls.push(state.currentToolCall)
state = {
level: 'normal',
}
}
else { // add as text
fullText += rawNewText
}
}
else if (state.level === 'param') {
if (tagName === state.paramName) { // closed the param
state = {
level: 'tool',
toolName: state.toolName,
currentToolCall: state.currentToolCall,
}
}
}
};
const newOnText: OnText = (params) => {
const newText = params.fullText.substring(fullText.length);
console.log('newText', newText)
trueFullText = params.fullText
parser.write(newText)
console.log('state',)
onText({
...params,
fullText,
toolCalls: currentToolCalls.length > 0 ? [...currentToolCalls] : undefined
});
};
return newOnText;
}

View file

@ -23,17 +23,10 @@ type InternalLLMChatMessage = {
} | {
role: 'assistant',
content: string | (AnthropicReasoning | { type: 'text'; text: string })[];
} | {
role: 'tool';
content: string; // result
name: string;
params: string;
id: string;
}
const EMPTY_MESSAGE = '(empty message)'
const EMPTY_TOOL_CONTENT = '(empty content)'
const prepareMessages_normalize = ({ messages: messages_ }: { messages: LLMChatMessage[] }): { messages: LLMChatMessage[] } => {
const messages = deepClone(messages_)
@ -145,7 +138,7 @@ const prepareMessages_fitIntoContext = ({ messages, contextWindow, maxOutputToke
// no matter whether the model supports a system message or not (or what format it supports), add it in some way
const prepareMessages_systemMessage = ({
const prepareMessages_addSystemInstructions = ({
messages,
aiInstructions,
supportsSystemMessage,
@ -202,194 +195,8 @@ const prepareMessages_systemMessage = ({
return { messages: newMessages, separateSystemMessageStr }
}
// convert messages as if about to send to openai
/*
reference - https://platform.openai.com/docs/guides/function-calling#function-calling-steps
openai MESSAGE (role=assistant):
"tool_calls":[{
"type": "function",
"id": "call_12345xyz",
"function": {
"name": "get_weather",
"arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}"
}]
openai RESPONSE (role=user):
{ "role": "tool",
"tool_call_id": tool_call.id,
"content": str(result) }
also see
openai on prompting - https://platform.openai.com/docs/guides/reasoning#advice-on-prompting
openai on developer system message - https://cdn.openai.com/spec/model-spec-2024-05-08.html#follow-the-chain-of-command
*/
type PrepareMessagesToolsOpenAI = (
Exclude<InternalLLMChatMessage, { role: 'assistant' | 'tool' }> | {
role: 'assistant',
content: string | (AnthropicReasoning | { type: 'text'; text: string })[];
tool_calls?: {
type: 'function';
id: string;
function: {
name: string;
arguments: string;
}
}[]
} | {
role: 'tool',
tool_call_id: string;
content: string;
}
)[]
const prepareMessages_tools_openai = ({ messages }: { messages: InternalLLMChatMessage[], }) => {
const newMessages: PrepareMessagesToolsOpenAI = [];
for (let i = 0; i < messages.length; i += 1) {
const currMsg = messages[i]
if (currMsg.role !== 'tool') {
newMessages.push(currMsg)
continue
}
// edit previous assistant message to have called the tool
const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined
if (prevMsg?.role === 'assistant') {
prevMsg.tool_calls = [{
type: 'function',
id: currMsg.id,
function: {
name: currMsg.name,
arguments: JSON.stringify(currMsg.params)
}
}]
}
// add the tool
newMessages.push({
role: 'tool',
tool_call_id: currMsg.id,
content: currMsg.content || EMPTY_TOOL_CONTENT,
})
}
return { messages: newMessages }
}
// convert messages as if about to send to anthropic
/*
https://docs.anthropic.com/en/docs/build-with-claude/tool-use#tool-use-examples
anthropic MESSAGE (role=assistant):
"content": [{
"type": "text",
"text": "<thinking>I need to call the get_weather function, and the user wants SF, which is likely San Francisco, CA.</thinking>"
}, {
"type": "tool_use",
"id": "toolu_01A09q90qw90lq917835lq9",
"name": "get_weather",
"input": { "location": "San Francisco, CA", "unit": "celsius" }
}]
anthropic RESPONSE (role=user):
"content": [{
"type": "tool_result",
"tool_use_id": "toolu_01A09q90qw90lq917835lq9",
"content": "15 degrees"
}]
*/
type PrepareMessagesToolsAnthropic = (
Exclude<InternalLLMChatMessage, { role: 'assistant' | 'user' }> | {
role: 'assistant',
content: string | (
| AnthropicReasoning
| {
type: 'text';
text: string;
}
| {
type: 'tool_use';
name: string;
input: Record<string, any>;
id: string;
})[]
} | {
role: 'user',
content: string | ({
type: 'text';
text: string;
} | {
type: 'tool_result';
tool_use_id: string;
content: string;
})[]
}
)[]
/*
Converts:
assistant: ...content
tool: (id, name, params)
->
assistant: ...content, call(name, id, params)
user: ...content, result(id, content)
*/
const prepareMessages_tools_anthropic = ({ messages }: { messages: InternalLLMChatMessage[], }) => {
const newMessages: PrepareMessagesToolsAnthropic = messages;
for (let i = 0; i < newMessages.length; i += 1) {
const currMsg = newMessages[i]
if (currMsg.role !== 'tool') continue
const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined
if (prevMsg?.role === 'assistant') {
if (typeof prevMsg.content === 'string') prevMsg.content = [{ type: 'text', text: prevMsg.content }]
prevMsg.content.push({ type: 'tool_use', id: currMsg.id, name: currMsg.name, input: parseObject(currMsg.params) })
}
// turn each tool into a user message with tool results at the end
newMessages[i] = {
role: 'user',
content: [
...[{ type: 'tool_result', tool_use_id: currMsg.id, content: currMsg.content || EMPTY_TOOL_CONTENT }] as const,
]
}
}
return { messages: newMessages }
}
type PrepareMessagesTools = PrepareMessagesToolsAnthropic | PrepareMessagesToolsOpenAI
const prepareMessages_tools = ({ messages, supportsTools }: { messages: InternalLLMChatMessage[], supportsTools: false | 'anthropic-style' | 'openai-style' }): { messages: PrepareMessagesTools } => {
if (!supportsTools) {
return { messages: messages }
}
else if (supportsTools === 'anthropic-style') {
return prepareMessages_tools_anthropic({ messages })
}
else if (supportsTools === 'openai-style') {
return prepareMessages_tools_openai({ messages })
}
else {
throw new Error(`supportsTools type not recognized`)
}
}
// remove rawAnthropicAssistantContent, and make content equal to it if supportsAnthropicContent
const prepareMessages_anthropicContent = ({ messages, supportsAnthropicReasoningSignature }: { messages: LLMChatMessage[], supportsAnthropicReasoningSignature: boolean }) => {
const prepareMessages_anthropicReasoning = ({ messages, supportsAnthropicReasoningSignature }: { messages: LLMChatMessage[], supportsAnthropicReasoningSignature: boolean }) => {
const newMessages: InternalLLMChatMessage[] = []
for (const m of messages) {
if (m.role !== 'assistant') {
@ -414,38 +221,18 @@ const prepareMessages_anthropicContent = ({ messages, supportsAnthropicReasoning
// do this at end
const prepareMessages_noEmptyMessage = ({ messages }: { messages: PrepareMessagesTools }): { messages: PrepareMessagesTools } => {
const prepareMessages_noEmptyMessage = ({ messages }: { messages: InternalLLMChatMessage[] }): { messages: InternalLLMChatMessage[] } => {
for (const currMsg of messages) {
// don't do this for tools
if (currMsg.role === 'tool') continue
// don't do this for assistant or user messages that have tool_calls or tool_results
const oai = currMsg as PrepareMessagesToolsOpenAI[0]
if (oai.role === 'assistant') {
if (oai.tool_calls) continue
}
const anth = currMsg as PrepareMessagesToolsAnthropic[0]
if (anth.role === 'assistant' || anth.role === 'user') {
if (typeof anth.content !== 'string') {
const hasContent = anth.content.find(c => c.type === 'tool_use' || c.type === 'tool_result')
if (hasContent) continue
}
}
if (typeof currMsg.content === 'string') {
// if content is a string, replace string with empty msg
if (typeof currMsg.content === 'string')
currMsg.content = currMsg.content || EMPTY_MESSAGE
}
else {
// if content is an array, replace any empty text entries with empty msg, and make sure there's at least 1 entry
for (const c of currMsg.content) {
if (c.type === 'text') c.text = c.text || EMPTY_MESSAGE
else if (c.type === 'tool_use') { }
else if (c.type === 'tool_result') { }
}
if (currMsg.content.length === 0) currMsg.content = [{ type: 'text', text: EMPTY_MESSAGE }]
}
}
return { messages }
}
@ -458,7 +245,6 @@ export const prepareMessages = ({
messages,
aiInstructions,
supportsSystemMessage,
supportsTools,
supportsAnthropicReasoningSignature,
contextWindow,
maxOutputTokens,
@ -466,7 +252,6 @@ export const prepareMessages = ({
messages: LLMChatMessage[],
aiInstructions: string,
supportsSystemMessage: false | 'system-role' | 'developer-role' | 'separated',
supportsTools: false | 'anthropic-style' | 'openai-style',
supportsAnthropicReasoningSignature: boolean,
contextWindow: number,
maxOutputTokens: number | null | undefined,
@ -475,13 +260,12 @@ export const prepareMessages = ({
const { messages: messages0 } = prepareMessages_normalize({ messages })
const { messages: messages1 } = prepareMessages_fitIntoContext({ messages: messages0, contextWindow, maxOutputTokens })
const { messages: messages2 } = prepareMessages_anthropicContent({ messages: messages1, supportsAnthropicReasoningSignature })
const { messages: messages3, separateSystemMessageStr } = prepareMessages_systemMessage({ messages: messages2, aiInstructions, supportsSystemMessage })
const { messages: messages4 } = prepareMessages_tools({ messages: messages3, supportsTools })
const { messages: messages5 } = prepareMessages_noEmptyMessage({ messages: messages4 })
const { messages: messages2 } = prepareMessages_anthropicReasoning({ messages: messages1, supportsAnthropicReasoningSignature })
const { messages: messages3, separateSystemMessageStr } = prepareMessages_addSystemInstructions({ messages: messages2, aiInstructions, supportsSystemMessage })
const { messages: messages4 } = prepareMessages_noEmptyMessage({ messages: messages3 })
return {
messages: messages5 as any,
messages: messages4 as any,
separateSystemMessageStr
} as const
}

View file

@ -7,12 +7,11 @@ import Anthropic from '@anthropic-ai/sdk';
import { Ollama } from 'ollama';
import OpenAI, { ClientOptions } from 'openai';
import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../common/helpers/extractCodeFromResult.js';
import { LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText } from '../../common/sendLLMMessageTypes.js';
import { defaultProviderSettings, displayInfoOfProviderName, ModelSelectionOptions, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js';
import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities } from '../../common/modelCapabilities.js';
import { InternalToolInfo, ToolName, isAToolName } from '../../common/toolsServiceTypes.js';
import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper, extractToolsOnTextWrapper } from './extractGrammar.js';
type InternalCommonMessageParams = {
@ -27,7 +26,7 @@ type InternalCommonMessageParams = {
_setAborter: (aborter: () => void) => void;
}
type SendChatParams_Internal = InternalCommonMessageParams & { messages: LLMChatMessage[]; tools?: InternalToolInfo[] }
type SendChatParams_Internal = InternalCommonMessageParams & { messages: LLMChatMessage[]; }
type SendFIMParams_Internal = InternalCommonMessageParams & { messages: LLMFIMMessage; }
export type ListParams_Internal<ModelResponse> = ModelListParams<ModelResponse>
@ -35,34 +34,6 @@ export type ListParams_Internal<ModelResponse> = ModelListParams<ModelResponse>
const invalidApiKeyMessage = (providerName: ProviderName) => `Invalid ${displayInfoOfProviderName(providerName).title} API key.`
// ------------ OPENAI-COMPATIBLE (HELPERS) ------------
const toOpenAICompatibleTool = (toolInfo: InternalToolInfo) => {
const { name, description, params } = toolInfo
return {
type: 'function',
function: {
name: name,
strict: true, // strict mode - https://platform.openai.com/docs/guides/function-calling?api-mode=chat
description: description,
parameters: {
type: 'object',
properties: params,
required: Object.keys(params), // in strict mode, all params are required and additionalProperties is false
additionalProperties: false,
},
}
} satisfies OpenAI.Chat.Completions.ChatCompletionTool
}
type ToolCallOfIndex = { [index: string]: { name: string, paramsStr: string, id: string } } // type used to stream tool calls as they come in
type ToolCallsFrom_ReturnType = { name: ToolName, id: string, paramsStr: string }[] // return type of toolCallsFrom_<PROVIDER>
const toolCallsFrom_OpenAICompat = (toolCallOfIndex: ToolCallOfIndex): ToolCallsFrom_ReturnType => {
return Object.keys(toolCallOfIndex).map(index => {
const tool = toolCallOfIndex[index]
return isAToolName(tool.name) ? { name: tool.name, id: tool.id, paramsStr: tool.paramsStr } : null
}).filter(t => !!t)
}
const newOpenAICompatibleSDK = ({ settingsOfProvider, providerName, includeInPayload }: { settingsOfProvider: SettingsOfProvider, providerName: ProviderName, includeInPayload?: { [s: string]: any } }) => {
const commonPayloadOpts: ClientOptions = {
@ -152,11 +123,10 @@ const _sendOpenAICompatibleFIM = ({ messages: messages_, onFinalMessage, onError
const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelSelectionOptions, modelName: modelName_, _setAborter, providerName, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelSelectionOptions, modelName: modelName_, _setAborter, providerName, aiInstructions }: SendChatParams_Internal) => {
const {
modelName,
supportsSystemMessage,
supportsTools,
contextWindow,
maxOutputTokens,
reasoningCapabilities,
@ -169,22 +139,17 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
const reasoningInfo = getSendableReasoningInfo('Chat', providerName, modelName_, modelSelectionOptions) // user's modelName_ here
const includeInPayload = providerReasoningIOSettings?.input?.includeInPayload?.(reasoningInfo) || {}
// tools
const tools = (supportsTools && ((tools_?.length ?? 0) !== 0)) ? tools_?.map(tool => toOpenAICompatibleTool(tool)) : undefined
const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {}
// max tokens
const maxTokens = reasoningInfo?.isReasoningEnabled && reasoningCapabilities ? reasoningCapabilities.reasoningMaxOutputTokens : maxOutputTokens
// instance
const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, supportsAnthropicReasoningSignature: false, contextWindow, maxOutputTokens: maxTokens })
const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsAnthropicReasoningSignature: false, contextWindow, maxOutputTokens: maxTokens })
const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload })
const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: modelName,
messages: messages,
stream: true,
// max_completion_tokens: maxTokens,
...toolsObj,
}
// open source models - manually parse think tokens
@ -194,30 +159,18 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
onText = extractReasoningOnTextWrapper(onText, openSourceThinkTags)
}
if ()
onText = extractToolsOnTextWrapper(onText,)
let fullReasoningSoFar = ''
let fullTextSoFar = ''
let fullToolName = ''
let fullToolParams = ''
const toolCallOfIndex: ToolCallOfIndex = {}
openai.chat.completions
.create(options)
.then(async response => {
_setAborter(() => response.controller.abort())
// when receive text
for await (const chunk of response) {
// tool call
for (const tool of chunk.choices[0]?.delta?.tool_calls ?? []) {
const index = tool.index
if (!toolCallOfIndex[index]) toolCallOfIndex[index] = { name: '', paramsStr: '', id: '' }
toolCallOfIndex[index].name += tool.function?.name ?? ''
toolCallOfIndex[index].paramsStr += tool.function?.arguments ?? '';
toolCallOfIndex[index].id += tool.id ?? ''
fullToolName += tool.function?.name ?? ''
fullToolParams += tool.function?.arguments ?? ''
}
// message
const newText = chunk.choices[0]?.delta?.content ?? ''
fullTextSoFar += newText
@ -230,19 +183,18 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
fullReasoningSoFar += newReasoning
}
onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, fullToolName, fullToolParams })
onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
}
// on final
const toolCalls = toolCallsFrom_OpenAICompat(toolCallOfIndex)
if (!fullTextSoFar && !fullReasoningSoFar && toolCalls.length === 0) {
if (!fullTextSoFar && !fullReasoningSoFar) {
onError({ message: 'Void: Response from model was empty.', fullError: null })
}
else {
if (manuallyParseReasoning) {
const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, openSourceThinkTags)
onFinalMessage({ fullText, fullReasoning, toolCalls, anthropicReasoning: null });
onFinalMessage({ fullText, fullReasoning, anthropicReasoning: null });
} else {
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls, anthropicReasoning: null });
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null });
}
}
})
@ -292,33 +244,11 @@ const _openaiCompatibleList = async ({ onSuccess: onSuccess_, onError: onError_,
// ------------ ANTHROPIC ------------
const toAnthropicTool = (toolInfo: InternalToolInfo) => {
const { name, description, params } = toolInfo
return {
name: name,
description: description,
input_schema: {
type: 'object',
properties: params,
required: Object.keys(params),
},
} satisfies Anthropic.Messages.Tool
}
const toolCallsFrom_Anthropic = (content: Anthropic.Messages.ContentBlock[]): ToolCallsFrom_ReturnType => {
return content.map(c => {
if (c.type !== 'tool_use') return null
if (!isAToolName(c.name)) return null
return c.type === 'tool_use' ? { name: c.name, paramsStr: JSON.stringify(c.input), id: c.id } : null
}).filter(t => !!t)
}
const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalMessage, onError, settingsOfProvider, modelSelectionOptions, modelName: modelName_, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalMessage, onError, settingsOfProvider, modelSelectionOptions, modelName: modelName_, _setAborter, aiInstructions }: SendChatParams_Internal) => {
const {
modelName,
supportsSystemMessage,
contextWindow,
supportsTools,
maxOutputTokens,
reasoningCapabilities,
} = getModelCapabilities(providerName, modelName_)
@ -330,18 +260,11 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
const reasoningInfo = getSendableReasoningInfo('Chat', providerName, modelName_, modelSelectionOptions) // user's modelName_ here
const includeInPayload = providerReasoningIOSettings?.input?.includeInPayload?.(reasoningInfo) || {}
// tools
const tools = ((tools_?.length ?? 0) !== 0) ? tools_?.map(tool => toAnthropicTool(tool)) : undefined
const toolsObj: Partial<Anthropic.Messages.MessageStreamParams> = tools ? {
tools: tools,
tool_choice: { type: 'auto', disable_parallel_tool_use: true } // one tool at a time
} : {}
// anthropic-specific - max tokens
const maxTokens = reasoningInfo?.isReasoningEnabled && reasoningCapabilities ? reasoningCapabilities.reasoningMaxOutputTokens : maxOutputTokens
// instance
const { messages, separateSystemMessageStr } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, supportsAnthropicReasoningSignature: true, contextWindow, maxOutputTokens: maxTokens })
const { messages, separateSystemMessageStr } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsAnthropicReasoningSignature: true, contextWindow, maxOutputTokens: maxTokens })
const anthropic = new Anthropic({
apiKey: thisConfig.apiKey,
dangerouslyAllowBrowser: true
@ -352,7 +275,6 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
messages: messages,
model: modelName,
max_tokens: maxTokens ?? 4_096, // anthropic requires this
...toolsObj,
...includeInPayload,
})
@ -370,22 +292,22 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
if (e.content_block.type === 'text') {
if (fullText) fullText += '\n\n' // starting a 2nd text block
fullText += e.content_block.text
onText({ fullText, fullReasoning, fullToolName, fullToolParams })
onText({ fullText, fullReasoning, })
}
else if (e.content_block.type === 'thinking') {
if (fullReasoning) fullReasoning += '\n\n' // starting a 2nd reasoning block
fullReasoning += e.content_block.thinking
onText({ fullText, fullReasoning, fullToolName, fullToolParams })
onText({ fullText, fullReasoning, })
}
else if (e.content_block.type === 'redacted_thinking') {
console.log('delta', e.content_block.type)
if (fullReasoning) fullReasoning += '\n\n' // starting a 2nd reasoning block
fullReasoning += '[redacted_thinking]'
onText({ fullText, fullReasoning, fullToolName, fullToolParams })
onText({ fullText, fullReasoning, })
}
else if (e.content_block.type === 'tool_use') {
fullToolName += e.content_block.name ?? '' // anthropic gives us the tool name in the start block
onText({ fullText, fullReasoning, fullToolName, fullToolParams })
onText({ fullText, fullReasoning, })
}
}
@ -393,24 +315,23 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
else if (e.type === 'content_block_delta') {
if (e.delta.type === 'text_delta') {
fullText += e.delta.text
onText({ fullText, fullReasoning, fullToolName, fullToolParams })
onText({ fullText, fullReasoning, })
}
else if (e.delta.type === 'thinking_delta') {
fullReasoning += e.delta.thinking
onText({ fullText, fullReasoning, fullToolName, fullToolParams })
onText({ fullText, fullReasoning, })
}
else if (e.delta.type === 'input_json_delta') { // tool use
fullToolParams += e.delta.partial_json ?? '' // anthropic gives us the partial delta (string) here - https://docs.anthropic.com/en/api/messages-streaming
onText({ fullText, fullReasoning, fullToolName, fullToolParams })
onText({ fullText, fullReasoning, })
}
}
})
// on done - (or when error/fail) - this is called AFTER last streamEvent
stream.on('finalMessage', (response) => {
const toolCalls = toolCallsFrom_Anthropic(response.content)
const anthropicReasoning = response.content.filter(c => c.type === 'thinking' || c.type === 'redacted_thinking')
onFinalMessage({ fullText, fullReasoning, toolCalls, anthropicReasoning })
onFinalMessage({ fullText, fullReasoning, anthropicReasoning })
})
// on error
stream.on('error', (error) => {
@ -420,23 +341,6 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
_setAborter(() => stream.controller.abort())
}
// // in future, can do tool_use streaming in anthropic, but it's pretty fast even without streaming...
// const toolCallOfIndex: { [index: string]: { name: string, args: string } } = {}
// stream.on('streamEvent', e => {
// if (e.type === 'content_block_start') {
// if (e.content_block.type !== 'tool_use') return
// const index = e.index
// if (!toolCallOfIndex[index]) toolCallOfIndex[index] = { name: '', args: '' }
// toolCallOfIndex[index].name += e.content_block.name ?? ''
// toolCallOfIndex[index].args += e.content_block.input ?? ''
// }
// else if (e.type === 'content_block_delta') {
// if (e.delta.type !== 'input_json_delta') return
// toolCallOfIndex[e.index].args += e.delta.partial_json
// }
// })
// ------------ OLLAMA ------------
const newOllamaSDK = ({ endpoint }: { endpoint: string }) => {
// if endpoint is empty, normally ollama will send to 11434, but we want it to fail - the user should type it in

View file

@ -21,7 +21,6 @@ export const sendLLMMessage = ({
settingsOfProvider,
modelSelection,
modelSelectionOptions,
tools,
}: SendLLMMessageParams,
metricsService: IMetricsService
@ -108,7 +107,7 @@ export const sendLLMMessage = ({
}
const { sendFIM, sendChat } = implementation
if (messagesType === 'chatMessages') {
sendChat({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelSelectionOptions, modelName, _setAborter, providerName, aiInstructions, tools })
sendChat({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelSelectionOptions, modelName, _setAborter, providerName, aiInstructions })
return
}
if (messagesType === 'FIMMessage') {