From e726c3c5de3df46c7634e3d58711432c4ccf0cbc Mon Sep 17 00:00:00 2001 From: Andrew Pareles Date: Sun, 4 May 2025 19:33:27 -0700 Subject: [PATCH] add system message trimming because of very small models with 4096 window len --- .../browser/convertToLLMMessageService.ts | 74 ++++++++++++------- .../contrib/void/browser/editCodeService.ts | 9 +-- .../contrib/void/common/modelCapabilities.ts | 2 +- .../llmMessage/extractGrammar.ts | 3 + 4 files changed, 56 insertions(+), 32 deletions(-) diff --git a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts index 2a57ea7b..1ce7996e 100644 --- a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts +++ b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts @@ -39,7 +39,7 @@ type SimpleLLMMessage = { const EMPTY_MESSAGE = '(empty message)' const CHARS_PER_TOKEN = 4 -const TRIM_TO_LEN = 60 +const TRIM_TO_LEN = 120 @@ -252,7 +252,7 @@ export type GeminiMessage = { // --- CHAT --- const prepareOpenAIOrAnthropicMessages = ({ - messages, + messages: messages_, systemMessage, aiInstructions, supportsSystemMessage, @@ -270,18 +270,30 @@ const prepareOpenAIOrAnthropicMessages = ({ contextWindow: number, maxOutputTokens: number | null | undefined, }): { messages: AnthropicOrOpenAILLMMessage[], separateSystemMessage: string | undefined } => { + maxOutputTokens = maxOutputTokens ?? 4_096 // default to 4096 + let messages: (SimpleLLMMessage | { role: 'system', content: string })[] = deepClone(messages_) + + // ================ system message ================ + // A COMPLETE HACK: last message is system message for context purposes + + const sysMsgParts: string[] = [] + if (aiInstructions) sysMsgParts.push(`GUIDELINES (from the user's .voidrules file):\n${aiInstructions}`) + if (systemMessage) sysMsgParts.push(systemMessage) + const combinedSystemMessage = sysMsgParts.join('\n\n') + + messages.unshift({ role: 'system', content: combinedSystemMessage }) // ================ trim ================ - - messages = deepClone(messages) messages = messages.map(m => ({ ...m, content: m.role !== 'tool' ? m.content.trim() : m.content })) + type MesType = (typeof messages)[0] + // ================ fit into context ================ // the higher the weight, the higher the desire to truncate - TRIM HIGHEST WEIGHT MESSAGES const alreadyTrimmedIdxes = new Set() - const weight = (message: SimpleLLMMessage, messages: SimpleLLMMessage[], idx: number) => { + const weight = (message: MesType, messages: MesType[], idx: number) => { const base = message.content.length let multiplier: number @@ -289,22 +301,30 @@ const prepareOpenAIOrAnthropicMessages = ({ if (message.role === 'user') { multiplier *= 1 } + else if (message.role === 'system') { + multiplier *= .01 // very low weight + } else { multiplier *= 10 // llm tokens are far less valuable than user tokens } - // 1st message, last 3 msgs, any already modified message should be low in weight - if (idx === 0 || idx >= messages.length - 1 - 3 || alreadyTrimmedIdxes.has(idx)) { + + // any already modified message should not be trimmed again + if (alreadyTrimmedIdxes.has(idx)) { + multiplier = 0 + } + // 1st and last messages should be very low weight + if (idx <= 1 || idx >= messages.length - 1 - 3) { multiplier *= .05 } return base * multiplier } - const _findLargestByWeight = (messages: SimpleLLMMessage[]) => { + const _findLargestByWeight = (messages_: MesType[]) => { let largestIndex = -1 let largestWeight = -Infinity for (let i = 0; i < messages.length; i += 1) { const m = messages[i] - const w = weight(m, messages, i) + const w = weight(m, messages_, i) if (w > largestWeight) { largestWeight = w largestIndex = i @@ -315,7 +335,11 @@ const prepareOpenAIOrAnthropicMessages = ({ let totalLen = 0 for (const m of messages) { totalLen += m.content.length } - const charsNeedToTrim = totalLen - (contextWindow - maxOutputTokens) * CHARS_PER_TOKEN + const charsNeedToTrim = totalLen - Math.max( + (contextWindow - maxOutputTokens) * CHARS_PER_TOKEN, // can be 0, in which case charsNeedToTrim=everything, bad + 4_096 // ensure we don't trim at least 4096 chars (just a random small value) + ) + // <-----------------------------------------> // 0 | | | @@ -335,53 +359,53 @@ const prepareOpenAIOrAnthropicMessages = ({ // if can finish here, do const numCharsWillTrim = m.content.length - TRIM_TO_LEN if (numCharsWillTrim > remainingCharsToTrim) { - m.content = m.content.slice(0, m.content.length - remainingCharsToTrim).trim() + m.content = m.content.slice(0, m.content.length - remainingCharsToTrim - '...'.length).trim() + '...' break } remainingCharsToTrim -= numCharsWillTrim - m.content = m.content.substring(0, TRIM_TO_LEN - 3) + '...' + m.content = m.content.substring(0, TRIM_TO_LEN - '...'.length) + '...' alreadyTrimmedIdxes.add(trimIdx) } + // ================ system message hack ================ + const newSysMsg = messages.shift()!.content + + // ================ tools and anthropicReasoning ================ + // SYSTEM MESSAGE HACK: we shifted (removed) the system message role, so now SimpleLLMMessage[] is valid let llmChatMessages: AnthropicOrOpenAILLMMessage[] = [] if (!specialToolFormat) { // XML tool behavior - llmChatMessages = prepareMessages_XML_tools(messages, supportsAnthropicReasoning) + llmChatMessages = prepareMessages_XML_tools(messages as SimpleLLMMessage[], supportsAnthropicReasoning) } else if (specialToolFormat === 'anthropic-style') { - llmChatMessages = prepareMessages_anthropic_tools(messages, supportsAnthropicReasoning) + llmChatMessages = prepareMessages_anthropic_tools(messages as SimpleLLMMessage[], supportsAnthropicReasoning) } else if (specialToolFormat === 'openai-style') { - llmChatMessages = prepareMessages_openai_tools(messages) + llmChatMessages = prepareMessages_openai_tools(messages as SimpleLLMMessage[]) } const llmMessages = llmChatMessages - // ================ system message concat ================ - - // find system messages and concatenate them - const newSystemMessage = aiInstructions ? - `${(systemMessage ? `${systemMessage}\n\n` : '')}GUIDELINES (from the user's .voidrules file):\n${aiInstructions}` - : systemMessage + // ================ system message add as first llmMessage ================ let separateSystemMessageStr: string | undefined = undefined // if supports system message if (supportsSystemMessage) { if (supportsSystemMessage === 'separated') - separateSystemMessageStr = newSystemMessage + separateSystemMessageStr = newSysMsg else if (supportsSystemMessage === 'system-role') - llmMessages.unshift({ role: 'system', content: newSystemMessage }) // add new first message + llmMessages.unshift({ role: 'system', content: newSysMsg }) // add new first message else if (supportsSystemMessage === 'developer-role') - llmMessages.unshift({ role: 'developer', content: newSystemMessage }) // add new first message + llmMessages.unshift({ role: 'developer', content: newSysMsg }) // add new first message } // if does not support system message else { const newFirstMessage = { role: 'user', - content: `\n${newSystemMessage}\n\n${llmMessages[0].content}` + content: `\n${newSysMsg}\n\n${llmMessages[0].content}` } as const llmMessages.splice(0, 1) // delete first message llmMessages.unshift(newFirstMessage) // add new first message diff --git a/src/vs/workbench/contrib/void/browser/editCodeService.ts b/src/vs/workbench/contrib/void/browser/editCodeService.ts index 41f45dce..85921a69 100644 --- a/src/vs/workbench/contrib/void/browser/editCodeService.ts +++ b/src/vs/workbench/contrib/void/browser/editCodeService.ts @@ -1614,15 +1614,12 @@ class EditCodeService extends Disposable implements IEditCodeService { endLine -= 1 // including newline before start - const contentBeforeStart = startLine !== 0 ? + const origStart = (startLine !== 0 ? modelStrLines.slice(0, startLine).join('\n') + '\n' - : '' + : '').length // including endline at end - const contentUpToEnd = modelStrLines.slice(0, endLine + 1).join('\n') - - const origStart = contentBeforeStart.length; - const origEnd = contentUpToEnd.length; + const origEnd = modelStrLines.slice(0, endLine + 1).join('\n').length - 1 replacements.push({ origStart, origEnd, block: b }); } diff --git a/src/vs/workbench/contrib/void/common/modelCapabilities.ts b/src/vs/workbench/contrib/void/common/modelCapabilities.ts index c30df73a..93cd953e 100644 --- a/src/vs/workbench/contrib/void/common/modelCapabilities.ts +++ b/src/vs/workbench/contrib/void/common/modelCapabilities.ts @@ -958,7 +958,7 @@ const vLLMSettings: VoidStaticProviderInfo = { const lmStudioSettings: VoidStaticProviderInfo = { providerReasoningIOSettings: { output: { needsManualParse: true }, }, - modelOptionsFallback: (modelName) => extensiveModelFallback(modelName, { downloadable: { sizeGb: 'not-known' } }), + modelOptionsFallback: (modelName) => extensiveModelFallback(modelName, { downloadable: { sizeGb: 'not-known' }, contextWindow: 4_096 }), modelOptions: {}, // TODO } diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts index b90dbafa..e96117b2 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts @@ -23,6 +23,9 @@ export const extractReasoningWrapper = ( let fullTextSoFar = '' let fullReasoningSoFar = '' + + if (!thinkTags[0] || !thinkTags[1]) throw new Error(`thinkTags must not be empty if provided. Got ${JSON.stringify(thinkTags)}.`) + let onText_ = onText onText = (params) => { onText_(params)