mirror of
https://github.com/voideditor/void
synced 2026-05-24 09:58:23 +00:00
add system message trimming because of very small models with 4096 window len
This commit is contained in:
parent
d826052444
commit
e726c3c5de
4 changed files with 56 additions and 32 deletions
|
|
@ -39,7 +39,7 @@ type SimpleLLMMessage = {
|
|||
const EMPTY_MESSAGE = '(empty message)'
|
||||
|
||||
const CHARS_PER_TOKEN = 4
|
||||
const TRIM_TO_LEN = 60
|
||||
const TRIM_TO_LEN = 120
|
||||
|
||||
|
||||
|
||||
|
|
@ -252,7 +252,7 @@ export type GeminiMessage = {
|
|||
// --- CHAT ---
|
||||
|
||||
const prepareOpenAIOrAnthropicMessages = ({
|
||||
messages,
|
||||
messages: messages_,
|
||||
systemMessage,
|
||||
aiInstructions,
|
||||
supportsSystemMessage,
|
||||
|
|
@ -270,18 +270,30 @@ const prepareOpenAIOrAnthropicMessages = ({
|
|||
contextWindow: number,
|
||||
maxOutputTokens: number | null | undefined,
|
||||
}): { messages: AnthropicOrOpenAILLMMessage[], separateSystemMessage: string | undefined } => {
|
||||
|
||||
maxOutputTokens = maxOutputTokens ?? 4_096 // default to 4096
|
||||
let messages: (SimpleLLMMessage | { role: 'system', content: string })[] = deepClone(messages_)
|
||||
|
||||
// ================ system message ================
|
||||
// A COMPLETE HACK: last message is system message for context purposes
|
||||
|
||||
const sysMsgParts: string[] = []
|
||||
if (aiInstructions) sysMsgParts.push(`GUIDELINES (from the user's .voidrules file):\n${aiInstructions}`)
|
||||
if (systemMessage) sysMsgParts.push(systemMessage)
|
||||
const combinedSystemMessage = sysMsgParts.join('\n\n')
|
||||
|
||||
messages.unshift({ role: 'system', content: combinedSystemMessage })
|
||||
|
||||
// ================ trim ================
|
||||
|
||||
messages = deepClone(messages)
|
||||
messages = messages.map(m => ({ ...m, content: m.role !== 'tool' ? m.content.trim() : m.content }))
|
||||
|
||||
type MesType = (typeof messages)[0]
|
||||
|
||||
// ================ fit into context ================
|
||||
|
||||
// the higher the weight, the higher the desire to truncate - TRIM HIGHEST WEIGHT MESSAGES
|
||||
const alreadyTrimmedIdxes = new Set<number>()
|
||||
const weight = (message: SimpleLLMMessage, messages: SimpleLLMMessage[], idx: number) => {
|
||||
const weight = (message: MesType, messages: MesType[], idx: number) => {
|
||||
const base = message.content.length
|
||||
|
||||
let multiplier: number
|
||||
|
|
@ -289,22 +301,30 @@ const prepareOpenAIOrAnthropicMessages = ({
|
|||
if (message.role === 'user') {
|
||||
multiplier *= 1
|
||||
}
|
||||
else if (message.role === 'system') {
|
||||
multiplier *= .01 // very low weight
|
||||
}
|
||||
else {
|
||||
multiplier *= 10 // llm tokens are far less valuable than user tokens
|
||||
}
|
||||
// 1st message, last 3 msgs, any already modified message should be low in weight
|
||||
if (idx === 0 || idx >= messages.length - 1 - 3 || alreadyTrimmedIdxes.has(idx)) {
|
||||
|
||||
// any already modified message should not be trimmed again
|
||||
if (alreadyTrimmedIdxes.has(idx)) {
|
||||
multiplier = 0
|
||||
}
|
||||
// 1st and last messages should be very low weight
|
||||
if (idx <= 1 || idx >= messages.length - 1 - 3) {
|
||||
multiplier *= .05
|
||||
}
|
||||
return base * multiplier
|
||||
}
|
||||
|
||||
const _findLargestByWeight = (messages: SimpleLLMMessage[]) => {
|
||||
const _findLargestByWeight = (messages_: MesType[]) => {
|
||||
let largestIndex = -1
|
||||
let largestWeight = -Infinity
|
||||
for (let i = 0; i < messages.length; i += 1) {
|
||||
const m = messages[i]
|
||||
const w = weight(m, messages, i)
|
||||
const w = weight(m, messages_, i)
|
||||
if (w > largestWeight) {
|
||||
largestWeight = w
|
||||
largestIndex = i
|
||||
|
|
@ -315,7 +335,11 @@ const prepareOpenAIOrAnthropicMessages = ({
|
|||
|
||||
let totalLen = 0
|
||||
for (const m of messages) { totalLen += m.content.length }
|
||||
const charsNeedToTrim = totalLen - (contextWindow - maxOutputTokens) * CHARS_PER_TOKEN
|
||||
const charsNeedToTrim = totalLen - Math.max(
|
||||
(contextWindow - maxOutputTokens) * CHARS_PER_TOKEN, // can be 0, in which case charsNeedToTrim=everything, bad
|
||||
4_096 // ensure we don't trim at least 4096 chars (just a random small value)
|
||||
)
|
||||
|
||||
|
||||
// <----------------------------------------->
|
||||
// 0 | | |
|
||||
|
|
@ -335,53 +359,53 @@ const prepareOpenAIOrAnthropicMessages = ({
|
|||
// if can finish here, do
|
||||
const numCharsWillTrim = m.content.length - TRIM_TO_LEN
|
||||
if (numCharsWillTrim > remainingCharsToTrim) {
|
||||
m.content = m.content.slice(0, m.content.length - remainingCharsToTrim).trim()
|
||||
m.content = m.content.slice(0, m.content.length - remainingCharsToTrim - '...'.length).trim() + '...'
|
||||
break
|
||||
}
|
||||
|
||||
remainingCharsToTrim -= numCharsWillTrim
|
||||
m.content = m.content.substring(0, TRIM_TO_LEN - 3) + '...'
|
||||
m.content = m.content.substring(0, TRIM_TO_LEN - '...'.length) + '...'
|
||||
alreadyTrimmedIdxes.add(trimIdx)
|
||||
}
|
||||
|
||||
// ================ system message hack ================
|
||||
const newSysMsg = messages.shift()!.content
|
||||
|
||||
|
||||
// ================ tools and anthropicReasoning ================
|
||||
// SYSTEM MESSAGE HACK: we shifted (removed) the system message role, so now SimpleLLMMessage[] is valid
|
||||
|
||||
let llmChatMessages: AnthropicOrOpenAILLMMessage[] = []
|
||||
if (!specialToolFormat) { // XML tool behavior
|
||||
llmChatMessages = prepareMessages_XML_tools(messages, supportsAnthropicReasoning)
|
||||
llmChatMessages = prepareMessages_XML_tools(messages as SimpleLLMMessage[], supportsAnthropicReasoning)
|
||||
}
|
||||
else if (specialToolFormat === 'anthropic-style') {
|
||||
llmChatMessages = prepareMessages_anthropic_tools(messages, supportsAnthropicReasoning)
|
||||
llmChatMessages = prepareMessages_anthropic_tools(messages as SimpleLLMMessage[], supportsAnthropicReasoning)
|
||||
}
|
||||
else if (specialToolFormat === 'openai-style') {
|
||||
llmChatMessages = prepareMessages_openai_tools(messages)
|
||||
llmChatMessages = prepareMessages_openai_tools(messages as SimpleLLMMessage[])
|
||||
}
|
||||
const llmMessages = llmChatMessages
|
||||
|
||||
|
||||
// ================ system message concat ================
|
||||
|
||||
// find system messages and concatenate them
|
||||
const newSystemMessage = aiInstructions ?
|
||||
`${(systemMessage ? `${systemMessage}\n\n` : '')}GUIDELINES (from the user's .voidrules file):\n${aiInstructions}`
|
||||
: systemMessage
|
||||
// ================ system message add as first llmMessage ================
|
||||
|
||||
let separateSystemMessageStr: string | undefined = undefined
|
||||
|
||||
// if supports system message
|
||||
if (supportsSystemMessage) {
|
||||
if (supportsSystemMessage === 'separated')
|
||||
separateSystemMessageStr = newSystemMessage
|
||||
separateSystemMessageStr = newSysMsg
|
||||
else if (supportsSystemMessage === 'system-role')
|
||||
llmMessages.unshift({ role: 'system', content: newSystemMessage }) // add new first message
|
||||
llmMessages.unshift({ role: 'system', content: newSysMsg }) // add new first message
|
||||
else if (supportsSystemMessage === 'developer-role')
|
||||
llmMessages.unshift({ role: 'developer', content: newSystemMessage }) // add new first message
|
||||
llmMessages.unshift({ role: 'developer', content: newSysMsg }) // add new first message
|
||||
}
|
||||
// if does not support system message
|
||||
else {
|
||||
const newFirstMessage = {
|
||||
role: 'user',
|
||||
content: `<SYSTEM_MESSAGE>\n${newSystemMessage}\n</SYSTEM_MESSAGE>\n${llmMessages[0].content}`
|
||||
content: `<SYSTEM_MESSAGE>\n${newSysMsg}\n</SYSTEM_MESSAGE>\n${llmMessages[0].content}`
|
||||
} as const
|
||||
llmMessages.splice(0, 1) // delete first message
|
||||
llmMessages.unshift(newFirstMessage) // add new first message
|
||||
|
|
|
|||
|
|
@ -1614,15 +1614,12 @@ class EditCodeService extends Disposable implements IEditCodeService {
|
|||
endLine -= 1
|
||||
|
||||
// including newline before start
|
||||
const contentBeforeStart = startLine !== 0 ?
|
||||
const origStart = (startLine !== 0 ?
|
||||
modelStrLines.slice(0, startLine).join('\n') + '\n'
|
||||
: ''
|
||||
: '').length
|
||||
|
||||
// including endline at end
|
||||
const contentUpToEnd = modelStrLines.slice(0, endLine + 1).join('\n')
|
||||
|
||||
const origStart = contentBeforeStart.length;
|
||||
const origEnd = contentUpToEnd.length;
|
||||
const origEnd = modelStrLines.slice(0, endLine + 1).join('\n').length - 1
|
||||
|
||||
replacements.push({ origStart, origEnd, block: b });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -958,7 +958,7 @@ const vLLMSettings: VoidStaticProviderInfo = {
|
|||
|
||||
const lmStudioSettings: VoidStaticProviderInfo = {
|
||||
providerReasoningIOSettings: { output: { needsManualParse: true }, },
|
||||
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName, { downloadable: { sizeGb: 'not-known' } }),
|
||||
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName, { downloadable: { sizeGb: 'not-known' }, contextWindow: 4_096 }),
|
||||
modelOptions: {}, // TODO
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ export const extractReasoningWrapper = (
|
|||
let fullTextSoFar = ''
|
||||
let fullReasoningSoFar = ''
|
||||
|
||||
|
||||
if (!thinkTags[0] || !thinkTags[1]) throw new Error(`thinkTags must not be empty if provided. Got ${JSON.stringify(thinkTags)}.`)
|
||||
|
||||
let onText_ = onText
|
||||
onText = (params) => {
|
||||
onText_(params)
|
||||
|
|
|
|||
Loading…
Reference in a new issue