mirror of
https://github.com/voideditor/void
synced 2026-05-24 09:58:23 +00:00
fix extractCode
This commit is contained in:
parent
4445ff28ce
commit
fa9a3db86f
4 changed files with 167 additions and 286 deletions
|
|
@ -1500,9 +1500,9 @@ We only need to do it for files that were edited since `from`, ie files between
|
|||
}
|
||||
}, true)
|
||||
|
||||
// when change focused message idx, jump
|
||||
if (messageIdx !== undefined)
|
||||
this.jumpToCheckpointBeforeMessageIdx({ threadId, messageIdx, jumpToUserModified: true })
|
||||
// // when change focused message idx, jump - do not jump back when click edit, too confusing.
|
||||
// if (messageIdx !== undefined)
|
||||
// this.jumpToCheckpointBeforeMessageIdx({ threadId, messageIdx, jumpToUserModified: true })
|
||||
}
|
||||
|
||||
// set message.state
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
*--------------------------------------------------------------------------------------*/
|
||||
|
||||
import { DIVIDER, FINAL, ORIGINAL } from '../prompt/prompts.js'
|
||||
class SurroundingsRemover {
|
||||
export class SurroundingsRemover {
|
||||
readonly originalS: string
|
||||
i: number
|
||||
j: number
|
||||
|
|
@ -58,12 +58,13 @@ class SurroundingsRemover {
|
|||
// return offset === suffix.length
|
||||
// }
|
||||
|
||||
// either removes all or nothing
|
||||
removeFromStartUntilFullMatch = (until: string, alsoRemoveUntilStr: boolean) => {
|
||||
const index = this.originalS.indexOf(until, this.i)
|
||||
|
||||
if (index === -1) {
|
||||
this.i = this.j + 1
|
||||
return null
|
||||
// this.i = this.j + 1
|
||||
return false
|
||||
}
|
||||
// console.log('index', index, until.length)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,11 +3,10 @@
|
|||
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
|
||||
*--------------------------------------------------------------------------------------*/
|
||||
|
||||
import { endsWithAnyPrefixOf } from '../../common/helpers/extractCodeFromResult.js'
|
||||
import { endsWithAnyPrefixOf, SurroundingsRemover } from '../../common/helpers/extractCodeFromResult.js'
|
||||
import { availableTools, InternalToolInfo, ToolName, ToolParamName } from '../../common/prompt/prompts.js'
|
||||
import { OnFinalMessage, OnText, RawToolCallObj } from '../../common/sendLLMMessageTypes.js'
|
||||
import { OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js'
|
||||
import { ChatMode } from '../../common/voidSettingsTypes.js'
|
||||
import { createSaxParser } from './sax.js'
|
||||
|
||||
|
||||
// =============== reasoning ===============
|
||||
|
|
@ -137,17 +136,113 @@ export const extractReasoningWrapper = (
|
|||
|
||||
// =============== tools ===============
|
||||
|
||||
type ToolsState = {
|
||||
level: 'normal',
|
||||
} | {
|
||||
level: 'tool',
|
||||
toolName: ToolName,
|
||||
currentToolCall: RawToolCallObj,
|
||||
} | {
|
||||
level: 'param',
|
||||
toolName: ToolName,
|
||||
paramName: ToolParamName,
|
||||
currentToolCall: RawToolCallObj,
|
||||
|
||||
|
||||
const findPartiallyWrittenToolTagAtEnd = (fullText: string, toolTags: string[]) => {
|
||||
for (const toolTag of toolTags) {
|
||||
const foundPrefix = endsWithAnyPrefixOf(fullText, toolTag)
|
||||
if (foundPrefix) {
|
||||
return [foundPrefix, toolTag] as const
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
const findIndexOfAny = (fullText: string, matches: string[]) => {
|
||||
for (const str of matches) {
|
||||
const idx = fullText.indexOf(str);
|
||||
if (idx !== -1) {
|
||||
return [idx, str] as const
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
|
||||
type ToolOfToolName = { [toolName: string]: InternalToolInfo | undefined }
|
||||
const parseXMLPrefixToToolCall = (toolName: ToolName, str: string, toolOfToolName: ToolOfToolName): RawToolCallObj => {
|
||||
const paramsObj: RawToolParamsObj = {}
|
||||
const doneParams: ToolParamName[] = []
|
||||
let isDone = false
|
||||
|
||||
const getAnswer = (): RawToolCallObj => {
|
||||
const ans: RawToolCallObj = {
|
||||
name: toolName,
|
||||
rawParams: paramsObj,
|
||||
doneParams: doneParams,
|
||||
isDone: isDone
|
||||
}
|
||||
return ans
|
||||
}
|
||||
|
||||
// find first toolName tag
|
||||
const openToolTag = `<${toolName}>`
|
||||
let i = str.indexOf(openToolTag)
|
||||
if (i === -1) return getAnswer()
|
||||
let j = str.lastIndexOf(`</${toolName}>`)
|
||||
if (j === -1) j = Infinity
|
||||
else isDone = true
|
||||
|
||||
|
||||
str = str.substring(i + openToolTag.length, j)
|
||||
|
||||
const pm = new SurroundingsRemover(str)
|
||||
|
||||
const allowedParams = Object.keys(toolOfToolName[toolName]?.params ?? {}) as ToolParamName[]
|
||||
if (allowedParams.length === 0) return getAnswer()
|
||||
let latestMatchedOpenParam: null | ToolParamName = null
|
||||
let n = 0
|
||||
while (true) {
|
||||
n += 1
|
||||
if (n > 10) return getAnswer() // just for good measure as this code is early
|
||||
|
||||
// find the param name opening tag
|
||||
let matchedOpenParam: null | ToolParamName = null
|
||||
for (const paramName of allowedParams) {
|
||||
const removed = pm.removeFromStartUntilFullMatch(`<${paramName}>`, true)
|
||||
if (removed) {
|
||||
matchedOpenParam = paramName
|
||||
break
|
||||
}
|
||||
}
|
||||
// if did not find a new param, stop
|
||||
if (matchedOpenParam === null) {
|
||||
if (latestMatchedOpenParam !== null) {
|
||||
paramsObj[latestMatchedOpenParam] += pm.value()
|
||||
}
|
||||
return getAnswer()
|
||||
}
|
||||
else {
|
||||
latestMatchedOpenParam = matchedOpenParam
|
||||
}
|
||||
|
||||
paramsObj[latestMatchedOpenParam] = ''
|
||||
|
||||
// find the param name closing tag
|
||||
let matchedCloseParam: boolean = false
|
||||
let paramContents = ''
|
||||
for (const paramName of allowedParams) {
|
||||
const i = pm.i
|
||||
const closeTag = `</${paramName}>`
|
||||
const removed = pm.removeFromStartUntilFullMatch(closeTag, true)
|
||||
if (removed) {
|
||||
const i2 = pm.i
|
||||
paramContents = pm.originalS.substring(i, i2 - closeTag.length)
|
||||
matchedCloseParam = true
|
||||
break
|
||||
}
|
||||
}
|
||||
// if did not find a new close tag, stop
|
||||
if (!matchedCloseParam) {
|
||||
paramsObj[latestMatchedOpenParam] += pm.value()
|
||||
return getAnswer()
|
||||
}
|
||||
else {
|
||||
doneParams.push(latestMatchedOpenParam)
|
||||
}
|
||||
|
||||
paramsObj[latestMatchedOpenParam] += paramContents
|
||||
}
|
||||
}
|
||||
|
||||
export const extractToolsWrapper = (
|
||||
|
|
@ -156,125 +251,18 @@ export const extractToolsWrapper = (
|
|||
const tools = availableTools(chatMode)
|
||||
if (!tools) return { newOnText: onText, newOnFinalMessage: onFinalMessage }
|
||||
|
||||
const toolOfToolName: { [toolName: string]: InternalToolInfo | undefined } = {}
|
||||
const toolOfToolName: ToolOfToolName = {}
|
||||
const toolOpenTags = tools.map(t => `<${t.name}>`)
|
||||
for (const t of tools) { toolOfToolName[t.name] = t }
|
||||
|
||||
// detect <availableTools[0]></availableTools[0]>, etc
|
||||
let fullText = '';
|
||||
let trueFullText = ''
|
||||
const firstToolCallRef: { current: RawToolCallObj | undefined } = { current: undefined }
|
||||
|
||||
let state: ToolsState = { level: 'normal' }
|
||||
|
||||
|
||||
const getRawNewText = () => {
|
||||
return trueFullText.substring(parser.startTagPosition, parser.position + 1)
|
||||
}
|
||||
const parser = createSaxParser()
|
||||
|
||||
// when see open tag <tagName>
|
||||
parser.onopentag = (node) => {
|
||||
const rawNewText = getRawNewText()
|
||||
const tagName = node.name;
|
||||
console.log('OPENING', tagName)
|
||||
console.log('state0:', state.level, { toolName: (state as any).toolName, paramName: (state as any).paramName })
|
||||
|
||||
if (state.level === 'normal') {
|
||||
if (tagName in toolOfToolName) { // valid toolName
|
||||
state = {
|
||||
level: 'tool',
|
||||
toolName: tagName as ToolName,
|
||||
currentToolCall: { name: tagName as ToolName, rawParams: {}, doneParams: [], isDone: false }
|
||||
}
|
||||
firstToolCallRef.current = state.currentToolCall
|
||||
}
|
||||
else {
|
||||
fullText += rawNewText // count as plaintext
|
||||
console.log('adding raw a', rawNewText)
|
||||
|
||||
}
|
||||
}
|
||||
else if (state.level === 'tool') {
|
||||
if (tagName in (toolOfToolName[state.toolName]?.params ?? {})) { // valid param
|
||||
state = {
|
||||
level: 'param',
|
||||
toolName: state.toolName,
|
||||
paramName: tagName as ToolParamName,
|
||||
currentToolCall: state.currentToolCall,
|
||||
}
|
||||
}
|
||||
else {
|
||||
// would normally be rawNewText, but we ignore all text inside tools
|
||||
}
|
||||
}
|
||||
else if (state.level === 'param') { // cannot double nest
|
||||
fullText += rawNewText // count as plaintext
|
||||
console.log('adding raw b', rawNewText)
|
||||
|
||||
}
|
||||
|
||||
console.log('state1:', state.level, { toolName: (state as any).toolName, paramName: (state as any).paramName })
|
||||
|
||||
};
|
||||
|
||||
parser.onclosetag = (tagName) => {
|
||||
const rawNewText = getRawNewText()
|
||||
console.log('CLOSING', tagName)
|
||||
console.log('state0:', state.level, { toolName: (state as any).toolName, paramName: (state as any).paramName })
|
||||
|
||||
|
||||
if (state.level === 'normal') {
|
||||
fullText += rawNewText
|
||||
console.log('adding raw A', rawNewText)
|
||||
}
|
||||
else if (state.level === 'tool') {
|
||||
if (tagName === state.toolName) { // closed the tool
|
||||
state.currentToolCall.isDone = true
|
||||
state = {
|
||||
level: 'normal',
|
||||
}
|
||||
}
|
||||
else { // add as text
|
||||
fullText += rawNewText
|
||||
console.log('adding raw B', rawNewText)
|
||||
}
|
||||
}
|
||||
else if (state.level === 'param') {
|
||||
if (tagName === state.paramName) { // closed the param
|
||||
state.currentToolCall.doneParams.push(state.paramName)
|
||||
state = {
|
||||
level: 'tool',
|
||||
toolName: state.toolName,
|
||||
currentToolCall: state.currentToolCall,
|
||||
}
|
||||
}
|
||||
else {
|
||||
fullText += rawNewText
|
||||
console.log('adding raw C', rawNewText)
|
||||
|
||||
}
|
||||
}
|
||||
console.log('state1:', state.level, { toolName: (state as any).toolName, paramName: (state as any).paramName })
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
parser.ontext = (text) => {
|
||||
if (state.level === 'normal') {
|
||||
fullText += text
|
||||
}
|
||||
// start param
|
||||
else if (state.level === 'tool') {
|
||||
// ignore all text in a tool, all text should go in the param tags inside it
|
||||
}
|
||||
else if (state.level === 'param') {
|
||||
if (!(state.paramName in state.currentToolCall.rawParams)) state.currentToolCall.rawParams[state.paramName] = ''
|
||||
state.currentToolCall.rawParams[state.paramName] += text
|
||||
}
|
||||
}
|
||||
let latestToolCall: RawToolCallObj | undefined = undefined
|
||||
|
||||
|
||||
let foundOpenTag: { idx: number, toolName: ToolName } | null = null
|
||||
let openToolTagBuffer = '' // the characters we've seen so far that come after a < with no space afterwards, not yet added to fullText
|
||||
|
||||
let prevFullTextLen = 0
|
||||
const newOnText: OnText = (params) => {
|
||||
|
|
@ -282,13 +270,55 @@ export const extractToolsWrapper = (
|
|||
prevFullTextLen = params.fullText.length
|
||||
trueFullText = params.fullText
|
||||
|
||||
parser.write(newText)
|
||||
console.log('NEWTEXT', JSON.stringify(newText))
|
||||
|
||||
|
||||
if (foundOpenTag === null) {
|
||||
const newFullText = openToolTagBuffer + newText
|
||||
// ensure the code below doesn't run if only half a tag has been written
|
||||
const isPartial = findPartiallyWrittenToolTagAtEnd(newFullText, toolOpenTags)
|
||||
if (isPartial) {
|
||||
console.log('--- partial!!!')
|
||||
openToolTagBuffer += newText
|
||||
}
|
||||
// if no tooltag is partially written at the end, attempt to get the index
|
||||
else {
|
||||
// we will instantly retroactively remove this if it's a tag match
|
||||
fullText += openToolTagBuffer
|
||||
openToolTagBuffer = ''
|
||||
fullText += newText
|
||||
|
||||
const i = findIndexOfAny(fullText, toolOpenTags)
|
||||
if (i !== null) {
|
||||
const [idx, toolTag] = i
|
||||
const toolName = toolTag.substring(1, toolTag.length - 1) as ToolName
|
||||
console.log('found ', toolName)
|
||||
foundOpenTag = { idx, toolName }
|
||||
|
||||
// do not count anything at or after i in fullText
|
||||
fullText = fullText.substring(0, idx)
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// toolTagIdx is not null, so parse the XML
|
||||
if (foundOpenTag !== null) {
|
||||
latestToolCall = parseXMLPrefixToToolCall(
|
||||
foundOpenTag.toolName,
|
||||
trueFullText.substring(foundOpenTag.idx, Infinity),
|
||||
toolOfToolName,
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
// firstToolCallRef.current === state.currentToolCall is always true
|
||||
onText({
|
||||
...params,
|
||||
fullText,
|
||||
toolCall: firstToolCallRef.current,
|
||||
toolCall: latestToolCall,
|
||||
});
|
||||
};
|
||||
|
||||
|
|
@ -298,7 +328,7 @@ export const extractToolsWrapper = (
|
|||
newOnText({ ...params })
|
||||
|
||||
fullText = fullText.trimEnd()
|
||||
const toolCall = firstToolCallRef.current
|
||||
const toolCall = latestToolCall
|
||||
if (toolCall) {
|
||||
// trim off all whitespace at and before first \n and after last \n for each param
|
||||
for (const p in toolCall.rawParams) {
|
||||
|
|
|
|||
|
|
@ -1,150 +0,0 @@
|
|||
/*--------------------------------------------------------------------------------------
|
||||
* Copyright 2025 Glass Devtools, Inc. All rights reserved.
|
||||
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
|
||||
*--------------------------------------------------------------------------------------*/
|
||||
|
||||
// Define options for the parser.
|
||||
export interface SaxParserOptions {
|
||||
lowercase?: boolean;
|
||||
}
|
||||
|
||||
// Define the structure for a parsed node.
|
||||
export interface SaxNode {
|
||||
name: string;
|
||||
attributes: { [key: string]: string };
|
||||
}
|
||||
|
||||
// Define the interface for the SAX-like parser.
|
||||
export interface SaxParser {
|
||||
// Event handlers that can be set by the consumer.
|
||||
onopentag: ((node: SaxNode) => void) | null;
|
||||
ontext: ((text: string) => void) | null;
|
||||
onclosetag: ((tagName: string) => void) | null;
|
||||
// Properties to track current positions (used for raw text extraction).
|
||||
startTagPosition: number;
|
||||
position: number;
|
||||
// Processes a new chunk of text.
|
||||
write(chunk: string): void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a minimal, event-driven SAX-like parser.
|
||||
*
|
||||
* @param options An object of type `SaxParserOptions`. Passing `{ lowercase: true }` will force all tag names to be lower-cased.
|
||||
* @returns A parser object implementing the `SaxParser` interface.
|
||||
*/
|
||||
export function createSaxParser(options: SaxParserOptions = {}): SaxParser {
|
||||
// Buffer to hold any leftover text (part of an incomplete tag).
|
||||
let buffer: string = '';
|
||||
// Global counter to track the total processed characters.
|
||||
let globalPos: number = 0;
|
||||
|
||||
const parser: SaxParser = {
|
||||
onopentag: null,
|
||||
ontext: null,
|
||||
onclosetag: null,
|
||||
startTagPosition: 0,
|
||||
position: 0,
|
||||
|
||||
write(chunk: string): void {
|
||||
// Set the starting position before processing the new chunk.
|
||||
this.startTagPosition = globalPos;
|
||||
buffer += chunk;
|
||||
globalPos += chunk.length;
|
||||
// Set the current position to the end of the processed chunk.
|
||||
this.position = globalPos - 1;
|
||||
|
||||
let cursor = 0;
|
||||
// Flag to indicate if an incomplete tag was found.
|
||||
let incompleteTagFound = false;
|
||||
// This will mark the position in the buffer where the incomplete tag starts.
|
||||
let incompleteStart = 0;
|
||||
|
||||
while (cursor < buffer.length) {
|
||||
// Look for the next opening '<' character.
|
||||
const ltIndex = buffer.indexOf('<', cursor);
|
||||
if (ltIndex === -1) {
|
||||
// No more tags found in the current buffer.
|
||||
if (cursor < buffer.length && this.ontext) {
|
||||
this.ontext(buffer.substring(cursor));
|
||||
}
|
||||
// All content is processed.
|
||||
buffer = '';
|
||||
cursor = buffer.length;
|
||||
break;
|
||||
}
|
||||
|
||||
// Emit any text between the current cursor and the opening tag.
|
||||
if (ltIndex > cursor && this.ontext) {
|
||||
this.ontext(buffer.substring(cursor, ltIndex));
|
||||
}
|
||||
|
||||
// Look for the closing '>' character starting from the found '<'.
|
||||
const gtIndex = buffer.indexOf('>', ltIndex);
|
||||
if (gtIndex === -1) {
|
||||
// Incomplete tag detected.
|
||||
incompleteTagFound = true;
|
||||
// Save the starting point of the incomplete tag.
|
||||
incompleteStart = ltIndex;
|
||||
break;
|
||||
}
|
||||
|
||||
// Extract the tag content (excluding the '<' and '>').
|
||||
let tagContent = buffer.substring(ltIndex + 1, gtIndex).trim();
|
||||
if (!tagContent) {
|
||||
cursor = gtIndex + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this is a closing tag (starts with '/').
|
||||
if (tagContent[0] === '/') {
|
||||
let tagName = tagContent.substring(1).trim();
|
||||
if (options.lowercase && tagName) {
|
||||
tagName = tagName.toLowerCase();
|
||||
}
|
||||
if (this.onclosetag) {
|
||||
this.onclosetag(tagName);
|
||||
}
|
||||
} else {
|
||||
// Handle self-closing tags (ending with '/').
|
||||
let selfClosing = false;
|
||||
if (tagContent[tagContent.length - 1] === '/') {
|
||||
selfClosing = true;
|
||||
tagContent = tagContent.slice(0, -1).trim();
|
||||
}
|
||||
// Determine the tag name (first word before any whitespace).
|
||||
const spaceIndex = tagContent.indexOf(' ');
|
||||
let tagName =
|
||||
spaceIndex !== -1
|
||||
? tagContent.substring(0, spaceIndex).trim()
|
||||
: tagContent;
|
||||
if (options.lowercase && tagName) {
|
||||
tagName = tagName.toLowerCase();
|
||||
}
|
||||
// Emit an open tag event.
|
||||
if (this.onopentag) {
|
||||
const node: SaxNode = { name: tagName, attributes: {} };
|
||||
this.onopentag(node);
|
||||
}
|
||||
// If it’s a self-closing tag, immediately emit a close tag event.
|
||||
if (selfClosing && this.onclosetag) {
|
||||
this.onclosetag(tagName);
|
||||
}
|
||||
}
|
||||
// Move the cursor past the current tag.
|
||||
cursor = gtIndex + 1;
|
||||
}
|
||||
|
||||
// If an incomplete tag was detected, preserve it.
|
||||
if (incompleteTagFound) {
|
||||
// Keep the incomplete portion starting from the '<'
|
||||
buffer = buffer.substring(incompleteStart);
|
||||
} else {
|
||||
// Otherwise, remove all processed content.
|
||||
buffer = buffer.substring(cursor);
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
return parser;
|
||||
}
|
||||
Loading…
Reference in a new issue