fix extractCode

This commit is contained in:
Andrew Pareles 2025-04-10 18:03:15 -07:00
parent 4445ff28ce
commit fa9a3db86f
4 changed files with 167 additions and 286 deletions

View file

@ -1500,9 +1500,9 @@ We only need to do it for files that were edited since `from`, ie files between
}
}, true)
// when change focused message idx, jump
if (messageIdx !== undefined)
this.jumpToCheckpointBeforeMessageIdx({ threadId, messageIdx, jumpToUserModified: true })
// // when change focused message idx, jump - do not jump back when click edit, too confusing.
// if (messageIdx !== undefined)
// this.jumpToCheckpointBeforeMessageIdx({ threadId, messageIdx, jumpToUserModified: true })
}
// set message.state

View file

@ -4,7 +4,7 @@
*--------------------------------------------------------------------------------------*/
import { DIVIDER, FINAL, ORIGINAL } from '../prompt/prompts.js'
class SurroundingsRemover {
export class SurroundingsRemover {
readonly originalS: string
i: number
j: number
@ -58,12 +58,13 @@ class SurroundingsRemover {
// return offset === suffix.length
// }
// either removes all or nothing
removeFromStartUntilFullMatch = (until: string, alsoRemoveUntilStr: boolean) => {
const index = this.originalS.indexOf(until, this.i)
if (index === -1) {
this.i = this.j + 1
return null
// this.i = this.j + 1
return false
}
// console.log('index', index, until.length)

View file

@ -3,11 +3,10 @@
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
*--------------------------------------------------------------------------------------*/
import { endsWithAnyPrefixOf } from '../../common/helpers/extractCodeFromResult.js'
import { endsWithAnyPrefixOf, SurroundingsRemover } from '../../common/helpers/extractCodeFromResult.js'
import { availableTools, InternalToolInfo, ToolName, ToolParamName } from '../../common/prompt/prompts.js'
import { OnFinalMessage, OnText, RawToolCallObj } from '../../common/sendLLMMessageTypes.js'
import { OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js'
import { ChatMode } from '../../common/voidSettingsTypes.js'
import { createSaxParser } from './sax.js'
// =============== reasoning ===============
@ -137,17 +136,113 @@ export const extractReasoningWrapper = (
// =============== tools ===============
type ToolsState = {
level: 'normal',
} | {
level: 'tool',
toolName: ToolName,
currentToolCall: RawToolCallObj,
} | {
level: 'param',
toolName: ToolName,
paramName: ToolParamName,
currentToolCall: RawToolCallObj,
const findPartiallyWrittenToolTagAtEnd = (fullText: string, toolTags: string[]) => {
for (const toolTag of toolTags) {
const foundPrefix = endsWithAnyPrefixOf(fullText, toolTag)
if (foundPrefix) {
return [foundPrefix, toolTag] as const
}
}
return false
}
const findIndexOfAny = (fullText: string, matches: string[]) => {
for (const str of matches) {
const idx = fullText.indexOf(str);
if (idx !== -1) {
return [idx, str] as const
}
}
return null
}
type ToolOfToolName = { [toolName: string]: InternalToolInfo | undefined }
const parseXMLPrefixToToolCall = (toolName: ToolName, str: string, toolOfToolName: ToolOfToolName): RawToolCallObj => {
const paramsObj: RawToolParamsObj = {}
const doneParams: ToolParamName[] = []
let isDone = false
const getAnswer = (): RawToolCallObj => {
const ans: RawToolCallObj = {
name: toolName,
rawParams: paramsObj,
doneParams: doneParams,
isDone: isDone
}
return ans
}
// find first toolName tag
const openToolTag = `<${toolName}>`
let i = str.indexOf(openToolTag)
if (i === -1) return getAnswer()
let j = str.lastIndexOf(`</${toolName}>`)
if (j === -1) j = Infinity
else isDone = true
str = str.substring(i + openToolTag.length, j)
const pm = new SurroundingsRemover(str)
const allowedParams = Object.keys(toolOfToolName[toolName]?.params ?? {}) as ToolParamName[]
if (allowedParams.length === 0) return getAnswer()
let latestMatchedOpenParam: null | ToolParamName = null
let n = 0
while (true) {
n += 1
if (n > 10) return getAnswer() // just for good measure as this code is early
// find the param name opening tag
let matchedOpenParam: null | ToolParamName = null
for (const paramName of allowedParams) {
const removed = pm.removeFromStartUntilFullMatch(`<${paramName}>`, true)
if (removed) {
matchedOpenParam = paramName
break
}
}
// if did not find a new param, stop
if (matchedOpenParam === null) {
if (latestMatchedOpenParam !== null) {
paramsObj[latestMatchedOpenParam] += pm.value()
}
return getAnswer()
}
else {
latestMatchedOpenParam = matchedOpenParam
}
paramsObj[latestMatchedOpenParam] = ''
// find the param name closing tag
let matchedCloseParam: boolean = false
let paramContents = ''
for (const paramName of allowedParams) {
const i = pm.i
const closeTag = `</${paramName}>`
const removed = pm.removeFromStartUntilFullMatch(closeTag, true)
if (removed) {
const i2 = pm.i
paramContents = pm.originalS.substring(i, i2 - closeTag.length)
matchedCloseParam = true
break
}
}
// if did not find a new close tag, stop
if (!matchedCloseParam) {
paramsObj[latestMatchedOpenParam] += pm.value()
return getAnswer()
}
else {
doneParams.push(latestMatchedOpenParam)
}
paramsObj[latestMatchedOpenParam] += paramContents
}
}
export const extractToolsWrapper = (
@ -156,125 +251,18 @@ export const extractToolsWrapper = (
const tools = availableTools(chatMode)
if (!tools) return { newOnText: onText, newOnFinalMessage: onFinalMessage }
const toolOfToolName: { [toolName: string]: InternalToolInfo | undefined } = {}
const toolOfToolName: ToolOfToolName = {}
const toolOpenTags = tools.map(t => `<${t.name}>`)
for (const t of tools) { toolOfToolName[t.name] = t }
// detect <availableTools[0]></availableTools[0]>, etc
let fullText = '';
let trueFullText = ''
const firstToolCallRef: { current: RawToolCallObj | undefined } = { current: undefined }
let state: ToolsState = { level: 'normal' }
const getRawNewText = () => {
return trueFullText.substring(parser.startTagPosition, parser.position + 1)
}
const parser = createSaxParser()
// when see open tag <tagName>
parser.onopentag = (node) => {
const rawNewText = getRawNewText()
const tagName = node.name;
console.log('OPENING', tagName)
console.log('state0:', state.level, { toolName: (state as any).toolName, paramName: (state as any).paramName })
if (state.level === 'normal') {
if (tagName in toolOfToolName) { // valid toolName
state = {
level: 'tool',
toolName: tagName as ToolName,
currentToolCall: { name: tagName as ToolName, rawParams: {}, doneParams: [], isDone: false }
}
firstToolCallRef.current = state.currentToolCall
}
else {
fullText += rawNewText // count as plaintext
console.log('adding raw a', rawNewText)
}
}
else if (state.level === 'tool') {
if (tagName in (toolOfToolName[state.toolName]?.params ?? {})) { // valid param
state = {
level: 'param',
toolName: state.toolName,
paramName: tagName as ToolParamName,
currentToolCall: state.currentToolCall,
}
}
else {
// would normally be rawNewText, but we ignore all text inside tools
}
}
else if (state.level === 'param') { // cannot double nest
fullText += rawNewText // count as plaintext
console.log('adding raw b', rawNewText)
}
console.log('state1:', state.level, { toolName: (state as any).toolName, paramName: (state as any).paramName })
};
parser.onclosetag = (tagName) => {
const rawNewText = getRawNewText()
console.log('CLOSING', tagName)
console.log('state0:', state.level, { toolName: (state as any).toolName, paramName: (state as any).paramName })
if (state.level === 'normal') {
fullText += rawNewText
console.log('adding raw A', rawNewText)
}
else if (state.level === 'tool') {
if (tagName === state.toolName) { // closed the tool
state.currentToolCall.isDone = true
state = {
level: 'normal',
}
}
else { // add as text
fullText += rawNewText
console.log('adding raw B', rawNewText)
}
}
else if (state.level === 'param') {
if (tagName === state.paramName) { // closed the param
state.currentToolCall.doneParams.push(state.paramName)
state = {
level: 'tool',
toolName: state.toolName,
currentToolCall: state.currentToolCall,
}
}
else {
fullText += rawNewText
console.log('adding raw C', rawNewText)
}
}
console.log('state1:', state.level, { toolName: (state as any).toolName, paramName: (state as any).paramName })
};
parser.ontext = (text) => {
if (state.level === 'normal') {
fullText += text
}
// start param
else if (state.level === 'tool') {
// ignore all text in a tool, all text should go in the param tags inside it
}
else if (state.level === 'param') {
if (!(state.paramName in state.currentToolCall.rawParams)) state.currentToolCall.rawParams[state.paramName] = ''
state.currentToolCall.rawParams[state.paramName] += text
}
}
let latestToolCall: RawToolCallObj | undefined = undefined
let foundOpenTag: { idx: number, toolName: ToolName } | null = null
let openToolTagBuffer = '' // the characters we've seen so far that come after a < with no space afterwards, not yet added to fullText
let prevFullTextLen = 0
const newOnText: OnText = (params) => {
@ -282,13 +270,55 @@ export const extractToolsWrapper = (
prevFullTextLen = params.fullText.length
trueFullText = params.fullText
parser.write(newText)
console.log('NEWTEXT', JSON.stringify(newText))
if (foundOpenTag === null) {
const newFullText = openToolTagBuffer + newText
// ensure the code below doesn't run if only half a tag has been written
const isPartial = findPartiallyWrittenToolTagAtEnd(newFullText, toolOpenTags)
if (isPartial) {
console.log('--- partial!!!')
openToolTagBuffer += newText
}
// if no tooltag is partially written at the end, attempt to get the index
else {
// we will instantly retroactively remove this if it's a tag match
fullText += openToolTagBuffer
openToolTagBuffer = ''
fullText += newText
const i = findIndexOfAny(fullText, toolOpenTags)
if (i !== null) {
const [idx, toolTag] = i
const toolName = toolTag.substring(1, toolTag.length - 1) as ToolName
console.log('found ', toolName)
foundOpenTag = { idx, toolName }
// do not count anything at or after i in fullText
fullText = fullText.substring(0, idx)
}
}
}
// toolTagIdx is not null, so parse the XML
if (foundOpenTag !== null) {
latestToolCall = parseXMLPrefixToToolCall(
foundOpenTag.toolName,
trueFullText.substring(foundOpenTag.idx, Infinity),
toolOfToolName,
)
}
// firstToolCallRef.current === state.currentToolCall is always true
onText({
...params,
fullText,
toolCall: firstToolCallRef.current,
toolCall: latestToolCall,
});
};
@ -298,7 +328,7 @@ export const extractToolsWrapper = (
newOnText({ ...params })
fullText = fullText.trimEnd()
const toolCall = firstToolCallRef.current
const toolCall = latestToolCall
if (toolCall) {
// trim off all whitespace at and before first \n and after last \n for each param
for (const p in toolCall.rawParams) {

View file

@ -1,150 +0,0 @@
/*--------------------------------------------------------------------------------------
* Copyright 2025 Glass Devtools, Inc. All rights reserved.
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
*--------------------------------------------------------------------------------------*/
// Define options for the parser.
export interface SaxParserOptions {
lowercase?: boolean;
}
// Define the structure for a parsed node.
export interface SaxNode {
name: string;
attributes: { [key: string]: string };
}
// Define the interface for the SAX-like parser.
export interface SaxParser {
// Event handlers that can be set by the consumer.
onopentag: ((node: SaxNode) => void) | null;
ontext: ((text: string) => void) | null;
onclosetag: ((tagName: string) => void) | null;
// Properties to track current positions (used for raw text extraction).
startTagPosition: number;
position: number;
// Processes a new chunk of text.
write(chunk: string): void;
}
/**
* Creates a minimal, event-driven SAX-like parser.
*
* @param options An object of type `SaxParserOptions`. Passing `{ lowercase: true }` will force all tag names to be lower-cased.
* @returns A parser object implementing the `SaxParser` interface.
*/
export function createSaxParser(options: SaxParserOptions = {}): SaxParser {
// Buffer to hold any leftover text (part of an incomplete tag).
let buffer: string = '';
// Global counter to track the total processed characters.
let globalPos: number = 0;
const parser: SaxParser = {
onopentag: null,
ontext: null,
onclosetag: null,
startTagPosition: 0,
position: 0,
write(chunk: string): void {
// Set the starting position before processing the new chunk.
this.startTagPosition = globalPos;
buffer += chunk;
globalPos += chunk.length;
// Set the current position to the end of the processed chunk.
this.position = globalPos - 1;
let cursor = 0;
// Flag to indicate if an incomplete tag was found.
let incompleteTagFound = false;
// This will mark the position in the buffer where the incomplete tag starts.
let incompleteStart = 0;
while (cursor < buffer.length) {
// Look for the next opening '<' character.
const ltIndex = buffer.indexOf('<', cursor);
if (ltIndex === -1) {
// No more tags found in the current buffer.
if (cursor < buffer.length && this.ontext) {
this.ontext(buffer.substring(cursor));
}
// All content is processed.
buffer = '';
cursor = buffer.length;
break;
}
// Emit any text between the current cursor and the opening tag.
if (ltIndex > cursor && this.ontext) {
this.ontext(buffer.substring(cursor, ltIndex));
}
// Look for the closing '>' character starting from the found '<'.
const gtIndex = buffer.indexOf('>', ltIndex);
if (gtIndex === -1) {
// Incomplete tag detected.
incompleteTagFound = true;
// Save the starting point of the incomplete tag.
incompleteStart = ltIndex;
break;
}
// Extract the tag content (excluding the '<' and '>').
let tagContent = buffer.substring(ltIndex + 1, gtIndex).trim();
if (!tagContent) {
cursor = gtIndex + 1;
continue;
}
// Check if this is a closing tag (starts with '/').
if (tagContent[0] === '/') {
let tagName = tagContent.substring(1).trim();
if (options.lowercase && tagName) {
tagName = tagName.toLowerCase();
}
if (this.onclosetag) {
this.onclosetag(tagName);
}
} else {
// Handle self-closing tags (ending with '/').
let selfClosing = false;
if (tagContent[tagContent.length - 1] === '/') {
selfClosing = true;
tagContent = tagContent.slice(0, -1).trim();
}
// Determine the tag name (first word before any whitespace).
const spaceIndex = tagContent.indexOf(' ');
let tagName =
spaceIndex !== -1
? tagContent.substring(0, spaceIndex).trim()
: tagContent;
if (options.lowercase && tagName) {
tagName = tagName.toLowerCase();
}
// Emit an open tag event.
if (this.onopentag) {
const node: SaxNode = { name: tagName, attributes: {} };
this.onopentag(node);
}
// If its a self-closing tag, immediately emit a close tag event.
if (selfClosing && this.onclosetag) {
this.onclosetag(tagName);
}
}
// Move the cursor past the current tag.
cursor = gtIndex + 1;
}
// If an incomplete tag was detected, preserve it.
if (incompleteTagFound) {
// Keep the incomplete portion starting from the '<'
buffer = buffer.substring(incompleteStart);
} else {
// Otherwise, remove all processed content.
buffer = buffer.substring(cursor);
}
},
};
return parser;
}