From cd77542a9e632c381aed9618ee093135528f30f6 Mon Sep 17 00:00:00 2001 From: mp Date: Tue, 19 Nov 2024 03:57:06 -0800 Subject: [PATCH 1/2] Autocomplete (max number of pending requests, filter for matchup with user's text, better prompt) --- extensions/void/src/common/SimpleLruCache.ts | 32 ------ extensions/void/src/common/getPrompt.ts | 17 ++- extensions/void/src/common/sendLLMMessage.ts | 5 +- .../void/src/extension/AutcompleteProvider.ts | 107 +++++++++++------- .../src/webviews/common/contextForConfig.tsx | 5 +- 5 files changed, 86 insertions(+), 80 deletions(-) delete mode 100644 extensions/void/src/common/SimpleLruCache.ts diff --git a/extensions/void/src/common/SimpleLruCache.ts b/extensions/void/src/common/SimpleLruCache.ts deleted file mode 100644 index 7118bc8f..00000000 --- a/extensions/void/src/common/SimpleLruCache.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { LRUCache } from 'lru-cache'; - -const DEFAULT_MAX_SIZE = 20 - - -export class SimpleLRUCache { - private cache: LRUCache; - private maxSize: number - public length: number - - constructor(maxSize?: number) { - - maxSize = maxSize ?? DEFAULT_MAX_SIZE - - this.cache = new LRUCache({ max: maxSize }); - this.length = 0 - this.maxSize = maxSize - } - - push(value: T): void { - const key = this.cache.size; - this.cache.set(key, value); - this.length++ - this.length = Math.min(this.length, this.maxSize) - } - - values() { - return this.cache.values() - } - - -} \ No newline at end of file diff --git a/extensions/void/src/common/getPrompt.ts b/extensions/void/src/common/getPrompt.ts index f1c7567b..6fb07f4c 100644 --- a/extensions/void/src/common/getPrompt.ts +++ b/extensions/void/src/common/getPrompt.ts @@ -22,9 +22,10 @@ export const getFIMSystem: GetFIMPrompt = ({ voidConfig, fimInfo }) => { Instruction summary: 1. Return the MIDDLE of the code between the START and END. 2. Do not give an explanation, description, or any other code besides the middle. -2. Do not return duplicate code from either START or END. -3. Make sure the MIDDLE piece of code has balanced brackets that match the START and END. -4. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line. +3. Do not return duplicate code from either START or END. +4. Make sure the MIDDLE piece of code has balanced brackets that match the START and END. +5. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line. +6. Around 90% of the time, you should return just one or a few lines of code. You should keep your outputs short unless you are confident the user is trying to write boilderplate code. # EXAMPLE @@ -75,11 +76,19 @@ export const getFIMPrompt: GetFIMPrompt = ({ voidConfig, fimInfo }) => { // if no prefix or suffix, return empty string if (!fimInfo.prefix.trim() && !fimInfo.suffix.trim()) return '' + // instruct model to generate a single line if there is text immediately after the cursor + const suffixLines = fimInfo.suffix.split('\n'); + const afterCursor = suffixLines[0] || ''; + const generateSingleLine = afterCursor.trim().length > 0; + const singleLinePrompt = generateSingleLine ? `Please produce a single line of code that fills in the middle.` : '' + // TODO may want to trim the prefix and suffix switch (voidConfig.default.whichApi) { case 'ollama': if (voidConfig.ollama.model === 'codestral') { - return `[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}` + return `${singleLinePrompt}[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}` + } else if (voidConfig.ollama.model.includes('qwen')) { + return `${singleLinePrompt}<|fim_prefix|>${fimInfo.prefix}<|fim_suffix|>${fimInfo.suffix}<|fim_middle|>` } return '' case 'anthropic': diff --git a/extensions/void/src/common/sendLLMMessage.ts b/extensions/void/src/common/sendLLMMessage.ts index a86f5c49..2ff1769f 100644 --- a/extensions/void/src/common/sendLLMMessage.ts +++ b/extensions/void/src/common/sendLLMMessage.ts @@ -247,12 +247,12 @@ export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, on let didAbort = false let fullText = "" + const ollama = new Ollama({ host: voidConfig.ollama.endpoint }) + abortRef.current = () => { didAbort = true; }; - const ollama = new Ollama({ host: voidConfig.ollama.endpoint }) - type GenerateResponse = Awaited> type ChatResponse = Awaited> @@ -271,7 +271,6 @@ export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, on } if (mode === 'fim') { - // the fim prompt is the last message let prompt = messages[messages.length - 1].content return ollama.generate({ diff --git a/extensions/void/src/extension/AutcompleteProvider.ts b/extensions/void/src/extension/AutcompleteProvider.ts index 2aad378a..587c5da9 100644 --- a/extensions/void/src/extension/AutcompleteProvider.ts +++ b/extensions/void/src/extension/AutcompleteProvider.ts @@ -2,22 +2,24 @@ import * as vscode from 'vscode'; import { AbortRef, LLMMessage, sendLLMMessage } from '../common/sendLLMMessage'; import { getVoidConfigFromPartial, VoidConfig } from '../webviews/common/contextForConfig'; import { LRUCache } from 'lru-cache'; -import { SimpleLRUCache } from '../common/SimpleLruCache'; type AutocompletionStatus = 'pending' | 'finished' | 'error'; type Autocompletion = { + id: number, prefix: string, suffix: string, startTime: number, endTime: number | undefined, abortRef: AbortRef, status: AutocompletionStatus, - promise: Promise | undefined, + llmPromise: Promise | undefined, result: string, } -const DEBOUNCE_TIME = 300 +const DEBOUNCE_TIME = 500 const TIMEOUT_TIME = 60000 +const MAX_CACHE_SIZE = 20 +const MAX_PENDING_REQUESTS = 2 // postprocesses the result const postprocessResult = (result: string) => { @@ -72,10 +74,6 @@ const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: stri const lastMatchupIndex = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length - console.log('generatedMiddle ', generatedMiddle) - console.log('trimmedOriginalPrefix ', trimmedOriginalPrefix) - console.log('trimmedCurrentPrefix ', trimmedCurrentPrefix) - console.log('index: ', lastMatchupIndex) if (lastMatchupIndex < 0) { return new vscode.InlineCompletionItem('') } @@ -90,19 +88,19 @@ const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: stri } -// returns whether we can use this autocompletion to complete the prefix +// returns whether this autocompletion is in the cache const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: string, autocompletion: Autocompletion }): boolean => { const originalPrefix = autocompletion.prefix const generatedMiddle = autocompletion.result - const trimmedOriginalPrefix = trimPrefix(originalPrefix) - const trimmedCurrentPrefix = trimPrefix(prefix) + const originalPrefixTrimmed = trimPrefix(originalPrefix) + const currentPrefixTrimmed = trimPrefix(prefix) - if (trimmedCurrentPrefix.length < trimmedOriginalPrefix.length) { + if (currentPrefixTrimmed.length < originalPrefixTrimmed.length) { return false } - const isMatch = (trimmedOriginalPrefix + generatedMiddle).startsWith(trimmedCurrentPrefix) + const isMatch = (originalPrefixTrimmed + generatedMiddle).startsWith(currentPrefixTrimmed) return isMatch } @@ -111,11 +109,14 @@ const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: str export class AutocompleteProvider implements vscode.InlineCompletionItemProvider { + private _extensionContext: vscode.ExtensionContext; - private _autocompletionsOfDocument: { [docUriStr: string]: SimpleLRUCache } = {} + private _autocompletionId: number = 0; + private _autocompletionsOfDocument: { [docUriStr: string]: LRUCache } = {} - private _lastTime = 0 + private _lastCompletionTime = 0 + private _lastPrefix: string = '' constructor(context: vscode.ExtensionContext) { this._extensionContext = context @@ -130,7 +131,7 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider token: vscode.CancellationToken, ): Promise { - const disabled = true + const disabled = false if (disabled) { return []; } const docUriStr = document.uri.toString() @@ -139,20 +140,26 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider const cursorOffset = document.offsetAt(position); const prefix = fullText.substring(0, cursorOffset) const suffix = fullText.substring(cursorOffset) - - if (!this._autocompletionsOfDocument[docUriStr]) { - this._autocompletionsOfDocument[docUriStr] = new SimpleLRUCache() - } - const voidConfig = getVoidConfigFromPartial(this._extensionContext.globalState.get('partialVoidConfig') ?? {}) + // initialize cache and other variables + // note that whenever an autocompletion is rejected, it is removed from cache + if (!this._autocompletionsOfDocument[docUriStr]) { + this._autocompletionsOfDocument[docUriStr] = new LRUCache({ + max: MAX_CACHE_SIZE, + dispose: (autocompletion) => { autocompletion.abortRef.current() } + }) + } + this._lastPrefix = prefix + console.log('cache size: ', this._autocompletionsOfDocument[docUriStr].size) + // get autocompletion from cache let cachedAutocompletion: Autocompletion | undefined = undefined - loop: for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { + for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { // if the user's change matches up with the generated text if (doesPrefixMatchAutocompletion({ prefix, autocompletion })) { cachedAutocompletion = autocompletion - break loop; + break } } @@ -169,11 +176,12 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider console.log('AAA2') try { - await cachedAutocompletion.promise; + await cachedAutocompletion.llmPromise; const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position }) return [inlineCompletion] } catch (e) { + this._autocompletionsOfDocument[docUriStr].delete(cachedAutocompletion.id) console.error('Error creating autocompletion (1): ' + e) } @@ -184,15 +192,13 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider return [] } - - // if there is no cached autocompletion, create it and add it to cache - + // else if no more typing happens, then go forwards with the request // wait DEBOUNCE_TIME for the user to stop typing const thisTime = Date.now() - this._lastTime = thisTime + this._lastCompletionTime = thisTime const didTypingHappenDuringDebounce = await new Promise((resolve, reject) => setTimeout(() => { - if (this._lastTime === thisTime) { + if (this._lastCompletionTime === thisTime) { resolve(false) } else { resolve(true) @@ -207,27 +213,50 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider console.log('BBB') - // else if no more typing happens, then go forwards with the request + // if there are too many pending requests, cancel the oldest one + let numPending = 0 + let oldestPending: Autocompletion | undefined = undefined + for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { + if (autocompletion.status === 'pending') { + numPending += 1 + if (oldestPending === undefined) { + oldestPending = autocompletion + } + if (numPending >= MAX_PENDING_REQUESTS) { + // cancel the oldest pending request and remove it from cache + this._autocompletionsOfDocument[docUriStr].delete(oldestPending.id) + break + } + } + } + + // create a new autocompletion and add it to cache const newAutocompletion: Autocompletion = { + id: this._autocompletionId++, prefix: prefix, suffix: suffix, startTime: Date.now(), endTime: undefined, abortRef: { current: () => { } }, status: 'pending', - promise: undefined, + llmPromise: undefined, result: '', } // set parameters of `newAutocompletion` appropriately - newAutocompletion.promise = new Promise((resolve, reject) => { + newAutocompletion.llmPromise = new Promise((resolve, reject) => { sendLLMMessage({ mode: 'fim', fimInfo: { prefix, suffix }, onText: async (tokenStr, completionStr) => { - // TODO filter out bad responses here + newAutocompletion.result = completionStr + + // if generation doesn't match the prefix for the first few tokens generated, reject it + if (completionStr.length < 20 && !doesPrefixMatchAutocompletion({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) { + reject('LLM response did not match user\'s text.') + } }, onFinalMessage: (finalMessage) => { @@ -252,24 +281,28 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider abortRef: newAutocompletion.abortRef, }) - setTimeout(() => { // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it + // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it + setTimeout(() => { if (newAutocompletion.status === 'pending') { - reject('Timeout') + reject('Timeout receiving message to LLM.') } }, TIMEOUT_TIME) + + }) // add autocompletion to cache - this._autocompletionsOfDocument[docUriStr].push(newAutocompletion) + this._autocompletionsOfDocument[docUriStr].set(newAutocompletion.id, newAutocompletion) // show autocompletion try { - await newAutocompletion.promise; + await newAutocompletion.llmPromise; const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, position }) return [inlineCompletion] } catch (e) { + this._autocompletionsOfDocument[docUriStr].delete(newAutocompletion.id) console.error('Error creating autocompletion (2): ' + e) return [] } @@ -277,6 +310,4 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider } - - } diff --git a/extensions/void/src/webviews/common/contextForConfig.tsx b/extensions/void/src/webviews/common/contextForConfig.tsx index 47e96e60..ad665a84 100644 --- a/extensions/void/src/webviews/common/contextForConfig.tsx +++ b/extensions/void/src/webviews/common/contextForConfig.tsx @@ -1,5 +1,6 @@ import React, { ReactNode, createContext, useCallback, useContext, useEffect, useRef, useState, } from "react" import { awaitVSCodeResponse, getVSCodeAPI, useOnVSCodeMessage } from "./getVscodeApi" +import { Ollama } from "ollama/browser" const configEnum = (description: string, defaultVal: EnumArr[number], enumArr: EnumArr) => { return { @@ -29,8 +30,6 @@ export const configFields = [ 'azure', ] as const - - const voidConfigInfo: Record< typeof configFields[number] | 'default', { [prop: string]: { @@ -122,7 +121,7 @@ const voidConfigInfo: Record< model: configEnum( 'Ollama model to use.', 'codestral', - ["codestral", "codegemma", "codegemma:2b", "codegemma:7b", "codellama", "codellama:7b", "codellama:13b", "codellama:34b", "codellama:70b", "codellama:code", "codellama:python", "command-r", "command-r:35b", "command-r-plus", "command-r-plus:104b", "deepseek-coder-v2", "deepseek-coder-v2:16b", "deepseek-coder-v2:236b", "falcon2", "falcon2:11b", "firefunction-v2", "firefunction-v2:70b", "gemma", "gemma:2b", "gemma:7b", "gemma2", "gemma2:2b", "gemma2:9b", "gemma2:27b", "llama2", "llama2:7b", "llama2:13b", "llama2:70b", "llama3", "llama3:8b", "llama3:70b", "llama3-chatqa", "llama3-chatqa:8b", "llama3-chatqa:70b", "llama3-gradient", "llama3-gradient:8b", "llama3-gradient:70b", "llama3.1", "llama3.2", "llama3.1:8b", "llama3.1:70b", "llama3.1:405b", "llava", "llava:7b", "llava:13b", "llava:34b", "llava-llama3", "llava-llama3:8b", "llava-phi3", "llava-phi3:3.8b", "mistral", "mistral:7b", "mistral-large", "mistral-large:123b", "mistral-nemo", "mistral-nemo:12b", "mixtral", "mixtral:8x7b", "mixtral:8x22b", "moondream", "moondream:1.8b", "openhermes", "openhermes:v2.5", "phi3", "phi3:3.8b", "phi3:14b", "phi3.5", "phi3.5:3.8b", "qwen", "qwen:7b", "qwen:14b", "qwen:32b", "qwen:72b", "qwen:110b", "qwen2", "qwen2:0.5b", "qwen2:1.5b", "qwen2:7b", "qwen2:72b", "smollm", "smollm:135m", "smollm:360m", "smollm:1.7b"] as const + ["codestral", "qwen2.5-coder", "qwen2.5-coder:0.5B", "qwen2.5-coder:1.5B", "qwen2.5-coder:3B", "qwen2.5-coder:7B", "qwen2.5-coder:14B", "qwen2.5-coder:32B", "codegemma", "codegemma:2b", "codegemma:7b", "codellama", "codellama:7b", "codellama:13b", "codellama:34b", "codellama:70b", "codellama:code", "codellama:python", "command-r", "command-r:35b", "command-r-plus", "command-r-plus:104b", "deepseek-coder-v2", "deepseek-coder-v2:16b", "deepseek-coder-v2:236b", "falcon2", "falcon2:11b", "firefunction-v2", "firefunction-v2:70b", "gemma", "gemma:2b", "gemma:7b", "gemma2", "gemma2:2b", "gemma2:9b", "gemma2:27b", "llama2", "llama2:7b", "llama2:13b", "llama2:70b", "llama3", "llama3:8b", "llama3:70b", "llama3-chatqa", "llama3-chatqa:8b", "llama3-chatqa:70b", "llama3-gradient", "llama3-gradient:8b", "llama3-gradient:70b", "llama3.1", "llama3.2", "llama3.1:8b", "llama3.1:70b", "llama3.1:405b", "llava", "llava:7b", "llava:13b", "llava:34b", "llava-llama3", "llava-llama3:8b", "llava-phi3", "llava-phi3:3.8b", "mistral", "mistral:7b", "mistral-large", "mistral-large:123b", "mistral-nemo", "mistral-nemo:12b", "mixtral", "mixtral:8x7b", "mixtral:8x22b", "moondream", "moondream:1.8b", "openhermes", "openhermes:v2.5", "phi3", "phi3:3.8b", "phi3:14b", "phi3.5", "phi3.5:3.8b", "qwen", "qwen:7b", "qwen:14b", "qwen:32b", "qwen:72b", "qwen:110b", "qwen2", "qwen2:0.5b", "qwen2:1.5b", "qwen2:7b", "qwen2:72b", "smollm", "smollm:135m", "smollm:360m", "smollm:1.7b"] as const ), }, openRouter: { From 53d19d819c0b62bf64d9542792e0d53968d7d8d3 Mon Sep 17 00:00:00 2001 From: mp Date: Thu, 21 Nov 2024 05:58:43 -0800 Subject: [PATCH 2/2] autocomplete UX --- extensions/void/src/common/getPrompt.ts | 24 +-- extensions/void/src/common/sendLLMMessage.ts | 28 ++- .../void/src/extension/AutcompleteProvider.ts | 204 +++++++++++++++--- .../void/src/extension/applyDiffLazily.ts | 1 - extensions/void/src/extension/extension.ts | 5 +- .../src/webviews/common/contextForConfig.tsx | 2 +- .../void/src/webviews/sidebar/SidebarChat.tsx | 2 +- 7 files changed, 212 insertions(+), 54 deletions(-) diff --git a/extensions/void/src/common/getPrompt.ts b/extensions/void/src/common/getPrompt.ts index 6fb07f4c..586148f3 100644 --- a/extensions/void/src/common/getPrompt.ts +++ b/extensions/void/src/common/getPrompt.ts @@ -73,22 +73,23 @@ const z = 3 export const getFIMPrompt: GetFIMPrompt = ({ voidConfig, fimInfo }) => { - // if no prefix or suffix, return empty string - if (!fimInfo.prefix.trim() && !fimInfo.suffix.trim()) return '' + const { prefix: fullPrefix, suffix: fullSuffix } = fimInfo + const prefix = fullPrefix.split('\n').slice(-20).join('\n') + const suffix = fullSuffix.split('\n').slice(0, 20).join('\n') - // instruct model to generate a single line if there is text immediately after the cursor - const suffixLines = fimInfo.suffix.split('\n'); - const afterCursor = suffixLines[0] || ''; - const generateSingleLine = afterCursor.trim().length > 0; - const singleLinePrompt = generateSingleLine ? `Please produce a single line of code that fills in the middle.` : '' + + console.log('prefix', JSON.stringify(prefix)) + console.log('suffix', JSON.stringify(suffix)) + + if (!prefix.trim() && !suffix.trim()) return '' // TODO may want to trim the prefix and suffix switch (voidConfig.default.whichApi) { case 'ollama': if (voidConfig.ollama.model === 'codestral') { - return `${singleLinePrompt}[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}` + return `[SUFFIX]${suffix}[PREFIX] ${prefix}` } else if (voidConfig.ollama.model.includes('qwen')) { - return `${singleLinePrompt}<|fim_prefix|>${fimInfo.prefix}<|fim_suffix|>${fimInfo.suffix}<|fim_middle|>` + return `<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>` } return '' case 'anthropic': @@ -101,14 +102,13 @@ export const getFIMPrompt: GetFIMPrompt = ({ voidConfig, fimInfo }) => { default: return `## START: \`\`\` -${fimInfo.prefix} +${prefix} \`\`\` ## END: \`\`\` -${fimInfo.suffix} +${suffix} \`\`\` ` - } } diff --git a/extensions/void/src/common/sendLLMMessage.ts b/extensions/void/src/common/sendLLMMessage.ts index 2ff1769f..395486e6 100644 --- a/extensions/void/src/common/sendLLMMessage.ts +++ b/extensions/void/src/common/sendLLMMessage.ts @@ -5,7 +5,7 @@ import { Content, GoogleGenerativeAI, GoogleGenerativeAIError, GoogleGenerativeA import { VoidConfig } from '../webviews/common/contextForConfig' import { getFIMPrompt, getFIMSystem } from './getPrompt'; -export type AbortRef = { current: (() => void) | null } +export type AbortRef = { current: (() => void) } export type OnText = (newText: string, fullText: string) => void @@ -21,9 +21,12 @@ export type LLMMessage = { content: string, } +type LLMMessageOptions = { stopTokens?: string[] } + type SendLLMMessageFnTypeInternal = (params: { mode: 'chat' | 'fim', messages: LLMMessage[], + options?: LLMMessageOptions, onText: OnText, onFinalMessage: OnFinalMessage, onError: (error: string) => void, @@ -34,8 +37,9 @@ type SendLLMMessageFnTypeInternal = (params: { type SendLLMMessageFnTypeExternal = (params: ( | { mode?: 'chat', messages: LLMMessage[], fimInfo?: undefined, } - | { mode: 'fim', fimInfo: FimInfo, messages?: undefined, } + | { mode: 'fim', messages?: undefined, fimInfo: FimInfo, } ) & { + options?: LLMMessageOptions, onText: OnText, onFinalMessage: OnFinalMessage, onError: (error: string) => void, @@ -242,7 +246,7 @@ const sendOpenAIMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinal }; // Ollama -export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => { +export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => { let didAbort = false let fullText = "" @@ -278,6 +282,7 @@ export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, on prompt: prompt, stream: true, raw: true, + options: { stop: options?.stopTokens } }) } @@ -293,6 +298,7 @@ export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, on abortRef.current = () => { didAbort = true + stream.abort() } for await (const chunk of stream) { if (didAbort) return; @@ -386,7 +392,7 @@ const sendGreptileMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFin } -export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ mode, messages, fimInfo, onText, onFinalMessage, onError, voidConfig, abortRef }) => { +export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ options, mode, messages, fimInfo, onText, onFinalMessage, onError, voidConfig, abortRef }) => { if (!voidConfig) return onError('No config file found for LLM.'); @@ -406,27 +412,29 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ mode, messages, f { role: 'system', content: system }, { role: 'user', content: prompt } ] as const) - .filter(m => m.content.trim() !== '') + } // trim message content (Anthropic and other providers give an error if there is trailing whitespace) messages = messages.map(m => ({ ...m, content: m.content.trim() })) + .filter(m => m.content !== '') + if (messages.length === 0) return onError('No messages provided to LLM.'); switch (voidConfig.default.whichApi) { case 'anthropic': - return sendAnthropicMsg({ mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); + return sendAnthropicMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); case 'openAI': case 'openRouter': case 'openAICompatible': - return sendOpenAIMsg({ mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); + return sendOpenAIMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); case 'gemini': - return sendGeminiMsg({ mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); + return sendGeminiMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); case 'ollama': - return sendOllamaMsg({ mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); + return sendOllamaMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); case 'greptile': - return sendGreptileMsg({ mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); + return sendGreptileMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); default: onError(`Error: whichApi was ${voidConfig.default.whichApi}, which is not recognized!`) } diff --git a/extensions/void/src/extension/AutcompleteProvider.ts b/extensions/void/src/extension/AutcompleteProvider.ts index 587c5da9..dd4b044e 100644 --- a/extensions/void/src/extension/AutcompleteProvider.ts +++ b/extensions/void/src/extension/AutcompleteProvider.ts @@ -3,6 +3,47 @@ import { AbortRef, LLMMessage, sendLLMMessage } from '../common/sendLLMMessage'; import { getVoidConfigFromPartial, VoidConfig } from '../webviews/common/contextForConfig'; import { LRUCache } from 'lru-cache'; + +/* +A summary of autotab: + +Postprocessing +-one common problem for all models is outputting unbalanced parentheses +we solve this by trimming all extra closing parentheses from the generated string +in future, should make sure parentheses are always balanced + +-another problem is completing the middle of a string, eg. "const [x, CURSOR] = useState()" +we complete up to first matchup character +but should instead complete the whole line / block (difficult because of parenthesis accuracy) + +-too much info is bad. usually we want to show the user 1 line, and have a preloaded response afterwards +this should happen automatically with caching system +should break preloaded responses into \n\n chunks + +Preprocessing +- we don't generate if cursor is at end / beginning of a line (no spaces) +- we generate 1 line if there is text to the right of cursor +- we generate 1 line if variable declaration +- (in many cases want to show 1 line but generate multiple) + +State +- cache based on prefix (and do some trimming first) +- when press tab on one line, should have an immediate followup response +to do this, show autocompletes before they're fully finished +- [todo] remove each autotab when accepted +- [todo] treat windows \r\n separately from \n +!- [todo] provide type information + +Details +-generated results are trimmed up to 1 leading/trailing space +-prefixes are cached up to 1 trailing newline +- +*/ + + + + + type AutocompletionStatus = 'pending' | 'finished' | 'error'; type Autocompletion = { id: number, @@ -24,15 +65,21 @@ const MAX_PENDING_REQUESTS = 2 // postprocesses the result const postprocessResult = (result: string) => { - // remove leading whitespace from result - return result.trimStart() + console.log('result: ', JSON.stringify(result)) + + // trim all whitespace except for a single leading/trailing space + const hasLeadingSpace = result.startsWith(' '); + const hasTrailingSpace = result.endsWith(' '); + return (hasLeadingSpace ? ' ' : '') + + result.trim() + + (hasTrailingSpace ? ' ' : ''); } const extractCodeFromResult = (result: string) => { // extract the code between triple backticks - const parts = result.split(/```/); + const parts = result.split(/```(?:\s*\w+)?\n?/); // if there is no ``` then return the raw result if (parts.length === 1) { @@ -58,6 +105,28 @@ const trimPrefix = (prefix: string) => { return trimmedPrefix } +function getStringUpToUnbalancedParenthesis(s: string, prefixToTheLeft: string): string { + + const pairs: Record = { ')': '(', '}': '{', ']': '[' }; + + // todo find first open bracket in prefix and get all brackets beyond it in prefix + // get all bracets in prefix + let stack: string[] = [] + const firstOpenIdx = prefixToTheLeft.search(/[[({]/); + if (firstOpenIdx !== -1) stack = prefixToTheLeft.slice(firstOpenIdx).split('').filter(c => '()[]{}'.includes(c)) + + // Iterate through each character + for (let i = 0; i < s.length; i++) { + const char = s[i]; + + if (char === '(' || char === '{' || char === '[') { stack.push(char); } + else if (char === ')' || char === '}' || char === ']') { + if (stack.length === 0 || stack.pop() !== pairs[char]) { return s.substring(0, i); } + } + } + return s; +} + // finds the text in the autocompletion to display, assuming the prefix is already matched // example: // originalPrefix = abcd @@ -65,26 +134,69 @@ const trimPrefix = (prefix: string) => { // originalSuffix = ijkl // the user has typed "ef" so prefix = abcdef // we want to return the rest of the generatedMiddle, which is "gh" -const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: string, autocompletion: Autocompletion, position: vscode.Position }): vscode.InlineCompletionItem => { +const toInlineCompletion = ({ prefix, suffix, autocompletion, position }: { prefix: string, suffix: string, autocompletion: Autocompletion, position: vscode.Position }): vscode.InlineCompletionItem => { const originalPrefix = autocompletion.prefix const generatedMiddle = autocompletion.result const trimmedOriginalPrefix = trimPrefix(originalPrefix) const trimmedCurrentPrefix = trimPrefix(prefix) - const lastMatchupIndex = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length + const suffixLines = suffix.split('\n') + const prefixLines = trimmedCurrentPrefix.split('\n') + const suffixToTheRightOfCursor = suffixLines[0].trim() + const prefixToTheLeftOfCursor = prefixLines[prefixLines.length - 1].trim() - if (lastMatchupIndex < 0) { + const generatedLines = generatedMiddle.split('\n') + + // compute startIdx + let startIdx = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length + if (startIdx < 0) { return new vscode.InlineCompletionItem('') } - const completionStr = generatedMiddle.substring(lastMatchupIndex) - console.log('completionStr: ', completionStr) + // compute endIdx + // hacks to get the suffix to render properly with lower quality models + // if the generated text matches with the suffix on the current line, stop + let endIdx: number | undefined = generatedMiddle.length // exclusive bounds - return new vscode.InlineCompletionItem( - completionStr, - new vscode.Range(position, position) - ) + if (suffixToTheRightOfCursor !== '') { // completing in the middle of a line + console.log('1') + // complete until there is a match + const matchIndex = generatedMiddle.lastIndexOf(suffixToTheRightOfCursor[0]) + if (matchIndex > 0) { endIdx = matchIndex } + } + + if (prefixToTheLeftOfCursor !== '') { // completing the end of a line + console.log('2') + // show a single line + const newlineIdx = generatedMiddle.indexOf('\n') + if (newlineIdx > -1) { endIdx = newlineIdx } + } + + // // if a generated line matches with a suffix line, stop + // if (suffixLines.length > 1) { + // console.log('3') + // const lines = [] + // for (const generatedLine of generatedLines) { + // if (suffixLines.slice(0, 10).some(suffixLine => + // generatedLine.trim() !== '' && suffixLine.trim() !== '' + // && generatedLine.trim().startsWith(suffixLine.trim()) + // )) break; + // lines.push(generatedLine) + // } + // endIdx = lines.join('\n').length // this is hacky, remove or refactor in future + // } + + let completionStr = generatedMiddle.slice(startIdx, endIdx) + + // filter out unbalanced parentheses + console.log('completionStrBeforeParens: ', JSON.stringify(completionStr)) + completionStr = getStringUpToUnbalancedParenthesis(completionStr, prefixLines.slice(-2).join('\n')) + + console.log('originalCompletionStr: ', JSON.stringify(generatedMiddle.slice(startIdx))) + console.log('finalCompletionStr: ', JSON.stringify(completionStr)) + + return new vscode.InlineCompletionItem(completionStr, new vscode.Range(position, position)) } @@ -105,11 +217,39 @@ const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: str } +const getCompletionOptions = ({ prefix, suffix }: { prefix: string, suffix: string }) => { + const prefixLines = prefix.split('\n') + const suffixLines = suffix.split('\n') + + const prefixToLeftOfCursor = prefixLines.slice(-1)[0] ?? '' + const suffixToRightOfCursor = suffixLines[0] + + // default parameters + let shouldGenerate = true + let stopTokens: string[] = ['\n\n', '\r\n\r\n'] + + // specific cases + if (suffixToRightOfCursor.trim() !== '') { // typing between something + stopTokens = ['\n', '\r\n'] + } + + // if (prefixToLeftOfCursor.trim() === '' && suffixToRightOfCursor.trim() === '') { // at an empty line + // stopTokens = ['\n\n', '\r\n\r\n'] + // } + + if (prefixToLeftOfCursor === '' || suffixToRightOfCursor === '') { // at beginning or end of line + shouldGenerate = false + } + + console.log('shouldGenerate:', shouldGenerate, stopTokens) + + return { shouldGenerate, stopTokens } + +} export class AutocompleteProvider implements vscode.InlineCompletionItemProvider { - private _extensionContext: vscode.ExtensionContext; private _autocompletionId: number = 0; @@ -123,7 +263,7 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider } // used internally by vscode - // fires after every keystroke + // fires after every keystroke and returns the completion to show async provideInlineCompletionItems( document: vscode.TextDocument, position: vscode.Position, @@ -136,6 +276,7 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider const docUriStr = document.uri.toString() + const fullText = document.getText(); const cursorOffset = document.offsetAt(position); const prefix = fullText.substring(0, cursorOffset) @@ -147,11 +288,17 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider if (!this._autocompletionsOfDocument[docUriStr]) { this._autocompletionsOfDocument[docUriStr] = new LRUCache({ max: MAX_CACHE_SIZE, - dispose: (autocompletion) => { autocompletion.abortRef.current() } + dispose: (autocompletion) => { + autocompletion.abortRef.current() + } }) } this._lastPrefix = prefix - console.log('cache size: ', this._autocompletionsOfDocument[docUriStr].size) + + // get all pending autocompletions + let __c = 0 + this._autocompletionsOfDocument[docUriStr].forEach(a => { if (a.status === 'pending') __c += 1 }) + console.log('pending: ' + __c) // get autocompletion from cache let cachedAutocompletion: Autocompletion | undefined = undefined @@ -167,17 +314,18 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider if (cachedAutocompletion) { if (cachedAutocompletion.status === 'finished') { - console.log('AAA1') + console.log('A1') - const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position }) + const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, suffix, position }) return [inlineCompletion] } else if (cachedAutocompletion.status === 'pending') { - console.log('AAA2') + console.log('A2') try { await cachedAutocompletion.llmPromise; - const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position }) + console.log('id: ' + cachedAutocompletion.id) + const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, suffix, position }) return [inlineCompletion] } catch (e) { @@ -186,7 +334,7 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider } } else if (cachedAutocompletion.status === 'error') { - console.log('AAA3') + console.log('A3') } return [] @@ -211,7 +359,7 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider return [] } - console.log('BBB') + console.log('B') // if there are too many pending requests, cancel the oldest one let numPending = 0 @@ -230,6 +378,10 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider } } + const { shouldGenerate, stopTokens } = getCompletionOptions({ prefix, suffix }) + + if (!shouldGenerate) return [] + // create a new autocompletion and add it to cache const newAutocompletion: Autocompletion = { id: this._autocompletionId++, @@ -249,12 +401,13 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider sendLLMMessage({ mode: 'fim', fimInfo: { prefix, suffix }, + options: { stopTokens }, onText: async (tokenStr, completionStr) => { newAutocompletion.result = completionStr // if generation doesn't match the prefix for the first few tokens generated, reject it - if (completionStr.length < 20 && !doesPrefixMatchAutocompletion({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) { + if (!doesPrefixMatchAutocompletion({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) { reject('LLM response did not match user\'s text.') } }, @@ -296,9 +449,10 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider // show autocompletion try { - await newAutocompletion.llmPromise; + await newAutocompletion.llmPromise + console.log('id: ' + newAutocompletion.id) - const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, position }) + const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, suffix, position }) return [inlineCompletion] } catch (e) { diff --git a/extensions/void/src/extension/applyDiffLazily.ts b/extensions/void/src/extension/applyDiffLazily.ts index 9b350082..c355fc5c 100644 --- a/extensions/void/src/extension/applyDiffLazily.ts +++ b/extensions/void/src/extension/applyDiffLazily.ts @@ -50,7 +50,6 @@ ${completedStr} isAnyChangeSoFar = true } - const isRecentMatchup = false // the final NUM_MATCHUP_TOKENS characters of fullCompletedStr are the same as the final NUM_MATCHUP_TOKENS characters of the last item in the diffs of oldFileStr that had 0 changes diff --git a/extensions/void/src/extension/extension.ts b/extensions/void/src/extension/extension.ts index 82de53b9..f8788874 100644 --- a/extensions/void/src/extension/extension.ts +++ b/extensions/void/src/extension/extension.ts @@ -114,7 +114,7 @@ export function activate(context: vscode.ExtensionContext) { // Receive messages in the extension from the sidebar webview (messages are sent using `postMessage`) webview.onDidReceiveMessage(async (m: MessageFromSidebar) => { - const abortRef: AbortRef = { current: null } + const abortRef: AbortRef = { current: () => { } } if (m.type === 'requestFiles') { @@ -187,15 +187,12 @@ export function activate(context: vscode.ExtensionContext) { context.subscriptions.push(vscode.languages.registerInlineCompletionItemProvider('*', autocompleteProvider)); const voidConfig = getVoidConfigFromPartial(context.globalState.get('partialVoidConfig') ?? {}) - const abortRef: AbortRef = { current: null } // setupAutocomplete({ voidConfig, abortRef }) - // 7. Language Server console.log('run lsp') let disposable = vscode.commands.registerCommand('typeInspector.inspect', runTreeSitter); - context.subscriptions.push(disposable); diff --git a/extensions/void/src/webviews/common/contextForConfig.tsx b/extensions/void/src/webviews/common/contextForConfig.tsx index ad665a84..a2d20f18 100644 --- a/extensions/void/src/webviews/common/contextForConfig.tsx +++ b/extensions/void/src/webviews/common/contextForConfig.tsx @@ -121,7 +121,7 @@ const voidConfigInfo: Record< model: configEnum( 'Ollama model to use.', 'codestral', - ["codestral", "qwen2.5-coder", "qwen2.5-coder:0.5B", "qwen2.5-coder:1.5B", "qwen2.5-coder:3B", "qwen2.5-coder:7B", "qwen2.5-coder:14B", "qwen2.5-coder:32B", "codegemma", "codegemma:2b", "codegemma:7b", "codellama", "codellama:7b", "codellama:13b", "codellama:34b", "codellama:70b", "codellama:code", "codellama:python", "command-r", "command-r:35b", "command-r-plus", "command-r-plus:104b", "deepseek-coder-v2", "deepseek-coder-v2:16b", "deepseek-coder-v2:236b", "falcon2", "falcon2:11b", "firefunction-v2", "firefunction-v2:70b", "gemma", "gemma:2b", "gemma:7b", "gemma2", "gemma2:2b", "gemma2:9b", "gemma2:27b", "llama2", "llama2:7b", "llama2:13b", "llama2:70b", "llama3", "llama3:8b", "llama3:70b", "llama3-chatqa", "llama3-chatqa:8b", "llama3-chatqa:70b", "llama3-gradient", "llama3-gradient:8b", "llama3-gradient:70b", "llama3.1", "llama3.2", "llama3.1:8b", "llama3.1:70b", "llama3.1:405b", "llava", "llava:7b", "llava:13b", "llava:34b", "llava-llama3", "llava-llama3:8b", "llava-phi3", "llava-phi3:3.8b", "mistral", "mistral:7b", "mistral-large", "mistral-large:123b", "mistral-nemo", "mistral-nemo:12b", "mixtral", "mixtral:8x7b", "mixtral:8x22b", "moondream", "moondream:1.8b", "openhermes", "openhermes:v2.5", "phi3", "phi3:3.8b", "phi3:14b", "phi3.5", "phi3.5:3.8b", "qwen", "qwen:7b", "qwen:14b", "qwen:32b", "qwen:72b", "qwen:110b", "qwen2", "qwen2:0.5b", "qwen2:1.5b", "qwen2:7b", "qwen2:72b", "smollm", "smollm:135m", "smollm:360m", "smollm:1.7b"] as const + ["codestral", "qwen2.5-coder", "qwen2.5-coder:0.5b", "qwen2.5-coder:1.5b", "qwen2.5-coder:3b", "qwen2.5-coder:7b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "codegemma", "codegemma:2b", "codegemma:7b", "codellama", "codellama:7b", "codellama:13b", "codellama:34b", "codellama:70b", "codellama:code", "codellama:python", "command-r", "command-r:35b", "command-r-plus", "command-r-plus:104b", "deepseek-coder-v2", "deepseek-coder-v2:16b", "deepseek-coder-v2:236b", "falcon2", "falcon2:11b", "firefunction-v2", "firefunction-v2:70b", "gemma", "gemma:2b", "gemma:7b", "gemma2", "gemma2:2b", "gemma2:9b", "gemma2:27b", "llama2", "llama2:7b", "llama2:13b", "llama2:70b", "llama3", "llama3:8b", "llama3:70b", "llama3-chatqa", "llama3-chatqa:8b", "llama3-chatqa:70b", "llama3-gradient", "llama3-gradient:8b", "llama3-gradient:70b", "llama3.1", "llama3.2", "llama3.1:8b", "llama3.1:70b", "llama3.1:405b", "llava", "llava:7b", "llava:13b", "llava:34b", "llava-llama3", "llava-llama3:8b", "llava-phi3", "llava-phi3:3.8b", "mistral", "mistral:7b", "mistral-large", "mistral-large:123b", "mistral-nemo", "mistral-nemo:12b", "mixtral", "mixtral:8x7b", "mixtral:8x22b", "moondream", "moondream:1.8b", "openhermes", "openhermes:v2.5", "phi3", "phi3:3.8b", "phi3:14b", "phi3.5", "phi3.5:3.8b", "qwen", "qwen:7b", "qwen:14b", "qwen:32b", "qwen:72b", "qwen:110b", "qwen2", "qwen2:0.5b", "qwen2:1.5b", "qwen2:7b", "qwen2:72b", "smollm", "smollm:135m", "smollm:360m", "smollm:1.7b"] as const ), }, openRouter: { diff --git a/extensions/void/src/webviews/sidebar/SidebarChat.tsx b/extensions/void/src/webviews/sidebar/SidebarChat.tsx index 038fedf7..95921959 100644 --- a/extensions/void/src/webviews/sidebar/SidebarChat.tsx +++ b/extensions/void/src/webviews/sidebar/SidebarChat.tsx @@ -156,7 +156,7 @@ export const SidebarChat = ({ chatInputRef }: { chatInputRef: React.RefObject void) | null>(null) + const abortFnRef = useRef<(() => void)>(() => { }) const [latestError, setLatestError] = useState('')