From cd77542a9e632c381aed9618ee093135528f30f6 Mon Sep 17 00:00:00 2001 From: mp Date: Tue, 19 Nov 2024 03:57:06 -0800 Subject: [PATCH] Autocomplete (max number of pending requests, filter for matchup with user's text, better prompt) --- extensions/void/src/common/SimpleLruCache.ts | 32 ------ extensions/void/src/common/getPrompt.ts | 17 ++- extensions/void/src/common/sendLLMMessage.ts | 5 +- .../void/src/extension/AutcompleteProvider.ts | 107 +++++++++++------- .../src/webviews/common/contextForConfig.tsx | 5 +- 5 files changed, 86 insertions(+), 80 deletions(-) delete mode 100644 extensions/void/src/common/SimpleLruCache.ts diff --git a/extensions/void/src/common/SimpleLruCache.ts b/extensions/void/src/common/SimpleLruCache.ts deleted file mode 100644 index 7118bc8f..00000000 --- a/extensions/void/src/common/SimpleLruCache.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { LRUCache } from 'lru-cache'; - -const DEFAULT_MAX_SIZE = 20 - - -export class SimpleLRUCache { - private cache: LRUCache; - private maxSize: number - public length: number - - constructor(maxSize?: number) { - - maxSize = maxSize ?? DEFAULT_MAX_SIZE - - this.cache = new LRUCache({ max: maxSize }); - this.length = 0 - this.maxSize = maxSize - } - - push(value: T): void { - const key = this.cache.size; - this.cache.set(key, value); - this.length++ - this.length = Math.min(this.length, this.maxSize) - } - - values() { - return this.cache.values() - } - - -} \ No newline at end of file diff --git a/extensions/void/src/common/getPrompt.ts b/extensions/void/src/common/getPrompt.ts index f1c7567b..6fb07f4c 100644 --- a/extensions/void/src/common/getPrompt.ts +++ b/extensions/void/src/common/getPrompt.ts @@ -22,9 +22,10 @@ export const getFIMSystem: GetFIMPrompt = ({ voidConfig, fimInfo }) => { Instruction summary: 1. Return the MIDDLE of the code between the START and END. 2. Do not give an explanation, description, or any other code besides the middle. -2. Do not return duplicate code from either START or END. -3. Make sure the MIDDLE piece of code has balanced brackets that match the START and END. -4. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line. +3. Do not return duplicate code from either START or END. +4. Make sure the MIDDLE piece of code has balanced brackets that match the START and END. +5. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line. +6. Around 90% of the time, you should return just one or a few lines of code. You should keep your outputs short unless you are confident the user is trying to write boilderplate code. # EXAMPLE @@ -75,11 +76,19 @@ export const getFIMPrompt: GetFIMPrompt = ({ voidConfig, fimInfo }) => { // if no prefix or suffix, return empty string if (!fimInfo.prefix.trim() && !fimInfo.suffix.trim()) return '' + // instruct model to generate a single line if there is text immediately after the cursor + const suffixLines = fimInfo.suffix.split('\n'); + const afterCursor = suffixLines[0] || ''; + const generateSingleLine = afterCursor.trim().length > 0; + const singleLinePrompt = generateSingleLine ? `Please produce a single line of code that fills in the middle.` : '' + // TODO may want to trim the prefix and suffix switch (voidConfig.default.whichApi) { case 'ollama': if (voidConfig.ollama.model === 'codestral') { - return `[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}` + return `${singleLinePrompt}[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}` + } else if (voidConfig.ollama.model.includes('qwen')) { + return `${singleLinePrompt}<|fim_prefix|>${fimInfo.prefix}<|fim_suffix|>${fimInfo.suffix}<|fim_middle|>` } return '' case 'anthropic': diff --git a/extensions/void/src/common/sendLLMMessage.ts b/extensions/void/src/common/sendLLMMessage.ts index a86f5c49..2ff1769f 100644 --- a/extensions/void/src/common/sendLLMMessage.ts +++ b/extensions/void/src/common/sendLLMMessage.ts @@ -247,12 +247,12 @@ export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, on let didAbort = false let fullText = "" + const ollama = new Ollama({ host: voidConfig.ollama.endpoint }) + abortRef.current = () => { didAbort = true; }; - const ollama = new Ollama({ host: voidConfig.ollama.endpoint }) - type GenerateResponse = Awaited> type ChatResponse = Awaited> @@ -271,7 +271,6 @@ export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, on } if (mode === 'fim') { - // the fim prompt is the last message let prompt = messages[messages.length - 1].content return ollama.generate({ diff --git a/extensions/void/src/extension/AutcompleteProvider.ts b/extensions/void/src/extension/AutcompleteProvider.ts index 2aad378a..587c5da9 100644 --- a/extensions/void/src/extension/AutcompleteProvider.ts +++ b/extensions/void/src/extension/AutcompleteProvider.ts @@ -2,22 +2,24 @@ import * as vscode from 'vscode'; import { AbortRef, LLMMessage, sendLLMMessage } from '../common/sendLLMMessage'; import { getVoidConfigFromPartial, VoidConfig } from '../webviews/common/contextForConfig'; import { LRUCache } from 'lru-cache'; -import { SimpleLRUCache } from '../common/SimpleLruCache'; type AutocompletionStatus = 'pending' | 'finished' | 'error'; type Autocompletion = { + id: number, prefix: string, suffix: string, startTime: number, endTime: number | undefined, abortRef: AbortRef, status: AutocompletionStatus, - promise: Promise | undefined, + llmPromise: Promise | undefined, result: string, } -const DEBOUNCE_TIME = 300 +const DEBOUNCE_TIME = 500 const TIMEOUT_TIME = 60000 +const MAX_CACHE_SIZE = 20 +const MAX_PENDING_REQUESTS = 2 // postprocesses the result const postprocessResult = (result: string) => { @@ -72,10 +74,6 @@ const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: stri const lastMatchupIndex = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length - console.log('generatedMiddle ', generatedMiddle) - console.log('trimmedOriginalPrefix ', trimmedOriginalPrefix) - console.log('trimmedCurrentPrefix ', trimmedCurrentPrefix) - console.log('index: ', lastMatchupIndex) if (lastMatchupIndex < 0) { return new vscode.InlineCompletionItem('') } @@ -90,19 +88,19 @@ const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: stri } -// returns whether we can use this autocompletion to complete the prefix +// returns whether this autocompletion is in the cache const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: string, autocompletion: Autocompletion }): boolean => { const originalPrefix = autocompletion.prefix const generatedMiddle = autocompletion.result - const trimmedOriginalPrefix = trimPrefix(originalPrefix) - const trimmedCurrentPrefix = trimPrefix(prefix) + const originalPrefixTrimmed = trimPrefix(originalPrefix) + const currentPrefixTrimmed = trimPrefix(prefix) - if (trimmedCurrentPrefix.length < trimmedOriginalPrefix.length) { + if (currentPrefixTrimmed.length < originalPrefixTrimmed.length) { return false } - const isMatch = (trimmedOriginalPrefix + generatedMiddle).startsWith(trimmedCurrentPrefix) + const isMatch = (originalPrefixTrimmed + generatedMiddle).startsWith(currentPrefixTrimmed) return isMatch } @@ -111,11 +109,14 @@ const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: str export class AutocompleteProvider implements vscode.InlineCompletionItemProvider { + private _extensionContext: vscode.ExtensionContext; - private _autocompletionsOfDocument: { [docUriStr: string]: SimpleLRUCache } = {} + private _autocompletionId: number = 0; + private _autocompletionsOfDocument: { [docUriStr: string]: LRUCache } = {} - private _lastTime = 0 + private _lastCompletionTime = 0 + private _lastPrefix: string = '' constructor(context: vscode.ExtensionContext) { this._extensionContext = context @@ -130,7 +131,7 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider token: vscode.CancellationToken, ): Promise { - const disabled = true + const disabled = false if (disabled) { return []; } const docUriStr = document.uri.toString() @@ -139,20 +140,26 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider const cursorOffset = document.offsetAt(position); const prefix = fullText.substring(0, cursorOffset) const suffix = fullText.substring(cursorOffset) - - if (!this._autocompletionsOfDocument[docUriStr]) { - this._autocompletionsOfDocument[docUriStr] = new SimpleLRUCache() - } - const voidConfig = getVoidConfigFromPartial(this._extensionContext.globalState.get('partialVoidConfig') ?? {}) + // initialize cache and other variables + // note that whenever an autocompletion is rejected, it is removed from cache + if (!this._autocompletionsOfDocument[docUriStr]) { + this._autocompletionsOfDocument[docUriStr] = new LRUCache({ + max: MAX_CACHE_SIZE, + dispose: (autocompletion) => { autocompletion.abortRef.current() } + }) + } + this._lastPrefix = prefix + console.log('cache size: ', this._autocompletionsOfDocument[docUriStr].size) + // get autocompletion from cache let cachedAutocompletion: Autocompletion | undefined = undefined - loop: for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { + for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { // if the user's change matches up with the generated text if (doesPrefixMatchAutocompletion({ prefix, autocompletion })) { cachedAutocompletion = autocompletion - break loop; + break } } @@ -169,11 +176,12 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider console.log('AAA2') try { - await cachedAutocompletion.promise; + await cachedAutocompletion.llmPromise; const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position }) return [inlineCompletion] } catch (e) { + this._autocompletionsOfDocument[docUriStr].delete(cachedAutocompletion.id) console.error('Error creating autocompletion (1): ' + e) } @@ -184,15 +192,13 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider return [] } - - // if there is no cached autocompletion, create it and add it to cache - + // else if no more typing happens, then go forwards with the request // wait DEBOUNCE_TIME for the user to stop typing const thisTime = Date.now() - this._lastTime = thisTime + this._lastCompletionTime = thisTime const didTypingHappenDuringDebounce = await new Promise((resolve, reject) => setTimeout(() => { - if (this._lastTime === thisTime) { + if (this._lastCompletionTime === thisTime) { resolve(false) } else { resolve(true) @@ -207,27 +213,50 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider console.log('BBB') - // else if no more typing happens, then go forwards with the request + // if there are too many pending requests, cancel the oldest one + let numPending = 0 + let oldestPending: Autocompletion | undefined = undefined + for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { + if (autocompletion.status === 'pending') { + numPending += 1 + if (oldestPending === undefined) { + oldestPending = autocompletion + } + if (numPending >= MAX_PENDING_REQUESTS) { + // cancel the oldest pending request and remove it from cache + this._autocompletionsOfDocument[docUriStr].delete(oldestPending.id) + break + } + } + } + + // create a new autocompletion and add it to cache const newAutocompletion: Autocompletion = { + id: this._autocompletionId++, prefix: prefix, suffix: suffix, startTime: Date.now(), endTime: undefined, abortRef: { current: () => { } }, status: 'pending', - promise: undefined, + llmPromise: undefined, result: '', } // set parameters of `newAutocompletion` appropriately - newAutocompletion.promise = new Promise((resolve, reject) => { + newAutocompletion.llmPromise = new Promise((resolve, reject) => { sendLLMMessage({ mode: 'fim', fimInfo: { prefix, suffix }, onText: async (tokenStr, completionStr) => { - // TODO filter out bad responses here + newAutocompletion.result = completionStr + + // if generation doesn't match the prefix for the first few tokens generated, reject it + if (completionStr.length < 20 && !doesPrefixMatchAutocompletion({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) { + reject('LLM response did not match user\'s text.') + } }, onFinalMessage: (finalMessage) => { @@ -252,24 +281,28 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider abortRef: newAutocompletion.abortRef, }) - setTimeout(() => { // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it + // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it + setTimeout(() => { if (newAutocompletion.status === 'pending') { - reject('Timeout') + reject('Timeout receiving message to LLM.') } }, TIMEOUT_TIME) + + }) // add autocompletion to cache - this._autocompletionsOfDocument[docUriStr].push(newAutocompletion) + this._autocompletionsOfDocument[docUriStr].set(newAutocompletion.id, newAutocompletion) // show autocompletion try { - await newAutocompletion.promise; + await newAutocompletion.llmPromise; const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, position }) return [inlineCompletion] } catch (e) { + this._autocompletionsOfDocument[docUriStr].delete(newAutocompletion.id) console.error('Error creating autocompletion (2): ' + e) return [] } @@ -277,6 +310,4 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider } - - } diff --git a/extensions/void/src/webviews/common/contextForConfig.tsx b/extensions/void/src/webviews/common/contextForConfig.tsx index 47e96e60..ad665a84 100644 --- a/extensions/void/src/webviews/common/contextForConfig.tsx +++ b/extensions/void/src/webviews/common/contextForConfig.tsx @@ -1,5 +1,6 @@ import React, { ReactNode, createContext, useCallback, useContext, useEffect, useRef, useState, } from "react" import { awaitVSCodeResponse, getVSCodeAPI, useOnVSCodeMessage } from "./getVscodeApi" +import { Ollama } from "ollama/browser" const configEnum = (description: string, defaultVal: EnumArr[number], enumArr: EnumArr) => { return { @@ -29,8 +30,6 @@ export const configFields = [ 'azure', ] as const - - const voidConfigInfo: Record< typeof configFields[number] | 'default', { [prop: string]: { @@ -122,7 +121,7 @@ const voidConfigInfo: Record< model: configEnum( 'Ollama model to use.', 'codestral', - ["codestral", "codegemma", "codegemma:2b", "codegemma:7b", "codellama", "codellama:7b", "codellama:13b", "codellama:34b", "codellama:70b", "codellama:code", "codellama:python", "command-r", "command-r:35b", "command-r-plus", "command-r-plus:104b", "deepseek-coder-v2", "deepseek-coder-v2:16b", "deepseek-coder-v2:236b", "falcon2", "falcon2:11b", "firefunction-v2", "firefunction-v2:70b", "gemma", "gemma:2b", "gemma:7b", "gemma2", "gemma2:2b", "gemma2:9b", "gemma2:27b", "llama2", "llama2:7b", "llama2:13b", "llama2:70b", "llama3", "llama3:8b", "llama3:70b", "llama3-chatqa", "llama3-chatqa:8b", "llama3-chatqa:70b", "llama3-gradient", "llama3-gradient:8b", "llama3-gradient:70b", "llama3.1", "llama3.2", "llama3.1:8b", "llama3.1:70b", "llama3.1:405b", "llava", "llava:7b", "llava:13b", "llava:34b", "llava-llama3", "llava-llama3:8b", "llava-phi3", "llava-phi3:3.8b", "mistral", "mistral:7b", "mistral-large", "mistral-large:123b", "mistral-nemo", "mistral-nemo:12b", "mixtral", "mixtral:8x7b", "mixtral:8x22b", "moondream", "moondream:1.8b", "openhermes", "openhermes:v2.5", "phi3", "phi3:3.8b", "phi3:14b", "phi3.5", "phi3.5:3.8b", "qwen", "qwen:7b", "qwen:14b", "qwen:32b", "qwen:72b", "qwen:110b", "qwen2", "qwen2:0.5b", "qwen2:1.5b", "qwen2:7b", "qwen2:72b", "smollm", "smollm:135m", "smollm:360m", "smollm:1.7b"] as const + ["codestral", "qwen2.5-coder", "qwen2.5-coder:0.5B", "qwen2.5-coder:1.5B", "qwen2.5-coder:3B", "qwen2.5-coder:7B", "qwen2.5-coder:14B", "qwen2.5-coder:32B", "codegemma", "codegemma:2b", "codegemma:7b", "codellama", "codellama:7b", "codellama:13b", "codellama:34b", "codellama:70b", "codellama:code", "codellama:python", "command-r", "command-r:35b", "command-r-plus", "command-r-plus:104b", "deepseek-coder-v2", "deepseek-coder-v2:16b", "deepseek-coder-v2:236b", "falcon2", "falcon2:11b", "firefunction-v2", "firefunction-v2:70b", "gemma", "gemma:2b", "gemma:7b", "gemma2", "gemma2:2b", "gemma2:9b", "gemma2:27b", "llama2", "llama2:7b", "llama2:13b", "llama2:70b", "llama3", "llama3:8b", "llama3:70b", "llama3-chatqa", "llama3-chatqa:8b", "llama3-chatqa:70b", "llama3-gradient", "llama3-gradient:8b", "llama3-gradient:70b", "llama3.1", "llama3.2", "llama3.1:8b", "llama3.1:70b", "llama3.1:405b", "llava", "llava:7b", "llava:13b", "llava:34b", "llava-llama3", "llava-llama3:8b", "llava-phi3", "llava-phi3:3.8b", "mistral", "mistral:7b", "mistral-large", "mistral-large:123b", "mistral-nemo", "mistral-nemo:12b", "mixtral", "mixtral:8x7b", "mixtral:8x22b", "moondream", "moondream:1.8b", "openhermes", "openhermes:v2.5", "phi3", "phi3:3.8b", "phi3:14b", "phi3.5", "phi3.5:3.8b", "qwen", "qwen:7b", "qwen:14b", "qwen:32b", "qwen:72b", "qwen:110b", "qwen2", "qwen2:0.5b", "qwen2:1.5b", "qwen2:7b", "qwen2:72b", "smollm", "smollm:135m", "smollm:360m", "smollm:1.7b"] as const ), }, openRouter: {