mirror of
https://github.com/voideditor/void
synced 2026-05-23 09:28:23 +00:00
Autocomplete (max number of pending requests, filter for matchup with user's text, better prompt)
This commit is contained in:
parent
6df0093786
commit
cd77542a9e
5 changed files with 86 additions and 80 deletions
|
|
@ -1,32 +0,0 @@
|
|||
import { LRUCache } from 'lru-cache';
|
||||
|
||||
const DEFAULT_MAX_SIZE = 20
|
||||
|
||||
|
||||
export class SimpleLRUCache<T extends {}> {
|
||||
private cache: LRUCache<number, T>;
|
||||
private maxSize: number
|
||||
public length: number
|
||||
|
||||
constructor(maxSize?: number) {
|
||||
|
||||
maxSize = maxSize ?? DEFAULT_MAX_SIZE
|
||||
|
||||
this.cache = new LRUCache<number, T>({ max: maxSize });
|
||||
this.length = 0
|
||||
this.maxSize = maxSize
|
||||
}
|
||||
|
||||
push(value: T): void {
|
||||
const key = this.cache.size;
|
||||
this.cache.set(key, value);
|
||||
this.length++
|
||||
this.length = Math.min(this.length, this.maxSize)
|
||||
}
|
||||
|
||||
values() {
|
||||
return this.cache.values()
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -22,9 +22,10 @@ export const getFIMSystem: GetFIMPrompt = ({ voidConfig, fimInfo }) => {
|
|||
Instruction summary:
|
||||
1. Return the MIDDLE of the code between the START and END.
|
||||
2. Do not give an explanation, description, or any other code besides the middle.
|
||||
2. Do not return duplicate code from either START or END.
|
||||
3. Make sure the MIDDLE piece of code has balanced brackets that match the START and END.
|
||||
4. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line.
|
||||
3. Do not return duplicate code from either START or END.
|
||||
4. Make sure the MIDDLE piece of code has balanced brackets that match the START and END.
|
||||
5. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line.
|
||||
6. Around 90% of the time, you should return just one or a few lines of code. You should keep your outputs short unless you are confident the user is trying to write boilderplate code.
|
||||
|
||||
# EXAMPLE
|
||||
|
||||
|
|
@ -75,11 +76,19 @@ export const getFIMPrompt: GetFIMPrompt = ({ voidConfig, fimInfo }) => {
|
|||
// if no prefix or suffix, return empty string
|
||||
if (!fimInfo.prefix.trim() && !fimInfo.suffix.trim()) return ''
|
||||
|
||||
// instruct model to generate a single line if there is text immediately after the cursor
|
||||
const suffixLines = fimInfo.suffix.split('\n');
|
||||
const afterCursor = suffixLines[0] || '';
|
||||
const generateSingleLine = afterCursor.trim().length > 0;
|
||||
const singleLinePrompt = generateSingleLine ? `Please produce a single line of code that fills in the middle.` : ''
|
||||
|
||||
// TODO may want to trim the prefix and suffix
|
||||
switch (voidConfig.default.whichApi) {
|
||||
case 'ollama':
|
||||
if (voidConfig.ollama.model === 'codestral') {
|
||||
return `[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}`
|
||||
return `${singleLinePrompt}[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}`
|
||||
} else if (voidConfig.ollama.model.includes('qwen')) {
|
||||
return `${singleLinePrompt}<|fim_prefix|>${fimInfo.prefix}<|fim_suffix|>${fimInfo.suffix}<|fim_middle|>`
|
||||
}
|
||||
return ''
|
||||
case 'anthropic':
|
||||
|
|
|
|||
|
|
@ -247,12 +247,12 @@ export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, on
|
|||
let didAbort = false
|
||||
let fullText = ""
|
||||
|
||||
const ollama = new Ollama({ host: voidConfig.ollama.endpoint })
|
||||
|
||||
abortRef.current = () => {
|
||||
didAbort = true;
|
||||
};
|
||||
|
||||
const ollama = new Ollama({ host: voidConfig.ollama.endpoint })
|
||||
|
||||
type GenerateResponse = Awaited<ReturnType<(typeof ollama.generate)>>
|
||||
type ChatResponse = Awaited<ReturnType<(typeof ollama.chat)>>
|
||||
|
||||
|
|
@ -271,7 +271,6 @@ export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ mode, messages, on
|
|||
}
|
||||
|
||||
if (mode === 'fim') {
|
||||
|
||||
// the fim prompt is the last message
|
||||
let prompt = messages[messages.length - 1].content
|
||||
return ollama.generate({
|
||||
|
|
|
|||
|
|
@ -2,22 +2,24 @@ import * as vscode from 'vscode';
|
|||
import { AbortRef, LLMMessage, sendLLMMessage } from '../common/sendLLMMessage';
|
||||
import { getVoidConfigFromPartial, VoidConfig } from '../webviews/common/contextForConfig';
|
||||
import { LRUCache } from 'lru-cache';
|
||||
import { SimpleLRUCache } from '../common/SimpleLruCache';
|
||||
|
||||
type AutocompletionStatus = 'pending' | 'finished' | 'error';
|
||||
type Autocompletion = {
|
||||
id: number,
|
||||
prefix: string,
|
||||
suffix: string,
|
||||
startTime: number,
|
||||
endTime: number | undefined,
|
||||
abortRef: AbortRef,
|
||||
status: AutocompletionStatus,
|
||||
promise: Promise<string> | undefined,
|
||||
llmPromise: Promise<string> | undefined,
|
||||
result: string,
|
||||
}
|
||||
|
||||
const DEBOUNCE_TIME = 300
|
||||
const DEBOUNCE_TIME = 500
|
||||
const TIMEOUT_TIME = 60000
|
||||
const MAX_CACHE_SIZE = 20
|
||||
const MAX_PENDING_REQUESTS = 2
|
||||
|
||||
// postprocesses the result
|
||||
const postprocessResult = (result: string) => {
|
||||
|
|
@ -72,10 +74,6 @@ const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: stri
|
|||
|
||||
const lastMatchupIndex = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length
|
||||
|
||||
console.log('generatedMiddle ', generatedMiddle)
|
||||
console.log('trimmedOriginalPrefix ', trimmedOriginalPrefix)
|
||||
console.log('trimmedCurrentPrefix ', trimmedCurrentPrefix)
|
||||
console.log('index: ', lastMatchupIndex)
|
||||
if (lastMatchupIndex < 0) {
|
||||
return new vscode.InlineCompletionItem('')
|
||||
}
|
||||
|
|
@ -90,19 +88,19 @@ const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: stri
|
|||
|
||||
}
|
||||
|
||||
// returns whether we can use this autocompletion to complete the prefix
|
||||
// returns whether this autocompletion is in the cache
|
||||
const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: string, autocompletion: Autocompletion }): boolean => {
|
||||
|
||||
const originalPrefix = autocompletion.prefix
|
||||
const generatedMiddle = autocompletion.result
|
||||
const trimmedOriginalPrefix = trimPrefix(originalPrefix)
|
||||
const trimmedCurrentPrefix = trimPrefix(prefix)
|
||||
const originalPrefixTrimmed = trimPrefix(originalPrefix)
|
||||
const currentPrefixTrimmed = trimPrefix(prefix)
|
||||
|
||||
if (trimmedCurrentPrefix.length < trimmedOriginalPrefix.length) {
|
||||
if (currentPrefixTrimmed.length < originalPrefixTrimmed.length) {
|
||||
return false
|
||||
}
|
||||
|
||||
const isMatch = (trimmedOriginalPrefix + generatedMiddle).startsWith(trimmedCurrentPrefix)
|
||||
const isMatch = (originalPrefixTrimmed + generatedMiddle).startsWith(currentPrefixTrimmed)
|
||||
return isMatch
|
||||
|
||||
}
|
||||
|
|
@ -111,11 +109,14 @@ const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: str
|
|||
|
||||
export class AutocompleteProvider implements vscode.InlineCompletionItemProvider {
|
||||
|
||||
|
||||
private _extensionContext: vscode.ExtensionContext;
|
||||
|
||||
private _autocompletionsOfDocument: { [docUriStr: string]: SimpleLRUCache<Autocompletion> } = {}
|
||||
private _autocompletionId: number = 0;
|
||||
private _autocompletionsOfDocument: { [docUriStr: string]: LRUCache<number, Autocompletion> } = {}
|
||||
|
||||
private _lastTime = 0
|
||||
private _lastCompletionTime = 0
|
||||
private _lastPrefix: string = ''
|
||||
|
||||
constructor(context: vscode.ExtensionContext) {
|
||||
this._extensionContext = context
|
||||
|
|
@ -130,7 +131,7 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
|
|||
token: vscode.CancellationToken,
|
||||
): Promise<vscode.InlineCompletionItem[]> {
|
||||
|
||||
const disabled = true
|
||||
const disabled = false
|
||||
if (disabled) { return []; }
|
||||
|
||||
const docUriStr = document.uri.toString()
|
||||
|
|
@ -139,20 +140,26 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
|
|||
const cursorOffset = document.offsetAt(position);
|
||||
const prefix = fullText.substring(0, cursorOffset)
|
||||
const suffix = fullText.substring(cursorOffset)
|
||||
|
||||
if (!this._autocompletionsOfDocument[docUriStr]) {
|
||||
this._autocompletionsOfDocument[docUriStr] = new SimpleLRUCache()
|
||||
}
|
||||
|
||||
const voidConfig = getVoidConfigFromPartial(this._extensionContext.globalState.get('partialVoidConfig') ?? {})
|
||||
|
||||
// initialize cache and other variables
|
||||
// note that whenever an autocompletion is rejected, it is removed from cache
|
||||
if (!this._autocompletionsOfDocument[docUriStr]) {
|
||||
this._autocompletionsOfDocument[docUriStr] = new LRUCache<number, Autocompletion>({
|
||||
max: MAX_CACHE_SIZE,
|
||||
dispose: (autocompletion) => { autocompletion.abortRef.current() }
|
||||
})
|
||||
}
|
||||
this._lastPrefix = prefix
|
||||
console.log('cache size: ', this._autocompletionsOfDocument[docUriStr].size)
|
||||
|
||||
// get autocompletion from cache
|
||||
let cachedAutocompletion: Autocompletion | undefined = undefined
|
||||
loop: for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
|
||||
for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
|
||||
// if the user's change matches up with the generated text
|
||||
if (doesPrefixMatchAutocompletion({ prefix, autocompletion })) {
|
||||
cachedAutocompletion = autocompletion
|
||||
break loop;
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -169,11 +176,12 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
|
|||
console.log('AAA2')
|
||||
|
||||
try {
|
||||
await cachedAutocompletion.promise;
|
||||
await cachedAutocompletion.llmPromise;
|
||||
const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position })
|
||||
return [inlineCompletion]
|
||||
|
||||
} catch (e) {
|
||||
this._autocompletionsOfDocument[docUriStr].delete(cachedAutocompletion.id)
|
||||
console.error('Error creating autocompletion (1): ' + e)
|
||||
}
|
||||
|
||||
|
|
@ -184,15 +192,13 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
|
|||
return []
|
||||
}
|
||||
|
||||
|
||||
// if there is no cached autocompletion, create it and add it to cache
|
||||
|
||||
// else if no more typing happens, then go forwards with the request
|
||||
// wait DEBOUNCE_TIME for the user to stop typing
|
||||
const thisTime = Date.now()
|
||||
this._lastTime = thisTime
|
||||
this._lastCompletionTime = thisTime
|
||||
const didTypingHappenDuringDebounce = await new Promise((resolve, reject) =>
|
||||
setTimeout(() => {
|
||||
if (this._lastTime === thisTime) {
|
||||
if (this._lastCompletionTime === thisTime) {
|
||||
resolve(false)
|
||||
} else {
|
||||
resolve(true)
|
||||
|
|
@ -207,27 +213,50 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
|
|||
|
||||
console.log('BBB')
|
||||
|
||||
// else if no more typing happens, then go forwards with the request
|
||||
// if there are too many pending requests, cancel the oldest one
|
||||
let numPending = 0
|
||||
let oldestPending: Autocompletion | undefined = undefined
|
||||
for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
|
||||
if (autocompletion.status === 'pending') {
|
||||
numPending += 1
|
||||
if (oldestPending === undefined) {
|
||||
oldestPending = autocompletion
|
||||
}
|
||||
if (numPending >= MAX_PENDING_REQUESTS) {
|
||||
// cancel the oldest pending request and remove it from cache
|
||||
this._autocompletionsOfDocument[docUriStr].delete(oldestPending.id)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create a new autocompletion and add it to cache
|
||||
const newAutocompletion: Autocompletion = {
|
||||
id: this._autocompletionId++,
|
||||
prefix: prefix,
|
||||
suffix: suffix,
|
||||
startTime: Date.now(),
|
||||
endTime: undefined,
|
||||
abortRef: { current: () => { } },
|
||||
status: 'pending',
|
||||
promise: undefined,
|
||||
llmPromise: undefined,
|
||||
result: '',
|
||||
}
|
||||
|
||||
// set parameters of `newAutocompletion` appropriately
|
||||
newAutocompletion.promise = new Promise((resolve, reject) => {
|
||||
newAutocompletion.llmPromise = new Promise((resolve, reject) => {
|
||||
|
||||
sendLLMMessage({
|
||||
mode: 'fim',
|
||||
fimInfo: { prefix, suffix },
|
||||
onText: async (tokenStr, completionStr) => {
|
||||
// TODO filter out bad responses here
|
||||
|
||||
newAutocompletion.result = completionStr
|
||||
|
||||
// if generation doesn't match the prefix for the first few tokens generated, reject it
|
||||
if (completionStr.length < 20 && !doesPrefixMatchAutocompletion({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) {
|
||||
reject('LLM response did not match user\'s text.')
|
||||
}
|
||||
},
|
||||
onFinalMessage: (finalMessage) => {
|
||||
|
||||
|
|
@ -252,24 +281,28 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
|
|||
abortRef: newAutocompletion.abortRef,
|
||||
})
|
||||
|
||||
setTimeout(() => { // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it
|
||||
// if the request hasnt resolved in TIMEOUT_TIME seconds, reject it
|
||||
setTimeout(() => {
|
||||
if (newAutocompletion.status === 'pending') {
|
||||
reject('Timeout')
|
||||
reject('Timeout receiving message to LLM.')
|
||||
}
|
||||
}, TIMEOUT_TIME)
|
||||
|
||||
|
||||
})
|
||||
|
||||
// add autocompletion to cache
|
||||
this._autocompletionsOfDocument[docUriStr].push(newAutocompletion)
|
||||
this._autocompletionsOfDocument[docUriStr].set(newAutocompletion.id, newAutocompletion)
|
||||
|
||||
// show autocompletion
|
||||
try {
|
||||
await newAutocompletion.promise;
|
||||
await newAutocompletion.llmPromise;
|
||||
|
||||
const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, position })
|
||||
return [inlineCompletion]
|
||||
|
||||
} catch (e) {
|
||||
this._autocompletionsOfDocument[docUriStr].delete(newAutocompletion.id)
|
||||
console.error('Error creating autocompletion (2): ' + e)
|
||||
return []
|
||||
}
|
||||
|
|
@ -277,6 +310,4 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import React, { ReactNode, createContext, useCallback, useContext, useEffect, useRef, useState, } from "react"
|
||||
import { awaitVSCodeResponse, getVSCodeAPI, useOnVSCodeMessage } from "./getVscodeApi"
|
||||
import { Ollama } from "ollama/browser"
|
||||
|
||||
const configEnum = <EnumArr extends readonly string[]>(description: string, defaultVal: EnumArr[number], enumArr: EnumArr) => {
|
||||
return {
|
||||
|
|
@ -29,8 +30,6 @@ export const configFields = [
|
|||
'azure',
|
||||
] as const
|
||||
|
||||
|
||||
|
||||
const voidConfigInfo: Record<
|
||||
typeof configFields[number] | 'default', {
|
||||
[prop: string]: {
|
||||
|
|
@ -122,7 +121,7 @@ const voidConfigInfo: Record<
|
|||
model: configEnum(
|
||||
'Ollama model to use.',
|
||||
'codestral',
|
||||
["codestral", "codegemma", "codegemma:2b", "codegemma:7b", "codellama", "codellama:7b", "codellama:13b", "codellama:34b", "codellama:70b", "codellama:code", "codellama:python", "command-r", "command-r:35b", "command-r-plus", "command-r-plus:104b", "deepseek-coder-v2", "deepseek-coder-v2:16b", "deepseek-coder-v2:236b", "falcon2", "falcon2:11b", "firefunction-v2", "firefunction-v2:70b", "gemma", "gemma:2b", "gemma:7b", "gemma2", "gemma2:2b", "gemma2:9b", "gemma2:27b", "llama2", "llama2:7b", "llama2:13b", "llama2:70b", "llama3", "llama3:8b", "llama3:70b", "llama3-chatqa", "llama3-chatqa:8b", "llama3-chatqa:70b", "llama3-gradient", "llama3-gradient:8b", "llama3-gradient:70b", "llama3.1", "llama3.2", "llama3.1:8b", "llama3.1:70b", "llama3.1:405b", "llava", "llava:7b", "llava:13b", "llava:34b", "llava-llama3", "llava-llama3:8b", "llava-phi3", "llava-phi3:3.8b", "mistral", "mistral:7b", "mistral-large", "mistral-large:123b", "mistral-nemo", "mistral-nemo:12b", "mixtral", "mixtral:8x7b", "mixtral:8x22b", "moondream", "moondream:1.8b", "openhermes", "openhermes:v2.5", "phi3", "phi3:3.8b", "phi3:14b", "phi3.5", "phi3.5:3.8b", "qwen", "qwen:7b", "qwen:14b", "qwen:32b", "qwen:72b", "qwen:110b", "qwen2", "qwen2:0.5b", "qwen2:1.5b", "qwen2:7b", "qwen2:72b", "smollm", "smollm:135m", "smollm:360m", "smollm:1.7b"] as const
|
||||
["codestral", "qwen2.5-coder", "qwen2.5-coder:0.5B", "qwen2.5-coder:1.5B", "qwen2.5-coder:3B", "qwen2.5-coder:7B", "qwen2.5-coder:14B", "qwen2.5-coder:32B", "codegemma", "codegemma:2b", "codegemma:7b", "codellama", "codellama:7b", "codellama:13b", "codellama:34b", "codellama:70b", "codellama:code", "codellama:python", "command-r", "command-r:35b", "command-r-plus", "command-r-plus:104b", "deepseek-coder-v2", "deepseek-coder-v2:16b", "deepseek-coder-v2:236b", "falcon2", "falcon2:11b", "firefunction-v2", "firefunction-v2:70b", "gemma", "gemma:2b", "gemma:7b", "gemma2", "gemma2:2b", "gemma2:9b", "gemma2:27b", "llama2", "llama2:7b", "llama2:13b", "llama2:70b", "llama3", "llama3:8b", "llama3:70b", "llama3-chatqa", "llama3-chatqa:8b", "llama3-chatqa:70b", "llama3-gradient", "llama3-gradient:8b", "llama3-gradient:70b", "llama3.1", "llama3.2", "llama3.1:8b", "llama3.1:70b", "llama3.1:405b", "llava", "llava:7b", "llava:13b", "llava:34b", "llava-llama3", "llava-llama3:8b", "llava-phi3", "llava-phi3:3.8b", "mistral", "mistral:7b", "mistral-large", "mistral-large:123b", "mistral-nemo", "mistral-nemo:12b", "mixtral", "mixtral:8x7b", "mixtral:8x22b", "moondream", "moondream:1.8b", "openhermes", "openhermes:v2.5", "phi3", "phi3:3.8b", "phi3:14b", "phi3.5", "phi3.5:3.8b", "qwen", "qwen:7b", "qwen:14b", "qwen:32b", "qwen:72b", "qwen:110b", "qwen2", "qwen2:0.5b", "qwen2:1.5b", "qwen2:7b", "qwen2:72b", "smollm", "smollm:135m", "smollm:360m", "smollm:1.7b"] as const
|
||||
),
|
||||
},
|
||||
openRouter: {
|
||||
|
|
|
|||
Loading…
Reference in a new issue