Merge branch 'autocomplete' into actual-editor-insets

This commit is contained in:
Andrew Pareles 2024-11-21 18:12:28 -08:00
commit a7cbed395b
5 changed files with 745 additions and 119 deletions

View file

@ -1,32 +0,0 @@
import { LRUCache } from 'lru-cache';
const DEFAULT_MAX_SIZE = 20
export class SimpleLRUCache<T extends {}> {
private cache: LRUCache<number, T>;
private maxSize: number
public length: number
constructor(maxSize?: number) {
maxSize = maxSize ?? DEFAULT_MAX_SIZE
this.cache = new LRUCache<number, T>({ max: maxSize });
this.length = 0
this.maxSize = maxSize
}
push(value: T): void {
const key = this.cache.size;
this.cache.set(key, value);
this.length++
this.length = Math.min(this.length, this.maxSize)
}
values() {
return this.cache.values()
}
}

View file

@ -2,35 +2,84 @@ import * as vscode from 'vscode';
import { AbortRef, LLMMessage, sendLLMMessage } from '../common/sendLLMMessage';
import { getVoidConfigFromPartial, VoidConfig } from '../webviews/common/contextForConfig';
import { LRUCache } from 'lru-cache';
import { SimpleLRUCache } from '../common/SimpleLruCache';
/*
A summary of autotab:
Postprocessing
-one common problem for all models is outputting unbalanced parentheses
we solve this by trimming all extra closing parentheses from the generated string
in future, should make sure parentheses are always balanced
-another problem is completing the middle of a string, eg. "const [x, CURSOR] = useState()"
we complete up to first matchup character
but should instead complete the whole line / block (difficult because of parenthesis accuracy)
-too much info is bad. usually we want to show the user 1 line, and have a preloaded response afterwards
this should happen automatically with caching system
should break preloaded responses into \n\n chunks
Preprocessing
- we don't generate if cursor is at end / beginning of a line (no spaces)
- we generate 1 line if there is text to the right of cursor
- we generate 1 line if variable declaration
- (in many cases want to show 1 line but generate multiple)
State
- cache based on prefix (and do some trimming first)
- when press tab on one line, should have an immediate followup response
to do this, show autocompletes before they're fully finished
- [todo] remove each autotab when accepted
- [todo] treat windows \r\n separately from \n
!- [todo] provide type information
Details
-generated results are trimmed up to 1 leading/trailing space
-prefixes are cached up to 1 trailing newline
-
*/
type AutocompletionStatus = 'pending' | 'finished' | 'error';
type Autocompletion = {
id: number,
prefix: string,
suffix: string,
startTime: number,
endTime: number | undefined,
abortRef: AbortRef,
status: AutocompletionStatus,
promise: Promise<string> | undefined,
llmPromise: Promise<string> | undefined,
result: string,
}
const DEBOUNCE_TIME = 300
const DEBOUNCE_TIME = 500
const TIMEOUT_TIME = 60000
const MAX_CACHE_SIZE = 20
const MAX_PENDING_REQUESTS = 2
// postprocesses the result
const postprocessResult = (result: string) => {
// remove leading whitespace from result
return result.trimStart()
console.log('result: ', JSON.stringify(result))
// trim all whitespace except for a single leading/trailing space
const hasLeadingSpace = result.startsWith(' ');
const hasTrailingSpace = result.endsWith(' ');
return (hasLeadingSpace ? ' ' : '')
+ result.trim()
+ (hasTrailingSpace ? ' ' : '');
}
const extractCodeFromResult = (result: string) => {
// extract the code between triple backticks
const parts = result.split(/```/);
const parts = result.split(/```(?:\s*\w+)?\n?/);
// if there is no ``` then return the raw result
if (parts.length === 1) {
@ -56,6 +105,28 @@ const trimPrefix = (prefix: string) => {
return trimmedPrefix
}
function getStringUpToUnbalancedParenthesis(s: string, prefixToTheLeft: string): string {
const pairs: Record<string, string> = { ')': '(', '}': '{', ']': '[' };
// todo find first open bracket in prefix and get all brackets beyond it in prefix
// get all bracets in prefix
let stack: string[] = []
const firstOpenIdx = prefixToTheLeft.search(/[[({]/);
if (firstOpenIdx !== -1) stack = prefixToTheLeft.slice(firstOpenIdx).split('').filter(c => '()[]{}'.includes(c))
// Iterate through each character
for (let i = 0; i < s.length; i++) {
const char = s[i];
if (char === '(' || char === '{' || char === '[') { stack.push(char); }
else if (char === ')' || char === '}' || char === ']') {
if (stack.length === 0 || stack.pop() !== pairs[char]) { return s.substring(0, i); }
}
}
return s;
}
// finds the text in the autocompletion to display, assuming the prefix is already matched
// example:
// originalPrefix = abcd
@ -63,66 +134,136 @@ const trimPrefix = (prefix: string) => {
// originalSuffix = ijkl
// the user has typed "ef" so prefix = abcdef
// we want to return the rest of the generatedMiddle, which is "gh"
const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: string, autocompletion: Autocompletion, position: vscode.Position }): vscode.InlineCompletionItem => {
const toInlineCompletion = ({ prefix, suffix, autocompletion, position }: { prefix: string, suffix: string, autocompletion: Autocompletion, position: vscode.Position }): vscode.InlineCompletionItem => {
const originalPrefix = autocompletion.prefix
const generatedMiddle = autocompletion.result
const trimmedOriginalPrefix = trimPrefix(originalPrefix)
const trimmedCurrentPrefix = trimPrefix(prefix)
const lastMatchupIndex = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length
const suffixLines = suffix.split('\n')
const prefixLines = trimmedCurrentPrefix.split('\n')
const suffixToTheRightOfCursor = suffixLines[0].trim()
const prefixToTheLeftOfCursor = prefixLines[prefixLines.length - 1].trim()
console.log('generatedMiddle ', generatedMiddle)
console.log('trimmedOriginalPrefix ', trimmedOriginalPrefix)
console.log('trimmedCurrentPrefix ', trimmedCurrentPrefix)
console.log('index: ', lastMatchupIndex)
if (lastMatchupIndex < 0) {
const generatedLines = generatedMiddle.split('\n')
// compute startIdx
let startIdx = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length
if (startIdx < 0) {
return new vscode.InlineCompletionItem('')
}
const completionStr = generatedMiddle.substring(lastMatchupIndex)
console.log('completionStr: ', completionStr)
// compute endIdx
// hacks to get the suffix to render properly with lower quality models
// if the generated text matches with the suffix on the current line, stop
let endIdx: number | undefined = generatedMiddle.length // exclusive bounds
return new vscode.InlineCompletionItem(
completionStr,
new vscode.Range(position, position)
)
if (suffixToTheRightOfCursor !== '') { // completing in the middle of a line
console.log('1')
// complete until there is a match
const matchIndex = generatedMiddle.lastIndexOf(suffixToTheRightOfCursor[0])
if (matchIndex > 0) { endIdx = matchIndex }
}
if (prefixToTheLeftOfCursor !== '') { // completing the end of a line
console.log('2')
// show a single line
const newlineIdx = generatedMiddle.indexOf('\n')
if (newlineIdx > -1) { endIdx = newlineIdx }
}
// // if a generated line matches with a suffix line, stop
// if (suffixLines.length > 1) {
// console.log('3')
// const lines = []
// for (const generatedLine of generatedLines) {
// if (suffixLines.slice(0, 10).some(suffixLine =>
// generatedLine.trim() !== '' && suffixLine.trim() !== ''
// && generatedLine.trim().startsWith(suffixLine.trim())
// )) break;
// lines.push(generatedLine)
// }
// endIdx = lines.join('\n').length // this is hacky, remove or refactor in future
// }
let completionStr = generatedMiddle.slice(startIdx, endIdx)
// filter out unbalanced parentheses
console.log('completionStrBeforeParens: ', JSON.stringify(completionStr))
completionStr = getStringUpToUnbalancedParenthesis(completionStr, prefixLines.slice(-2).join('\n'))
console.log('originalCompletionStr: ', JSON.stringify(generatedMiddle.slice(startIdx)))
console.log('finalCompletionStr: ', JSON.stringify(completionStr))
return new vscode.InlineCompletionItem(completionStr, new vscode.Range(position, position))
}
// returns whether we can use this autocompletion to complete the prefix
// returns whether this autocompletion is in the cache
const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: string, autocompletion: Autocompletion }): boolean => {
const originalPrefix = autocompletion.prefix
const generatedMiddle = autocompletion.result
const trimmedOriginalPrefix = trimPrefix(originalPrefix)
const trimmedCurrentPrefix = trimPrefix(prefix)
const originalPrefixTrimmed = trimPrefix(originalPrefix)
const currentPrefixTrimmed = trimPrefix(prefix)
if (trimmedCurrentPrefix.length < trimmedOriginalPrefix.length) {
if (currentPrefixTrimmed.length < originalPrefixTrimmed.length) {
return false
}
const isMatch = (trimmedOriginalPrefix + generatedMiddle).startsWith(trimmedCurrentPrefix)
const isMatch = (originalPrefixTrimmed + generatedMiddle).startsWith(currentPrefixTrimmed)
return isMatch
}
const getCompletionOptions = ({ prefix, suffix }: { prefix: string, suffix: string }) => {
const prefixLines = prefix.split('\n')
const suffixLines = suffix.split('\n')
const prefixToLeftOfCursor = prefixLines.slice(-1)[0] ?? ''
const suffixToRightOfCursor = suffixLines[0]
// default parameters
let shouldGenerate = true
let stopTokens: string[] = ['\n\n', '\r\n\r\n']
// specific cases
if (suffixToRightOfCursor.trim() !== '') { // typing between something
stopTokens = ['\n', '\r\n']
}
// if (prefixToLeftOfCursor.trim() === '' && suffixToRightOfCursor.trim() === '') { // at an empty line
// stopTokens = ['\n\n', '\r\n\r\n']
// }
if (prefixToLeftOfCursor === '' || suffixToRightOfCursor === '') { // at beginning or end of line
shouldGenerate = false
}
console.log('shouldGenerate:', shouldGenerate, stopTokens)
return { shouldGenerate, stopTokens }
}
export class AutocompleteProvider implements vscode.InlineCompletionItemProvider {
private _extensionContext: vscode.ExtensionContext;
private _autocompletionsOfDocument: { [docUriStr: string]: SimpleLRUCache<Autocompletion> } = {}
private _autocompletionId: number = 0;
private _autocompletionsOfDocument: { [docUriStr: string]: LRUCache<number, Autocompletion> } = {}
private _lastTime = 0
private _lastCompletionTime = 0
private _lastPrefix: string = ''
constructor(context: vscode.ExtensionContext) {
this._extensionContext = context
}
// used internally by vscode
// fires after every keystroke
// fires after every keystroke and returns the completion to show
async provideInlineCompletionItems(
document: vscode.TextDocument,
position: vscode.Position,
@ -130,29 +271,42 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
token: vscode.CancellationToken,
): Promise<vscode.InlineCompletionItem[]> {
const disabled = true
const disabled = false
if (disabled) { return []; }
const docUriStr = document.uri.toString()
const fullText = document.getText();
const cursorOffset = document.offsetAt(position);
const prefix = fullText.substring(0, cursorOffset)
const suffix = fullText.substring(cursorOffset)
if (!this._autocompletionsOfDocument[docUriStr]) {
this._autocompletionsOfDocument[docUriStr] = new SimpleLRUCache()
}
const voidConfig = getVoidConfigFromPartial(this._extensionContext.globalState.get('partialVoidConfig') ?? {})
// initialize cache and other variables
// note that whenever an autocompletion is rejected, it is removed from cache
if (!this._autocompletionsOfDocument[docUriStr]) {
this._autocompletionsOfDocument[docUriStr] = new LRUCache<number, Autocompletion>({
max: MAX_CACHE_SIZE,
dispose: (autocompletion) => {
autocompletion.abortRef.current()
}
})
}
this._lastPrefix = prefix
// get all pending autocompletions
let __c = 0
this._autocompletionsOfDocument[docUriStr].forEach(a => { if (a.status === 'pending') __c += 1 })
console.log('pending: ' + __c)
// get autocompletion from cache
let cachedAutocompletion: Autocompletion | undefined = undefined
loop: for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
// if the user's change matches up with the generated text
if (doesPrefixMatchAutocompletion({ prefix, autocompletion })) {
cachedAutocompletion = autocompletion
break loop;
break
}
}
@ -160,39 +314,39 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
if (cachedAutocompletion) {
if (cachedAutocompletion.status === 'finished') {
console.log('AAA1')
console.log('A1')
const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position })
const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, suffix, position })
return [inlineCompletion]
} else if (cachedAutocompletion.status === 'pending') {
console.log('AAA2')
console.log('A2')
try {
await cachedAutocompletion.promise;
const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position })
await cachedAutocompletion.llmPromise;
console.log('id: ' + cachedAutocompletion.id)
const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, suffix, position })
return [inlineCompletion]
} catch (e) {
this._autocompletionsOfDocument[docUriStr].delete(cachedAutocompletion.id)
console.error('Error creating autocompletion (1): ' + e)
}
} else if (cachedAutocompletion.status === 'error') {
console.log('AAA3')
console.log('A3')
}
return []
}
// if there is no cached autocompletion, create it and add it to cache
// else if no more typing happens, then go forwards with the request
// wait DEBOUNCE_TIME for the user to stop typing
const thisTime = Date.now()
this._lastTime = thisTime
this._lastCompletionTime = thisTime
const didTypingHappenDuringDebounce = await new Promise((resolve, reject) =>
setTimeout(() => {
if (this._lastTime === thisTime) {
if (this._lastCompletionTime === thisTime) {
resolve(false)
} else {
resolve(true)
@ -205,29 +359,57 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
return []
}
console.log('BBB')
console.log('B')
// else if no more typing happens, then go forwards with the request
// if there are too many pending requests, cancel the oldest one
let numPending = 0
let oldestPending: Autocompletion | undefined = undefined
for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
if (autocompletion.status === 'pending') {
numPending += 1
if (oldestPending === undefined) {
oldestPending = autocompletion
}
if (numPending >= MAX_PENDING_REQUESTS) {
// cancel the oldest pending request and remove it from cache
this._autocompletionsOfDocument[docUriStr].delete(oldestPending.id)
break
}
}
}
const { shouldGenerate, stopTokens } = getCompletionOptions({ prefix, suffix })
if (!shouldGenerate) return []
// create a new autocompletion and add it to cache
const newAutocompletion: Autocompletion = {
id: this._autocompletionId++,
prefix: prefix,
suffix: suffix,
startTime: Date.now(),
endTime: undefined,
abortRef: { current: () => { } },
status: 'pending',
promise: undefined,
llmPromise: undefined,
result: '',
}
// set parameters of `newAutocompletion` appropriately
newAutocompletion.promise = new Promise((resolve, reject) => {
newAutocompletion.llmPromise = new Promise((resolve, reject) => {
sendLLMMessage({
mode: 'fim',
fimInfo: { prefix, suffix },
options: { stopTokens },
onText: async (tokenStr, completionStr) => {
// TODO filter out bad responses here
newAutocompletion.result = completionStr
// if generation doesn't match the prefix for the first few tokens generated, reject it
if (!doesPrefixMatchAutocompletion({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) {
reject('LLM response did not match user\'s text.')
}
},
onFinalMessage: (finalMessage) => {
@ -252,24 +434,29 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
abortRef: newAutocompletion.abortRef,
})
setTimeout(() => { // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it
// if the request hasnt resolved in TIMEOUT_TIME seconds, reject it
setTimeout(() => {
if (newAutocompletion.status === 'pending') {
reject('Timeout')
reject('Timeout receiving message to LLM.')
}
}, TIMEOUT_TIME)
})
// add autocompletion to cache
this._autocompletionsOfDocument[docUriStr].push(newAutocompletion)
this._autocompletionsOfDocument[docUriStr].set(newAutocompletion.id, newAutocompletion)
// show autocompletion
try {
await newAutocompletion.promise;
await newAutocompletion.llmPromise
console.log('id: ' + newAutocompletion.id)
const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, position })
const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, suffix, position })
return [inlineCompletion]
} catch (e) {
this._autocompletionsOfDocument[docUriStr].delete(newAutocompletion.id)
console.error('Error creating autocompletion (2): ' + e)
return []
}
@ -277,6 +464,4 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
}
}

View file

@ -417,7 +417,30 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({
// // 6. Autocomplete
// const autocompleteProvider = new AutocompleteProvider(context);
// context.subscriptions.push(vscode.languages.registerInlineCompletionItemProvider('*', autocompleteProvider));
// const voidConfig = getVoidConfigFromPartial(context.globalState.get('partialVoidConfig') ?? {})
// // setupAutocomplete({ voidConfig, abortRef })
// // 7. Language Server
// console.log('run lsp')
// let disposable = vscode.commands.registerCommand('typeInspector.inspect', runTreeSitter);
// context.subscriptions.push(disposable);
// import { configFields, VoidConfig } from "../webviews/common/contextForConfig"
// import { FimInfo } from "./sendLLMMessage"
// type GetFIMPrompt = ({ voidConfig, fimInfo }: { voidConfig: VoidConfig, fimInfo: FimInfo, }) => string
@ -440,9 +463,10 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({
// Instruction summary:
// 1. Return the MIDDLE of the code between the START and END.
// 2. Do not give an explanation, description, or any other code besides the middle.
// 2. Do not return duplicate code from either START or END.
// 3. Make sure the MIDDLE piece of code has balanced brackets that match the START and END.
// 4. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line.
// 3. Do not return duplicate code from either START or END.
// 4. Make sure the MIDDLE piece of code has balanced brackets that match the START and END.
// 5. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line.
// 6. Around 90% of the time, you should return just one or a few lines of code. You should keep your outputs short unless you are confident the user is trying to write boilderplate code.
// # EXAMPLE
@ -490,14 +514,23 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({
// export const getFIMPrompt: GetFIMPrompt = ({ voidConfig, fimInfo }) => {
// // if no prefix or suffix, return empty string
// if (!fimInfo.prefix.trim() && !fimInfo.suffix.trim()) return ''
// const { prefix: fullPrefix, suffix: fullSuffix } = fimInfo
// const prefix = fullPrefix.split('\n').slice(-20).join('\n')
// const suffix = fullSuffix.split('\n').slice(0, 20).join('\n')
// console.log('prefix', JSON.stringify(prefix))
// console.log('suffix', JSON.stringify(suffix))
// if (!prefix.trim() && !suffix.trim()) return ''
// // TODO may want to trim the prefix and suffix
// switch (voidConfig.default.whichApi) {
// case 'ollama':
// if (voidConfig.ollama.model === 'codestral') {
// return `[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}`
// return `[SUFFIX]${suffix}[PREFIX] ${prefix}`
// } else if (voidConfig.ollama.model.includes('qwen')) {
// return `<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>`
// }
// return ''
// case 'anthropic':
@ -510,14 +543,473 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({
// default:
// return `## START:
// \`\`\`
// ${fimInfo.prefix}
// ${prefix}
// \`\`\`
// ## END:
// \`\`\`
// ${fimInfo.suffix}
// ${suffix}
// \`\`\`
// `
// }
// }
// Mathew - sendLLMMessage
// import Anthropic from '@anthropic-ai/sdk';
// import OpenAI from 'openai';
// import { Ollama } from 'ollama/browser'
// import { Content, GoogleGenerativeAI, GoogleGenerativeAIError, GoogleGenerativeAIFetchError } from '@google/generative-ai';
// import { VoidConfig } from '../webviews/common/contextForConfig'
// import { getFIMPrompt, getFIMSystem } from './getPrompt';
// export type AbortRef = { current: (() => void) }
// export type OnText = (newText: string, fullText: string) => void
// export type OnFinalMessage = (input: string) => void
// export type LLMMessageAnthropic = {
// role: 'user' | 'assistant',
// content: string,
// }
// export type LLMMessage = {
// role: 'system' | 'user' | 'assistant',
// content: string,
// }
// type LLMMessageOptions = { stopTokens?: string[] }
// type SendLLMMessageFnTypeInternal = (params: {
// mode: 'chat' | 'fim',
// messages: LLMMessage[],
// options?: LLMMessageOptions,
// onText: OnText,
// onFinalMessage: OnFinalMessage,
// onError: (error: string) => void,
// abortRef: AbortRef,
// voidConfig: VoidConfig,
// }) => void
// type SendLLMMessageFnTypeExternal = (params: (
// | { mode?: 'chat', messages: LLMMessage[], fimInfo?: undefined, }
// | { mode: 'fim', messages?: undefined, fimInfo: FimInfo, }
// ) & {
// options?: LLMMessageOptions,
// onText: OnText,
// onFinalMessage: OnFinalMessage,
// onError: (error: string) => void,
// abortRef: AbortRef,
// voidConfig: VoidConfig | null, // these may be absent
// }) => void
// export type FimInfo = {
// prefix: string,
// suffix: string,
// }
// const parseMaxTokensStr = (maxTokensStr: string) => {
// // parse the string but only if the full string is a valid number, eg parseInt('100abc') should return NaN
// let int = isNaN(Number(maxTokensStr)) ? undefined : parseInt(maxTokensStr)
// if (Number.isNaN(int))
// return undefined
// return int
// }
// // Anthropic
// const sendAnthropicMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig }) => {
// const anthropic = new Anthropic({ apiKey: voidConfig.anthropic.apikey, dangerouslyAllowBrowser: true }); // defaults to process.env["ANTHROPIC_API_KEY"]
// // find system messages and concatenate them
// const systemMessage = messages
// .filter(msg => msg.role === 'system')
// .map(msg => msg.content)
// .join('\n');
// // remove system messages for Anthropic
// const anthropicMessages = messages.filter(msg => msg.role !== 'system') as LLMMessageAnthropic[]
// const stream = anthropic.messages.stream({
// system: systemMessage,
// messages: anthropicMessages,
// model: voidConfig.anthropic.model,
// max_tokens: parseMaxTokensStr(voidConfig.default.maxTokens)!, // this might be undefined, but it will just throw an error for the user
// });
// let did_abort = false
// // when receive text
// stream.on('text', (newText, fullText) => {
// if (did_abort) return
// onText(newText, fullText)
// })
// // when we get the final message on this stream (or when error/fail)
// stream.on('finalMessage', (claude_response) => {
// if (did_abort) return
// // stringify the response's content
// let content = claude_response.content.map(c => { if (c.type === 'text') { return c.text } }).join('\n');
// onFinalMessage(content)
// })
// stream.on('error', (error) => {
// // the most common error will be invalid API key (401), so we handle this with a nice message
// if (error instanceof Anthropic.APIError && error.status === 401) {
// onError('Invalid API key.')
// }
// else {
// onError(error.message)
// }
// })
// // if abort is called, onFinalMessage is NOT called, and no later onTexts are called either
// const abort = () => {
// did_abort = true
// stream.controller.abort() // TODO need to test this to make sure it works, it might throw an error
// }
// return { abort }
// };
// // Gemini
// const sendGeminiMsg: SendLLMMessageFnTypeInternal = async ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
// let didAbort = false
// let fullText = ''
// abortRef.current = () => {
// didAbort = true
// }
// const genAI = new GoogleGenerativeAI(voidConfig.gemini.apikey);
// const model = genAI.getGenerativeModel({ model: voidConfig.gemini.model });
// // remove system messages that get sent to Gemini
// // str of all system messages
// let systemMessage = messages
// .filter(msg => msg.role === 'system')
// .map(msg => msg.content)
// .join('\n');
// // Convert messages to Gemini format
// const geminiMessages: Content[] = messages
// .filter(msg => msg.role !== 'system')
// .map((msg, i) => ({
// parts: [{ text: msg.content }],
// role: msg.role === 'assistant' ? 'model' : 'user'
// }))
// model.generateContentStream({ contents: geminiMessages, systemInstruction: systemMessage, })
// .then(async response => {
// abortRef.current = () => {
// // response.stream.return(fullText)
// didAbort = true;
// }
// for await (const chunk of response.stream) {
// if (didAbort) return;
// const newText = chunk.text();
// fullText += newText;
// onText(newText, fullText);
// }
// onFinalMessage(fullText);
// })
// .catch((error) => {
// if (error instanceof GoogleGenerativeAIFetchError) {
// if (error.status === 400) {
// onError('Invalid API key.');
// }
// else {
// onError(`${error.name}:\n${error.message}`);
// }
// }
// else {
// onError(error);
// }
// })
// }
// // OpenAI, OpenRouter, OpenAICompatible
// const sendOpenAIMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
// let didAbort = false
// let fullText = ''
// // if abort is called, onFinalMessage is NOT called, and no later onTexts are called either
// abortRef.current = () => {
// didAbort = true;
// };
// let openai: OpenAI
// let options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming
// let maxTokens = parseMaxTokensStr(voidConfig.default.maxTokens)
// if (voidConfig.default.whichApi === 'openAI') {
// openai = new OpenAI({ apiKey: voidConfig.openAI.apikey, dangerouslyAllowBrowser: true });
// options = { model: voidConfig.openAI.model, messages: messages, stream: true, max_completion_tokens: maxTokens }
// }
// else if (voidConfig.default.whichApi === 'openRouter') {
// openai = new OpenAI({
// baseURL: "https://openrouter.ai/api/v1", apiKey: voidConfig.openRouter.apikey, dangerouslyAllowBrowser: true,
// defaultHeaders: {
// "HTTP-Referer": 'https://voideditor.com', // Optional, for including your app on openrouter.ai rankings.
// "X-Title": 'Void Editor', // Optional. Shows in rankings on openrouter.ai.
// },
// });
// options = { model: voidConfig.openRouter.model, messages: messages, stream: true, max_completion_tokens: maxTokens }
// }
// else if (voidConfig.default.whichApi === 'openAICompatible') {
// openai = new OpenAI({ baseURL: voidConfig.openAICompatible.endpoint, apiKey: voidConfig.openAICompatible.apikey, dangerouslyAllowBrowser: true })
// options = { model: voidConfig.openAICompatible.model, messages: messages, stream: true, max_completion_tokens: maxTokens }
// }
// else {
// console.error(`sendOpenAIMsg: invalid whichApi: ${voidConfig.default.whichApi}`)
// throw new Error(`voidConfig.whichAPI was invalid: ${voidConfig.default.whichApi}`)
// }
// openai.chat.completions
// .create(options)
// .then(async response => {
// abortRef.current = () => {
// // response.controller.abort()
// didAbort = true;
// }
// // when receive text
// for await (const chunk of response) {
// if (didAbort) return;
// const newText = chunk.choices[0]?.delta?.content || '';
// fullText += newText;
// onText(newText, fullText);
// }
// onFinalMessage(fullText);
// })
// // when error/fail - this catches errors of both .create() and .then(for await)
// .catch(error => {
// if (error instanceof OpenAI.APIError) {
// if (error.status === 401) {
// onError('Invalid API key.');
// }
// else {
// onError(`${error.name}:\n${error.message}`);
// }
// }
// else {
// onError(error);
// }
// })
// };
// // Ollama
// export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
// let didAbort = false
// let fullText = ""
// const ollama = new Ollama({ host: voidConfig.ollama.endpoint })
// abortRef.current = () => {
// didAbort = true;
// };
// type GenerateResponse = Awaited<ReturnType<(typeof ollama.generate)>>
// type ChatResponse = Awaited<ReturnType<(typeof ollama.chat)>>
// // First check if model exists
// ollama.list()
// .then(async models => {
// const installedModels = models.models.map(m => m.name.replace(/:latest$/, ''))
// const modelExists = installedModels.some(m => m.startsWith(voidConfig.ollama.model));
// if (!modelExists) {
// const errorMessage = `The model "${voidConfig.ollama.model}" is not available locally. Please run 'ollama pull ${voidConfig.ollama.model}' to download it first or
// try selecting one from the Installed models: ${installedModels.join(', ')}`;
// onText(errorMessage, errorMessage);
// onFinalMessage(errorMessage);
// return Promise.reject();
// }
// if (mode === 'fim') {
// // the fim prompt is the last message
// let prompt = messages[messages.length - 1].content
// return ollama.generate({
// model: voidConfig.ollama.model,
// prompt: prompt,
// stream: true,
// raw: true,
// options: { stop: options?.stopTokens }
// })
// }
// return ollama.chat({
// model: voidConfig.ollama.model,
// messages: messages,
// stream: true,
// options: { num_predict: parseMaxTokensStr(voidConfig.default.maxTokens) }
// });
// })
// .then(async stream => {
// if (!stream) return;
// abortRef.current = () => {
// didAbort = true
// stream.abort()
// }
// for await (const chunk of stream) {
// if (didAbort) return;
// const newText = (mode === 'fim'
// ? (chunk as GenerateResponse).response
// : (chunk as ChatResponse).message.content
// )
// fullText += newText;
// onText(newText, fullText);
// }
// onFinalMessage(fullText);
// })
// .catch(error => {
// // Check if the error is a connection error
// if (error instanceof Error && error.message.includes('Failed to fetch')) {
// const errorMessage = 'Ollama service is not running. Please start the Ollama service and try again.';
// onText(errorMessage, errorMessage);
// onFinalMessage(errorMessage);
// } else if (error) {
// onError(error);
// }
// });
// };
// // Greptile
// // https://docs.greptile.com/api-reference/query
// // https://docs.greptile.com/quickstart#sample-response-streamed
// const sendGreptileMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
// let didAbort = false
// let fullText = ''
// // if abort is called, onFinalMessage is NOT called, and no later onTexts are called either
// abortRef.current = () => {
// didAbort = true
// }
// fetch('https://api.greptile.com/v2/query', {
// method: 'POST',
// headers: {
// "Authorization": `Bearer ${voidConfig.greptile.apikey}`,
// "X-Github-Token": `${voidConfig.greptile.githubPAT}`,
// "Content-Type": `application/json`,
// },
// body: JSON.stringify({
// messages,
// stream: true,
// repositories: [voidConfig.greptile.repoinfo],
// }),
// })
// // this is {message}\n{message}\n{message}...\n
// .then(async response => {
// const text = await response.text()
// console.log('got greptile', text)
// return JSON.parse(`[${text.trim().split('\n').join(',')}]`)
// })
// // TODO make this actually stream, right now it just sends one message at the end
// .then(async responseArr => {
// if (didAbort)
// return
// for (let response of responseArr) {
// const type: string = response['type']
// const message = response['message']
// // when receive text
// if (type === 'message') {
// fullText += message
// onText(message, fullText)
// }
// else if (type === 'sources') {
// const { filepath, linestart, lineend } = message as { filepath: string, linestart: number | null, lineend: number | null }
// fullText += filepath
// onText(filepath, fullText)
// }
// // type: 'status' with an empty 'message' means last message
// else if (type === 'status') {
// if (!message) {
// onFinalMessage(fullText)
// }
// }
// }
// })
// .catch(e => {
// onError(e)
// });
// }
// export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ options, mode, messages, fimInfo, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
// if (!voidConfig)
// return onError('No config file found for LLM.');
// // handle defaults
// if (!mode) mode = 'chat'
// if (!messages) messages = []
// // build messages
// if (mode === 'chat') {
// // nothing needed
// } else if (mode === 'fim') {
// fimInfo = fimInfo!
// const system = getFIMSystem({ voidConfig, fimInfo })
// const prompt = getFIMPrompt({ voidConfig, fimInfo })
// messages = ([
// { role: 'system', content: system },
// { role: 'user', content: prompt }
// ] as const)
// }
// // trim message content (Anthropic and other providers give an error if there is trailing whitespace)
// messages = messages.map(m => ({ ...m, content: m.content.trim() }))
// .filter(m => m.content !== '')
// if (messages.length === 0)
// return onError('No messages provided to LLM.');
// switch (voidConfig.default.whichApi) {
// case 'anthropic':
// return sendAnthropicMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
// case 'openAI':
// case 'openRouter':
// case 'openAICompatible':
// return sendOpenAIMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
// case 'gemini':
// return sendGeminiMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
// case 'ollama':
// return sendOllamaMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
// case 'greptile':
// return sendGreptileMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
// default:
// onError(`Error: whichApi was ${voidConfig.default.whichApi}, which is not recognized!`)
// }
// }

View file

@ -130,7 +130,7 @@ const voidConfigInfo: Record<
model: configEnum(
'Ollama model to use.',
'codestral',
['codestral', 'codegemma', 'codegemma:2b', 'codegemma:7b', 'codellama', 'codellama:7b', 'codellama:13b', 'codellama:34b', 'codellama:70b', 'codellama:code', 'codellama:python', 'command-r', 'command-r:35b', 'command-r-plus', 'command-r-plus:104b', 'deepseek-coder-v2', 'deepseek-coder-v2:16b', 'deepseek-coder-v2:236b', 'falcon2', 'falcon2:11b', 'firefunction-v2', 'firefunction-v2:70b', 'gemma', 'gemma:2b', 'gemma:7b', 'gemma2', 'gemma2:2b', 'gemma2:9b', 'gemma2:27b', 'llama2', 'llama2:7b', 'llama2:13b', 'llama2:70b', 'llama3', 'llama3:8b', 'llama3:70b', 'llama3-chatqa', 'llama3-chatqa:8b', 'llama3-chatqa:70b', 'llama3-gradient', 'llama3-gradient:8b', 'llama3-gradient:70b', 'llama3.1', 'llama3.1:8b', 'llama3.1:70b', 'llama3.1:405b', 'llava', 'llava:7b', 'llava:13b', 'llava:34b', 'llava-llama3', 'llava-llama3:8b', 'llava-phi3', 'llava-phi3:3.8b', 'mistral', 'mistral:7b', 'mistral-large', 'mistral-large:123b', 'mistral-nemo', 'mistral-nemo:12b', 'mixtral', 'mixtral:8x7b', 'mixtral:8x22b', 'moondream', 'moondream:1.8b', 'openhermes', 'openhermes:v2.5', 'phi3', 'phi3:3.8b', 'phi3:14b', 'phi3.5', 'phi3.5:3.8b', 'qwen', 'qwen:7b', 'qwen:14b', 'qwen:32b', 'qwen:72b', 'qwen:110b', 'qwen2', 'qwen2:0.5b', 'qwen2:1.5b', 'qwen2:7b', 'qwen2:72b', 'smollm', 'smollm:135m', 'smollm:360m', 'smollm:1.7b'] as const
['codestral', 'qwen2.5-coder', 'qwen2.5-coder:0.5b', 'qwen2.5-coder:1.5b', 'qwen2.5-coder:3b', 'qwen2.5-coder:7b', 'qwen2.5-coder:14b', 'qwen2.5-coder:32b', 'codegemma', 'codegemma:2b', 'codegemma:7b', 'codellama', 'codellama:7b', 'codellama:13b', 'codellama:34b', 'codellama:70b', 'codellama:code', 'codellama:python', 'command-r', 'command-r:35b', 'command-r-plus', 'command-r-plus:104b', 'deepseek-coder-v2', 'deepseek-coder-v2:16b', 'deepseek-coder-v2:236b', 'falcon2', 'falcon2:11b', 'firefunction-v2', 'firefunction-v2:70b', 'gemma', 'gemma:2b', 'gemma:7b', 'gemma2', 'gemma2:2b', 'gemma2:9b', 'gemma2:27b', 'llama2', 'llama2:7b', 'llama2:13b', 'llama2:70b', 'llama3', 'llama3:8b', 'llama3:70b', 'llama3-chatqa', 'llama3-chatqa:8b', 'llama3-chatqa:70b', 'llama3-gradient', 'llama3-gradient:8b', 'llama3-gradient:70b', 'llama3.1', 'llama3.1:8b', 'llama3.1:70b', 'llama3.1:405b', 'llava', 'llava:7b', 'llava:13b', 'llava:34b', 'llava-llama3', 'llava-llama3:8b', 'llava-phi3', 'llava-phi3:3.8b', 'mistral', 'mistral:7b', 'mistral-large', 'mistral-large:123b', 'mistral-nemo', 'mistral-nemo:12b', 'mixtral', 'mixtral:8x7b', 'mixtral:8x22b', 'moondream', 'moondream:1.8b', 'openhermes', 'openhermes:v2.5', 'phi3', 'phi3:3.8b', 'phi3:14b', 'phi3.5', 'phi3.5:3.8b', 'qwen', 'qwen:7b', 'qwen:14b', 'qwen:32b', 'qwen:72b', 'qwen:110b', 'qwen2', 'qwen2:0.5b', 'qwen2:1.5b', 'qwen2:7b', 'qwen2:72b', 'smollm', 'smollm:135m', 'smollm:360m', 'smollm:1.7b'] as const
),
},
openRouter: {

View file

@ -1027,22 +1027,3 @@ class AcceptRejectWidget extends Widget implements IOverlayWidget {
// // 6. Autocomplete
// const autocompleteProvider = new AutocompleteProvider(context);
// context.subscriptions.push(vscode.languages.registerInlineCompletionItemProvider('*', autocompleteProvider));
// const voidConfig = getVoidConfigFromPartial(context.globalState.get('partialVoidConfig') ?? {})
// const abortRef: AbortRef = { current: null }
// // setupAutocomplete({ voidConfig, abortRef })
// // 7. Language Server
// console.log('run lsp')
// let disposable = vscode.commands.registerCommand('typeInspector.inspect', runTreeSitter);
// context.subscriptions.push(disposable);