diff --git a/extensions/void/src/common/SimpleLruCache.ts b/extensions/void/src/common/SimpleLruCache.ts deleted file mode 100644 index 7118bc8f..00000000 --- a/extensions/void/src/common/SimpleLruCache.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { LRUCache } from 'lru-cache'; - -const DEFAULT_MAX_SIZE = 20 - - -export class SimpleLRUCache { - private cache: LRUCache; - private maxSize: number - public length: number - - constructor(maxSize?: number) { - - maxSize = maxSize ?? DEFAULT_MAX_SIZE - - this.cache = new LRUCache({ max: maxSize }); - this.length = 0 - this.maxSize = maxSize - } - - push(value: T): void { - const key = this.cache.size; - this.cache.set(key, value); - this.length++ - this.length = Math.min(this.length, this.maxSize) - } - - values() { - return this.cache.values() - } - - -} \ No newline at end of file diff --git a/extensions/void/src/extension/AutcompleteProvider.ts b/extensions/void/src/extension/AutcompleteProvider.ts index 2aad378a..dd4b044e 100644 --- a/extensions/void/src/extension/AutcompleteProvider.ts +++ b/extensions/void/src/extension/AutcompleteProvider.ts @@ -2,35 +2,84 @@ import * as vscode from 'vscode'; import { AbortRef, LLMMessage, sendLLMMessage } from '../common/sendLLMMessage'; import { getVoidConfigFromPartial, VoidConfig } from '../webviews/common/contextForConfig'; import { LRUCache } from 'lru-cache'; -import { SimpleLRUCache } from '../common/SimpleLruCache'; + + +/* +A summary of autotab: + +Postprocessing +-one common problem for all models is outputting unbalanced parentheses +we solve this by trimming all extra closing parentheses from the generated string +in future, should make sure parentheses are always balanced + +-another problem is completing the middle of a string, eg. "const [x, CURSOR] = useState()" +we complete up to first matchup character +but should instead complete the whole line / block (difficult because of parenthesis accuracy) + +-too much info is bad. usually we want to show the user 1 line, and have a preloaded response afterwards +this should happen automatically with caching system +should break preloaded responses into \n\n chunks + +Preprocessing +- we don't generate if cursor is at end / beginning of a line (no spaces) +- we generate 1 line if there is text to the right of cursor +- we generate 1 line if variable declaration +- (in many cases want to show 1 line but generate multiple) + +State +- cache based on prefix (and do some trimming first) +- when press tab on one line, should have an immediate followup response +to do this, show autocompletes before they're fully finished +- [todo] remove each autotab when accepted +- [todo] treat windows \r\n separately from \n +!- [todo] provide type information + +Details +-generated results are trimmed up to 1 leading/trailing space +-prefixes are cached up to 1 trailing newline +- +*/ + + + + type AutocompletionStatus = 'pending' | 'finished' | 'error'; type Autocompletion = { + id: number, prefix: string, suffix: string, startTime: number, endTime: number | undefined, abortRef: AbortRef, status: AutocompletionStatus, - promise: Promise | undefined, + llmPromise: Promise | undefined, result: string, } -const DEBOUNCE_TIME = 300 +const DEBOUNCE_TIME = 500 const TIMEOUT_TIME = 60000 +const MAX_CACHE_SIZE = 20 +const MAX_PENDING_REQUESTS = 2 // postprocesses the result const postprocessResult = (result: string) => { - // remove leading whitespace from result - return result.trimStart() + console.log('result: ', JSON.stringify(result)) + + // trim all whitespace except for a single leading/trailing space + const hasLeadingSpace = result.startsWith(' '); + const hasTrailingSpace = result.endsWith(' '); + return (hasLeadingSpace ? ' ' : '') + + result.trim() + + (hasTrailingSpace ? ' ' : ''); } const extractCodeFromResult = (result: string) => { // extract the code between triple backticks - const parts = result.split(/```/); + const parts = result.split(/```(?:\s*\w+)?\n?/); // if there is no ``` then return the raw result if (parts.length === 1) { @@ -56,6 +105,28 @@ const trimPrefix = (prefix: string) => { return trimmedPrefix } +function getStringUpToUnbalancedParenthesis(s: string, prefixToTheLeft: string): string { + + const pairs: Record = { ')': '(', '}': '{', ']': '[' }; + + // todo find first open bracket in prefix and get all brackets beyond it in prefix + // get all bracets in prefix + let stack: string[] = [] + const firstOpenIdx = prefixToTheLeft.search(/[[({]/); + if (firstOpenIdx !== -1) stack = prefixToTheLeft.slice(firstOpenIdx).split('').filter(c => '()[]{}'.includes(c)) + + // Iterate through each character + for (let i = 0; i < s.length; i++) { + const char = s[i]; + + if (char === '(' || char === '{' || char === '[') { stack.push(char); } + else if (char === ')' || char === '}' || char === ']') { + if (stack.length === 0 || stack.pop() !== pairs[char]) { return s.substring(0, i); } + } + } + return s; +} + // finds the text in the autocompletion to display, assuming the prefix is already matched // example: // originalPrefix = abcd @@ -63,66 +134,136 @@ const trimPrefix = (prefix: string) => { // originalSuffix = ijkl // the user has typed "ef" so prefix = abcdef // we want to return the rest of the generatedMiddle, which is "gh" -const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: string, autocompletion: Autocompletion, position: vscode.Position }): vscode.InlineCompletionItem => { +const toInlineCompletion = ({ prefix, suffix, autocompletion, position }: { prefix: string, suffix: string, autocompletion: Autocompletion, position: vscode.Position }): vscode.InlineCompletionItem => { const originalPrefix = autocompletion.prefix const generatedMiddle = autocompletion.result const trimmedOriginalPrefix = trimPrefix(originalPrefix) const trimmedCurrentPrefix = trimPrefix(prefix) - const lastMatchupIndex = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length + const suffixLines = suffix.split('\n') + const prefixLines = trimmedCurrentPrefix.split('\n') + const suffixToTheRightOfCursor = suffixLines[0].trim() + const prefixToTheLeftOfCursor = prefixLines[prefixLines.length - 1].trim() - console.log('generatedMiddle ', generatedMiddle) - console.log('trimmedOriginalPrefix ', trimmedOriginalPrefix) - console.log('trimmedCurrentPrefix ', trimmedCurrentPrefix) - console.log('index: ', lastMatchupIndex) - if (lastMatchupIndex < 0) { + const generatedLines = generatedMiddle.split('\n') + + // compute startIdx + let startIdx = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length + if (startIdx < 0) { return new vscode.InlineCompletionItem('') } - const completionStr = generatedMiddle.substring(lastMatchupIndex) - console.log('completionStr: ', completionStr) + // compute endIdx + // hacks to get the suffix to render properly with lower quality models + // if the generated text matches with the suffix on the current line, stop + let endIdx: number | undefined = generatedMiddle.length // exclusive bounds - return new vscode.InlineCompletionItem( - completionStr, - new vscode.Range(position, position) - ) + if (suffixToTheRightOfCursor !== '') { // completing in the middle of a line + console.log('1') + // complete until there is a match + const matchIndex = generatedMiddle.lastIndexOf(suffixToTheRightOfCursor[0]) + if (matchIndex > 0) { endIdx = matchIndex } + } + + if (prefixToTheLeftOfCursor !== '') { // completing the end of a line + console.log('2') + // show a single line + const newlineIdx = generatedMiddle.indexOf('\n') + if (newlineIdx > -1) { endIdx = newlineIdx } + } + + // // if a generated line matches with a suffix line, stop + // if (suffixLines.length > 1) { + // console.log('3') + // const lines = [] + // for (const generatedLine of generatedLines) { + // if (suffixLines.slice(0, 10).some(suffixLine => + // generatedLine.trim() !== '' && suffixLine.trim() !== '' + // && generatedLine.trim().startsWith(suffixLine.trim()) + // )) break; + // lines.push(generatedLine) + // } + // endIdx = lines.join('\n').length // this is hacky, remove or refactor in future + // } + + let completionStr = generatedMiddle.slice(startIdx, endIdx) + + // filter out unbalanced parentheses + console.log('completionStrBeforeParens: ', JSON.stringify(completionStr)) + completionStr = getStringUpToUnbalancedParenthesis(completionStr, prefixLines.slice(-2).join('\n')) + + console.log('originalCompletionStr: ', JSON.stringify(generatedMiddle.slice(startIdx))) + console.log('finalCompletionStr: ', JSON.stringify(completionStr)) + + return new vscode.InlineCompletionItem(completionStr, new vscode.Range(position, position)) } -// returns whether we can use this autocompletion to complete the prefix +// returns whether this autocompletion is in the cache const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: string, autocompletion: Autocompletion }): boolean => { const originalPrefix = autocompletion.prefix const generatedMiddle = autocompletion.result - const trimmedOriginalPrefix = trimPrefix(originalPrefix) - const trimmedCurrentPrefix = trimPrefix(prefix) + const originalPrefixTrimmed = trimPrefix(originalPrefix) + const currentPrefixTrimmed = trimPrefix(prefix) - if (trimmedCurrentPrefix.length < trimmedOriginalPrefix.length) { + if (currentPrefixTrimmed.length < originalPrefixTrimmed.length) { return false } - const isMatch = (trimmedOriginalPrefix + generatedMiddle).startsWith(trimmedCurrentPrefix) + const isMatch = (originalPrefixTrimmed + generatedMiddle).startsWith(currentPrefixTrimmed) return isMatch } +const getCompletionOptions = ({ prefix, suffix }: { prefix: string, suffix: string }) => { + const prefixLines = prefix.split('\n') + const suffixLines = suffix.split('\n') + + const prefixToLeftOfCursor = prefixLines.slice(-1)[0] ?? '' + const suffixToRightOfCursor = suffixLines[0] + + // default parameters + let shouldGenerate = true + let stopTokens: string[] = ['\n\n', '\r\n\r\n'] + + // specific cases + if (suffixToRightOfCursor.trim() !== '') { // typing between something + stopTokens = ['\n', '\r\n'] + } + + // if (prefixToLeftOfCursor.trim() === '' && suffixToRightOfCursor.trim() === '') { // at an empty line + // stopTokens = ['\n\n', '\r\n\r\n'] + // } + + if (prefixToLeftOfCursor === '' || suffixToRightOfCursor === '') { // at beginning or end of line + shouldGenerate = false + } + + console.log('shouldGenerate:', shouldGenerate, stopTokens) + + return { shouldGenerate, stopTokens } + +} export class AutocompleteProvider implements vscode.InlineCompletionItemProvider { private _extensionContext: vscode.ExtensionContext; - private _autocompletionsOfDocument: { [docUriStr: string]: SimpleLRUCache } = {} + private _autocompletionId: number = 0; + private _autocompletionsOfDocument: { [docUriStr: string]: LRUCache } = {} - private _lastTime = 0 + private _lastCompletionTime = 0 + private _lastPrefix: string = '' constructor(context: vscode.ExtensionContext) { this._extensionContext = context } // used internally by vscode - // fires after every keystroke + // fires after every keystroke and returns the completion to show async provideInlineCompletionItems( document: vscode.TextDocument, position: vscode.Position, @@ -130,29 +271,42 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider token: vscode.CancellationToken, ): Promise { - const disabled = true + const disabled = false if (disabled) { return []; } const docUriStr = document.uri.toString() + const fullText = document.getText(); const cursorOffset = document.offsetAt(position); const prefix = fullText.substring(0, cursorOffset) const suffix = fullText.substring(cursorOffset) - - if (!this._autocompletionsOfDocument[docUriStr]) { - this._autocompletionsOfDocument[docUriStr] = new SimpleLRUCache() - } - const voidConfig = getVoidConfigFromPartial(this._extensionContext.globalState.get('partialVoidConfig') ?? {}) + // initialize cache and other variables + // note that whenever an autocompletion is rejected, it is removed from cache + if (!this._autocompletionsOfDocument[docUriStr]) { + this._autocompletionsOfDocument[docUriStr] = new LRUCache({ + max: MAX_CACHE_SIZE, + dispose: (autocompletion) => { + autocompletion.abortRef.current() + } + }) + } + this._lastPrefix = prefix + + // get all pending autocompletions + let __c = 0 + this._autocompletionsOfDocument[docUriStr].forEach(a => { if (a.status === 'pending') __c += 1 }) + console.log('pending: ' + __c) + // get autocompletion from cache let cachedAutocompletion: Autocompletion | undefined = undefined - loop: for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { + for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { // if the user's change matches up with the generated text if (doesPrefixMatchAutocompletion({ prefix, autocompletion })) { cachedAutocompletion = autocompletion - break loop; + break } } @@ -160,39 +314,39 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider if (cachedAutocompletion) { if (cachedAutocompletion.status === 'finished') { - console.log('AAA1') + console.log('A1') - const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position }) + const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, suffix, position }) return [inlineCompletion] } else if (cachedAutocompletion.status === 'pending') { - console.log('AAA2') + console.log('A2') try { - await cachedAutocompletion.promise; - const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position }) + await cachedAutocompletion.llmPromise; + console.log('id: ' + cachedAutocompletion.id) + const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, suffix, position }) return [inlineCompletion] } catch (e) { + this._autocompletionsOfDocument[docUriStr].delete(cachedAutocompletion.id) console.error('Error creating autocompletion (1): ' + e) } } else if (cachedAutocompletion.status === 'error') { - console.log('AAA3') + console.log('A3') } return [] } - - // if there is no cached autocompletion, create it and add it to cache - + // else if no more typing happens, then go forwards with the request // wait DEBOUNCE_TIME for the user to stop typing const thisTime = Date.now() - this._lastTime = thisTime + this._lastCompletionTime = thisTime const didTypingHappenDuringDebounce = await new Promise((resolve, reject) => setTimeout(() => { - if (this._lastTime === thisTime) { + if (this._lastCompletionTime === thisTime) { resolve(false) } else { resolve(true) @@ -205,29 +359,57 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider return [] } - console.log('BBB') + console.log('B') - // else if no more typing happens, then go forwards with the request + // if there are too many pending requests, cancel the oldest one + let numPending = 0 + let oldestPending: Autocompletion | undefined = undefined + for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) { + if (autocompletion.status === 'pending') { + numPending += 1 + if (oldestPending === undefined) { + oldestPending = autocompletion + } + if (numPending >= MAX_PENDING_REQUESTS) { + // cancel the oldest pending request and remove it from cache + this._autocompletionsOfDocument[docUriStr].delete(oldestPending.id) + break + } + } + } + + const { shouldGenerate, stopTokens } = getCompletionOptions({ prefix, suffix }) + + if (!shouldGenerate) return [] + + // create a new autocompletion and add it to cache const newAutocompletion: Autocompletion = { + id: this._autocompletionId++, prefix: prefix, suffix: suffix, startTime: Date.now(), endTime: undefined, abortRef: { current: () => { } }, status: 'pending', - promise: undefined, + llmPromise: undefined, result: '', } // set parameters of `newAutocompletion` appropriately - newAutocompletion.promise = new Promise((resolve, reject) => { + newAutocompletion.llmPromise = new Promise((resolve, reject) => { sendLLMMessage({ mode: 'fim', fimInfo: { prefix, suffix }, + options: { stopTokens }, onText: async (tokenStr, completionStr) => { - // TODO filter out bad responses here + newAutocompletion.result = completionStr + + // if generation doesn't match the prefix for the first few tokens generated, reject it + if (!doesPrefixMatchAutocompletion({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) { + reject('LLM response did not match user\'s text.') + } }, onFinalMessage: (finalMessage) => { @@ -252,24 +434,29 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider abortRef: newAutocompletion.abortRef, }) - setTimeout(() => { // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it + // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it + setTimeout(() => { if (newAutocompletion.status === 'pending') { - reject('Timeout') + reject('Timeout receiving message to LLM.') } }, TIMEOUT_TIME) + + }) // add autocompletion to cache - this._autocompletionsOfDocument[docUriStr].push(newAutocompletion) + this._autocompletionsOfDocument[docUriStr].set(newAutocompletion.id, newAutocompletion) // show autocompletion try { - await newAutocompletion.promise; + await newAutocompletion.llmPromise + console.log('id: ' + newAutocompletion.id) - const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, position }) + const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, suffix, position }) return [inlineCompletion] } catch (e) { + this._autocompletionsOfDocument[docUriStr].delete(newAutocompletion.id) console.error('Error creating autocompletion (2): ' + e) return [] } @@ -277,6 +464,4 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider } - - } diff --git a/src/vs/workbench/contrib/void/browser/react/src/util/sendLLMMessage.tsx b/src/vs/workbench/contrib/void/browser/react/src/util/sendLLMMessage.tsx index cda6bb5c..039c16c9 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/util/sendLLMMessage.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/util/sendLLMMessage.tsx @@ -417,7 +417,30 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ +// // 6. Autocomplete +// const autocompleteProvider = new AutocompleteProvider(context); +// context.subscriptions.push(vscode.languages.registerInlineCompletionItemProvider('*', autocompleteProvider)); +// const voidConfig = getVoidConfigFromPartial(context.globalState.get('partialVoidConfig') ?? {}) + +// // setupAutocomplete({ voidConfig, abortRef }) + +// // 7. Language Server +// console.log('run lsp') +// let disposable = vscode.commands.registerCommand('typeInspector.inspect', runTreeSitter); +// context.subscriptions.push(disposable); + + + + + + + + + + +// import { configFields, VoidConfig } from "../webviews/common/contextForConfig" +// import { FimInfo } from "./sendLLMMessage" // type GetFIMPrompt = ({ voidConfig, fimInfo }: { voidConfig: VoidConfig, fimInfo: FimInfo, }) => string @@ -440,9 +463,10 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ // Instruction summary: // 1. Return the MIDDLE of the code between the START and END. // 2. Do not give an explanation, description, or any other code besides the middle. -// 2. Do not return duplicate code from either START or END. -// 3. Make sure the MIDDLE piece of code has balanced brackets that match the START and END. -// 4. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line. +// 3. Do not return duplicate code from either START or END. +// 4. Make sure the MIDDLE piece of code has balanced brackets that match the START and END. +// 5. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line. +// 6. Around 90% of the time, you should return just one or a few lines of code. You should keep your outputs short unless you are confident the user is trying to write boilderplate code. // # EXAMPLE @@ -490,14 +514,23 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ // export const getFIMPrompt: GetFIMPrompt = ({ voidConfig, fimInfo }) => { -// // if no prefix or suffix, return empty string -// if (!fimInfo.prefix.trim() && !fimInfo.suffix.trim()) return '' +// const { prefix: fullPrefix, suffix: fullSuffix } = fimInfo +// const prefix = fullPrefix.split('\n').slice(-20).join('\n') +// const suffix = fullSuffix.split('\n').slice(0, 20).join('\n') + + +// console.log('prefix', JSON.stringify(prefix)) +// console.log('suffix', JSON.stringify(suffix)) + +// if (!prefix.trim() && !suffix.trim()) return '' // // TODO may want to trim the prefix and suffix // switch (voidConfig.default.whichApi) { // case 'ollama': // if (voidConfig.ollama.model === 'codestral') { -// return `[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}` +// return `[SUFFIX]${suffix}[PREFIX] ${prefix}` +// } else if (voidConfig.ollama.model.includes('qwen')) { +// return `<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>` // } // return '' // case 'anthropic': @@ -510,14 +543,473 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ // default: // return `## START: // \`\`\` -// ${fimInfo.prefix} +// ${prefix} // \`\`\` // ## END: // \`\`\` -// ${fimInfo.suffix} +// ${suffix} // \`\`\` // ` - // } // } + + + + + + + + + + + + + + + + +// Mathew - sendLLMMessage + +// import Anthropic from '@anthropic-ai/sdk'; +// import OpenAI from 'openai'; +// import { Ollama } from 'ollama/browser' +// import { Content, GoogleGenerativeAI, GoogleGenerativeAIError, GoogleGenerativeAIFetchError } from '@google/generative-ai'; +// import { VoidConfig } from '../webviews/common/contextForConfig' +// import { getFIMPrompt, getFIMSystem } from './getPrompt'; + +// export type AbortRef = { current: (() => void) } + +// export type OnText = (newText: string, fullText: string) => void + +// export type OnFinalMessage = (input: string) => void + +// export type LLMMessageAnthropic = { +// role: 'user' | 'assistant', +// content: string, +// } + +// export type LLMMessage = { +// role: 'system' | 'user' | 'assistant', +// content: string, +// } + +// type LLMMessageOptions = { stopTokens?: string[] } + +// type SendLLMMessageFnTypeInternal = (params: { +// mode: 'chat' | 'fim', +// messages: LLMMessage[], +// options?: LLMMessageOptions, +// onText: OnText, +// onFinalMessage: OnFinalMessage, +// onError: (error: string) => void, +// abortRef: AbortRef, +// voidConfig: VoidConfig, +// }) => void + + +// type SendLLMMessageFnTypeExternal = (params: ( +// | { mode?: 'chat', messages: LLMMessage[], fimInfo?: undefined, } +// | { mode: 'fim', messages?: undefined, fimInfo: FimInfo, } +// ) & { +// options?: LLMMessageOptions, +// onText: OnText, +// onFinalMessage: OnFinalMessage, +// onError: (error: string) => void, +// abortRef: AbortRef, +// voidConfig: VoidConfig | null, // these may be absent +// }) => void + +// export type FimInfo = { +// prefix: string, +// suffix: string, +// } + +// const parseMaxTokensStr = (maxTokensStr: string) => { +// // parse the string but only if the full string is a valid number, eg parseInt('100abc') should return NaN +// let int = isNaN(Number(maxTokensStr)) ? undefined : parseInt(maxTokensStr) +// if (Number.isNaN(int)) +// return undefined +// return int +// } + +// // Anthropic +// const sendAnthropicMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig }) => { + +// const anthropic = new Anthropic({ apiKey: voidConfig.anthropic.apikey, dangerouslyAllowBrowser: true }); // defaults to process.env["ANTHROPIC_API_KEY"] + +// // find system messages and concatenate them +// const systemMessage = messages +// .filter(msg => msg.role === 'system') +// .map(msg => msg.content) +// .join('\n'); + +// // remove system messages for Anthropic +// const anthropicMessages = messages.filter(msg => msg.role !== 'system') as LLMMessageAnthropic[] + +// const stream = anthropic.messages.stream({ +// system: systemMessage, +// messages: anthropicMessages, +// model: voidConfig.anthropic.model, +// max_tokens: parseMaxTokensStr(voidConfig.default.maxTokens)!, // this might be undefined, but it will just throw an error for the user +// }); + +// let did_abort = false + +// // when receive text +// stream.on('text', (newText, fullText) => { +// if (did_abort) return +// onText(newText, fullText) +// }) + +// // when we get the final message on this stream (or when error/fail) +// stream.on('finalMessage', (claude_response) => { +// if (did_abort) return +// // stringify the response's content +// let content = claude_response.content.map(c => { if (c.type === 'text') { return c.text } }).join('\n'); +// onFinalMessage(content) +// }) + +// stream.on('error', (error) => { +// // the most common error will be invalid API key (401), so we handle this with a nice message +// if (error instanceof Anthropic.APIError && error.status === 401) { +// onError('Invalid API key.') +// } +// else { +// onError(error.message) +// } +// }) + +// // if abort is called, onFinalMessage is NOT called, and no later onTexts are called either +// const abort = () => { +// did_abort = true +// stream.controller.abort() // TODO need to test this to make sure it works, it might throw an error +// } + +// return { abort } +// }; + +// // Gemini +// const sendGeminiMsg: SendLLMMessageFnTypeInternal = async ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => { + +// let didAbort = false +// let fullText = '' + +// abortRef.current = () => { +// didAbort = true +// } + +// const genAI = new GoogleGenerativeAI(voidConfig.gemini.apikey); +// const model = genAI.getGenerativeModel({ model: voidConfig.gemini.model }); + +// // remove system messages that get sent to Gemini +// // str of all system messages +// let systemMessage = messages +// .filter(msg => msg.role === 'system') +// .map(msg => msg.content) +// .join('\n'); + +// // Convert messages to Gemini format +// const geminiMessages: Content[] = messages +// .filter(msg => msg.role !== 'system') +// .map((msg, i) => ({ +// parts: [{ text: msg.content }], +// role: msg.role === 'assistant' ? 'model' : 'user' +// })) + +// model.generateContentStream({ contents: geminiMessages, systemInstruction: systemMessage, }) +// .then(async response => { +// abortRef.current = () => { +// // response.stream.return(fullText) +// didAbort = true; +// } +// for await (const chunk of response.stream) { +// if (didAbort) return; +// const newText = chunk.text(); +// fullText += newText; +// onText(newText, fullText); +// } +// onFinalMessage(fullText); +// }) +// .catch((error) => { +// if (error instanceof GoogleGenerativeAIFetchError) { +// if (error.status === 400) { +// onError('Invalid API key.'); +// } +// else { +// onError(`${error.name}:\n${error.message}`); +// } +// } +// else { +// onError(error); +// } +// }) +// } + +// // OpenAI, OpenRouter, OpenAICompatible +// const sendOpenAIMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => { + +// let didAbort = false +// let fullText = '' + +// // if abort is called, onFinalMessage is NOT called, and no later onTexts are called either +// abortRef.current = () => { +// didAbort = true; +// }; + +// let openai: OpenAI +// let options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming + +// let maxTokens = parseMaxTokensStr(voidConfig.default.maxTokens) + +// if (voidConfig.default.whichApi === 'openAI') { +// openai = new OpenAI({ apiKey: voidConfig.openAI.apikey, dangerouslyAllowBrowser: true }); +// options = { model: voidConfig.openAI.model, messages: messages, stream: true, max_completion_tokens: maxTokens } +// } +// else if (voidConfig.default.whichApi === 'openRouter') { +// openai = new OpenAI({ +// baseURL: "https://openrouter.ai/api/v1", apiKey: voidConfig.openRouter.apikey, dangerouslyAllowBrowser: true, +// defaultHeaders: { +// "HTTP-Referer": 'https://voideditor.com', // Optional, for including your app on openrouter.ai rankings. +// "X-Title": 'Void Editor', // Optional. Shows in rankings on openrouter.ai. +// }, +// }); +// options = { model: voidConfig.openRouter.model, messages: messages, stream: true, max_completion_tokens: maxTokens } +// } +// else if (voidConfig.default.whichApi === 'openAICompatible') { +// openai = new OpenAI({ baseURL: voidConfig.openAICompatible.endpoint, apiKey: voidConfig.openAICompatible.apikey, dangerouslyAllowBrowser: true }) +// options = { model: voidConfig.openAICompatible.model, messages: messages, stream: true, max_completion_tokens: maxTokens } +// } +// else { +// console.error(`sendOpenAIMsg: invalid whichApi: ${voidConfig.default.whichApi}`) +// throw new Error(`voidConfig.whichAPI was invalid: ${voidConfig.default.whichApi}`) +// } + +// openai.chat.completions +// .create(options) +// .then(async response => { +// abortRef.current = () => { +// // response.controller.abort() +// didAbort = true; +// } +// // when receive text +// for await (const chunk of response) { +// if (didAbort) return; +// const newText = chunk.choices[0]?.delta?.content || ''; +// fullText += newText; +// onText(newText, fullText); +// } +// onFinalMessage(fullText); +// }) +// // when error/fail - this catches errors of both .create() and .then(for await) +// .catch(error => { +// if (error instanceof OpenAI.APIError) { +// if (error.status === 401) { +// onError('Invalid API key.'); +// } +// else { +// onError(`${error.name}:\n${error.message}`); +// } +// } +// else { +// onError(error); +// } +// }) + +// }; + +// // Ollama +// export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => { + +// let didAbort = false +// let fullText = "" + +// const ollama = new Ollama({ host: voidConfig.ollama.endpoint }) + +// abortRef.current = () => { +// didAbort = true; +// }; + +// type GenerateResponse = Awaited> +// type ChatResponse = Awaited> + + +// // First check if model exists +// ollama.list() +// .then(async models => { +// const installedModels = models.models.map(m => m.name.replace(/:latest$/, '')) +// const modelExists = installedModels.some(m => m.startsWith(voidConfig.ollama.model)); +// if (!modelExists) { +// const errorMessage = `The model "${voidConfig.ollama.model}" is not available locally. Please run 'ollama pull ${voidConfig.ollama.model}' to download it first or +// try selecting one from the Installed models: ${installedModels.join(', ')}`; +// onText(errorMessage, errorMessage); +// onFinalMessage(errorMessage); +// return Promise.reject(); +// } + +// if (mode === 'fim') { +// // the fim prompt is the last message +// let prompt = messages[messages.length - 1].content +// return ollama.generate({ +// model: voidConfig.ollama.model, +// prompt: prompt, +// stream: true, +// raw: true, +// options: { stop: options?.stopTokens } +// }) +// } + +// return ollama.chat({ +// model: voidConfig.ollama.model, +// messages: messages, +// stream: true, +// options: { num_predict: parseMaxTokensStr(voidConfig.default.maxTokens) } +// }); +// }) +// .then(async stream => { +// if (!stream) return; + +// abortRef.current = () => { +// didAbort = true +// stream.abort() +// } +// for await (const chunk of stream) { +// if (didAbort) return; + +// const newText = (mode === 'fim' +// ? (chunk as GenerateResponse).response +// : (chunk as ChatResponse).message.content +// ) +// fullText += newText; +// onText(newText, fullText); +// } +// onFinalMessage(fullText); +// }) +// .catch(error => { +// // Check if the error is a connection error +// if (error instanceof Error && error.message.includes('Failed to fetch')) { +// const errorMessage = 'Ollama service is not running. Please start the Ollama service and try again.'; +// onText(errorMessage, errorMessage); +// onFinalMessage(errorMessage); +// } else if (error) { +// onError(error); +// } +// }); +// }; + +// // Greptile +// // https://docs.greptile.com/api-reference/query +// // https://docs.greptile.com/quickstart#sample-response-streamed + +// const sendGreptileMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => { + +// let didAbort = false +// let fullText = '' + +// // if abort is called, onFinalMessage is NOT called, and no later onTexts are called either +// abortRef.current = () => { +// didAbort = true +// } + +// fetch('https://api.greptile.com/v2/query', { +// method: 'POST', +// headers: { +// "Authorization": `Bearer ${voidConfig.greptile.apikey}`, +// "X-Github-Token": `${voidConfig.greptile.githubPAT}`, +// "Content-Type": `application/json`, +// }, +// body: JSON.stringify({ +// messages, +// stream: true, +// repositories: [voidConfig.greptile.repoinfo], +// }), +// }) +// // this is {message}\n{message}\n{message}...\n +// .then(async response => { +// const text = await response.text() +// console.log('got greptile', text) +// return JSON.parse(`[${text.trim().split('\n').join(',')}]`) +// }) +// // TODO make this actually stream, right now it just sends one message at the end +// .then(async responseArr => { +// if (didAbort) +// return + +// for (let response of responseArr) { + +// const type: string = response['type'] +// const message = response['message'] + +// // when receive text +// if (type === 'message') { +// fullText += message +// onText(message, fullText) +// } +// else if (type === 'sources') { +// const { filepath, linestart, lineend } = message as { filepath: string, linestart: number | null, lineend: number | null } +// fullText += filepath +// onText(filepath, fullText) +// } +// // type: 'status' with an empty 'message' means last message +// else if (type === 'status') { +// if (!message) { +// onFinalMessage(fullText) +// } +// } +// } + +// }) +// .catch(e => { +// onError(e) +// }); + +// } + +// export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ options, mode, messages, fimInfo, onText, onFinalMessage, onError, voidConfig, abortRef }) => { +// if (!voidConfig) +// return onError('No config file found for LLM.'); + +// // handle defaults +// if (!mode) mode = 'chat' +// if (!messages) messages = [] + +// // build messages +// if (mode === 'chat') { +// // nothing needed +// } else if (mode === 'fim') { +// fimInfo = fimInfo! + +// const system = getFIMSystem({ voidConfig, fimInfo }) +// const prompt = getFIMPrompt({ voidConfig, fimInfo }) +// messages = ([ +// { role: 'system', content: system }, +// { role: 'user', content: prompt } +// ] as const) + +// } + +// // trim message content (Anthropic and other providers give an error if there is trailing whitespace) +// messages = messages.map(m => ({ ...m, content: m.content.trim() })) +// .filter(m => m.content !== '') + +// if (messages.length === 0) +// return onError('No messages provided to LLM.'); + +// switch (voidConfig.default.whichApi) { +// case 'anthropic': +// return sendAnthropicMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); +// case 'openAI': +// case 'openRouter': +// case 'openAICompatible': +// return sendOpenAIMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); +// case 'gemini': +// return sendGeminiMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); +// case 'ollama': +// return sendOllamaMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); +// case 'greptile': +// return sendGreptileMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }); +// default: +// onError(`Error: whichApi was ${voidConfig.default.whichApi}, which is not recognized!`) +// } + +// } diff --git a/src/vs/workbench/contrib/void/browser/registerConfig.ts b/src/vs/workbench/contrib/void/browser/registerConfig.ts index bf7dc86c..bb54fbee 100644 --- a/src/vs/workbench/contrib/void/browser/registerConfig.ts +++ b/src/vs/workbench/contrib/void/browser/registerConfig.ts @@ -130,7 +130,7 @@ const voidConfigInfo: Record< model: configEnum( 'Ollama model to use.', 'codestral', - ['codestral', 'codegemma', 'codegemma:2b', 'codegemma:7b', 'codellama', 'codellama:7b', 'codellama:13b', 'codellama:34b', 'codellama:70b', 'codellama:code', 'codellama:python', 'command-r', 'command-r:35b', 'command-r-plus', 'command-r-plus:104b', 'deepseek-coder-v2', 'deepseek-coder-v2:16b', 'deepseek-coder-v2:236b', 'falcon2', 'falcon2:11b', 'firefunction-v2', 'firefunction-v2:70b', 'gemma', 'gemma:2b', 'gemma:7b', 'gemma2', 'gemma2:2b', 'gemma2:9b', 'gemma2:27b', 'llama2', 'llama2:7b', 'llama2:13b', 'llama2:70b', 'llama3', 'llama3:8b', 'llama3:70b', 'llama3-chatqa', 'llama3-chatqa:8b', 'llama3-chatqa:70b', 'llama3-gradient', 'llama3-gradient:8b', 'llama3-gradient:70b', 'llama3.1', 'llama3.1:8b', 'llama3.1:70b', 'llama3.1:405b', 'llava', 'llava:7b', 'llava:13b', 'llava:34b', 'llava-llama3', 'llava-llama3:8b', 'llava-phi3', 'llava-phi3:3.8b', 'mistral', 'mistral:7b', 'mistral-large', 'mistral-large:123b', 'mistral-nemo', 'mistral-nemo:12b', 'mixtral', 'mixtral:8x7b', 'mixtral:8x22b', 'moondream', 'moondream:1.8b', 'openhermes', 'openhermes:v2.5', 'phi3', 'phi3:3.8b', 'phi3:14b', 'phi3.5', 'phi3.5:3.8b', 'qwen', 'qwen:7b', 'qwen:14b', 'qwen:32b', 'qwen:72b', 'qwen:110b', 'qwen2', 'qwen2:0.5b', 'qwen2:1.5b', 'qwen2:7b', 'qwen2:72b', 'smollm', 'smollm:135m', 'smollm:360m', 'smollm:1.7b'] as const + ['codestral', 'qwen2.5-coder', 'qwen2.5-coder:0.5b', 'qwen2.5-coder:1.5b', 'qwen2.5-coder:3b', 'qwen2.5-coder:7b', 'qwen2.5-coder:14b', 'qwen2.5-coder:32b', 'codegemma', 'codegemma:2b', 'codegemma:7b', 'codellama', 'codellama:7b', 'codellama:13b', 'codellama:34b', 'codellama:70b', 'codellama:code', 'codellama:python', 'command-r', 'command-r:35b', 'command-r-plus', 'command-r-plus:104b', 'deepseek-coder-v2', 'deepseek-coder-v2:16b', 'deepseek-coder-v2:236b', 'falcon2', 'falcon2:11b', 'firefunction-v2', 'firefunction-v2:70b', 'gemma', 'gemma:2b', 'gemma:7b', 'gemma2', 'gemma2:2b', 'gemma2:9b', 'gemma2:27b', 'llama2', 'llama2:7b', 'llama2:13b', 'llama2:70b', 'llama3', 'llama3:8b', 'llama3:70b', 'llama3-chatqa', 'llama3-chatqa:8b', 'llama3-chatqa:70b', 'llama3-gradient', 'llama3-gradient:8b', 'llama3-gradient:70b', 'llama3.1', 'llama3.1:8b', 'llama3.1:70b', 'llama3.1:405b', 'llava', 'llava:7b', 'llava:13b', 'llava:34b', 'llava-llama3', 'llava-llama3:8b', 'llava-phi3', 'llava-phi3:3.8b', 'mistral', 'mistral:7b', 'mistral-large', 'mistral-large:123b', 'mistral-nemo', 'mistral-nemo:12b', 'mixtral', 'mixtral:8x7b', 'mixtral:8x22b', 'moondream', 'moondream:1.8b', 'openhermes', 'openhermes:v2.5', 'phi3', 'phi3:3.8b', 'phi3:14b', 'phi3.5', 'phi3.5:3.8b', 'qwen', 'qwen:7b', 'qwen:14b', 'qwen:32b', 'qwen:72b', 'qwen:110b', 'qwen2', 'qwen2:0.5b', 'qwen2:1.5b', 'qwen2:7b', 'qwen2:72b', 'smollm', 'smollm:135m', 'smollm:360m', 'smollm:1.7b'] as const ), }, openRouter: { diff --git a/src/vs/workbench/contrib/void/browser/registerInlineDiffs.ts b/src/vs/workbench/contrib/void/browser/registerInlineDiffs.ts index 29c6e15f..85d1584d 100644 --- a/src/vs/workbench/contrib/void/browser/registerInlineDiffs.ts +++ b/src/vs/workbench/contrib/void/browser/registerInlineDiffs.ts @@ -1027,22 +1027,3 @@ class AcceptRejectWidget extends Widget implements IOverlayWidget { - - - -// // 6. Autocomplete -// const autocompleteProvider = new AutocompleteProvider(context); -// context.subscriptions.push(vscode.languages.registerInlineCompletionItemProvider('*', autocompleteProvider)); - -// const voidConfig = getVoidConfigFromPartial(context.globalState.get('partialVoidConfig') ?? {}) -// const abortRef: AbortRef = { current: null } - -// // setupAutocomplete({ voidConfig, abortRef }) - - -// // 7. Language Server -// console.log('run lsp') -// let disposable = vscode.commands.registerCommand('typeInspector.inspect', runTreeSitter); - -// context.subscriptions.push(disposable); -