diff --git a/extensions/void/src/common/SimpleLruCache.ts b/extensions/void/src/common/SimpleLruCache.ts
deleted file mode 100644
index 7118bc8f..00000000
--- a/extensions/void/src/common/SimpleLruCache.ts
+++ /dev/null
@@ -1,32 +0,0 @@
-import { LRUCache } from 'lru-cache';
-
-const DEFAULT_MAX_SIZE = 20
-
-
-export class SimpleLRUCache<T extends {}> {
-	private cache: LRUCache<number, T>;
-	private maxSize: number
-	public length: number
-
-	constructor(maxSize?: number) {
-
-		maxSize = maxSize ?? DEFAULT_MAX_SIZE
-
-		this.cache = new LRUCache<number, T>({ max: maxSize });
-		this.length = 0
-		this.maxSize = maxSize
-	}
-
-	push(value: T): void {
-		const key = this.cache.size;
-		this.cache.set(key, value);
-		this.length++
-		this.length = Math.min(this.length, this.maxSize)
-	}
-
-	values() {
-		return this.cache.values()
-	}
-
-
-}
\ No newline at end of file
diff --git a/extensions/void/src/extension/AutcompleteProvider.ts b/extensions/void/src/extension/AutcompleteProvider.ts
index 2aad378a..dd4b044e 100644
--- a/extensions/void/src/extension/AutcompleteProvider.ts
+++ b/extensions/void/src/extension/AutcompleteProvider.ts
@@ -2,35 +2,84 @@ import * as vscode from 'vscode';
 import { AbortRef, LLMMessage, sendLLMMessage } from '../common/sendLLMMessage';
 import { getVoidConfigFromPartial, VoidConfig } from '../webviews/common/contextForConfig';
 import { LRUCache } from 'lru-cache';
-import { SimpleLRUCache } from '../common/SimpleLruCache';
+
+
+/*
+A summary of autotab:
+
+Postprocessing
+-one common problem for all models is outputting unbalanced parentheses
+we solve this by trimming all extra closing parentheses from the generated string
+in future, should make sure parentheses are always balanced
+
+-another problem is completing the middle of a string, eg. "const [x, CURSOR] = useState()"
+we complete up to first matchup character
+but should instead complete the whole line / block (difficult because of parenthesis accuracy)
+
+-too much info is bad. usually we want to show the user 1 line, and have a preloaded response afterwards
+this should happen automatically with caching system
+should break preloaded responses into \n\n chunks
+
+Preprocessing
+- we don't generate if cursor is at end / beginning of a line (no spaces)
+- we generate 1 line if there is text to the right of cursor
+- we generate 1 line if variable declaration
+- (in many cases want to show 1 line but generate multiple)
+
+State
+- cache based on prefix (and do some trimming first)
+- when press tab on one line, should have an immediate followup response
+to do this, show autocompletes before they're fully finished
+- [todo] remove each autotab when accepted
+- [todo] treat windows \r\n separately from \n
+!- [todo] provide type information
+
+Details
+-generated results are trimmed up to 1 leading/trailing space
+-prefixes are cached up to 1 trailing newline
+-
+*/
+
+
+
+
 
 type AutocompletionStatus = 'pending' | 'finished' | 'error';
 type Autocompletion = {
+	id: number,
 	prefix: string,
 	suffix: string,
 	startTime: number,
 	endTime: number | undefined,
 	abortRef: AbortRef,
 	status: AutocompletionStatus,
-	promise: Promise<string> | undefined,
+	llmPromise: Promise<string> | undefined,
 	result: string,
 }
 
-const DEBOUNCE_TIME = 300
+const DEBOUNCE_TIME = 500
 const TIMEOUT_TIME = 60000
+const MAX_CACHE_SIZE = 20
+const MAX_PENDING_REQUESTS = 2
 
 // postprocesses the result
 const postprocessResult = (result: string) => {
 
-	// remove leading whitespace from result
-	return result.trimStart()
+	console.log('result: ', JSON.stringify(result))
+
+	// trim all whitespace except for a single leading/trailing space
+	const hasLeadingSpace = result.startsWith(' ');
+	const hasTrailingSpace = result.endsWith(' ');
+	return (hasLeadingSpace ? ' ' : '')
+		+ result.trim()
+		+ (hasTrailingSpace ? ' ' : '');
 
 }
 
 const extractCodeFromResult = (result: string) => {
 
 	// extract the code between triple backticks
-	const parts = result.split(/```/);
+	const parts = result.split(/```(?:\s*\w+)?\n?/);
 
 	// if there is no ``` then return the raw result
 	if (parts.length === 1) {
@@ -56,6 +105,28 @@ const trimPrefix = (prefix: string) => {
 	return trimmedPrefix
 }
 
+function getStringUpToUnbalancedParenthesis(s: string, prefixToTheLeft: string): string {
+
+	const pairs: Record<string, string> = { ')': '(', '}': '{', ']': '[' };
+
+	// todo find first open bracket in prefix and get all brackets beyond it in prefix
+	// get all bracets in prefix
+	let stack: string[] = []
+	const firstOpenIdx = prefixToTheLeft.search(/[[({]/);
+	if (firstOpenIdx !== -1) stack = prefixToTheLeft.slice(firstOpenIdx).split('').filter(c => '()[]{}'.includes(c))
+
+	// Iterate through each character
+	for (let i = 0; i < s.length; i++) {
+		const char = s[i];
+
+		if (char === '(' || char === '{' || char === '[') { stack.push(char); }
+		else if (char === ')' || char === '}' || char === ']') {
+			if (stack.length === 0 || stack.pop() !== pairs[char]) { return s.substring(0, i); }
+		}
+	}
+	return s;
+}
+
 // finds the text in the autocompletion to display, assuming the prefix is already matched
 // example:
 // originalPrefix = abcd
@@ -63,66 +134,136 @@ const trimPrefix = (prefix: string) => {
 // originalSuffix = ijkl
 // the user has typed "ef" so prefix = abcdef
 // we want to return the rest of the generatedMiddle, which is "gh"
-const toInlineCompletion = ({ prefix, autocompletion, position }: { prefix: string, autocompletion: Autocompletion, position: vscode.Position }): vscode.InlineCompletionItem => {
+const toInlineCompletion = ({ prefix, suffix, autocompletion, position }: { prefix: string, suffix: string, autocompletion: Autocompletion, position: vscode.Position }): vscode.InlineCompletionItem => {
 	const originalPrefix = autocompletion.prefix
 	const generatedMiddle = autocompletion.result
 
 	const trimmedOriginalPrefix = trimPrefix(originalPrefix)
 	const trimmedCurrentPrefix = trimPrefix(prefix)
 
-	const lastMatchupIndex = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length
+	const suffixLines = suffix.split('\n')
+	const prefixLines = trimmedCurrentPrefix.split('\n')
+	const suffixToTheRightOfCursor = suffixLines[0].trim()
+	const prefixToTheLeftOfCursor = prefixLines[prefixLines.length - 1].trim()
 
-	console.log('generatedMiddle ', generatedMiddle)
-	console.log('trimmedOriginalPrefix ', trimmedOriginalPrefix)
-	console.log('trimmedCurrentPrefix ', trimmedCurrentPrefix)
-	console.log('index: ', lastMatchupIndex)
-	if (lastMatchupIndex < 0) {
+	const generatedLines = generatedMiddle.split('\n')
+
+	// compute startIdx
+	let startIdx = trimmedCurrentPrefix.length - trimmedOriginalPrefix.length
+	if (startIdx < 0) {
 		return new vscode.InlineCompletionItem('')
 	}
 
-	const completionStr = generatedMiddle.substring(lastMatchupIndex)
-	console.log('completionStr: ', completionStr)
+	// compute endIdx
+	// hacks to get the suffix to render properly with lower quality models
+	// if the generated text matches with the suffix on the current line, stop
+	let endIdx: number | undefined = generatedMiddle.length // exclusive bounds
 
-	return new vscode.InlineCompletionItem(
-		completionStr,
-		new vscode.Range(position, position)
-	)
+	if (suffixToTheRightOfCursor !== '') { // completing in the middle of a line
+		console.log('1')
+		// complete until there is a match
+		const matchIndex = generatedMiddle.lastIndexOf(suffixToTheRightOfCursor[0])
+		if (matchIndex > 0) { endIdx = matchIndex }
+	}
+
+	if (prefixToTheLeftOfCursor !== '') { // completing the end of a line
+		console.log('2')
+		// show a single line
+		const newlineIdx = generatedMiddle.indexOf('\n')
+		if (newlineIdx > -1) { endIdx = newlineIdx }
+	}
+
+	// // if a generated line matches with a suffix line, stop
+	// if (suffixLines.length > 1) {
+	// 	console.log('3')
+	// 	const lines = []
+	// 	for (const generatedLine of generatedLines) {
+	// 		if (suffixLines.slice(0, 10).some(suffixLine =>
+	// 			generatedLine.trim() !== '' && suffixLine.trim() !== ''
+	// 			&& generatedLine.trim().startsWith(suffixLine.trim())
+	// 		)) break;
+	// 		lines.push(generatedLine)
+	// 	}
+	// 	endIdx = lines.join('\n').length // this is hacky, remove or refactor in future
+	// }
+
+	let completionStr = generatedMiddle.slice(startIdx, endIdx)
+
+	// filter out unbalanced parentheses
+	console.log('completionStrBeforeParens: ', JSON.stringify(completionStr))
+	completionStr = getStringUpToUnbalancedParenthesis(completionStr, prefixLines.slice(-2).join('\n'))
+
+	console.log('originalCompletionStr: ', JSON.stringify(generatedMiddle.slice(startIdx)))
+	console.log('finalCompletionStr: ', JSON.stringify(completionStr))
+
+	return new vscode.InlineCompletionItem(completionStr, new vscode.Range(position, position))
 
 }
 
-// returns whether we can use this autocompletion to complete the prefix
+// returns whether this autocompletion is in the cache
 const doesPrefixMatchAutocompletion = ({ prefix, autocompletion }: { prefix: string, autocompletion: Autocompletion }): boolean => {
 
 	const originalPrefix = autocompletion.prefix
 	const generatedMiddle = autocompletion.result
-	const trimmedOriginalPrefix = trimPrefix(originalPrefix)
-	const trimmedCurrentPrefix = trimPrefix(prefix)
+	const originalPrefixTrimmed = trimPrefix(originalPrefix)
+	const currentPrefixTrimmed = trimPrefix(prefix)
 
-	if (trimmedCurrentPrefix.length < trimmedOriginalPrefix.length) {
+	if (currentPrefixTrimmed.length < originalPrefixTrimmed.length) {
 		return false
 	}
 
-	const isMatch = (trimmedOriginalPrefix + generatedMiddle).startsWith(trimmedCurrentPrefix)
+	const isMatch = (originalPrefixTrimmed + generatedMiddle).startsWith(currentPrefixTrimmed)
 	return isMatch
 
 }
 
+const getCompletionOptions = ({ prefix, suffix }: { prefix: string, suffix: string }) => {
 
+	const prefixLines = prefix.split('\n')
+	const suffixLines = suffix.split('\n')
+
+	const prefixToLeftOfCursor = prefixLines.slice(-1)[0] ?? ''
+	const suffixToRightOfCursor = suffixLines[0]
+
+	// default parameters
+	let shouldGenerate = true
+	let stopTokens: string[] = ['\n\n', '\r\n\r\n']
+
+	// specific cases
+	if (suffixToRightOfCursor.trim() !== '') { // typing between something
+		stopTokens = ['\n', '\r\n']
+	}
+
+	// if (prefixToLeftOfCursor.trim() === '' && suffixToRightOfCursor.trim() === '') { // at an empty line
+	// 	stopTokens = ['\n\n', '\r\n\r\n']
+	// }
+
+	if (prefixToLeftOfCursor === '' || suffixToRightOfCursor === '') { // at beginning or end of line
+		shouldGenerate = false
+	}
+
+	console.log('shouldGenerate:', shouldGenerate, stopTokens)
+
+	return { shouldGenerate, stopTokens }
+
+}
 
 export class AutocompleteProvider implements vscode.InlineCompletionItemProvider {
 
 	private _extensionContext: vscode.ExtensionContext;
 
-	private _autocompletionsOfDocument: { [docUriStr: string]: SimpleLRUCache<Autocompletion> } = {}
+	private _autocompletionId: number = 0;
+	private _autocompletionsOfDocument: { [docUriStr: string]: LRUCache<number, Autocompletion> } = {}
 
-	private _lastTime = 0
+	private _lastCompletionTime = 0
+	private _lastPrefix: string = ''
 
 	constructor(context: vscode.ExtensionContext) {
 		this._extensionContext = context
 	}
 
 	// used internally by vscode
-	// fires after every keystroke
+	// fires after every keystroke and returns the completion to show
 	async provideInlineCompletionItems(
 		document: vscode.TextDocument,
 		position: vscode.Position,
@@ -130,29 +271,42 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
 		token: vscode.CancellationToken,
 	): Promise<vscode.InlineCompletionItem[]> {
 
-		const disabled = true
+		const disabled = false
 		if (disabled) { return []; }
 
 		const docUriStr = document.uri.toString()
 
+
 		const fullText = document.getText();
 		const cursorOffset = document.offsetAt(position);
 		const prefix = fullText.substring(0, cursorOffset)
 		const suffix = fullText.substring(cursorOffset)
-
-		if (!this._autocompletionsOfDocument[docUriStr]) {
-			this._autocompletionsOfDocument[docUriStr] = new SimpleLRUCache()
-		}
-
 		const voidConfig = getVoidConfigFromPartial(this._extensionContext.globalState.get('partialVoidConfig') ?? {})
 
+		// initialize cache and other variables
+		// note that whenever an autocompletion is rejected, it is removed from cache
+		if (!this._autocompletionsOfDocument[docUriStr]) {
+			this._autocompletionsOfDocument[docUriStr] = new LRUCache<number, Autocompletion>({
+				max: MAX_CACHE_SIZE,
+				dispose: (autocompletion) => {
+					autocompletion.abortRef.current()
+				}
+			})
+		}
+		this._lastPrefix = prefix
+
+		// get all pending autocompletions
+		let __c = 0
+		this._autocompletionsOfDocument[docUriStr].forEach(a => { if (a.status === 'pending') __c += 1 })
+		console.log('pending: ' + __c)
+
 		// get autocompletion from cache
 		let cachedAutocompletion: Autocompletion | undefined = undefined
-		loop: for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
+		for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
 			// if the user's change matches up with the generated text
 			if (doesPrefixMatchAutocompletion({ prefix, autocompletion })) {
 				cachedAutocompletion = autocompletion
-				break loop;
+				break
 			}
 		}
 
@@ -160,39 +314,39 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
 		if (cachedAutocompletion) {
 
 			if (cachedAutocompletion.status === 'finished') {
-				console.log('AAA1')
+				console.log('A1')
 
-				const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position })
+				const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, suffix, position })
 				return [inlineCompletion]
 
 			} else if (cachedAutocompletion.status === 'pending') {
-				console.log('AAA2')
+				console.log('A2')
 
 				try {
-					await cachedAutocompletion.promise;
-					const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, position })
+					await cachedAutocompletion.llmPromise;
+					console.log('id: ' + cachedAutocompletion.id)
+					const inlineCompletion = toInlineCompletion({ autocompletion: cachedAutocompletion, prefix, suffix, position })
 					return [inlineCompletion]
 
 				} catch (e) {
+					this._autocompletionsOfDocument[docUriStr].delete(cachedAutocompletion.id)
 					console.error('Error creating autocompletion (1): ' + e)
 				}
 
 			} else if (cachedAutocompletion.status === 'error') {
-				console.log('AAA3')
+				console.log('A3')
 			}
 
 			return []
 		}
 
-
-		// if there is no cached autocompletion, create it and add it to cache
-
+		// else if no more typing happens, then go forwards with the request
 		// wait DEBOUNCE_TIME for the user to stop typing
 		const thisTime = Date.now()
-		this._lastTime = thisTime
+		this._lastCompletionTime = thisTime
 		const didTypingHappenDuringDebounce = await new Promise((resolve, reject) =>
 			setTimeout(() => {
-				if (this._lastTime === thisTime) {
+				if (this._lastCompletionTime === thisTime) {
 					resolve(false)
 				} else {
 					resolve(true)
@@ -205,29 +359,57 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
 			return []
 		}
 
-		console.log('BBB')
+		console.log('B')
 
-		// else if no more typing happens, then go forwards with the request
+		// if there are too many pending requests, cancel the oldest one
+		let numPending = 0
+		let oldestPending: Autocompletion | undefined = undefined
+		for (const autocompletion of this._autocompletionsOfDocument[docUriStr].values()) {
+			if (autocompletion.status === 'pending') {
+				numPending += 1
+				if (oldestPending === undefined) {
+					oldestPending = autocompletion
+				}
+				if (numPending >= MAX_PENDING_REQUESTS) {
+					// cancel the oldest pending request and remove it from cache
+					this._autocompletionsOfDocument[docUriStr].delete(oldestPending.id)
+					break
+				}
+			}
+		}
+
+		const { shouldGenerate, stopTokens } = getCompletionOptions({ prefix, suffix })
+
+		if (!shouldGenerate) return []
+
+		// create a new autocompletion and add it to cache
 		const newAutocompletion: Autocompletion = {
+			id: this._autocompletionId++,
 			prefix: prefix,
 			suffix: suffix,
 			startTime: Date.now(),
 			endTime: undefined,
 			abortRef: { current: () => { } },
 			status: 'pending',
-			promise: undefined,
+			llmPromise: undefined,
 			result: '',
 		}
 
 		// set parameters of `newAutocompletion` appropriately
-		newAutocompletion.promise = new Promise((resolve, reject) => {
+		newAutocompletion.llmPromise = new Promise((resolve, reject) => {
 
 			sendLLMMessage({
 				mode: 'fim',
 				fimInfo: { prefix, suffix },
+				options: { stopTokens },
 				onText: async (tokenStr, completionStr) => {
-					// TODO filter out bad responses here
+
 					newAutocompletion.result = completionStr
+
+					// if generation doesn't match the prefix for the first few tokens generated, reject it
+					if (!doesPrefixMatchAutocompletion({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) {
+						reject('LLM response did not match user\'s text.')
+					}
 				},
 				onFinalMessage: (finalMessage) => {
 
@@ -252,24 +434,29 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
 				abortRef: newAutocompletion.abortRef,
 			})
 
-			setTimeout(() => { // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it
+			// if the request hasnt resolved in TIMEOUT_TIME seconds, reject it
+			setTimeout(() => {
 				if (newAutocompletion.status === 'pending') {
-					reject('Timeout')
+					reject('Timeout receiving message to LLM.')
 				}
 			}, TIMEOUT_TIME)
+
+
 		})
 
 		// add autocompletion to cache
-		this._autocompletionsOfDocument[docUriStr].push(newAutocompletion)
+		this._autocompletionsOfDocument[docUriStr].set(newAutocompletion.id, newAutocompletion)
 
 		// show autocompletion
 		try {
-			await newAutocompletion.promise;
+			await newAutocompletion.llmPromise
+			console.log('id: ' + newAutocompletion.id)
 
-			const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, position })
+			const inlineCompletion = toInlineCompletion({ autocompletion: newAutocompletion, prefix, suffix, position })
 			return [inlineCompletion]
 
 		} catch (e) {
+			this._autocompletionsOfDocument[docUriStr].delete(newAutocompletion.id)
 			console.error('Error creating autocompletion (2): ' + e)
 			return []
 		}
@@ -277,6 +464,4 @@ export class AutocompleteProvider implements vscode.InlineCompletionItemProvider
 	}
 
 
-
-
 }
diff --git a/src/vs/workbench/contrib/void/browser/react/src/util/sendLLMMessage.tsx b/src/vs/workbench/contrib/void/browser/react/src/util/sendLLMMessage.tsx
index cda6bb5c..039c16c9 100644
--- a/src/vs/workbench/contrib/void/browser/react/src/util/sendLLMMessage.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/util/sendLLMMessage.tsx
@@ -417,7 +417,30 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({
 
 
 
+// // 6. Autocomplete
+// const autocompleteProvider = new AutocompleteProvider(context);
+// context.subscriptions.push(vscode.languages.registerInlineCompletionItemProvider('*', autocompleteProvider));
 
+// const voidConfig = getVoidConfigFromPartial(context.globalState.get('partialVoidConfig') ?? {})
+
+// // setupAutocomplete({ voidConfig, abortRef })
+
+// // 7. Language Server
+// console.log('run lsp')
+// let disposable = vscode.commands.registerCommand('typeInspector.inspect', runTreeSitter);
+// context.subscriptions.push(disposable);
+
+
+
+
+
+
+
+
+
+
+// import { configFields, VoidConfig } from "../webviews/common/contextForConfig"
+// import { FimInfo } from "./sendLLMMessage"
 
 
 // type GetFIMPrompt = ({ voidConfig, fimInfo }: { voidConfig: VoidConfig, fimInfo: FimInfo, }) => string
@@ -440,9 +463,10 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({
 // Instruction summary:
 // 1. Return the MIDDLE of the code between the START and END.
 // 2. Do not give an explanation, description, or any other code besides the middle.
-// 2. Do not return duplicate code from either START or END.
-// 3. Make sure the MIDDLE piece of code has balanced brackets that match the START and END.
-// 4. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line.
+// 3. Do not return duplicate code from either START or END.
+// 4. Make sure the MIDDLE piece of code has balanced brackets that match the START and END.
+// 5. The MIDDLE begins on the same line as START. Please include a newline character if you want to begin on the next line.
+// 6. Around 90% of the time, you should return just one or a few lines of code. You should keep your outputs short unless you are confident the user is trying to write boilderplate code.
 
 // # EXAMPLE
 
@@ -490,14 +514,23 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({
 
 // export const getFIMPrompt: GetFIMPrompt = ({ voidConfig, fimInfo }) => {
 
-// 	// if no prefix or suffix, return empty string
-// 	if (!fimInfo.prefix.trim() && !fimInfo.suffix.trim()) return ''
+// 	const { prefix: fullPrefix, suffix: fullSuffix } = fimInfo
+// 	const prefix = fullPrefix.split('\n').slice(-20).join('\n')
+// 	const suffix = fullSuffix.split('\n').slice(0, 20).join('\n')
+
+
+// 	console.log('prefix', JSON.stringify(prefix))
+// 	console.log('suffix', JSON.stringify(suffix))
+
+// 	if (!prefix.trim() && !suffix.trim()) return ''
 
 // 	// TODO may want to trim the prefix and suffix
 // 	switch (voidConfig.default.whichApi) {
 // 		case 'ollama':
 // 			if (voidConfig.ollama.model === 'codestral') {
-// 				return `[SUFFIX]${fimInfo.suffix}[PREFIX] ${fimInfo.prefix}`
+// 				return `[SUFFIX]${suffix}[PREFIX] ${prefix}`
+// 			} else if (voidConfig.ollama.model.includes('qwen')) {
+// 				return `<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>`
 // 			}
 // 			return ''
 // 		case 'anthropic':
@@ -510,14 +543,473 @@ export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({
 // 		default:
 // 			return `## START:
 // \`\`\`
-// ${fimInfo.prefix}
+// ${prefix}
 // \`\`\`
 // ## END:
 // \`\`\`
-// ${fimInfo.suffix}
+// ${suffix}
 // \`\`\`
 // `
-
 // 	}
 // }
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+// Mathew - sendLLMMessage
+
+// import Anthropic from '@anthropic-ai/sdk';
+// import OpenAI from 'openai';
+// import { Ollama } from 'ollama/browser'
+// import { Content, GoogleGenerativeAI, GoogleGenerativeAIError, GoogleGenerativeAIFetchError } from '@google/generative-ai';
+// import { VoidConfig } from '../webviews/common/contextForConfig'
+// import { getFIMPrompt, getFIMSystem } from './getPrompt';
+
+// export type AbortRef = { current: (() => void) }
+
+// export type OnText = (newText: string, fullText: string) => void
+
+// export type OnFinalMessage = (input: string) => void
+
+// export type LLMMessageAnthropic = {
+// 	role: 'user' | 'assistant',
+// 	content: string,
+// }
+
+// export type LLMMessage = {
+// 	role: 'system' | 'user' | 'assistant',
+// 	content: string,
+// }
+
+// type LLMMessageOptions = { stopTokens?: string[] }
+
+// type SendLLMMessageFnTypeInternal = (params: {
+// 	mode: 'chat' | 'fim',
+// 	messages: LLMMessage[],
+// 	options?: LLMMessageOptions,
+// 	onText: OnText,
+// 	onFinalMessage: OnFinalMessage,
+// 	onError: (error: string) => void,
+// 	abortRef: AbortRef,
+// 	voidConfig: VoidConfig,
+// }) => void
+
+
+// type SendLLMMessageFnTypeExternal = (params: (
+// 	| { mode?: 'chat', messages: LLMMessage[], fimInfo?: undefined, }
+// 	| { mode: 'fim', messages?: undefined, fimInfo: FimInfo, }
+// ) & {
+// 	options?: LLMMessageOptions,
+// 	onText: OnText,
+// 	onFinalMessage: OnFinalMessage,
+// 	onError: (error: string) => void,
+// 	abortRef: AbortRef,
+// 	voidConfig: VoidConfig | null, // these may be absent
+// }) => void
+
+// export type FimInfo = {
+// 	prefix: string,
+// 	suffix: string,
+// }
+
+// const parseMaxTokensStr = (maxTokensStr: string) => {
+// 	// parse the string but only if the full string is a valid number, eg parseInt('100abc') should return NaN
+// 	let int = isNaN(Number(maxTokensStr)) ? undefined : parseInt(maxTokensStr)
+// 	if (Number.isNaN(int))
+// 		return undefined
+// 	return int
+// }
+
+// // Anthropic
+// const sendAnthropicMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig }) => {
+
+// 	const anthropic = new Anthropic({ apiKey: voidConfig.anthropic.apikey, dangerouslyAllowBrowser: true }); // defaults to process.env["ANTHROPIC_API_KEY"]
+
+// 	// find system messages and concatenate them
+// 	const systemMessage = messages
+// 		.filter(msg => msg.role === 'system')
+// 		.map(msg => msg.content)
+// 		.join('\n');
+
+// 	// remove system messages for Anthropic
+// 	const anthropicMessages = messages.filter(msg => msg.role !== 'system') as LLMMessageAnthropic[]
+
+// 	const stream = anthropic.messages.stream({
+// 		system: systemMessage,
+// 		messages: anthropicMessages,
+// 		model: voidConfig.anthropic.model,
+// 		max_tokens: parseMaxTokensStr(voidConfig.default.maxTokens)!, // this might be undefined, but it will just throw an error for the user
+// 	});
+
+// 	let did_abort = false
+
+// 	// when receive text
+// 	stream.on('text', (newText, fullText) => {
+// 		if (did_abort) return
+// 		onText(newText, fullText)
+// 	})
+
+// 	// when we get the final message on this stream (or when error/fail)
+// 	stream.on('finalMessage', (claude_response) => {
+// 		if (did_abort) return
+// 		// stringify the response's content
+// 		let content = claude_response.content.map(c => { if (c.type === 'text') { return c.text } }).join('\n');
+// 		onFinalMessage(content)
+// 	})
+
+// 	stream.on('error', (error) => {
+// 		// the most common error will be invalid API key (401), so we handle this with a nice message
+// 		if (error instanceof Anthropic.APIError && error.status === 401) {
+// 			onError('Invalid API key.')
+// 		}
+// 		else {
+// 			onError(error.message)
+// 		}
+// 	})
+
+// 	// if abort is called, onFinalMessage is NOT called, and no later onTexts are called either
+// 	const abort = () => {
+// 		did_abort = true
+// 		stream.controller.abort() // TODO need to test this to make sure it works, it might throw an error
+// 	}
+
+// 	return { abort }
+// };
+
+// // Gemini
+// const sendGeminiMsg: SendLLMMessageFnTypeInternal = async ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
+
+// 	let didAbort = false
+// 	let fullText = ''
+
+// 	abortRef.current = () => {
+// 		didAbort = true
+// 	}
+
+// 	const genAI = new GoogleGenerativeAI(voidConfig.gemini.apikey);
+// 	const model = genAI.getGenerativeModel({ model: voidConfig.gemini.model });
+
+// 	// remove system messages that get sent to Gemini
+// 	// str of all system messages
+// 	let systemMessage = messages
+// 		.filter(msg => msg.role === 'system')
+// 		.map(msg => msg.content)
+// 		.join('\n');
+
+// 	// Convert messages to Gemini format
+// 	const geminiMessages: Content[] = messages
+// 		.filter(msg => msg.role !== 'system')
+// 		.map((msg, i) => ({
+// 			parts: [{ text: msg.content }],
+// 			role: msg.role === 'assistant' ? 'model' : 'user'
+// 		}))
+
+// 	model.generateContentStream({ contents: geminiMessages, systemInstruction: systemMessage, })
+// 		.then(async response => {
+// 			abortRef.current = () => {
+// 				// response.stream.return(fullText)
+// 				didAbort = true;
+// 			}
+// 			for await (const chunk of response.stream) {
+// 				if (didAbort) return;
+// 				const newText = chunk.text();
+// 				fullText += newText;
+// 				onText(newText, fullText);
+// 			}
+// 			onFinalMessage(fullText);
+// 		})
+// 		.catch((error) => {
+// 			if (error instanceof GoogleGenerativeAIFetchError) {
+// 				if (error.status === 400) {
+// 					onError('Invalid API key.');
+// 				}
+// 				else {
+// 					onError(`${error.name}:\n${error.message}`);
+// 				}
+// 			}
+// 			else {
+// 				onError(error);
+// 			}
+// 		})
+// }
+
+// // OpenAI, OpenRouter, OpenAICompatible
+// const sendOpenAIMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
+
+// 	let didAbort = false
+// 	let fullText = ''
+
+// 	// if abort is called, onFinalMessage is NOT called, and no later onTexts are called either
+// 	abortRef.current = () => {
+// 		didAbort = true;
+// 	};
+
+// 	let openai: OpenAI
+// 	let options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming
+
+// 	let maxTokens = parseMaxTokensStr(voidConfig.default.maxTokens)
+
+// 	if (voidConfig.default.whichApi === 'openAI') {
+// 		openai = new OpenAI({ apiKey: voidConfig.openAI.apikey, dangerouslyAllowBrowser: true });
+// 		options = { model: voidConfig.openAI.model, messages: messages, stream: true, max_completion_tokens: maxTokens }
+// 	}
+// 	else if (voidConfig.default.whichApi === 'openRouter') {
+// 		openai = new OpenAI({
+// 			baseURL: "https://openrouter.ai/api/v1", apiKey: voidConfig.openRouter.apikey, dangerouslyAllowBrowser: true,
+// 			defaultHeaders: {
+// 				"HTTP-Referer": 'https://voideditor.com', // Optional, for including your app on openrouter.ai rankings.
+// 				"X-Title": 'Void Editor', // Optional. Shows in rankings on openrouter.ai.
+// 			},
+// 		});
+// 		options = { model: voidConfig.openRouter.model, messages: messages, stream: true, max_completion_tokens: maxTokens }
+// 	}
+// 	else if (voidConfig.default.whichApi === 'openAICompatible') {
+// 		openai = new OpenAI({ baseURL: voidConfig.openAICompatible.endpoint, apiKey: voidConfig.openAICompatible.apikey, dangerouslyAllowBrowser: true })
+// 		options = { model: voidConfig.openAICompatible.model, messages: messages, stream: true, max_completion_tokens: maxTokens }
+// 	}
+// 	else {
+// 		console.error(`sendOpenAIMsg: invalid whichApi: ${voidConfig.default.whichApi}`)
+// 		throw new Error(`voidConfig.whichAPI was invalid: ${voidConfig.default.whichApi}`)
+// 	}
+
+// 	openai.chat.completions
+// 		.create(options)
+// 		.then(async response => {
+// 			abortRef.current = () => {
+// 				// response.controller.abort()
+// 				didAbort = true;
+// 			}
+// 			// when receive text
+// 			for await (const chunk of response) {
+// 				if (didAbort) return;
+// 				const newText = chunk.choices[0]?.delta?.content || '';
+// 				fullText += newText;
+// 				onText(newText, fullText);
+// 			}
+// 			onFinalMessage(fullText);
+// 		})
+// 		// when error/fail - this catches errors of both .create() and .then(for await)
+// 		.catch(error => {
+// 			if (error instanceof OpenAI.APIError) {
+// 				if (error.status === 401) {
+// 					onError('Invalid API key.');
+// 				}
+// 				else {
+// 					onError(`${error.name}:\n${error.message}`);
+// 				}
+// 			}
+// 			else {
+// 				onError(error);
+// 			}
+// 		})
+
+// };
+
+// // Ollama
+// export const sendOllamaMsg: SendLLMMessageFnTypeInternal = ({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
+
+// 	let didAbort = false
+// 	let fullText = ""
+
+// 	const ollama = new Ollama({ host: voidConfig.ollama.endpoint })
+
+// 	abortRef.current = () => {
+// 		didAbort = true;
+// 	};
+
+// 	type GenerateResponse = Awaited<ReturnType<(typeof ollama.generate)>>
+// 	type ChatResponse = Awaited<ReturnType<(typeof ollama.chat)>>
+
+
+// 	// First check if model exists
+// 	ollama.list()
+// 		.then(async models => {
+// 			const installedModels = models.models.map(m => m.name.replace(/:latest$/, ''))
+// 			const modelExists = installedModels.some(m => m.startsWith(voidConfig.ollama.model));
+// 			if (!modelExists) {
+// 				const errorMessage = `The model "${voidConfig.ollama.model}" is not available locally. Please run 'ollama pull ${voidConfig.ollama.model}' to download it first or
+// 				try selecting one from the Installed models: ${installedModels.join(', ')}`;
+// 				onText(errorMessage, errorMessage);
+// 				onFinalMessage(errorMessage);
+// 				return Promise.reject();
+// 			}
+
+// 			if (mode === 'fim') {
+// 				// the fim prompt is the last message
+// 				let prompt = messages[messages.length - 1].content
+// 				return ollama.generate({
+// 					model: voidConfig.ollama.model,
+// 					prompt: prompt,
+// 					stream: true,
+// 					raw: true,
+// 					options: { stop: options?.stopTokens }
+// 				})
+// 			}
+
+// 			return ollama.chat({
+// 				model: voidConfig.ollama.model,
+// 				messages: messages,
+// 				stream: true,
+// 				options: { num_predict: parseMaxTokensStr(voidConfig.default.maxTokens) }
+// 			});
+// 		})
+// 		.then(async stream => {
+// 			if (!stream) return;
+
+// 			abortRef.current = () => {
+// 				didAbort = true
+// 				stream.abort()
+// 			}
+// 			for await (const chunk of stream) {
+// 				if (didAbort) return;
+
+// 				const newText = (mode === 'fim'
+// 					? (chunk as GenerateResponse).response
+// 					: (chunk as ChatResponse).message.content
+// 				)
+// 				fullText += newText;
+// 				onText(newText, fullText);
+// 			}
+// 			onFinalMessage(fullText);
+// 		})
+// 		.catch(error => {
+// 			// Check if the error is a connection error
+// 			if (error instanceof Error && error.message.includes('Failed to fetch')) {
+// 				const errorMessage = 'Ollama service is not running. Please start the Ollama service and try again.';
+// 				onText(errorMessage, errorMessage);
+// 				onFinalMessage(errorMessage);
+// 			} else if (error) {
+// 				onError(error);
+// 			}
+// 		});
+// };
+
+// // Greptile
+// // https://docs.greptile.com/api-reference/query
+// // https://docs.greptile.com/quickstart#sample-response-streamed
+
+// const sendGreptileMsg: SendLLMMessageFnTypeInternal = ({ messages, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
+
+// 	let didAbort = false
+// 	let fullText = ''
+
+// 	// if abort is called, onFinalMessage is NOT called, and no later onTexts are called either
+// 	abortRef.current = () => {
+// 		didAbort = true
+// 	}
+
+// 	fetch('https://api.greptile.com/v2/query', {
+// 		method: 'POST',
+// 		headers: {
+// 			"Authorization": `Bearer ${voidConfig.greptile.apikey}`,
+// 			"X-Github-Token": `${voidConfig.greptile.githubPAT}`,
+// 			"Content-Type": `application/json`,
+// 		},
+// 		body: JSON.stringify({
+// 			messages,
+// 			stream: true,
+// 			repositories: [voidConfig.greptile.repoinfo],
+// 		}),
+// 	})
+// 		// this is {message}\n{message}\n{message}...\n
+// 		.then(async response => {
+// 			const text = await response.text()
+// 			console.log('got greptile', text)
+// 			return JSON.parse(`[${text.trim().split('\n').join(',')}]`)
+// 		})
+// 		// TODO make this actually stream, right now it just sends one message at the end
+// 		.then(async responseArr => {
+// 			if (didAbort)
+// 				return
+
+// 			for (let response of responseArr) {
+
+// 				const type: string = response['type']
+// 				const message = response['message']
+
+// 				// when receive text
+// 				if (type === 'message') {
+// 					fullText += message
+// 					onText(message, fullText)
+// 				}
+// 				else if (type === 'sources') {
+// 					const { filepath, linestart, lineend } = message as { filepath: string, linestart: number | null, lineend: number | null }
+// 					fullText += filepath
+// 					onText(filepath, fullText)
+// 				}
+// 				// type: 'status' with an empty 'message' means last message
+// 				else if (type === 'status') {
+// 					if (!message) {
+// 						onFinalMessage(fullText)
+// 					}
+// 				}
+// 			}
+
+// 		})
+// 		.catch(e => {
+// 			onError(e)
+// 		});
+
+// }
+
+// export const sendLLMMessage: SendLLMMessageFnTypeExternal = ({ options, mode, messages, fimInfo, onText, onFinalMessage, onError, voidConfig, abortRef }) => {
+// 	if (!voidConfig)
+// 		return onError('No config file found for LLM.');
+
+// 	// handle defaults
+// 	if (!mode) mode = 'chat'
+// 	if (!messages) messages = []
+
+// 	// build messages
+// 	if (mode === 'chat') {
+// 		// nothing needed
+// 	} else if (mode === 'fim') {
+// 		fimInfo = fimInfo!
+
+// 		const system = getFIMSystem({ voidConfig, fimInfo })
+// 		const prompt = getFIMPrompt({ voidConfig, fimInfo })
+// 		messages = ([
+// 			{ role: 'system', content: system },
+// 			{ role: 'user', content: prompt }
+// 		] as const)
+
+// 	}
+
+// 	// trim message content (Anthropic and other providers give an error if there is trailing whitespace)
+// 	messages = messages.map(m => ({ ...m, content: m.content.trim() }))
+// 		.filter(m => m.content !== '')
+
+// 	if (messages.length === 0)
+// 		return onError('No messages provided to LLM.');
+
+// 	switch (voidConfig.default.whichApi) {
+// 		case 'anthropic':
+// 			return sendAnthropicMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
+// 		case 'openAI':
+// 		case 'openRouter':
+// 		case 'openAICompatible':
+// 			return sendOpenAIMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
+// 		case 'gemini':
+// 			return sendGeminiMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
+// 		case 'ollama':
+// 			return sendOllamaMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
+// 		case 'greptile':
+// 			return sendGreptileMsg({ options, mode, messages, onText, onFinalMessage, onError, voidConfig, abortRef });
+// 		default:
+// 			onError(`Error: whichApi was ${voidConfig.default.whichApi}, which is not recognized!`)
+// 	}
+
+// }
diff --git a/src/vs/workbench/contrib/void/browser/registerConfig.ts b/src/vs/workbench/contrib/void/browser/registerConfig.ts
index bf7dc86c..bb54fbee 100644
--- a/src/vs/workbench/contrib/void/browser/registerConfig.ts
+++ b/src/vs/workbench/contrib/void/browser/registerConfig.ts
@@ -130,7 +130,7 @@ const voidConfigInfo: Record<
 		model: configEnum(
 			'Ollama model to use.',
 			'codestral',
-			['codestral', 'codegemma', 'codegemma:2b', 'codegemma:7b', 'codellama', 'codellama:7b', 'codellama:13b', 'codellama:34b', 'codellama:70b', 'codellama:code', 'codellama:python', 'command-r', 'command-r:35b', 'command-r-plus', 'command-r-plus:104b', 'deepseek-coder-v2', 'deepseek-coder-v2:16b', 'deepseek-coder-v2:236b', 'falcon2', 'falcon2:11b', 'firefunction-v2', 'firefunction-v2:70b', 'gemma', 'gemma:2b', 'gemma:7b', 'gemma2', 'gemma2:2b', 'gemma2:9b', 'gemma2:27b', 'llama2', 'llama2:7b', 'llama2:13b', 'llama2:70b', 'llama3', 'llama3:8b', 'llama3:70b', 'llama3-chatqa', 'llama3-chatqa:8b', 'llama3-chatqa:70b', 'llama3-gradient', 'llama3-gradient:8b', 'llama3-gradient:70b', 'llama3.1', 'llama3.1:8b', 'llama3.1:70b', 'llama3.1:405b', 'llava', 'llava:7b', 'llava:13b', 'llava:34b', 'llava-llama3', 'llava-llama3:8b', 'llava-phi3', 'llava-phi3:3.8b', 'mistral', 'mistral:7b', 'mistral-large', 'mistral-large:123b', 'mistral-nemo', 'mistral-nemo:12b', 'mixtral', 'mixtral:8x7b', 'mixtral:8x22b', 'moondream', 'moondream:1.8b', 'openhermes', 'openhermes:v2.5', 'phi3', 'phi3:3.8b', 'phi3:14b', 'phi3.5', 'phi3.5:3.8b', 'qwen', 'qwen:7b', 'qwen:14b', 'qwen:32b', 'qwen:72b', 'qwen:110b', 'qwen2', 'qwen2:0.5b', 'qwen2:1.5b', 'qwen2:7b', 'qwen2:72b', 'smollm', 'smollm:135m', 'smollm:360m', 'smollm:1.7b'] as const
+			['codestral', 'qwen2.5-coder', 'qwen2.5-coder:0.5b', 'qwen2.5-coder:1.5b', 'qwen2.5-coder:3b', 'qwen2.5-coder:7b', 'qwen2.5-coder:14b', 'qwen2.5-coder:32b', 'codegemma', 'codegemma:2b', 'codegemma:7b', 'codellama', 'codellama:7b', 'codellama:13b', 'codellama:34b', 'codellama:70b', 'codellama:code', 'codellama:python', 'command-r', 'command-r:35b', 'command-r-plus', 'command-r-plus:104b', 'deepseek-coder-v2', 'deepseek-coder-v2:16b', 'deepseek-coder-v2:236b', 'falcon2', 'falcon2:11b', 'firefunction-v2', 'firefunction-v2:70b', 'gemma', 'gemma:2b', 'gemma:7b', 'gemma2', 'gemma2:2b', 'gemma2:9b', 'gemma2:27b', 'llama2', 'llama2:7b', 'llama2:13b', 'llama2:70b', 'llama3', 'llama3:8b', 'llama3:70b', 'llama3-chatqa', 'llama3-chatqa:8b', 'llama3-chatqa:70b', 'llama3-gradient', 'llama3-gradient:8b', 'llama3-gradient:70b', 'llama3.1', 'llama3.1:8b', 'llama3.1:70b', 'llama3.1:405b', 'llava', 'llava:7b', 'llava:13b', 'llava:34b', 'llava-llama3', 'llava-llama3:8b', 'llava-phi3', 'llava-phi3:3.8b', 'mistral', 'mistral:7b', 'mistral-large', 'mistral-large:123b', 'mistral-nemo', 'mistral-nemo:12b', 'mixtral', 'mixtral:8x7b', 'mixtral:8x22b', 'moondream', 'moondream:1.8b', 'openhermes', 'openhermes:v2.5', 'phi3', 'phi3:3.8b', 'phi3:14b', 'phi3.5', 'phi3.5:3.8b', 'qwen', 'qwen:7b', 'qwen:14b', 'qwen:32b', 'qwen:72b', 'qwen:110b', 'qwen2', 'qwen2:0.5b', 'qwen2:1.5b', 'qwen2:7b', 'qwen2:72b', 'smollm', 'smollm:135m', 'smollm:360m', 'smollm:1.7b'] as const
 		),
 	},
 	openRouter: {
diff --git a/src/vs/workbench/contrib/void/browser/registerInlineDiffs.ts b/src/vs/workbench/contrib/void/browser/registerInlineDiffs.ts
index 29c6e15f..85d1584d 100644
--- a/src/vs/workbench/contrib/void/browser/registerInlineDiffs.ts
+++ b/src/vs/workbench/contrib/void/browser/registerInlineDiffs.ts
@@ -1027,22 +1027,3 @@ class AcceptRejectWidget extends Widget implements IOverlayWidget {
 
 
 
-
-
-
-// // 6. Autocomplete
-// const autocompleteProvider = new AutocompleteProvider(context);
-// context.subscriptions.push(vscode.languages.registerInlineCompletionItemProvider('*', autocompleteProvider));
-
-// const voidConfig = getVoidConfigFromPartial(context.globalState.get('partialVoidConfig') ?? {})
-// const abortRef: AbortRef = { current: null }
-
-// // setupAutocomplete({ voidConfig, abortRef })
-
-
-// // 7. Language Server
-// console.log('run lsp')
-// let disposable = vscode.commands.registerCommand('typeInspector.inspect', runTreeSitter);
-
-// context.subscriptions.push(disposable);
-