remove mistral, finish(?) models!

2026-05-23 17:38:23 +00:00 · 2025-02-24 04:01:00 -08:00 · 2025-02-24 04:01:00 -08:00 · 3ae8f75641
commit 3ae8f75641
parent 9f20476eea
8 changed files with 686 additions and 333 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -56,7 +56,7 @@
        "node-pty": "1.1.0-beta21",
        "ollama": "^0.5.11",
        "open": "^8.4.2",
-        "openai": "^4.76.1",
+        "openai": "^4.85.4",
        "posthog-node": "^4.3.1",
        "react": "^18.3.1",
        "react-dom": "^18.3.1",
@ -17079,9 +17079,10 @@
      }
    },
    "node_modules/openai": {
-      "version": "4.77.0",
-      "resolved": "https://registry.npmjs.org/openai/-/openai-4.77.0.tgz",
-      "integrity": "sha512-WWacavtns/7pCUkOWvQIjyOfcdr9X+9n9Vvb0zFeKVDAqwCMDHB+iSr24SVaBAhplvSG6JrRXFpcNM9gWhOGIw==",
+      "version": "4.85.4",
+      "resolved": "https://registry.npmjs.org/openai/-/openai-4.85.4.tgz",
+      "integrity": "sha512-Nki51PBSu+Aryo7WKbdXvfm0X/iKkQS2fq3O0Uqb/O3b4exOZFid2te1BZ52bbO5UwxQZ5eeHJDCTqtrJLPw0w==",
+      "license": "Apache-2.0",
      "dependencies": {
        "@types/node": "^18.11.18",
        "@types/node-fetch": "^2.6.4",
@ -17095,9 +17096,13 @@
        "openai": "bin/cli"
      },
      "peerDependencies": {
+        "ws": "^8.18.0",
        "zod": "^3.23.8"
      },
      "peerDependenciesMeta": {
+        "ws": {
+          "optional": true
+        },
        "zod": {
          "optional": true
        }
--- a/package.json
+++ b/package.json
@ -124,7 +124,7 @@
    "node-pty": "1.1.0-beta21",
    "ollama": "^0.5.11",
    "open": "^8.4.2",
-    "openai": "^4.76.1",
+    "openai": "^4.85.4",
    "posthog-node": "^4.3.1",
    "react": "^18.3.1",
    "react-dom": "^18.3.1",
--- a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts
+++ b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts
@ -3,6 +3,7 @@
 *  Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
 *--------------------------------------------------------------------------------------*/

+import { OnText } from '../../common/llmMessageTypes.js'
 import { DIVIDER, FINAL, ORIGINAL } from '../prompt/prompts.js'

 class SurroundingsRemover {
@ -240,3 +241,96 @@ export const extractSearchReplaceBlocks = (str: string) => {
 		})
 	}
 }
+
+
+
+
+
+
+
+
+
+
+export const extractReasoningFromText = (
+	onText_: OnText,
+	thinkTags: [string, string],
+): OnText => {
+
+	let latestAddIdx = 0 // exclusive
+	let foundTag1 = false
+	let foundTag2 = false
+
+	let fullText = ''
+	let fullReasoning = ''
+
+	const onText: OnText = ({ newText: newText_, fullText: fullText_ }) => {
+		//     abcdef<t|hin|k>ghi
+		//           |
+		// until found the first think tag, keep adding to fullText
+		if (!foundTag1) {
+			const endsWithTag1 = endsWithAnyPrefixOf(fullText_, thinkTags[0])
+			if (endsWithTag1) {
+				// wait until we get the full tag or know more
+				return
+			}
+			// if found the first tag
+			const tag1Index = fullText_.lastIndexOf(thinkTags[0])
+			if (tag1Index !== -1) {
+				foundTag1 = true
+				const newText = fullText.substring(latestAddIdx, tag1Index)
+				const newReasoning = fullText.substring(tag1Index + thinkTags[0].length, Infinity)
+
+				fullText += newText
+				fullReasoning += newReasoning
+				latestAddIdx += newText.length + newReasoning.length
+				onText_({ newText, fullText, newReasoning: newReasoning, fullReasoning })
+				return
+			}
+
+			// add the text to fullText
+			const newText = fullText.substring(latestAddIdx, Infinity)
+			fullText += newText
+			latestAddIdx += newText.length
+			onText_({ newText, fullText, newReasoning: '', fullReasoning })
+			return
+		}
+		// at this point, we found <tag1>
+
+		// until found the second think tag, keep adding to fullReasoning
+		if (!foundTag2) {
+			const endsWithTag2 = endsWithAnyPrefixOf(fullText_, thinkTags[1])
+			if (endsWithTag2) {
+				// wait until we get the full tag or know more
+				return
+			}
+			// if found the second tag
+			const tag2Index = fullText_.lastIndexOf(thinkTags[1])
+			if (tag2Index !== -1) {
+				foundTag2 = true
+				const newReasoning = fullText.substring(latestAddIdx, tag2Index)
+				const newText = fullText.substring(tag2Index + thinkTags[1].length, Infinity)
+
+				fullText += newText
+				fullReasoning += newReasoning
+				latestAddIdx += newText.length + newReasoning.length
+				onText_({ newText, fullText, newReasoning: newReasoning, fullReasoning })
+				return
+			}
+
+			// add the text to fullReasoning
+			const newReasoning = fullText.substring(latestAddIdx, Infinity)
+			fullReasoning += newReasoning
+			latestAddIdx += newReasoning.length
+			onText_({ newText: '', fullText, newReasoning, fullReasoning })
+			return
+		}
+		// at this point, we found <tag2>
+
+		fullText += newText_
+		const newText = fullText.substring(latestAddIdx, Infinity)
+		latestAddIdx += newText.length
+		onText_({ newText, fullText, newReasoning: '', fullReasoning })
+	}
+
+	return onText
+}
--- a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
@ -415,7 +415,7 @@ export const FeaturesTab = () => {
 		<div className='pl-4 opacity-50'>
 			<span className={`text-sm mb-2`}><ChatMarkdownRender noSpace string={`1. Download [Ollama](https://ollama.com/download).`} /></span>
 			<span className={`text-sm mb-2`}><ChatMarkdownRender noSpace string={`2. Open your terminal.`} /></span>
-			<span className={`text-sm mb-2 select-text`}><ChatMarkdownRender noSpace string={`3. Run \`ollama run llama3.1\`. This installs Meta's llama3.1 model which is best for chat and inline edits. Requires 5GB of memory.`} /></span>
+			<span className={`text-sm mb-2 select-text`}><ChatMarkdownRender noSpace string={`3. Run \`ollama run llama3.1:8b\`. This installs Meta's llama3.1 model which is best for chat and inline edits. Requires 5GB of memory.`} /></span>
 			<span className={`text-sm mb-2 select-text`}><ChatMarkdownRender noSpace string={`4. Run \`ollama run qwen2.5-coder:1.5b\`. This installs a faster autocomplete model. Requires 1GB of memory.`} /></span>
 			<span className={`text-sm mb-2`}><ChatMarkdownRender noSpace string={`Void automatically detects locally running models and enables them.`} /></span>
 			{/* TODO we should create UI for downloading models without user going into terminal */}
--- a/src/vs/workbench/contrib/void/common/voidSettingsService.ts
+++ b/src/vs/workbench/contrib/void/common/voidSettingsService.ts
@ -11,7 +11,7 @@ import { registerSingleton, InstantiationType } from '../../../../platform/insta
 import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
 import { IStorageService, StorageScope, StorageTarget } from '../../../../platform/storage/common/storage.js';
 import { IMetricsService } from './metricsService.js';
-import { defaultSettingsOfProvider, FeatureName, ProviderName, ModelSelectionOfFeature, SettingsOfProvider, SettingName, providerNames, ModelSelection, modelSelectionsEqual, featureNames, VoidModelInfo, GlobalSettings, GlobalSettingName, defaultGlobalSettings, defaultProviderSettings, developerInfoOfModelName, modelInfoOfAutodetectedModelNames } from './voidSettingsTypes.js';
+import { defaultSettingsOfProvider, FeatureName, ProviderName, ModelSelectionOfFeature, SettingsOfProvider, SettingName, providerNames, ModelSelection, modelSelectionsEqual, featureNames, VoidModelInfo, GlobalSettings, GlobalSettingName, defaultGlobalSettings, defaultProviderSettings } from './voidSettingsTypes.js';


 const STORAGE_KEY = 'void.settingsServiceStorage'
@ -32,8 +32,6 @@ type SetGlobalSettingFn = <T extends GlobalSettingName, >(settingName: T, newVal

 export type ModelOption = { name: string, selection: ModelSelection }

-
-
 export type VoidSettingsState = {
 	readonly settingsOfProvider: SettingsOfProvider; // optionsOfProvider
 	readonly modelSelectionOfFeature: ModelSelectionOfFeature; // stateOfFeature
@ -172,9 +170,6 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService {
 				// A HACK BECAUSE WE ADDED DEEPSEEK (did not exist before, comes before readS)
 				...{ deepseek: defaultSettingsOfProvider.deepseek },

-				// A HACK BECAUSE WE ADDED MISTRAL (did not exist before, comes before readS)
-				...{ mistral: defaultSettingsOfProvider.mistral },
-
 				// A HACK BECAUSE WE ADDED XAI (did not exist before, comes before readS)
 				...{ xAI: defaultSettingsOfProvider.xAI },

@ -295,19 +290,35 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService {
 	}


+	private _updatedModelsAfterAutodetection = (defaultModelNames: string[], options: { existingModels: VoidModelInfo[] }) => {
+		const { existingModels } = options
+
+		const existingModelsMap: Record<string, VoidModelInfo> = {}
+		for (const existingModel of existingModels) {
+			existingModelsMap[existingModel.modelName] = existingModel
+		}
+
+		const newDefaultModels = defaultModelNames.map((modelName, i) => ({
+			modelName,
+			isDefault: true,
+			isAutodetected: true,
+			isHidden: !!existingModelsMap[modelName]?.isHidden,
+		}))
+
+		return [
+			...newDefaultModels, // swap out all the default models for the new default models
+			...existingModels.filter(m => !m.isDefault), // keep any non-default (custom) models
+		]
+	}
+
+

 	setAutodetectedModels(providerName: ProviderName, autodetectedModelNames: string[], logging: object) {

 		const { models } = this.state.settingsOfProvider[providerName]
 		const oldModelNames = models.map(m => m.modelName)

-
-		const newDefaultModels = modelInfoOfAutodetectedModelNames(autodetectedModelNames, { existingModels: models })
-		const newModels = [
-			...newDefaultModels, // swap out all the default models for the new default models
-			...models.filter(m => !m.isDefault), // keep any non-default (custom) models
-		]
-
+		const newModels = this._updatedModelsAfterAutodetection(autodetectedModelNames, { existingModels: models })
 		this.setSettingOfProvider(providerName, 'models', newModels)

 		// if the models changed, log it
@ -341,7 +352,7 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService {
 		if (existingIdx !== -1) return // if exists, do nothing
 		const newModels = [
 			...models,
-			{ ...developerInfoOfModelName(modelName), modelName, isDefault: false, isHidden: false }
+			{ modelName, isDefault: false, isHidden: false }
 		]
 		this.setSettingOfProvider(providerName, 'models', newModels)

--- a/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts
+++ b/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts
@ -4,7 +4,6 @@
 *  Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
 *--------------------------------------------------------------------------------------*/

-import { defaultModelsOfProvider } from '../electron-main/llmMessage/MODELS.js';
 import { VoidSettingsState } from './voidSettingsService.js'


@ -40,14 +39,70 @@ export const defaultProviderSettings = {
 	groq: {
 		apiKey: '',
 	},
-	mistral: {
-		apiKey: ''
-	},
 	xAI: {
 		apiKey: ''
 	},
 } as const

+
+
+
+export const defaultModelsOfProvider = {
+	openAI: [ // https://platform.openai.com/docs/models/gp
+		'o1',
+		'o3-mini',
+		'o1-mini',
+		'gpt-4o',
+		'gpt-4o-mini',
+	],
+	anthropic: [ // https://docs.anthropic.com/en/docs/about-claude/models
+		'claude-3-5-sonnet-latest',
+		'claude-3-5-haiku-latest',
+		'claude-3-opus-latest',
+	],
+	xAI: [ // https://docs.x.ai/docs/models?cluster=us-east-1
+		'grok-2-latest',
+		'grok-3-latest',
+	],
+	gemini: [ // https://ai.google.dev/gemini-api/docs/models/gemini
+		'gemini-2.0-flash',
+		'gemini-1.5-flash',
+		'gemini-1.5-pro',
+		'gemini-1.5-flash-8b',
+		'gemini-2.0-flash-thinking-exp',
+	],
+	deepseek: [ // https://api-docs.deepseek.com/quick_start/pricing
+		'deepseek-chat',
+		'deepseek-reasoner',
+	],
+	ollama: [ // autodetected
+	],
+	vLLM: [ // autodetected
+	],
+	openRouter: [ // https://openrouter.ai/models
+		'anthropic/claude-3.5-sonnet',
+		'deepseek/deepseek-r1',
+		'mistralai/codestral-2501',
+		'qwen/qwen2.5-vl-72b-instruct:free',
+	],
+	groq: [ // https://console.groq.com/docs/models
+		'llama-3.3-70b-versatile',
+		'llama-3.1-8b-instant',
+		'qwen-2.5-coder-32b', // preview mode (experimental)
+	],
+	// not supporting mistral right now- it's last on Void usage, and a huge pain to set up since it's nonstandard (it supports codestral FIM but it's on v1/fim/completions, etc)
+	// mistral: [ // https://docs.mistral.ai/getting-started/models/models_overview/
+	// 	'codestral-latest',
+	// 	'mistral-large-latest',
+	// 	'ministral-3b-latest',
+	// 	'ministral-8b-latest',
+	// ],
+	openAICompatible: [], // fallback
+} as const satisfies Record<ProviderName, string[]>
+
+
+
+
 export type ProviderName = keyof typeof defaultProviderSettings
 export const providerNames = Object.keys(defaultProviderSettings) as ProviderName[]

@ -139,11 +194,6 @@ export const displayInfoOfProviderName = (providerName: ProviderName): DisplayIn
 			title: 'Groq.com API',
 		}
 	}
-	else if (providerName === 'mistral') {
-		return {
-			title: 'Mistral API',
-		}
-	}
 	else if (providerName === 'xAI') {
 		return {
 			title: 'xAI API',
@ -173,10 +223,9 @@ export const displayInfoOfSettingName = (providerName: ProviderName, settingName
 						providerName === 'openRouter' ? 'sk-or-key...' : // sk-or-v1-key
 							providerName === 'gemini' ? 'key...' :
 								providerName === 'groq' ? 'gsk_key...' :
-									providerName === 'mistral' ? 'key...' :
-										providerName === 'openAICompatible' ? 'sk-key...' :
-											providerName === 'xAI' ? 'xai-key...' :
-												'',
+									providerName === 'openAICompatible' ? 'sk-key...' :
+										providerName === 'xAI' ? 'xai-key...' :
+											'',

 			subTextMd: providerName === 'anthropic' ? 'Get your [API Key here](https://console.anthropic.com/settings/keys).' :
 				providerName === 'openAI' ? 'Get your [API Key here](https://platform.openai.com/api-keys).' :
@ -184,10 +233,9 @@ export const displayInfoOfSettingName = (providerName: ProviderName, settingName
 						providerName === 'openRouter' ? 'Get your [API Key here](https://openrouter.ai/settings/keys).' :
 							providerName === 'gemini' ? 'Get your [API Key here](https://aistudio.google.com/apikey).' :
 								providerName === 'groq' ? 'Get your [API Key here](https://console.groq.com/keys).' :
-									providerName === 'mistral' ? 'Get your [API Key here](https://console.mistral.ai/api-keys/).' :
-										providerName === 'xAI' ? 'Get your [API Key here](https://console.x.ai).' :
-											providerName === 'openAICompatible' ? undefined :
-												'',
+									providerName === 'xAI' ? 'Get your [API Key here](https://console.x.ai).' :
+										providerName === 'openAICompatible' ? undefined :
+											'',
 			isPasswordField: true,
 		}
 	}
@ -271,12 +319,6 @@ export const defaultSettingsOfProvider: SettingsOfProvider = {
 		...modelInfoOfDefaultModelNames(defaultModelsOfProvider.gemini),
 		_didFillInProviderSettings: undefined,
 	},
-	mistral: {
-		...defaultCustomSettings,
-		...defaultProviderSettings.mistral,
-		...modelInfoOfDefaultModelNames(defaultModelsOfProvider.mistral),
-		_didFillInProviderSettings: undefined,
-	},
 	xAI: {
 		...defaultCustomSettings,
 		...defaultProviderSettings.xAI,
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts
@ -4,67 +4,15 @@
 *--------------------------------------------------------------------------------------*/

 import OpenAI, { ClientOptions } from 'openai';
-import { Model as OpenAIModel } from 'openai/resources/models.js';
-import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js';
-import { InternalToolInfo, isAToolName } from '../../common/toolsService.js';
-import { defaultProviderSettings, displayInfoOfProviderName, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
-import { prepareMessages } from './preprocessLLMMessages.js';
 import Anthropic from '@anthropic-ai/sdk';
 import { Ollama } from 'ollama';

-
-
-export const defaultModelsOfProvider = {
-	anthropic: [ // https://docs.anthropic.com/en/docs/about-claude/models
-		'claude-3-5-sonnet-latest',
-		'claude-3-5-haiku-latest',
-		'claude-3-opus-latest',
-	],
-	openAI: [ // https://platform.openai.com/docs/models/gp
-		'o1',
-		'o1-mini',
-		'o3-mini',
-		'gpt-4o',
-		'gpt-4o-mini',
-	],
-	deepseek: [ // https://platform.openai.com/docs/models/gp
-		'deepseek-chat',
-		'deepseek-reasoner',
-	],
-	ollama: [],
-	vLLM: [],
-	openRouter: [],
-	openAICompatible: [],
-	gemini: [
-		'gemini-1.5-flash',
-		'gemini-1.5-pro',
-		'gemini-1.5-flash-8b',
-		'gemini-2.0-flash-exp',
-		'gemini-2.0-flash-thinking-exp-1219',
-		'learnlm-1.5-pro-experimental'
-	],
-	groq: [ // https://console.groq.com/docs/models
-		"llama3-70b-8192",
-		"llama-3.3-70b-versatile",
-		"llama-3.1-8b-instant",
-		"gemma2-9b-it",
-		"mixtral-8x7b-32768"
-	],
-	mistral: [ // https://docs.mistral.ai/getting-started/models/models_overview/
-		"codestral-latest",
-		"open-codestral-mamba",
-		"open-mistral-nemo",
-		"mistral-large-latest",
-		"pixtral-large-latest",
-		"ministral-3b-latest",
-		"ministral-8b-latest",
-		"mistral-small-latest",
-	],
-	xAI: [ // https://docs.x.ai/docs/models?cluster=us-east-1
-		'grok-3-latest',
-		'grok-2-latest',
-	],
-} satisfies Record<ProviderName, string[]>
+import { Model as OpenAIModel } from 'openai/resources/models.js';
+import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js';
+import { InternalToolInfo, isAToolName } from '../../common/toolsService.js';
+import { defaultProviderSettings, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
+import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js';
+import { extractReasoningFromText } from '../../browser/helpers/extractCodeFromResult.js';



@ -78,10 +26,13 @@ type ModelOptions = {
 	}
 	supportsSystemMessage: false | 'system-role' | 'developer-role' | 'separated';
 	supportsTools: false | 'anthropic-style' | 'openai-style';
-	supportsFIM: false | 'TODO_FIM_FORMAT';
+	supportsFIM: boolean;

-	supportsReasoning: boolean; // not whether it reasons, but whether it outputs reasoning tokens
-	manualMatchReasoningTokens?: [string, string]; // reasoning tokens if it's an OSS model
+	supportsReasoningOutput: false | {
+		// you are allowed to not include openSourceThinkTags if it's not open source (no such cases as of writing)
+		// if it's open source, put the think tags here so we parse them out in e.g. ollama
+		openSourceThinkTags?: [string, string]
+	};
 }

 type ProviderReasoningOptions = {
@ -95,9 +46,9 @@ type ProviderReasoningOptions = {
 }

 type ProviderSettings = {
-	providerReasoningOptions?: ProviderReasoningOptions;
+	ifSupportsReasoningOutput?: ProviderReasoningOptions;
 	modelOptions: { [key: string]: ModelOptions };
-	modelOptionsFallback: (modelName: string) => ModelOptions; // allowed to throw error if modeName is totally invalid
+	modelOptionsFallback: (modelName: string) => (ModelOptions & { modelName: string }) | null;
 }


@ -107,172 +58,446 @@ type ModelSettingsOfProvider = {



+// type DefaultModels<T extends ProviderName> = typeof defaultModelsOfProvider[T][number]
+// type AssertModelsIncluded<
+// 	T extends ProviderName,
+// 	Options extends Record<string, unknown>
+// > = Exclude<DefaultModels<T>, keyof Options> extends never
+// 	? true
+// 	: ["Missing models for", T, Exclude<DefaultModels<T>, keyof Options>];
+// const assertOpenAI: AssertModelsIncluded<'openAI', typeof openAIModelOptions> = true;


-const modelNotRecognizedErrorMessage = (modelName: string, providerName: ProviderName) => `Void could not find a model matching ${modelName} for ${displayInfoOfProviderName(providerName).title}.`
-
+const modelOptionDefaults: ModelOptions = {
+	contextWindow: 32_000,
+	cost: { input: 0, output: 0 },
+	supportsSystemMessage: false,
+	supportsTools: false,
+	supportsFIM: false,
+	supportsReasoningOutput: false,
+}


 // ---------------- OPENAI ----------------
-const openAIModelOptions = {
-	"o1": {
+const openAIModelOptions = { // https://platform.openai.com/docs/pricing
+	'o1': {
 		contextWindow: 128_000,
 		cost: { input: 15.00, cache_read: 7.50, output: 60.00, },
 		supportsFIM: false,
 		supportsTools: false,
 		supportsSystemMessage: 'developer-role',
-		supportsReasoning: false,
+		supportsReasoningOutput: false,
 	},
-	"o3-mini": {
+	'o3-mini': {
 		contextWindow: 200_000,
 		cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
 		supportsFIM: false,
 		supportsTools: false,
 		supportsSystemMessage: 'developer-role',
-		supportsReasoning: false,
+		supportsReasoningOutput: false,
 	},
-	"gpt-4o": {
+	'gpt-4o': {
 		contextWindow: 128_000,
 		cost: { input: 2.50, cache_read: 1.25, output: 10.00, },
 		supportsFIM: false,
 		supportsTools: 'openai-style',
 		supportsSystemMessage: 'system-role',
-		supportsReasoning: false,
+		supportsReasoningOutput: false,
 	},
-} as const
+	'o1-mini': {
+		contextWindow: 128_000,
+		cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
+		supportsFIM: false,
+		supportsTools: false,
+		supportsSystemMessage: false, // does not support any system
+		supportsReasoningOutput: false,
+	},
+	'gpt-4o-mini': {
+		contextWindow: 128_000,
+		cost: { input: 0.15, cache_read: 0.075, output: 0.60, },
+		supportsFIM: false,
+		supportsTools: 'openai-style',
+		supportsSystemMessage: 'system-role', // ??
+		supportsReasoningOutput: false,
+	},
+} as const satisfies { [s: string]: ModelOptions }
+

 const openAISettings: ProviderSettings = {
 	modelOptions: openAIModelOptions,
 	modelOptionsFallback: (modelName) => {
-		if (modelName.includes('o1')) return openAIModelOptions['o1']
-		if (modelName.includes('o3-mini')) return openAIModelOptions['o3-mini']
-		if (modelName.includes('gpt-4o')) return openAIModelOptions['gpt-4o']
-		throw new Error(modelNotRecognizedErrorMessage(modelName, 'openAI'))
+		let fallbackName: keyof typeof openAIModelOptions | null = null
+		if (modelName.includes('o1')) { fallbackName = 'o1' }
+		if (modelName.includes('o3-mini')) { fallbackName = 'o3-mini' }
+		if (modelName.includes('gpt-4o')) { fallbackName = 'gpt-4o' }
+		if (fallbackName) return { modelName: fallbackName, ...openAIModelOptions[fallbackName] }
+		return null
 	}
 }

 // ---------------- ANTHROPIC ----------------
 const anthropicModelOptions = {
-	"claude-3-5-sonnet-20241022": {
+	'claude-3-5-sonnet-20241022': {
 		contextWindow: 200_000,
 		cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
 		supportsFIM: false,
 		supportsSystemMessage: 'separated',
 		supportsTools: 'anthropic-style',
-		supportsReasoning: false,
-
+		supportsReasoningOutput: false,
 	},
-	"claude-3-5-haiku-20241022": {
+	'claude-3-5-haiku-20241022': {
 		contextWindow: 200_000,
 		cost: { input: 0.80, cache_read: 0.08, cache_write: 1.00, output: 4.00 },
 		supportsFIM: false,
 		supportsSystemMessage: 'separated',
 		supportsTools: 'anthropic-style',
-		supportsReasoning: false,
+		supportsReasoningOutput: false,
 	},
-	"claude-3-opus-20240229": {
+	'claude-3-opus-20240229': {
 		contextWindow: 200_000,
 		cost: { input: 15.00, cache_read: 1.50, cache_write: 18.75, output: 75.00 },
 		supportsFIM: false,
 		supportsSystemMessage: 'separated',
 		supportsTools: 'anthropic-style',
-		supportsReasoning: false,
+		supportsReasoningOutput: false,
 	},
-	"claude-3-sonnet-20240229": {
+	'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in
 		contextWindow: 200_000, cost: { input: 3.00, output: 15.00 },
 		supportsFIM: false,
 		supportsSystemMessage: 'separated',
 		supportsTools: 'anthropic-style',
-		supportsReasoning: false,
+		supportsReasoningOutput: false,
 	}
-} as const
+} as const satisfies { [s: string]: ModelOptions }

 const anthropicSettings: ProviderSettings = {
 	modelOptions: anthropicModelOptions,
 	modelOptionsFallback: (modelName) => {
-		throw new Error(modelNotRecognizedErrorMessage(modelName, 'anthropic'))
+		let fallbackName: keyof typeof anthropicModelOptions | null = null
+		if (modelName.includes('claude-3-5-sonnet')) fallbackName = 'claude-3-5-sonnet-20241022'
+		if (modelName.includes('claude-3-5-haiku')) fallbackName = 'claude-3-5-haiku-20241022'
+		if (modelName.includes('claude-3-opus')) fallbackName = 'claude-3-opus-20240229'
+		if (fallbackName) return { modelName: fallbackName, ...anthropicModelOptions[fallbackName] }
+		return null
 	}
 }


 // ---------------- XAI ----------------
-const XAIModelOptions = {
-	"grok-2-latest": {
+const xAIModelOptions = {
+	'grok-2-latest': {
 		contextWindow: 131_072,
 		cost: { input: 2.00, output: 10.00 },
 		supportsFIM: false,
 		supportsSystemMessage: 'system-role',
 		supportsTools: 'openai-style',
-		supportsReasoning: false,
+		supportsReasoningOutput: false,
 	},
-} as const
+} as const satisfies { [s: string]: ModelOptions }

-const XAISettings: ProviderSettings = {
-	modelOptions: XAIModelOptions,
+const xAISettings: ProviderSettings = {
+	modelOptions: xAIModelOptions,
 	modelOptionsFallback: (modelName) => {
-		throw new Error(modelNotRecognizedErrorMessage(modelName, 'xAI'))
+		let fallbackName: keyof typeof xAIModelOptions | null = null
+		if (modelName.includes('grok-2')) fallbackName = 'grok-2-latest'
+		if (fallbackName) return { modelName: fallbackName, ...xAIModelOptions[fallbackName] }
+		return null
 	}
 }


+// ---------------- GEMINI ----------------
+const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
+	'gemini-2.0-flash': {
+		contextWindow: 1_048_576,
+		cost: { input: 0.10, output: 0.40 },
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style', // we are assuming OpenAI SDK when calling gemini
+		supportsReasoningOutput: false,
+	},
+	'gemini-2.0-flash-lite-preview-02-05': {
+		contextWindow: 1_048_576,
+		cost: { input: 0.075, output: 0.30 },
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'gemini-1.5-flash': {
+		contextWindow: 1_048_576,
+		cost: { input: 0.075, output: 0.30 },  // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'gemini-1.5-pro': {
+		contextWindow: 2_097_152,
+		cost: { input: 1.25, output: 5.00 },  // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'gemini-1.5-flash-8b': {
+		contextWindow: 1_048_576,
+		cost: { input: 0.0375, output: 0.15 },  // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+} as const satisfies { [s: string]: ModelOptions }
+
+const geminiSettings: ProviderSettings = {
+	modelOptions: geminiModelOptions,
+	modelOptionsFallback: (modelName) => {
+		return null
+	}
+}
+
+
+// ---------------- OPEN SOURCE MODELS ----------------
+
+const openSourceModelDefaultOptionsAssumingOAICompat = {
+	'deepseekR1': {
+		supportsFIM: false,
+		supportsSystemMessage: false,
+		supportsTools: false,
+		supportsReasoningOutput: { openSourceThinkTags: ['<think>', '</think>'] },
+	},
+	'deepseekCoderV2': {
+		supportsFIM: false,
+		supportsSystemMessage: false, // unstable
+		supportsTools: false,
+		supportsReasoningOutput: false,
+	},
+	'codestral': {
+		supportsFIM: true,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	// llama
+	'llama3': {
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'llama3.1': {
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'llama3.2': {
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'llama3.3': {
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'qwen2.5coder': {
+		supportsFIM: true,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	// FIM only
+	'starcoder2': {
+		supportsFIM: true,
+		supportsSystemMessage: false,
+		supportsTools: false,
+		supportsReasoningOutput: false,
+	},
+	'codegemma:2b': {
+		supportsFIM: true,
+		supportsSystemMessage: false,
+		supportsTools: false,
+		supportsReasoningOutput: false,
+	},
+} as const satisfies { [s: string]: Partial<ModelOptions> }
+
+
+
+// ---------------- DEEPSEEK API ----------------
+const deepseekModelOptions = {
+	'deepseek-chat': {
+		...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1,
+		contextWindow: 64_000, // https://api-docs.deepseek.com/quick_start/pricing
+		cost: { cache_read: .07, input: .27, output: 1.10, },
+	},
+	'deepseek-reasoner': {
+		...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2,
+		contextWindow: 64_000,
+		cost: { cache_read: .14, input: .55, output: 2.19, },
+	},
+} as const satisfies { [s: string]: ModelOptions }
+
+
+const deepseekSettings: ProviderSettings = {
+	modelOptions: deepseekModelOptions,
+	ifSupportsReasoningOutput: {
+		// reasoning: OAICompat +  response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model
+		output: { nameOfFieldInDelta: 'reasoning_content' },
+	},
+	modelOptionsFallback: (modelName) => {
+		return null
+	}
+}
+
+// ---------------- GROQ ----------------
+const groqModelOptions = {
+	'llama-3.3-70b-versatile': {
+		contextWindow: 128_000,
+		cost: { input: 0.59, output: 0.79 },
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'llama-3.1-8b-instant': {
+		contextWindow: 128_000,
+		cost: { input: 0.05, output: 0.08 },
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'qwen-2.5-coder-32b': {
+		contextWindow: 128_000,
+		cost: { input: 0.79, output: 0.79 },
+		supportsFIM: false, // unfortunately looks like no FIM support on groq
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+} as const satisfies { [s: string]: ModelOptions }
+const groqSettings: ProviderSettings = {
+	modelOptions: groqModelOptions,
+	modelOptionsFallback: (modelName) => { return null }
+}
+
+
+// ---------------- anything self-hosted/local: VLLM, OLLAMA, OPENAICOMPAT ----------------
+
+// fallback to any model (anything openai-compatible)
+const extensiveModelFallback: ProviderSettings['modelOptionsFallback'] = (modelName) => {
+	const toFallback = (opts: Omit<ModelOptions, 'cost'>): ModelOptions & { modelName: string } => {
+		return {
+			modelName,
+			...opts,
+			supportsSystemMessage: opts.supportsSystemMessage ? 'system-role' : false,
+			cost: { input: 0, output: 0 },
+		}
+	}
+	if (modelName.includes('gpt-4o')) return toFallback(openAIModelOptions['gpt-4o'])
+	if (modelName.includes('claude')) return toFallback(anthropicModelOptions['claude-3-5-sonnet-20241022'])
+	if (modelName.includes('grok')) return toFallback(xAIModelOptions['grok-2-latest'])
+	if (modelName.includes('deepseek-r1') || modelName.includes('deepseek-reasoner')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, contextWindow: 32_000, })
+	if (modelName.includes('deepseek')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2, contextWindow: 32_000, })
+	if (modelName.includes('llama3')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.llama3, contextWindow: 32_000, })
+	if (modelName.includes('qwen2.5-coder')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat['qwen2.5coder'], contextWindow: 32_000, })
+	if (modelName.includes('codestral')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.codestral, contextWindow: 32_000, })
+	if (/\bo1\b/.test(modelName) || /\bo3\b/.test(modelName)) return toFallback(openAIModelOptions['o1'])
+	return toFallback(modelOptionDefaults)
+}
+
+
+const vLLMSettings: ProviderSettings = {
+	// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions
+	ifSupportsReasoningOutput: { output: { nameOfFieldInDelta: 'reasoning_content' }, },
+	modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
+	modelOptions: {},
+}
+
+const ollamaSettings: ProviderSettings = {
+	// reasoning: we need to filter out reasoning <think> tags manually
+	ifSupportsReasoningOutput: { output: { needsManualParse: true }, },
+	modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
+	modelOptions: {},
+}
+
+const openaiCompatible: ProviderSettings = {
+	// reasoning: we have no idea what endpoint they used, so we can't consistently parse out reasoning
+	modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
+	modelOptions: {},
+}
+
+
+// ---------------- OPENROUTER ----------------
+const openRouterModelOptions = {
+	'deepseek/deepseek-r1': {
+		...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1,
+		contextWindow: 128_000,
+		cost: { input: 0.8, output: 2.4 },
+	},
+	'anthropic/claude-3.5-sonnet': {
+		contextWindow: 200_000,
+		cost: { input: 3.00, output: 15.00 },
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+	'mistralai/codestral-2501': {
+		...openSourceModelDefaultOptionsAssumingOAICompat.codestral,
+		contextWindow: 256_000,
+		cost: { input: 0.3, output: 0.9 },
+		supportsTools: 'openai-style',
+		supportsReasoningOutput: false,
+	},
+} as const satisfies { [s: string]: ModelOptions }
+
+const openRouterSettings: ProviderSettings = {
+	// reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models
+	ifSupportsReasoningOutput: {
+		input: { includeInPayload: { include_reasoning: true } },
+		output: { nameOfFieldInDelta: 'reasoning' },
+	},
+	modelOptions: openRouterModelOptions,
+	// TODO!!! send a query to openrouter to get the price, isFIM, etc.
+	modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
+}
+
+// ---------------- model settings of everything above ----------------

 const modelSettingsOfProvider: ModelSettingsOfProvider = {
 	openAI: openAISettings,
 	anthropic: anthropicSettings,
-	xAI: XAISettings,
-	gemini: {
-		modelOptions: {
-
-		}
-	},
-	googleVertex: {
-
-	},
-	microsoftAzure: {
-
-	},
-	openRouter: {
-		providerReasoningOptions: {
-			// reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models
-			input: { includeInPayload: { include_reasoning: true } },
-			output: { nameOfFieldInDelta: 'reasoning' },
-		}
-	},
-	vLLM: {
-		providerReasoningOptions: {
-			// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions
-			output: { nameOfFieldInDelta: 'reasoning_content' },
-		}
-	},
-	deepseek: {
-		providerReasoningOptions: {
-			// reasoning: OAICompat +  response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model
-			output: { nameOfFieldInDelta: 'reasoning_content' },
-		},
-	},
-	ollama: {
-		providerReasoningOptions: {
-			// reasoning: we need to filter out reasoning <think> tags manually
-			output: { needsManualParse: true },
-		},
-	},
-
-	openAICompatible: {
-	},
-	mistral: {
-	},
-	groq: {
-	},
+	xAI: xAISettings,
+	gemini: geminiSettings,

+	// open source models
+	deepseek: deepseekSettings,
+	groq: groqSettings,

+	// open source models + providers (mixture of everything)
+	openRouter: openRouterSettings,
+	vLLM: vLLMSettings,
+	ollama: ollamaSettings,
+	openAICompatible: openaiCompatible,

+	// googleVertex: {},
+	// microsoftAzure: {},
 } as const satisfies ModelSettingsOfProvider


-const modelOptionsOfProvider = (providerName: ProviderName, modelName: string) => {
+
+
+export const modelOptionsOfProvider = (providerName: ProviderName, modelName: string): ModelOptions & { modelName: string } => {
 	const { modelOptions, modelOptionsFallback } = modelSettingsOfProvider[providerName]
-	if (modelName in modelOptions) return modelOptions[modelName]
-	return modelOptionsFallback(modelName)
+	if (modelName in modelOptions) return { modelName, ...modelOptions[modelName] }
+	const result = modelOptionsFallback(modelName)
+	if (!result) return { modelName, ...modelOptionDefaults }
+	return result
 }


@ -361,10 +586,6 @@ const newOpenAICompatibleSDK = ({ settingsOfProvider, providerName, includeInPay
 		const thisConfig = settingsOfProvider[providerName]
 		return new OpenAI({ baseURL: thisConfig.endpoint, apiKey: thisConfig.apiKey, ...commonPayloadOpts })
 	}
-	else if (providerName === 'mistral') {
-		const thisConfig = settingsOfProvider[providerName]
-		return new OpenAI({ baseURL: 'https://api.mistral.ai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts })
-	}
 	else if (providerName === 'groq') {
 		const thisConfig = settingsOfProvider[providerName]
 		return new OpenAI({ baseURL: 'https://api.groq.com/openai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts })
@ -379,33 +600,52 @@ const newOpenAICompatibleSDK = ({ settingsOfProvider, providerName, includeInPay



-const manualParseOnText = (
-	providerName: ProviderName,
-	modelName: string,
-	onText_: OnText
-): OnText => {
-	return onText_
+const _sendOpenAICompatibleFIM = ({ messages: messages_, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, providerName, aiInstructions, }: SendFIMParams_Internal) => {
+	const { modelName, } = modelOptionsOfProvider(providerName, modelName_)
+	const messages = prepareFIMMessage({ messages: messages_, aiInstructions, })
+
+	const openai = newOpenAICompatibleSDK({ providerName, settingsOfProvider })
+	openai.completions
+		.create({
+			model: modelName,
+			prompt: messages.prefix,
+			suffix: messages.suffix,
+			stop: messages.stopTokens,
+			max_tokens: messages.maxTokens,
+		})
+		.then(async response => {
+			const fullText = response.choices[0]?.text
+			onFinalMessage({ fullText, });
+		})
+		.catch(error => {
+			if (error instanceof OpenAI.APIError && error.status === 401) { onError({ message: 'Invalid API key.', fullError: error }); }
+			else { onError({ message: error + '', fullError: error }); }
+		})
 }


-const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter, providerName, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
+
+
+const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, providerName, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
 	const {
-		supportsReasoning: modelSupportsReasoning,
+		modelName,
+		supportsReasoningOutput,
 		supportsSystemMessage,
 		supportsTools,
-	} = modelOptionsOfProvider(providerName, modelName)
+	} = modelOptionsOfProvider(providerName, modelName_)

 	const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, })
 	const tools = (supportsTools && ((tools_?.length ?? 0) !== 0)) ? tools_?.map(tool => toOpenAICompatibleTool(tool)) : undefined

-	const includeInPayload = modelSupportsReasoning ? {} : modelSettingsOfProvider[providerName].providerReasoningOptions?.input?.includeInPayload || {}
+	const includeInPayload = supportsReasoningOutput ? modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.input?.includeInPayload || {} : {}

 	const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {}
 	const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload })
 	const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj }

-	const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].providerReasoningOptions?.output ?? {}
-	if (needsManualReasoningParse) onText = manualParseOnText(providerName, modelName, onText)
+	const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.output ?? {}
+	if (needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags)
+		onText = extractReasoningFromText(onText, supportsReasoningOutput.openSourceThinkTags)

 	let fullReasoning = ''
 	let fullText = ''
@ -432,7 +672,7 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
 				let newReasoning = ''
 				if (nameOfReasoningFieldInDelta) {
 					// @ts-ignore
-					newReasoning = (chunk.choices[0]?.delta?.[nameOfFieldInDelta] || '') + ''
+					newReasoning = (chunk.choices[0]?.delta?.[nameOfReasoningFieldInDelta] || '') + ''
 					fullReasoning += newReasoning
 				}

@ -477,10 +717,6 @@ const _openaiCompatibleList = async ({ onSuccess: onSuccess_, onError: onError_,



-// ------------ OPENAI ------------
-const sendOpenAIChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
-}

 // ------------ ANTHROPIC ------------
 const toAnthropicTool = (toolInfo: InternalToolInfo) => {
@ -504,13 +740,14 @@ const toolCallsFromAnthropicContent = (content: Anthropic.Messages.ContentBlock[
 	}).filter(t => !!t)
 }

-const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
+const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
 	const {
 		// supportsReasoning: modelSupportsReasoning,
+		modelName,
 		supportsSystemMessage,
 		supportsTools,
 		contextWindow,
-	} = modelOptionsOfProvider(providerName, modelName)
+	} = modelOptionsOfProvider(providerName, modelName_)

 	const { messages, separateSystemMessageStr } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, })

@ -561,16 +798,6 @@ const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalM
 // })


-// ------------ XAI ------------
-const sendXAIChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
-}
-
-// ------------ GEMINI ------------
-const sendGeminiAPIChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
-}
-
 // ------------ OLLAMA ------------
 const newOllamaSDK = ({ endpoint }: { endpoint: string }) => {
 	// if endpoint is empty, normally ollama will send to 11434, but we want it to fail - the user should type it in
@ -603,10 +830,12 @@ const ollamaList = async ({ onSuccess: onSuccess_, onError: onError_, settingsOf
 	}
 }

-const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter }: SendFIMParams_Internal) => {
+const sendOllamaFIM = ({ messages: messages_, onFinalMessage, onError, settingsOfProvider, modelName, aiInstructions, _setAborter }: SendFIMParams_Internal) => {
 	const thisConfig = settingsOfProvider.ollama
 	const ollama = newOllamaSDK({ endpoint: thisConfig.endpoint })

+	const messages = prepareFIMMessage({ messages: messages_, aiInstructions, })
+
 	let fullText = ''
 	ollama.generate({
 		model: modelName,
@ -614,7 +843,7 @@ const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider,
 		suffix: messages.suffix,
 		options: {
 			stop: messages.stopTokens,
-			num_predict: 300, // max tokens
+			num_predict: messages.maxTokens, // max tokens
 			// repeat_penalty: 1,
 		},
 		raw: true,
@ -635,57 +864,73 @@ const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider,
 }


-// ollama's implementation of openai-compatible SDK dumps all reasoning tokens out with message, and supports tools, so we can use it for chat!
-const sendOllamaChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
+
+type CallFnOfProvider = {
+	[providerName in ProviderName]: {
+		sendChat: (params: SendChatParams_Internal) => void;
+		sendFIM: ((params: SendFIMParams_Internal) => void) | null;
+		list: ((params: ListParams_Internal<any>) => void) | null;
+	}
 }

-// ------------ OPENAI-COMPATIBLE ------------
-// TODO!!! FIM
-
-// using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration
-const sendOpenAICompatibleChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
-}
-
-// ------------ OPENROUTER ------------
-const sendOpenRouterChat = (params: SendChatParams_Internal) => {
-	_sendOpenAICompatibleChat(params)
-}
-
-// ------------ VLLM ------------
-const vLLMList = async (params: ListParams_Internal<OpenAIModel>) => {
-	return _openaiCompatibleList(params)
-}
-const sendVLLMFIM = (params: SendFIMParams_Internal) => {
-	// TODO!!!
-}
-
-// using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration
-const sendVLLMChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
-}
-
-// ------------ DEEPSEEK API ------------
-const sendDeepSeekAPIChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
-}
-
-// ------------ MISTRAL ------------
-const sendMistralAPIChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
-}
-
-// ------------ GROQ ------------
-const sendGroqAPIChat = (params: SendChatParams_Internal) => {
-	return _sendOpenAICompatibleChat(params)
-}
+export const sendLLMMessageToProviderImplementation = {
+	anthropic: {
+		sendChat: sendAnthropicChat,
+		sendFIM: null,
+		list: null,
+	},
+	openAI: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: null,
+		list: null,
+	},
+	xAI: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: null,
+		list: null,
+	},
+	gemini: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: null,
+		list: null,
+	},
+	ollama: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: sendOllamaFIM,
+		list: ollamaList,
+	},
+	openAICompatible: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params), // using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration
+		sendFIM: (params) => _sendOpenAICompatibleFIM(params),
+		list: null,
+	},
+	openRouter: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: (params) => _sendOpenAICompatibleFIM(params),
+		list: null,
+	},
+	vLLM: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: (params) => _sendOpenAICompatibleFIM(params),
+		list: (params) => _openaiCompatibleList(params),
+	},
+	deepseek: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: null,
+		list: null,
+	},
+	groq: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: null,
+		list: null,
+	},
+} satisfies CallFnOfProvider




 /*
-FIM:
+FIM info (this may be useful in the future with vLLM, but in most cases the only way to use FIM is if the provider explicitly supports it):

 qwen2.5-coder https://ollama.com/library/qwen2.5-coder/blobs/e94a8ecb9327
 <|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>
@ -706,71 +951,3 @@ codegemma https://ollama.com/library/codegemma:2b/blobs/48d9a8140749
 <|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>

 */
-
-
-
-type CallFnOfProvider = {
-	[providerName in ProviderName]: {
-		sendChat: (params: SendChatParams_Internal) => void;
-		sendFIM: ((params: SendFIMParams_Internal) => void) | null;
-		list: ((params: ListParams_Internal<any>) => void) | null;
-	}
-}
-export const sendLLMMessageToProviderImplementation = {
-	openAI: {
-		sendChat: sendOpenAIChat,
-		sendFIM: null,
-		list: null,
-	},
-	anthropic: {
-		sendChat: sendAnthropicChat,
-		sendFIM: null,
-		list: null,
-	},
-	xAI: {
-		sendChat: sendXAIChat,
-		sendFIM: null,
-		list: null,
-	},
-	gemini: {
-		sendChat: sendGeminiAPIChat,
-		sendFIM: null,
-		list: null,
-	},
-	ollama: {
-		sendChat: sendOllamaChat,
-		sendFIM: sendOllamaFIM,
-		list: ollamaList,
-	},
-	openAICompatible: {
-		sendChat: sendOpenAICompatibleChat,
-		sendFIM: null,
-		list: null,
-	},
-	openRouter: {
-		sendChat: sendOpenRouterChat,
-		sendFIM: null,
-		list: null,
-	},
-	vLLM: {
-		sendChat: sendVLLMChat,
-		sendFIM: sendVLLMFIM,
-		list: vLLMList,
-	},
-	deepseek: {
-		sendChat: sendDeepSeekAPIChat,
-		sendFIM: null,
-		list: null,
-	},
-	groq: {
-		sendChat: sendGroqAPIChat,
-		sendFIM: null,
-		list: null,
-	},
-	mistral: {
-		sendChat: sendMistralAPIChat,
-		sendFIM: null,
-		list: null,
-	},
-
-} satisfies CallFnOfProvider
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts
@ -1,6 +1,6 @@


-import { LLMChatMessage } from '../../common/llmMessageTypes.js';
+import { LLMChatMessage, LLMFIMMessage } from '../../common/llmMessageTypes.js';
 import { deepClone } from '../../../../../base/common/objects.js';


@ -322,3 +322,27 @@ export const prepareMessages = ({
 	} as const
 }

+
+
+
+
+export const prepareFIMMessage = ({
+	messages,
+	aiInstructions,
+}: {
+	messages: LLMFIMMessage,
+	aiInstructions: string,
+}) => {
+
+	let prefix = `\
+## You are a helpful coding assistant that performs autocomplete (fill-in-the middle or "FIM") for the user.
+${!aiInstructions ? '' : `\
+## Special user instructions:
+${aiInstructions.split('\n').map(line => `##${line}`).join('\n')}`}
+
+${messages.prefix}`
+
+	const suffix = messages.suffix
+	const stopTokens = messages.stopTokens
+	return { prefix, suffix, stopTokens, maxTokens: 300 } as const
+}