remove mistral, finish(?) models!

This commit is contained in:
Andrew Pareles 2025-02-24 04:01:00 -08:00
parent 9f20476eea
commit 3ae8f75641
8 changed files with 686 additions and 333 deletions

13
package-lock.json generated
View file

@ -56,7 +56,7 @@
"node-pty": "1.1.0-beta21",
"ollama": "^0.5.11",
"open": "^8.4.2",
"openai": "^4.76.1",
"openai": "^4.85.4",
"posthog-node": "^4.3.1",
"react": "^18.3.1",
"react-dom": "^18.3.1",
@ -17079,9 +17079,10 @@
}
},
"node_modules/openai": {
"version": "4.77.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-4.77.0.tgz",
"integrity": "sha512-WWacavtns/7pCUkOWvQIjyOfcdr9X+9n9Vvb0zFeKVDAqwCMDHB+iSr24SVaBAhplvSG6JrRXFpcNM9gWhOGIw==",
"version": "4.85.4",
"resolved": "https://registry.npmjs.org/openai/-/openai-4.85.4.tgz",
"integrity": "sha512-Nki51PBSu+Aryo7WKbdXvfm0X/iKkQS2fq3O0Uqb/O3b4exOZFid2te1BZ52bbO5UwxQZ5eeHJDCTqtrJLPw0w==",
"license": "Apache-2.0",
"dependencies": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
@ -17095,9 +17096,13 @@
"openai": "bin/cli"
},
"peerDependencies": {
"ws": "^8.18.0",
"zod": "^3.23.8"
},
"peerDependenciesMeta": {
"ws": {
"optional": true
},
"zod": {
"optional": true
}

View file

@ -124,7 +124,7 @@
"node-pty": "1.1.0-beta21",
"ollama": "^0.5.11",
"open": "^8.4.2",
"openai": "^4.76.1",
"openai": "^4.85.4",
"posthog-node": "^4.3.1",
"react": "^18.3.1",
"react-dom": "^18.3.1",

View file

@ -3,6 +3,7 @@
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
*--------------------------------------------------------------------------------------*/
import { OnText } from '../../common/llmMessageTypes.js'
import { DIVIDER, FINAL, ORIGINAL } from '../prompt/prompts.js'
class SurroundingsRemover {
@ -240,3 +241,96 @@ export const extractSearchReplaceBlocks = (str: string) => {
})
}
}
export const extractReasoningFromText = (
onText_: OnText,
thinkTags: [string, string],
): OnText => {
let latestAddIdx = 0 // exclusive
let foundTag1 = false
let foundTag2 = false
let fullText = ''
let fullReasoning = ''
const onText: OnText = ({ newText: newText_, fullText: fullText_ }) => {
// abcdef<t|hin|k>ghi
// |
// until found the first think tag, keep adding to fullText
if (!foundTag1) {
const endsWithTag1 = endsWithAnyPrefixOf(fullText_, thinkTags[0])
if (endsWithTag1) {
// wait until we get the full tag or know more
return
}
// if found the first tag
const tag1Index = fullText_.lastIndexOf(thinkTags[0])
if (tag1Index !== -1) {
foundTag1 = true
const newText = fullText.substring(latestAddIdx, tag1Index)
const newReasoning = fullText.substring(tag1Index + thinkTags[0].length, Infinity)
fullText += newText
fullReasoning += newReasoning
latestAddIdx += newText.length + newReasoning.length
onText_({ newText, fullText, newReasoning: newReasoning, fullReasoning })
return
}
// add the text to fullText
const newText = fullText.substring(latestAddIdx, Infinity)
fullText += newText
latestAddIdx += newText.length
onText_({ newText, fullText, newReasoning: '', fullReasoning })
return
}
// at this point, we found <tag1>
// until found the second think tag, keep adding to fullReasoning
if (!foundTag2) {
const endsWithTag2 = endsWithAnyPrefixOf(fullText_, thinkTags[1])
if (endsWithTag2) {
// wait until we get the full tag or know more
return
}
// if found the second tag
const tag2Index = fullText_.lastIndexOf(thinkTags[1])
if (tag2Index !== -1) {
foundTag2 = true
const newReasoning = fullText.substring(latestAddIdx, tag2Index)
const newText = fullText.substring(tag2Index + thinkTags[1].length, Infinity)
fullText += newText
fullReasoning += newReasoning
latestAddIdx += newText.length + newReasoning.length
onText_({ newText, fullText, newReasoning: newReasoning, fullReasoning })
return
}
// add the text to fullReasoning
const newReasoning = fullText.substring(latestAddIdx, Infinity)
fullReasoning += newReasoning
latestAddIdx += newReasoning.length
onText_({ newText: '', fullText, newReasoning, fullReasoning })
return
}
// at this point, we found <tag2>
fullText += newText_
const newText = fullText.substring(latestAddIdx, Infinity)
latestAddIdx += newText.length
onText_({ newText, fullText, newReasoning: '', fullReasoning })
}
return onText
}

View file

@ -415,7 +415,7 @@ export const FeaturesTab = () => {
<div className='pl-4 opacity-50'>
<span className={`text-sm mb-2`}><ChatMarkdownRender noSpace string={`1. Download [Ollama](https://ollama.com/download).`} /></span>
<span className={`text-sm mb-2`}><ChatMarkdownRender noSpace string={`2. Open your terminal.`} /></span>
<span className={`text-sm mb-2 select-text`}><ChatMarkdownRender noSpace string={`3. Run \`ollama run llama3.1\`. This installs Meta's llama3.1 model which is best for chat and inline edits. Requires 5GB of memory.`} /></span>
<span className={`text-sm mb-2 select-text`}><ChatMarkdownRender noSpace string={`3. Run \`ollama run llama3.1:8b\`. This installs Meta's llama3.1 model which is best for chat and inline edits. Requires 5GB of memory.`} /></span>
<span className={`text-sm mb-2 select-text`}><ChatMarkdownRender noSpace string={`4. Run \`ollama run qwen2.5-coder:1.5b\`. This installs a faster autocomplete model. Requires 1GB of memory.`} /></span>
<span className={`text-sm mb-2`}><ChatMarkdownRender noSpace string={`Void automatically detects locally running models and enables them.`} /></span>
{/* TODO we should create UI for downloading models without user going into terminal */}

View file

@ -11,7 +11,7 @@ import { registerSingleton, InstantiationType } from '../../../../platform/insta
import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
import { IStorageService, StorageScope, StorageTarget } from '../../../../platform/storage/common/storage.js';
import { IMetricsService } from './metricsService.js';
import { defaultSettingsOfProvider, FeatureName, ProviderName, ModelSelectionOfFeature, SettingsOfProvider, SettingName, providerNames, ModelSelection, modelSelectionsEqual, featureNames, VoidModelInfo, GlobalSettings, GlobalSettingName, defaultGlobalSettings, defaultProviderSettings, developerInfoOfModelName, modelInfoOfAutodetectedModelNames } from './voidSettingsTypes.js';
import { defaultSettingsOfProvider, FeatureName, ProviderName, ModelSelectionOfFeature, SettingsOfProvider, SettingName, providerNames, ModelSelection, modelSelectionsEqual, featureNames, VoidModelInfo, GlobalSettings, GlobalSettingName, defaultGlobalSettings, defaultProviderSettings } from './voidSettingsTypes.js';
const STORAGE_KEY = 'void.settingsServiceStorage'
@ -32,8 +32,6 @@ type SetGlobalSettingFn = <T extends GlobalSettingName, >(settingName: T, newVal
export type ModelOption = { name: string, selection: ModelSelection }
export type VoidSettingsState = {
readonly settingsOfProvider: SettingsOfProvider; // optionsOfProvider
readonly modelSelectionOfFeature: ModelSelectionOfFeature; // stateOfFeature
@ -172,9 +170,6 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService {
// A HACK BECAUSE WE ADDED DEEPSEEK (did not exist before, comes before readS)
...{ deepseek: defaultSettingsOfProvider.deepseek },
// A HACK BECAUSE WE ADDED MISTRAL (did not exist before, comes before readS)
...{ mistral: defaultSettingsOfProvider.mistral },
// A HACK BECAUSE WE ADDED XAI (did not exist before, comes before readS)
...{ xAI: defaultSettingsOfProvider.xAI },
@ -295,19 +290,35 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService {
}
private _updatedModelsAfterAutodetection = (defaultModelNames: string[], options: { existingModels: VoidModelInfo[] }) => {
const { existingModels } = options
const existingModelsMap: Record<string, VoidModelInfo> = {}
for (const existingModel of existingModels) {
existingModelsMap[existingModel.modelName] = existingModel
}
const newDefaultModels = defaultModelNames.map((modelName, i) => ({
modelName,
isDefault: true,
isAutodetected: true,
isHidden: !!existingModelsMap[modelName]?.isHidden,
}))
return [
...newDefaultModels, // swap out all the default models for the new default models
...existingModels.filter(m => !m.isDefault), // keep any non-default (custom) models
]
}
setAutodetectedModels(providerName: ProviderName, autodetectedModelNames: string[], logging: object) {
const { models } = this.state.settingsOfProvider[providerName]
const oldModelNames = models.map(m => m.modelName)
const newDefaultModels = modelInfoOfAutodetectedModelNames(autodetectedModelNames, { existingModels: models })
const newModels = [
...newDefaultModels, // swap out all the default models for the new default models
...models.filter(m => !m.isDefault), // keep any non-default (custom) models
]
const newModels = this._updatedModelsAfterAutodetection(autodetectedModelNames, { existingModels: models })
this.setSettingOfProvider(providerName, 'models', newModels)
// if the models changed, log it
@ -341,7 +352,7 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService {
if (existingIdx !== -1) return // if exists, do nothing
const newModels = [
...models,
{ ...developerInfoOfModelName(modelName), modelName, isDefault: false, isHidden: false }
{ modelName, isDefault: false, isHidden: false }
]
this.setSettingOfProvider(providerName, 'models', newModels)

View file

@ -4,7 +4,6 @@
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
*--------------------------------------------------------------------------------------*/
import { defaultModelsOfProvider } from '../electron-main/llmMessage/MODELS.js';
import { VoidSettingsState } from './voidSettingsService.js'
@ -40,14 +39,70 @@ export const defaultProviderSettings = {
groq: {
apiKey: '',
},
mistral: {
apiKey: ''
},
xAI: {
apiKey: ''
},
} as const
export const defaultModelsOfProvider = {
openAI: [ // https://platform.openai.com/docs/models/gp
'o1',
'o3-mini',
'o1-mini',
'gpt-4o',
'gpt-4o-mini',
],
anthropic: [ // https://docs.anthropic.com/en/docs/about-claude/models
'claude-3-5-sonnet-latest',
'claude-3-5-haiku-latest',
'claude-3-opus-latest',
],
xAI: [ // https://docs.x.ai/docs/models?cluster=us-east-1
'grok-2-latest',
'grok-3-latest',
],
gemini: [ // https://ai.google.dev/gemini-api/docs/models/gemini
'gemini-2.0-flash',
'gemini-1.5-flash',
'gemini-1.5-pro',
'gemini-1.5-flash-8b',
'gemini-2.0-flash-thinking-exp',
],
deepseek: [ // https://api-docs.deepseek.com/quick_start/pricing
'deepseek-chat',
'deepseek-reasoner',
],
ollama: [ // autodetected
],
vLLM: [ // autodetected
],
openRouter: [ // https://openrouter.ai/models
'anthropic/claude-3.5-sonnet',
'deepseek/deepseek-r1',
'mistralai/codestral-2501',
'qwen/qwen2.5-vl-72b-instruct:free',
],
groq: [ // https://console.groq.com/docs/models
'llama-3.3-70b-versatile',
'llama-3.1-8b-instant',
'qwen-2.5-coder-32b', // preview mode (experimental)
],
// not supporting mistral right now- it's last on Void usage, and a huge pain to set up since it's nonstandard (it supports codestral FIM but it's on v1/fim/completions, etc)
// mistral: [ // https://docs.mistral.ai/getting-started/models/models_overview/
// 'codestral-latest',
// 'mistral-large-latest',
// 'ministral-3b-latest',
// 'ministral-8b-latest',
// ],
openAICompatible: [], // fallback
} as const satisfies Record<ProviderName, string[]>
export type ProviderName = keyof typeof defaultProviderSettings
export const providerNames = Object.keys(defaultProviderSettings) as ProviderName[]
@ -139,11 +194,6 @@ export const displayInfoOfProviderName = (providerName: ProviderName): DisplayIn
title: 'Groq.com API',
}
}
else if (providerName === 'mistral') {
return {
title: 'Mistral API',
}
}
else if (providerName === 'xAI') {
return {
title: 'xAI API',
@ -173,10 +223,9 @@ export const displayInfoOfSettingName = (providerName: ProviderName, settingName
providerName === 'openRouter' ? 'sk-or-key...' : // sk-or-v1-key
providerName === 'gemini' ? 'key...' :
providerName === 'groq' ? 'gsk_key...' :
providerName === 'mistral' ? 'key...' :
providerName === 'openAICompatible' ? 'sk-key...' :
providerName === 'xAI' ? 'xai-key...' :
'',
providerName === 'openAICompatible' ? 'sk-key...' :
providerName === 'xAI' ? 'xai-key...' :
'',
subTextMd: providerName === 'anthropic' ? 'Get your [API Key here](https://console.anthropic.com/settings/keys).' :
providerName === 'openAI' ? 'Get your [API Key here](https://platform.openai.com/api-keys).' :
@ -184,10 +233,9 @@ export const displayInfoOfSettingName = (providerName: ProviderName, settingName
providerName === 'openRouter' ? 'Get your [API Key here](https://openrouter.ai/settings/keys).' :
providerName === 'gemini' ? 'Get your [API Key here](https://aistudio.google.com/apikey).' :
providerName === 'groq' ? 'Get your [API Key here](https://console.groq.com/keys).' :
providerName === 'mistral' ? 'Get your [API Key here](https://console.mistral.ai/api-keys/).' :
providerName === 'xAI' ? 'Get your [API Key here](https://console.x.ai).' :
providerName === 'openAICompatible' ? undefined :
'',
providerName === 'xAI' ? 'Get your [API Key here](https://console.x.ai).' :
providerName === 'openAICompatible' ? undefined :
'',
isPasswordField: true,
}
}
@ -271,12 +319,6 @@ export const defaultSettingsOfProvider: SettingsOfProvider = {
...modelInfoOfDefaultModelNames(defaultModelsOfProvider.gemini),
_didFillInProviderSettings: undefined,
},
mistral: {
...defaultCustomSettings,
...defaultProviderSettings.mistral,
...modelInfoOfDefaultModelNames(defaultModelsOfProvider.mistral),
_didFillInProviderSettings: undefined,
},
xAI: {
...defaultCustomSettings,
...defaultProviderSettings.xAI,

View file

@ -4,67 +4,15 @@
*--------------------------------------------------------------------------------------*/
import OpenAI, { ClientOptions } from 'openai';
import { Model as OpenAIModel } from 'openai/resources/models.js';
import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js';
import { InternalToolInfo, isAToolName } from '../../common/toolsService.js';
import { defaultProviderSettings, displayInfoOfProviderName, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { prepareMessages } from './preprocessLLMMessages.js';
import Anthropic from '@anthropic-ai/sdk';
import { Ollama } from 'ollama';
export const defaultModelsOfProvider = {
anthropic: [ // https://docs.anthropic.com/en/docs/about-claude/models
'claude-3-5-sonnet-latest',
'claude-3-5-haiku-latest',
'claude-3-opus-latest',
],
openAI: [ // https://platform.openai.com/docs/models/gp
'o1',
'o1-mini',
'o3-mini',
'gpt-4o',
'gpt-4o-mini',
],
deepseek: [ // https://platform.openai.com/docs/models/gp
'deepseek-chat',
'deepseek-reasoner',
],
ollama: [],
vLLM: [],
openRouter: [],
openAICompatible: [],
gemini: [
'gemini-1.5-flash',
'gemini-1.5-pro',
'gemini-1.5-flash-8b',
'gemini-2.0-flash-exp',
'gemini-2.0-flash-thinking-exp-1219',
'learnlm-1.5-pro-experimental'
],
groq: [ // https://console.groq.com/docs/models
"llama3-70b-8192",
"llama-3.3-70b-versatile",
"llama-3.1-8b-instant",
"gemma2-9b-it",
"mixtral-8x7b-32768"
],
mistral: [ // https://docs.mistral.ai/getting-started/models/models_overview/
"codestral-latest",
"open-codestral-mamba",
"open-mistral-nemo",
"mistral-large-latest",
"pixtral-large-latest",
"ministral-3b-latest",
"ministral-8b-latest",
"mistral-small-latest",
],
xAI: [ // https://docs.x.ai/docs/models?cluster=us-east-1
'grok-3-latest',
'grok-2-latest',
],
} satisfies Record<ProviderName, string[]>
import { Model as OpenAIModel } from 'openai/resources/models.js';
import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js';
import { InternalToolInfo, isAToolName } from '../../common/toolsService.js';
import { defaultProviderSettings, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js';
import { extractReasoningFromText } from '../../browser/helpers/extractCodeFromResult.js';
@ -78,10 +26,13 @@ type ModelOptions = {
}
supportsSystemMessage: false | 'system-role' | 'developer-role' | 'separated';
supportsTools: false | 'anthropic-style' | 'openai-style';
supportsFIM: false | 'TODO_FIM_FORMAT';
supportsFIM: boolean;
supportsReasoning: boolean; // not whether it reasons, but whether it outputs reasoning tokens
manualMatchReasoningTokens?: [string, string]; // reasoning tokens if it's an OSS model
supportsReasoningOutput: false | {
// you are allowed to not include openSourceThinkTags if it's not open source (no such cases as of writing)
// if it's open source, put the think tags here so we parse them out in e.g. ollama
openSourceThinkTags?: [string, string]
};
}
type ProviderReasoningOptions = {
@ -95,9 +46,9 @@ type ProviderReasoningOptions = {
}
type ProviderSettings = {
providerReasoningOptions?: ProviderReasoningOptions;
ifSupportsReasoningOutput?: ProviderReasoningOptions;
modelOptions: { [key: string]: ModelOptions };
modelOptionsFallback: (modelName: string) => ModelOptions; // allowed to throw error if modeName is totally invalid
modelOptionsFallback: (modelName: string) => (ModelOptions & { modelName: string }) | null;
}
@ -107,172 +58,446 @@ type ModelSettingsOfProvider = {
// type DefaultModels<T extends ProviderName> = typeof defaultModelsOfProvider[T][number]
// type AssertModelsIncluded<
// T extends ProviderName,
// Options extends Record<string, unknown>
// > = Exclude<DefaultModels<T>, keyof Options> extends never
// ? true
// : ["Missing models for", T, Exclude<DefaultModels<T>, keyof Options>];
// const assertOpenAI: AssertModelsIncluded<'openAI', typeof openAIModelOptions> = true;
const modelNotRecognizedErrorMessage = (modelName: string, providerName: ProviderName) => `Void could not find a model matching ${modelName} for ${displayInfoOfProviderName(providerName).title}.`
const modelOptionDefaults: ModelOptions = {
contextWindow: 32_000,
cost: { input: 0, output: 0 },
supportsSystemMessage: false,
supportsTools: false,
supportsFIM: false,
supportsReasoningOutput: false,
}
// ---------------- OPENAI ----------------
const openAIModelOptions = {
"o1": {
const openAIModelOptions = { // https://platform.openai.com/docs/pricing
'o1': {
contextWindow: 128_000,
cost: { input: 15.00, cache_read: 7.50, output: 60.00, },
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: 'developer-role',
supportsReasoning: false,
supportsReasoningOutput: false,
},
"o3-mini": {
'o3-mini': {
contextWindow: 200_000,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: 'developer-role',
supportsReasoning: false,
supportsReasoningOutput: false,
},
"gpt-4o": {
'gpt-4o': {
contextWindow: 128_000,
cost: { input: 2.50, cache_read: 1.25, output: 10.00, },
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role',
supportsReasoning: false,
supportsReasoningOutput: false,
},
} as const
'o1-mini': {
contextWindow: 128_000,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: false, // does not support any system
supportsReasoningOutput: false,
},
'gpt-4o-mini': {
contextWindow: 128_000,
cost: { input: 0.15, cache_read: 0.075, output: 0.60, },
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role', // ??
supportsReasoningOutput: false,
},
} as const satisfies { [s: string]: ModelOptions }
const openAISettings: ProviderSettings = {
modelOptions: openAIModelOptions,
modelOptionsFallback: (modelName) => {
if (modelName.includes('o1')) return openAIModelOptions['o1']
if (modelName.includes('o3-mini')) return openAIModelOptions['o3-mini']
if (modelName.includes('gpt-4o')) return openAIModelOptions['gpt-4o']
throw new Error(modelNotRecognizedErrorMessage(modelName, 'openAI'))
let fallbackName: keyof typeof openAIModelOptions | null = null
if (modelName.includes('o1')) { fallbackName = 'o1' }
if (modelName.includes('o3-mini')) { fallbackName = 'o3-mini' }
if (modelName.includes('gpt-4o')) { fallbackName = 'gpt-4o' }
if (fallbackName) return { modelName: fallbackName, ...openAIModelOptions[fallbackName] }
return null
}
}
// ---------------- ANTHROPIC ----------------
const anthropicModelOptions = {
"claude-3-5-sonnet-20241022": {
'claude-3-5-sonnet-20241022': {
contextWindow: 200_000,
cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoning: false,
supportsReasoningOutput: false,
},
"claude-3-5-haiku-20241022": {
'claude-3-5-haiku-20241022': {
contextWindow: 200_000,
cost: { input: 0.80, cache_read: 0.08, cache_write: 1.00, output: 4.00 },
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoning: false,
supportsReasoningOutput: false,
},
"claude-3-opus-20240229": {
'claude-3-opus-20240229': {
contextWindow: 200_000,
cost: { input: 15.00, cache_read: 1.50, cache_write: 18.75, output: 75.00 },
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoning: false,
supportsReasoningOutput: false,
},
"claude-3-sonnet-20240229": {
'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in
contextWindow: 200_000, cost: { input: 3.00, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoning: false,
supportsReasoningOutput: false,
}
} as const
} as const satisfies { [s: string]: ModelOptions }
const anthropicSettings: ProviderSettings = {
modelOptions: anthropicModelOptions,
modelOptionsFallback: (modelName) => {
throw new Error(modelNotRecognizedErrorMessage(modelName, 'anthropic'))
let fallbackName: keyof typeof anthropicModelOptions | null = null
if (modelName.includes('claude-3-5-sonnet')) fallbackName = 'claude-3-5-sonnet-20241022'
if (modelName.includes('claude-3-5-haiku')) fallbackName = 'claude-3-5-haiku-20241022'
if (modelName.includes('claude-3-opus')) fallbackName = 'claude-3-opus-20240229'
if (fallbackName) return { modelName: fallbackName, ...anthropicModelOptions[fallbackName] }
return null
}
}
// ---------------- XAI ----------------
const XAIModelOptions = {
"grok-2-latest": {
const xAIModelOptions = {
'grok-2-latest': {
contextWindow: 131_072,
cost: { input: 2.00, output: 10.00 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoning: false,
supportsReasoningOutput: false,
},
} as const
} as const satisfies { [s: string]: ModelOptions }
const XAISettings: ProviderSettings = {
modelOptions: XAIModelOptions,
const xAISettings: ProviderSettings = {
modelOptions: xAIModelOptions,
modelOptionsFallback: (modelName) => {
throw new Error(modelNotRecognizedErrorMessage(modelName, 'xAI'))
let fallbackName: keyof typeof xAIModelOptions | null = null
if (modelName.includes('grok-2')) fallbackName = 'grok-2-latest'
if (fallbackName) return { modelName: fallbackName, ...xAIModelOptions[fallbackName] }
return null
}
}
// ---------------- GEMINI ----------------
const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
'gemini-2.0-flash': {
contextWindow: 1_048_576,
cost: { input: 0.10, output: 0.40 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style', // we are assuming OpenAI SDK when calling gemini
supportsReasoningOutput: false,
},
'gemini-2.0-flash-lite-preview-02-05': {
contextWindow: 1_048_576,
cost: { input: 0.075, output: 0.30 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'gemini-1.5-flash': {
contextWindow: 1_048_576,
cost: { input: 0.075, output: 0.30 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'gemini-1.5-pro': {
contextWindow: 2_097_152,
cost: { input: 1.25, output: 5.00 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'gemini-1.5-flash-8b': {
contextWindow: 1_048_576,
cost: { input: 0.0375, output: 0.15 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
} as const satisfies { [s: string]: ModelOptions }
const geminiSettings: ProviderSettings = {
modelOptions: geminiModelOptions,
modelOptionsFallback: (modelName) => {
return null
}
}
// ---------------- OPEN SOURCE MODELS ----------------
const openSourceModelDefaultOptionsAssumingOAICompat = {
'deepseekR1': {
supportsFIM: false,
supportsSystemMessage: false,
supportsTools: false,
supportsReasoningOutput: { openSourceThinkTags: ['<think>', '</think>'] },
},
'deepseekCoderV2': {
supportsFIM: false,
supportsSystemMessage: false, // unstable
supportsTools: false,
supportsReasoningOutput: false,
},
'codestral': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
// llama
'llama3': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'llama3.1': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'llama3.2': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'llama3.3': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'qwen2.5coder': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
// FIM only
'starcoder2': {
supportsFIM: true,
supportsSystemMessage: false,
supportsTools: false,
supportsReasoningOutput: false,
},
'codegemma:2b': {
supportsFIM: true,
supportsSystemMessage: false,
supportsTools: false,
supportsReasoningOutput: false,
},
} as const satisfies { [s: string]: Partial<ModelOptions> }
// ---------------- DEEPSEEK API ----------------
const deepseekModelOptions = {
'deepseek-chat': {
...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1,
contextWindow: 64_000, // https://api-docs.deepseek.com/quick_start/pricing
cost: { cache_read: .07, input: .27, output: 1.10, },
},
'deepseek-reasoner': {
...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2,
contextWindow: 64_000,
cost: { cache_read: .14, input: .55, output: 2.19, },
},
} as const satisfies { [s: string]: ModelOptions }
const deepseekSettings: ProviderSettings = {
modelOptions: deepseekModelOptions,
ifSupportsReasoningOutput: {
// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model
output: { nameOfFieldInDelta: 'reasoning_content' },
},
modelOptionsFallback: (modelName) => {
return null
}
}
// ---------------- GROQ ----------------
const groqModelOptions = {
'llama-3.3-70b-versatile': {
contextWindow: 128_000,
cost: { input: 0.59, output: 0.79 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'llama-3.1-8b-instant': {
contextWindow: 128_000,
cost: { input: 0.05, output: 0.08 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'qwen-2.5-coder-32b': {
contextWindow: 128_000,
cost: { input: 0.79, output: 0.79 },
supportsFIM: false, // unfortunately looks like no FIM support on groq
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
} as const satisfies { [s: string]: ModelOptions }
const groqSettings: ProviderSettings = {
modelOptions: groqModelOptions,
modelOptionsFallback: (modelName) => { return null }
}
// ---------------- anything self-hosted/local: VLLM, OLLAMA, OPENAICOMPAT ----------------
// fallback to any model (anything openai-compatible)
const extensiveModelFallback: ProviderSettings['modelOptionsFallback'] = (modelName) => {
const toFallback = (opts: Omit<ModelOptions, 'cost'>): ModelOptions & { modelName: string } => {
return {
modelName,
...opts,
supportsSystemMessage: opts.supportsSystemMessage ? 'system-role' : false,
cost: { input: 0, output: 0 },
}
}
if (modelName.includes('gpt-4o')) return toFallback(openAIModelOptions['gpt-4o'])
if (modelName.includes('claude')) return toFallback(anthropicModelOptions['claude-3-5-sonnet-20241022'])
if (modelName.includes('grok')) return toFallback(xAIModelOptions['grok-2-latest'])
if (modelName.includes('deepseek-r1') || modelName.includes('deepseek-reasoner')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, contextWindow: 32_000, })
if (modelName.includes('deepseek')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2, contextWindow: 32_000, })
if (modelName.includes('llama3')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.llama3, contextWindow: 32_000, })
if (modelName.includes('qwen2.5-coder')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat['qwen2.5coder'], contextWindow: 32_000, })
if (modelName.includes('codestral')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.codestral, contextWindow: 32_000, })
if (/\bo1\b/.test(modelName) || /\bo3\b/.test(modelName)) return toFallback(openAIModelOptions['o1'])
return toFallback(modelOptionDefaults)
}
const vLLMSettings: ProviderSettings = {
// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions
ifSupportsReasoningOutput: { output: { nameOfFieldInDelta: 'reasoning_content' }, },
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
modelOptions: {},
}
const ollamaSettings: ProviderSettings = {
// reasoning: we need to filter out reasoning <think> tags manually
ifSupportsReasoningOutput: { output: { needsManualParse: true }, },
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
modelOptions: {},
}
const openaiCompatible: ProviderSettings = {
// reasoning: we have no idea what endpoint they used, so we can't consistently parse out reasoning
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
modelOptions: {},
}
// ---------------- OPENROUTER ----------------
const openRouterModelOptions = {
'deepseek/deepseek-r1': {
...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1,
contextWindow: 128_000,
cost: { input: 0.8, output: 2.4 },
},
'anthropic/claude-3.5-sonnet': {
contextWindow: 200_000,
cost: { input: 3.00, output: 15.00 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
'mistralai/codestral-2501': {
...openSourceModelDefaultOptionsAssumingOAICompat.codestral,
contextWindow: 256_000,
cost: { input: 0.3, output: 0.9 },
supportsTools: 'openai-style',
supportsReasoningOutput: false,
},
} as const satisfies { [s: string]: ModelOptions }
const openRouterSettings: ProviderSettings = {
// reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models
ifSupportsReasoningOutput: {
input: { includeInPayload: { include_reasoning: true } },
output: { nameOfFieldInDelta: 'reasoning' },
},
modelOptions: openRouterModelOptions,
// TODO!!! send a query to openrouter to get the price, isFIM, etc.
modelOptionsFallback: (modelName) => extensiveModelFallback(modelName),
}
// ---------------- model settings of everything above ----------------
const modelSettingsOfProvider: ModelSettingsOfProvider = {
openAI: openAISettings,
anthropic: anthropicSettings,
xAI: XAISettings,
gemini: {
modelOptions: {
}
},
googleVertex: {
},
microsoftAzure: {
},
openRouter: {
providerReasoningOptions: {
// reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models
input: { includeInPayload: { include_reasoning: true } },
output: { nameOfFieldInDelta: 'reasoning' },
}
},
vLLM: {
providerReasoningOptions: {
// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions
output: { nameOfFieldInDelta: 'reasoning_content' },
}
},
deepseek: {
providerReasoningOptions: {
// reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model
output: { nameOfFieldInDelta: 'reasoning_content' },
},
},
ollama: {
providerReasoningOptions: {
// reasoning: we need to filter out reasoning <think> tags manually
output: { needsManualParse: true },
},
},
openAICompatible: {
},
mistral: {
},
groq: {
},
xAI: xAISettings,
gemini: geminiSettings,
// open source models
deepseek: deepseekSettings,
groq: groqSettings,
// open source models + providers (mixture of everything)
openRouter: openRouterSettings,
vLLM: vLLMSettings,
ollama: ollamaSettings,
openAICompatible: openaiCompatible,
// googleVertex: {},
// microsoftAzure: {},
} as const satisfies ModelSettingsOfProvider
const modelOptionsOfProvider = (providerName: ProviderName, modelName: string) => {
export const modelOptionsOfProvider = (providerName: ProviderName, modelName: string): ModelOptions & { modelName: string } => {
const { modelOptions, modelOptionsFallback } = modelSettingsOfProvider[providerName]
if (modelName in modelOptions) return modelOptions[modelName]
return modelOptionsFallback(modelName)
if (modelName in modelOptions) return { modelName, ...modelOptions[modelName] }
const result = modelOptionsFallback(modelName)
if (!result) return { modelName, ...modelOptionDefaults }
return result
}
@ -361,10 +586,6 @@ const newOpenAICompatibleSDK = ({ settingsOfProvider, providerName, includeInPay
const thisConfig = settingsOfProvider[providerName]
return new OpenAI({ baseURL: thisConfig.endpoint, apiKey: thisConfig.apiKey, ...commonPayloadOpts })
}
else if (providerName === 'mistral') {
const thisConfig = settingsOfProvider[providerName]
return new OpenAI({ baseURL: 'https://api.mistral.ai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts })
}
else if (providerName === 'groq') {
const thisConfig = settingsOfProvider[providerName]
return new OpenAI({ baseURL: 'https://api.groq.com/openai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts })
@ -379,33 +600,52 @@ const newOpenAICompatibleSDK = ({ settingsOfProvider, providerName, includeInPay
const manualParseOnText = (
providerName: ProviderName,
modelName: string,
onText_: OnText
): OnText => {
return onText_
const _sendOpenAICompatibleFIM = ({ messages: messages_, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, providerName, aiInstructions, }: SendFIMParams_Internal) => {
const { modelName, } = modelOptionsOfProvider(providerName, modelName_)
const messages = prepareFIMMessage({ messages: messages_, aiInstructions, })
const openai = newOpenAICompatibleSDK({ providerName, settingsOfProvider })
openai.completions
.create({
model: modelName,
prompt: messages.prefix,
suffix: messages.suffix,
stop: messages.stopTokens,
max_tokens: messages.maxTokens,
})
.then(async response => {
const fullText = response.choices[0]?.text
onFinalMessage({ fullText, });
})
.catch(error => {
if (error instanceof OpenAI.APIError && error.status === 401) { onError({ message: 'Invalid API key.', fullError: error }); }
else { onError({ message: error + '', fullError: error }); }
})
}
const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter, providerName, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, providerName, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const {
supportsReasoning: modelSupportsReasoning,
modelName,
supportsReasoningOutput,
supportsSystemMessage,
supportsTools,
} = modelOptionsOfProvider(providerName, modelName)
} = modelOptionsOfProvider(providerName, modelName_)
const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, })
const tools = (supportsTools && ((tools_?.length ?? 0) !== 0)) ? tools_?.map(tool => toOpenAICompatibleTool(tool)) : undefined
const includeInPayload = modelSupportsReasoning ? {} : modelSettingsOfProvider[providerName].providerReasoningOptions?.input?.includeInPayload || {}
const includeInPayload = supportsReasoningOutput ? modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.input?.includeInPayload || {} : {}
const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {}
const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload })
const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj }
const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].providerReasoningOptions?.output ?? {}
if (needsManualReasoningParse) onText = manualParseOnText(providerName, modelName, onText)
const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.output ?? {}
if (needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags)
onText = extractReasoningFromText(onText, supportsReasoningOutput.openSourceThinkTags)
let fullReasoning = ''
let fullText = ''
@ -432,7 +672,7 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
let newReasoning = ''
if (nameOfReasoningFieldInDelta) {
// @ts-ignore
newReasoning = (chunk.choices[0]?.delta?.[nameOfFieldInDelta] || '') + ''
newReasoning = (chunk.choices[0]?.delta?.[nameOfReasoningFieldInDelta] || '') + ''
fullReasoning += newReasoning
}
@ -477,10 +717,6 @@ const _openaiCompatibleList = async ({ onSuccess: onSuccess_, onError: onError_,
// ------------ OPENAI ------------
const sendOpenAIChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
}
// ------------ ANTHROPIC ------------
const toAnthropicTool = (toolInfo: InternalToolInfo) => {
@ -504,13 +740,14 @@ const toolCallsFromAnthropicContent = (content: Anthropic.Messages.ContentBlock[
}).filter(t => !!t)
}
const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const {
// supportsReasoning: modelSupportsReasoning,
modelName,
supportsSystemMessage,
supportsTools,
contextWindow,
} = modelOptionsOfProvider(providerName, modelName)
} = modelOptionsOfProvider(providerName, modelName_)
const { messages, separateSystemMessageStr } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, })
@ -561,16 +798,6 @@ const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalM
// })
// ------------ XAI ------------
const sendXAIChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
}
// ------------ GEMINI ------------
const sendGeminiAPIChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
}
// ------------ OLLAMA ------------
const newOllamaSDK = ({ endpoint }: { endpoint: string }) => {
// if endpoint is empty, normally ollama will send to 11434, but we want it to fail - the user should type it in
@ -603,10 +830,12 @@ const ollamaList = async ({ onSuccess: onSuccess_, onError: onError_, settingsOf
}
}
const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter }: SendFIMParams_Internal) => {
const sendOllamaFIM = ({ messages: messages_, onFinalMessage, onError, settingsOfProvider, modelName, aiInstructions, _setAborter }: SendFIMParams_Internal) => {
const thisConfig = settingsOfProvider.ollama
const ollama = newOllamaSDK({ endpoint: thisConfig.endpoint })
const messages = prepareFIMMessage({ messages: messages_, aiInstructions, })
let fullText = ''
ollama.generate({
model: modelName,
@ -614,7 +843,7 @@ const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider,
suffix: messages.suffix,
options: {
stop: messages.stopTokens,
num_predict: 300, // max tokens
num_predict: messages.maxTokens, // max tokens
// repeat_penalty: 1,
},
raw: true,
@ -635,57 +864,73 @@ const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider,
}
// ollama's implementation of openai-compatible SDK dumps all reasoning tokens out with message, and supports tools, so we can use it for chat!
const sendOllamaChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
type CallFnOfProvider = {
[providerName in ProviderName]: {
sendChat: (params: SendChatParams_Internal) => void;
sendFIM: ((params: SendFIMParams_Internal) => void) | null;
list: ((params: ListParams_Internal<any>) => void) | null;
}
}
// ------------ OPENAI-COMPATIBLE ------------
// TODO!!! FIM
// using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration
const sendOpenAICompatibleChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
}
// ------------ OPENROUTER ------------
const sendOpenRouterChat = (params: SendChatParams_Internal) => {
_sendOpenAICompatibleChat(params)
}
// ------------ VLLM ------------
const vLLMList = async (params: ListParams_Internal<OpenAIModel>) => {
return _openaiCompatibleList(params)
}
const sendVLLMFIM = (params: SendFIMParams_Internal) => {
// TODO!!!
}
// using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration
const sendVLLMChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
}
// ------------ DEEPSEEK API ------------
const sendDeepSeekAPIChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
}
// ------------ MISTRAL ------------
const sendMistralAPIChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
}
// ------------ GROQ ------------
const sendGroqAPIChat = (params: SendChatParams_Internal) => {
return _sendOpenAICompatibleChat(params)
}
export const sendLLMMessageToProviderImplementation = {
anthropic: {
sendChat: sendAnthropicChat,
sendFIM: null,
list: null,
},
openAI: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: null,
list: null,
},
xAI: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: null,
list: null,
},
gemini: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: null,
list: null,
},
ollama: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: sendOllamaFIM,
list: ollamaList,
},
openAICompatible: {
sendChat: (params) => _sendOpenAICompatibleChat(params), // using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration
sendFIM: (params) => _sendOpenAICompatibleFIM(params),
list: null,
},
openRouter: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: (params) => _sendOpenAICompatibleFIM(params),
list: null,
},
vLLM: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: (params) => _sendOpenAICompatibleFIM(params),
list: (params) => _openaiCompatibleList(params),
},
deepseek: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: null,
list: null,
},
groq: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: null,
list: null,
},
} satisfies CallFnOfProvider
/*
FIM:
FIM info (this may be useful in the future with vLLM, but in most cases the only way to use FIM is if the provider explicitly supports it):
qwen2.5-coder https://ollama.com/library/qwen2.5-coder/blobs/e94a8ecb9327
<|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>
@ -706,71 +951,3 @@ codegemma https://ollama.com/library/codegemma:2b/blobs/48d9a8140749
<|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>
*/
type CallFnOfProvider = {
[providerName in ProviderName]: {
sendChat: (params: SendChatParams_Internal) => void;
sendFIM: ((params: SendFIMParams_Internal) => void) | null;
list: ((params: ListParams_Internal<any>) => void) | null;
}
}
export const sendLLMMessageToProviderImplementation = {
openAI: {
sendChat: sendOpenAIChat,
sendFIM: null,
list: null,
},
anthropic: {
sendChat: sendAnthropicChat,
sendFIM: null,
list: null,
},
xAI: {
sendChat: sendXAIChat,
sendFIM: null,
list: null,
},
gemini: {
sendChat: sendGeminiAPIChat,
sendFIM: null,
list: null,
},
ollama: {
sendChat: sendOllamaChat,
sendFIM: sendOllamaFIM,
list: ollamaList,
},
openAICompatible: {
sendChat: sendOpenAICompatibleChat,
sendFIM: null,
list: null,
},
openRouter: {
sendChat: sendOpenRouterChat,
sendFIM: null,
list: null,
},
vLLM: {
sendChat: sendVLLMChat,
sendFIM: sendVLLMFIM,
list: vLLMList,
},
deepseek: {
sendChat: sendDeepSeekAPIChat,
sendFIM: null,
list: null,
},
groq: {
sendChat: sendGroqAPIChat,
sendFIM: null,
list: null,
},
mistral: {
sendChat: sendMistralAPIChat,
sendFIM: null,
list: null,
},
} satisfies CallFnOfProvider

View file

@ -1,6 +1,6 @@
import { LLMChatMessage } from '../../common/llmMessageTypes.js';
import { LLMChatMessage, LLMFIMMessage } from '../../common/llmMessageTypes.js';
import { deepClone } from '../../../../../base/common/objects.js';
@ -322,3 +322,27 @@ export const prepareMessages = ({
} as const
}
export const prepareFIMMessage = ({
messages,
aiInstructions,
}: {
messages: LLMFIMMessage,
aiInstructions: string,
}) => {
let prefix = `\
## You are a helpful coding assistant that performs autocomplete (fill-in-the middle or "FIM") for the user.
${!aiInstructions ? '' : `\
## Special user instructions:
${aiInstructions.split('\n').map(line => `##${line}`).join('\n')}`}
${messages.prefix}`
const suffix = messages.suffix
const stopTokens = messages.stopTokens
return { prefix, suffix, stopTokens, maxTokens: 300 } as const
}