From 3ae8f756410f5cc56054f196b9e2193f1e616425 Mon Sep 17 00:00:00 2001 From: Andrew Pareles Date: Mon, 24 Feb 2025 04:01:00 -0800 Subject: [PATCH] remove mistral, finish(?) models! --- package-lock.json | 13 +- package.json | 2 +- .../browser/helpers/extractCodeFromResult.ts | 94 +++ .../react/src/void-settings-tsx/Settings.tsx | 2 +- .../void/common/voidSettingsService.ts | 39 +- .../contrib/void/common/voidSettingsTypes.ts | 88 +- .../void/electron-main/llmMessage/MODELS.ts | 755 +++++++++++------- .../llmMessage/preprocessLLMMessages.ts | 26 +- 8 files changed, 686 insertions(+), 333 deletions(-) diff --git a/package-lock.json b/package-lock.json index bc045891..d29248c2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -56,7 +56,7 @@ "node-pty": "1.1.0-beta21", "ollama": "^0.5.11", "open": "^8.4.2", - "openai": "^4.76.1", + "openai": "^4.85.4", "posthog-node": "^4.3.1", "react": "^18.3.1", "react-dom": "^18.3.1", @@ -17079,9 +17079,10 @@ } }, "node_modules/openai": { - "version": "4.77.0", - "resolved": "https://registry.npmjs.org/openai/-/openai-4.77.0.tgz", - "integrity": "sha512-WWacavtns/7pCUkOWvQIjyOfcdr9X+9n9Vvb0zFeKVDAqwCMDHB+iSr24SVaBAhplvSG6JrRXFpcNM9gWhOGIw==", + "version": "4.85.4", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.85.4.tgz", + "integrity": "sha512-Nki51PBSu+Aryo7WKbdXvfm0X/iKkQS2fq3O0Uqb/O3b4exOZFid2te1BZ52bbO5UwxQZ5eeHJDCTqtrJLPw0w==", + "license": "Apache-2.0", "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", @@ -17095,9 +17096,13 @@ "openai": "bin/cli" }, "peerDependencies": { + "ws": "^8.18.0", "zod": "^3.23.8" }, "peerDependenciesMeta": { + "ws": { + "optional": true + }, "zod": { "optional": true } diff --git a/package.json b/package.json index a4ee38bb..b99ca9dc 100644 --- a/package.json +++ b/package.json @@ -124,7 +124,7 @@ "node-pty": "1.1.0-beta21", "ollama": "^0.5.11", "open": "^8.4.2", - "openai": "^4.76.1", + "openai": "^4.85.4", "posthog-node": "^4.3.1", "react": "^18.3.1", "react-dom": "^18.3.1", diff --git a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts index 806676da..297d82b6 100644 --- a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts +++ b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts @@ -3,6 +3,7 @@ * Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information. *--------------------------------------------------------------------------------------*/ +import { OnText } from '../../common/llmMessageTypes.js' import { DIVIDER, FINAL, ORIGINAL } from '../prompt/prompts.js' class SurroundingsRemover { @@ -240,3 +241,96 @@ export const extractSearchReplaceBlocks = (str: string) => { }) } } + + + + + + + + + + +export const extractReasoningFromText = ( + onText_: OnText, + thinkTags: [string, string], +): OnText => { + + let latestAddIdx = 0 // exclusive + let foundTag1 = false + let foundTag2 = false + + let fullText = '' + let fullReasoning = '' + + const onText: OnText = ({ newText: newText_, fullText: fullText_ }) => { + // abcdefghi + // | + // until found the first think tag, keep adding to fullText + if (!foundTag1) { + const endsWithTag1 = endsWithAnyPrefixOf(fullText_, thinkTags[0]) + if (endsWithTag1) { + // wait until we get the full tag or know more + return + } + // if found the first tag + const tag1Index = fullText_.lastIndexOf(thinkTags[0]) + if (tag1Index !== -1) { + foundTag1 = true + const newText = fullText.substring(latestAddIdx, tag1Index) + const newReasoning = fullText.substring(tag1Index + thinkTags[0].length, Infinity) + + fullText += newText + fullReasoning += newReasoning + latestAddIdx += newText.length + newReasoning.length + onText_({ newText, fullText, newReasoning: newReasoning, fullReasoning }) + return + } + + // add the text to fullText + const newText = fullText.substring(latestAddIdx, Infinity) + fullText += newText + latestAddIdx += newText.length + onText_({ newText, fullText, newReasoning: '', fullReasoning }) + return + } + // at this point, we found + + // until found the second think tag, keep adding to fullReasoning + if (!foundTag2) { + const endsWithTag2 = endsWithAnyPrefixOf(fullText_, thinkTags[1]) + if (endsWithTag2) { + // wait until we get the full tag or know more + return + } + // if found the second tag + const tag2Index = fullText_.lastIndexOf(thinkTags[1]) + if (tag2Index !== -1) { + foundTag2 = true + const newReasoning = fullText.substring(latestAddIdx, tag2Index) + const newText = fullText.substring(tag2Index + thinkTags[1].length, Infinity) + + fullText += newText + fullReasoning += newReasoning + latestAddIdx += newText.length + newReasoning.length + onText_({ newText, fullText, newReasoning: newReasoning, fullReasoning }) + return + } + + // add the text to fullReasoning + const newReasoning = fullText.substring(latestAddIdx, Infinity) + fullReasoning += newReasoning + latestAddIdx += newReasoning.length + onText_({ newText: '', fullText, newReasoning, fullReasoning }) + return + } + // at this point, we found + + fullText += newText_ + const newText = fullText.substring(latestAddIdx, Infinity) + latestAddIdx += newText.length + onText_({ newText, fullText, newReasoning: '', fullReasoning }) + } + + return onText +} diff --git a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx index a6aec380..e2056fbf 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx @@ -415,7 +415,7 @@ export const FeaturesTab = () => {
- + {/* TODO we should create UI for downloading models without user going into terminal */} diff --git a/src/vs/workbench/contrib/void/common/voidSettingsService.ts b/src/vs/workbench/contrib/void/common/voidSettingsService.ts index 7a35c678..be3f6689 100644 --- a/src/vs/workbench/contrib/void/common/voidSettingsService.ts +++ b/src/vs/workbench/contrib/void/common/voidSettingsService.ts @@ -11,7 +11,7 @@ import { registerSingleton, InstantiationType } from '../../../../platform/insta import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js'; import { IStorageService, StorageScope, StorageTarget } from '../../../../platform/storage/common/storage.js'; import { IMetricsService } from './metricsService.js'; -import { defaultSettingsOfProvider, FeatureName, ProviderName, ModelSelectionOfFeature, SettingsOfProvider, SettingName, providerNames, ModelSelection, modelSelectionsEqual, featureNames, VoidModelInfo, GlobalSettings, GlobalSettingName, defaultGlobalSettings, defaultProviderSettings, developerInfoOfModelName, modelInfoOfAutodetectedModelNames } from './voidSettingsTypes.js'; +import { defaultSettingsOfProvider, FeatureName, ProviderName, ModelSelectionOfFeature, SettingsOfProvider, SettingName, providerNames, ModelSelection, modelSelectionsEqual, featureNames, VoidModelInfo, GlobalSettings, GlobalSettingName, defaultGlobalSettings, defaultProviderSettings } from './voidSettingsTypes.js'; const STORAGE_KEY = 'void.settingsServiceStorage' @@ -32,8 +32,6 @@ type SetGlobalSettingFn = (settingName: T, newVal export type ModelOption = { name: string, selection: ModelSelection } - - export type VoidSettingsState = { readonly settingsOfProvider: SettingsOfProvider; // optionsOfProvider readonly modelSelectionOfFeature: ModelSelectionOfFeature; // stateOfFeature @@ -172,9 +170,6 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService { // A HACK BECAUSE WE ADDED DEEPSEEK (did not exist before, comes before readS) ...{ deepseek: defaultSettingsOfProvider.deepseek }, - // A HACK BECAUSE WE ADDED MISTRAL (did not exist before, comes before readS) - ...{ mistral: defaultSettingsOfProvider.mistral }, - // A HACK BECAUSE WE ADDED XAI (did not exist before, comes before readS) ...{ xAI: defaultSettingsOfProvider.xAI }, @@ -295,19 +290,35 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService { } + private _updatedModelsAfterAutodetection = (defaultModelNames: string[], options: { existingModels: VoidModelInfo[] }) => { + const { existingModels } = options + + const existingModelsMap: Record = {} + for (const existingModel of existingModels) { + existingModelsMap[existingModel.modelName] = existingModel + } + + const newDefaultModels = defaultModelNames.map((modelName, i) => ({ + modelName, + isDefault: true, + isAutodetected: true, + isHidden: !!existingModelsMap[modelName]?.isHidden, + })) + + return [ + ...newDefaultModels, // swap out all the default models for the new default models + ...existingModels.filter(m => !m.isDefault), // keep any non-default (custom) models + ] + } + + setAutodetectedModels(providerName: ProviderName, autodetectedModelNames: string[], logging: object) { const { models } = this.state.settingsOfProvider[providerName] const oldModelNames = models.map(m => m.modelName) - - const newDefaultModels = modelInfoOfAutodetectedModelNames(autodetectedModelNames, { existingModels: models }) - const newModels = [ - ...newDefaultModels, // swap out all the default models for the new default models - ...models.filter(m => !m.isDefault), // keep any non-default (custom) models - ] - + const newModels = this._updatedModelsAfterAutodetection(autodetectedModelNames, { existingModels: models }) this.setSettingOfProvider(providerName, 'models', newModels) // if the models changed, log it @@ -341,7 +352,7 @@ class VoidSettingsService extends Disposable implements IVoidSettingsService { if (existingIdx !== -1) return // if exists, do nothing const newModels = [ ...models, - { ...developerInfoOfModelName(modelName), modelName, isDefault: false, isHidden: false } + { modelName, isDefault: false, isHidden: false } ] this.setSettingOfProvider(providerName, 'models', newModels) diff --git a/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts b/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts index fb387bc1..4111b53b 100644 --- a/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts +++ b/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts @@ -4,7 +4,6 @@ * Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information. *--------------------------------------------------------------------------------------*/ -import { defaultModelsOfProvider } from '../electron-main/llmMessage/MODELS.js'; import { VoidSettingsState } from './voidSettingsService.js' @@ -40,14 +39,70 @@ export const defaultProviderSettings = { groq: { apiKey: '', }, - mistral: { - apiKey: '' - }, xAI: { apiKey: '' }, } as const + + + +export const defaultModelsOfProvider = { + openAI: [ // https://platform.openai.com/docs/models/gp + 'o1', + 'o3-mini', + 'o1-mini', + 'gpt-4o', + 'gpt-4o-mini', + ], + anthropic: [ // https://docs.anthropic.com/en/docs/about-claude/models + 'claude-3-5-sonnet-latest', + 'claude-3-5-haiku-latest', + 'claude-3-opus-latest', + ], + xAI: [ // https://docs.x.ai/docs/models?cluster=us-east-1 + 'grok-2-latest', + 'grok-3-latest', + ], + gemini: [ // https://ai.google.dev/gemini-api/docs/models/gemini + 'gemini-2.0-flash', + 'gemini-1.5-flash', + 'gemini-1.5-pro', + 'gemini-1.5-flash-8b', + 'gemini-2.0-flash-thinking-exp', + ], + deepseek: [ // https://api-docs.deepseek.com/quick_start/pricing + 'deepseek-chat', + 'deepseek-reasoner', + ], + ollama: [ // autodetected + ], + vLLM: [ // autodetected + ], + openRouter: [ // https://openrouter.ai/models + 'anthropic/claude-3.5-sonnet', + 'deepseek/deepseek-r1', + 'mistralai/codestral-2501', + 'qwen/qwen2.5-vl-72b-instruct:free', + ], + groq: [ // https://console.groq.com/docs/models + 'llama-3.3-70b-versatile', + 'llama-3.1-8b-instant', + 'qwen-2.5-coder-32b', // preview mode (experimental) + ], + // not supporting mistral right now- it's last on Void usage, and a huge pain to set up since it's nonstandard (it supports codestral FIM but it's on v1/fim/completions, etc) + // mistral: [ // https://docs.mistral.ai/getting-started/models/models_overview/ + // 'codestral-latest', + // 'mistral-large-latest', + // 'ministral-3b-latest', + // 'ministral-8b-latest', + // ], + openAICompatible: [], // fallback +} as const satisfies Record + + + + export type ProviderName = keyof typeof defaultProviderSettings export const providerNames = Object.keys(defaultProviderSettings) as ProviderName[] @@ -139,11 +194,6 @@ export const displayInfoOfProviderName = (providerName: ProviderName): DisplayIn title: 'Groq.com API', } } - else if (providerName === 'mistral') { - return { - title: 'Mistral API', - } - } else if (providerName === 'xAI') { return { title: 'xAI API', @@ -173,10 +223,9 @@ export const displayInfoOfSettingName = (providerName: ProviderName, settingName providerName === 'openRouter' ? 'sk-or-key...' : // sk-or-v1-key providerName === 'gemini' ? 'key...' : providerName === 'groq' ? 'gsk_key...' : - providerName === 'mistral' ? 'key...' : - providerName === 'openAICompatible' ? 'sk-key...' : - providerName === 'xAI' ? 'xai-key...' : - '', + providerName === 'openAICompatible' ? 'sk-key...' : + providerName === 'xAI' ? 'xai-key...' : + '', subTextMd: providerName === 'anthropic' ? 'Get your [API Key here](https://console.anthropic.com/settings/keys).' : providerName === 'openAI' ? 'Get your [API Key here](https://platform.openai.com/api-keys).' : @@ -184,10 +233,9 @@ export const displayInfoOfSettingName = (providerName: ProviderName, settingName providerName === 'openRouter' ? 'Get your [API Key here](https://openrouter.ai/settings/keys).' : providerName === 'gemini' ? 'Get your [API Key here](https://aistudio.google.com/apikey).' : providerName === 'groq' ? 'Get your [API Key here](https://console.groq.com/keys).' : - providerName === 'mistral' ? 'Get your [API Key here](https://console.mistral.ai/api-keys/).' : - providerName === 'xAI' ? 'Get your [API Key here](https://console.x.ai).' : - providerName === 'openAICompatible' ? undefined : - '', + providerName === 'xAI' ? 'Get your [API Key here](https://console.x.ai).' : + providerName === 'openAICompatible' ? undefined : + '', isPasswordField: true, } } @@ -271,12 +319,6 @@ export const defaultSettingsOfProvider: SettingsOfProvider = { ...modelInfoOfDefaultModelNames(defaultModelsOfProvider.gemini), _didFillInProviderSettings: undefined, }, - mistral: { - ...defaultCustomSettings, - ...defaultProviderSettings.mistral, - ...modelInfoOfDefaultModelNames(defaultModelsOfProvider.mistral), - _didFillInProviderSettings: undefined, - }, xAI: { ...defaultCustomSettings, ...defaultProviderSettings.xAI, diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts index ce0d0537..d68408cd 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts @@ -4,67 +4,15 @@ *--------------------------------------------------------------------------------------*/ import OpenAI, { ClientOptions } from 'openai'; -import { Model as OpenAIModel } from 'openai/resources/models.js'; -import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js'; -import { InternalToolInfo, isAToolName } from '../../common/toolsService.js'; -import { defaultProviderSettings, displayInfoOfProviderName, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js'; -import { prepareMessages } from './preprocessLLMMessages.js'; import Anthropic from '@anthropic-ai/sdk'; import { Ollama } from 'ollama'; - - -export const defaultModelsOfProvider = { - anthropic: [ // https://docs.anthropic.com/en/docs/about-claude/models - 'claude-3-5-sonnet-latest', - 'claude-3-5-haiku-latest', - 'claude-3-opus-latest', - ], - openAI: [ // https://platform.openai.com/docs/models/gp - 'o1', - 'o1-mini', - 'o3-mini', - 'gpt-4o', - 'gpt-4o-mini', - ], - deepseek: [ // https://platform.openai.com/docs/models/gp - 'deepseek-chat', - 'deepseek-reasoner', - ], - ollama: [], - vLLM: [], - openRouter: [], - openAICompatible: [], - gemini: [ - 'gemini-1.5-flash', - 'gemini-1.5-pro', - 'gemini-1.5-flash-8b', - 'gemini-2.0-flash-exp', - 'gemini-2.0-flash-thinking-exp-1219', - 'learnlm-1.5-pro-experimental' - ], - groq: [ // https://console.groq.com/docs/models - "llama3-70b-8192", - "llama-3.3-70b-versatile", - "llama-3.1-8b-instant", - "gemma2-9b-it", - "mixtral-8x7b-32768" - ], - mistral: [ // https://docs.mistral.ai/getting-started/models/models_overview/ - "codestral-latest", - "open-codestral-mamba", - "open-mistral-nemo", - "mistral-large-latest", - "pixtral-large-latest", - "ministral-3b-latest", - "ministral-8b-latest", - "mistral-small-latest", - ], - xAI: [ // https://docs.x.ai/docs/models?cluster=us-east-1 - 'grok-3-latest', - 'grok-2-latest', - ], -} satisfies Record +import { Model as OpenAIModel } from 'openai/resources/models.js'; +import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js'; +import { InternalToolInfo, isAToolName } from '../../common/toolsService.js'; +import { defaultProviderSettings, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js'; +import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js'; +import { extractReasoningFromText } from '../../browser/helpers/extractCodeFromResult.js'; @@ -78,10 +26,13 @@ type ModelOptions = { } supportsSystemMessage: false | 'system-role' | 'developer-role' | 'separated'; supportsTools: false | 'anthropic-style' | 'openai-style'; - supportsFIM: false | 'TODO_FIM_FORMAT'; + supportsFIM: boolean; - supportsReasoning: boolean; // not whether it reasons, but whether it outputs reasoning tokens - manualMatchReasoningTokens?: [string, string]; // reasoning tokens if it's an OSS model + supportsReasoningOutput: false | { + // you are allowed to not include openSourceThinkTags if it's not open source (no such cases as of writing) + // if it's open source, put the think tags here so we parse them out in e.g. ollama + openSourceThinkTags?: [string, string] + }; } type ProviderReasoningOptions = { @@ -95,9 +46,9 @@ type ProviderReasoningOptions = { } type ProviderSettings = { - providerReasoningOptions?: ProviderReasoningOptions; + ifSupportsReasoningOutput?: ProviderReasoningOptions; modelOptions: { [key: string]: ModelOptions }; - modelOptionsFallback: (modelName: string) => ModelOptions; // allowed to throw error if modeName is totally invalid + modelOptionsFallback: (modelName: string) => (ModelOptions & { modelName: string }) | null; } @@ -107,172 +58,446 @@ type ModelSettingsOfProvider = { +// type DefaultModels = typeof defaultModelsOfProvider[T][number] +// type AssertModelsIncluded< +// T extends ProviderName, +// Options extends Record +// > = Exclude, keyof Options> extends never +// ? true +// : ["Missing models for", T, Exclude, keyof Options>]; +// const assertOpenAI: AssertModelsIncluded<'openAI', typeof openAIModelOptions> = true; -const modelNotRecognizedErrorMessage = (modelName: string, providerName: ProviderName) => `Void could not find a model matching ${modelName} for ${displayInfoOfProviderName(providerName).title}.` - +const modelOptionDefaults: ModelOptions = { + contextWindow: 32_000, + cost: { input: 0, output: 0 }, + supportsSystemMessage: false, + supportsTools: false, + supportsFIM: false, + supportsReasoningOutput: false, +} // ---------------- OPENAI ---------------- -const openAIModelOptions = { - "o1": { +const openAIModelOptions = { // https://platform.openai.com/docs/pricing + 'o1': { contextWindow: 128_000, cost: { input: 15.00, cache_read: 7.50, output: 60.00, }, supportsFIM: false, supportsTools: false, supportsSystemMessage: 'developer-role', - supportsReasoning: false, + supportsReasoningOutput: false, }, - "o3-mini": { + 'o3-mini': { contextWindow: 200_000, cost: { input: 1.10, cache_read: 0.55, output: 4.40, }, supportsFIM: false, supportsTools: false, supportsSystemMessage: 'developer-role', - supportsReasoning: false, + supportsReasoningOutput: false, }, - "gpt-4o": { + 'gpt-4o': { contextWindow: 128_000, cost: { input: 2.50, cache_read: 1.25, output: 10.00, }, supportsFIM: false, supportsTools: 'openai-style', supportsSystemMessage: 'system-role', - supportsReasoning: false, + supportsReasoningOutput: false, }, -} as const + 'o1-mini': { + contextWindow: 128_000, + cost: { input: 1.10, cache_read: 0.55, output: 4.40, }, + supportsFIM: false, + supportsTools: false, + supportsSystemMessage: false, // does not support any system + supportsReasoningOutput: false, + }, + 'gpt-4o-mini': { + contextWindow: 128_000, + cost: { input: 0.15, cache_read: 0.075, output: 0.60, }, + supportsFIM: false, + supportsTools: 'openai-style', + supportsSystemMessage: 'system-role', // ?? + supportsReasoningOutput: false, + }, +} as const satisfies { [s: string]: ModelOptions } + const openAISettings: ProviderSettings = { modelOptions: openAIModelOptions, modelOptionsFallback: (modelName) => { - if (modelName.includes('o1')) return openAIModelOptions['o1'] - if (modelName.includes('o3-mini')) return openAIModelOptions['o3-mini'] - if (modelName.includes('gpt-4o')) return openAIModelOptions['gpt-4o'] - throw new Error(modelNotRecognizedErrorMessage(modelName, 'openAI')) + let fallbackName: keyof typeof openAIModelOptions | null = null + if (modelName.includes('o1')) { fallbackName = 'o1' } + if (modelName.includes('o3-mini')) { fallbackName = 'o3-mini' } + if (modelName.includes('gpt-4o')) { fallbackName = 'gpt-4o' } + if (fallbackName) return { modelName: fallbackName, ...openAIModelOptions[fallbackName] } + return null } } // ---------------- ANTHROPIC ---------------- const anthropicModelOptions = { - "claude-3-5-sonnet-20241022": { + 'claude-3-5-sonnet-20241022': { contextWindow: 200_000, cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 }, supportsFIM: false, supportsSystemMessage: 'separated', supportsTools: 'anthropic-style', - supportsReasoning: false, - + supportsReasoningOutput: false, }, - "claude-3-5-haiku-20241022": { + 'claude-3-5-haiku-20241022': { contextWindow: 200_000, cost: { input: 0.80, cache_read: 0.08, cache_write: 1.00, output: 4.00 }, supportsFIM: false, supportsSystemMessage: 'separated', supportsTools: 'anthropic-style', - supportsReasoning: false, + supportsReasoningOutput: false, }, - "claude-3-opus-20240229": { + 'claude-3-opus-20240229': { contextWindow: 200_000, cost: { input: 15.00, cache_read: 1.50, cache_write: 18.75, output: 75.00 }, supportsFIM: false, supportsSystemMessage: 'separated', supportsTools: 'anthropic-style', - supportsReasoning: false, + supportsReasoningOutput: false, }, - "claude-3-sonnet-20240229": { + 'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in contextWindow: 200_000, cost: { input: 3.00, output: 15.00 }, supportsFIM: false, supportsSystemMessage: 'separated', supportsTools: 'anthropic-style', - supportsReasoning: false, + supportsReasoningOutput: false, } -} as const +} as const satisfies { [s: string]: ModelOptions } const anthropicSettings: ProviderSettings = { modelOptions: anthropicModelOptions, modelOptionsFallback: (modelName) => { - throw new Error(modelNotRecognizedErrorMessage(modelName, 'anthropic')) + let fallbackName: keyof typeof anthropicModelOptions | null = null + if (modelName.includes('claude-3-5-sonnet')) fallbackName = 'claude-3-5-sonnet-20241022' + if (modelName.includes('claude-3-5-haiku')) fallbackName = 'claude-3-5-haiku-20241022' + if (modelName.includes('claude-3-opus')) fallbackName = 'claude-3-opus-20240229' + if (fallbackName) return { modelName: fallbackName, ...anthropicModelOptions[fallbackName] } + return null } } // ---------------- XAI ---------------- -const XAIModelOptions = { - "grok-2-latest": { +const xAIModelOptions = { + 'grok-2-latest': { contextWindow: 131_072, cost: { input: 2.00, output: 10.00 }, supportsFIM: false, supportsSystemMessage: 'system-role', supportsTools: 'openai-style', - supportsReasoning: false, + supportsReasoningOutput: false, }, -} as const +} as const satisfies { [s: string]: ModelOptions } -const XAISettings: ProviderSettings = { - modelOptions: XAIModelOptions, +const xAISettings: ProviderSettings = { + modelOptions: xAIModelOptions, modelOptionsFallback: (modelName) => { - throw new Error(modelNotRecognizedErrorMessage(modelName, 'xAI')) + let fallbackName: keyof typeof xAIModelOptions | null = null + if (modelName.includes('grok-2')) fallbackName = 'grok-2-latest' + if (fallbackName) return { modelName: fallbackName, ...xAIModelOptions[fallbackName] } + return null } } +// ---------------- GEMINI ---------------- +const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing + 'gemini-2.0-flash': { + contextWindow: 1_048_576, + cost: { input: 0.10, output: 0.40 }, + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', // we are assuming OpenAI SDK when calling gemini + supportsReasoningOutput: false, + }, + 'gemini-2.0-flash-lite-preview-02-05': { + contextWindow: 1_048_576, + cost: { input: 0.075, output: 0.30 }, + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'gemini-1.5-flash': { + contextWindow: 1_048_576, + cost: { input: 0.075, output: 0.30 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'gemini-1.5-pro': { + contextWindow: 2_097_152, + cost: { input: 1.25, output: 5.00 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'gemini-1.5-flash-8b': { + contextWindow: 1_048_576, + cost: { input: 0.0375, output: 0.15 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, +} as const satisfies { [s: string]: ModelOptions } + +const geminiSettings: ProviderSettings = { + modelOptions: geminiModelOptions, + modelOptionsFallback: (modelName) => { + return null + } +} + + +// ---------------- OPEN SOURCE MODELS ---------------- + +const openSourceModelDefaultOptionsAssumingOAICompat = { + 'deepseekR1': { + supportsFIM: false, + supportsSystemMessage: false, + supportsTools: false, + supportsReasoningOutput: { openSourceThinkTags: ['', ''] }, + }, + 'deepseekCoderV2': { + supportsFIM: false, + supportsSystemMessage: false, // unstable + supportsTools: false, + supportsReasoningOutput: false, + }, + 'codestral': { + supportsFIM: true, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + // llama + 'llama3': { + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'llama3.1': { + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'llama3.2': { + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'llama3.3': { + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'qwen2.5coder': { + supportsFIM: true, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + // FIM only + 'starcoder2': { + supportsFIM: true, + supportsSystemMessage: false, + supportsTools: false, + supportsReasoningOutput: false, + }, + 'codegemma:2b': { + supportsFIM: true, + supportsSystemMessage: false, + supportsTools: false, + supportsReasoningOutput: false, + }, +} as const satisfies { [s: string]: Partial } + + + +// ---------------- DEEPSEEK API ---------------- +const deepseekModelOptions = { + 'deepseek-chat': { + ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, + contextWindow: 64_000, // https://api-docs.deepseek.com/quick_start/pricing + cost: { cache_read: .07, input: .27, output: 1.10, }, + }, + 'deepseek-reasoner': { + ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2, + contextWindow: 64_000, + cost: { cache_read: .14, input: .55, output: 2.19, }, + }, +} as const satisfies { [s: string]: ModelOptions } + + +const deepseekSettings: ProviderSettings = { + modelOptions: deepseekModelOptions, + ifSupportsReasoningOutput: { + // reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model + output: { nameOfFieldInDelta: 'reasoning_content' }, + }, + modelOptionsFallback: (modelName) => { + return null + } +} + +// ---------------- GROQ ---------------- +const groqModelOptions = { + 'llama-3.3-70b-versatile': { + contextWindow: 128_000, + cost: { input: 0.59, output: 0.79 }, + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'llama-3.1-8b-instant': { + contextWindow: 128_000, + cost: { input: 0.05, output: 0.08 }, + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'qwen-2.5-coder-32b': { + contextWindow: 128_000, + cost: { input: 0.79, output: 0.79 }, + supportsFIM: false, // unfortunately looks like no FIM support on groq + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, +} as const satisfies { [s: string]: ModelOptions } +const groqSettings: ProviderSettings = { + modelOptions: groqModelOptions, + modelOptionsFallback: (modelName) => { return null } +} + + +// ---------------- anything self-hosted/local: VLLM, OLLAMA, OPENAICOMPAT ---------------- + +// fallback to any model (anything openai-compatible) +const extensiveModelFallback: ProviderSettings['modelOptionsFallback'] = (modelName) => { + const toFallback = (opts: Omit): ModelOptions & { modelName: string } => { + return { + modelName, + ...opts, + supportsSystemMessage: opts.supportsSystemMessage ? 'system-role' : false, + cost: { input: 0, output: 0 }, + } + } + if (modelName.includes('gpt-4o')) return toFallback(openAIModelOptions['gpt-4o']) + if (modelName.includes('claude')) return toFallback(anthropicModelOptions['claude-3-5-sonnet-20241022']) + if (modelName.includes('grok')) return toFallback(xAIModelOptions['grok-2-latest']) + if (modelName.includes('deepseek-r1') || modelName.includes('deepseek-reasoner')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, contextWindow: 32_000, }) + if (modelName.includes('deepseek')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2, contextWindow: 32_000, }) + if (modelName.includes('llama3')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.llama3, contextWindow: 32_000, }) + if (modelName.includes('qwen2.5-coder')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat['qwen2.5coder'], contextWindow: 32_000, }) + if (modelName.includes('codestral')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.codestral, contextWindow: 32_000, }) + if (/\bo1\b/.test(modelName) || /\bo3\b/.test(modelName)) return toFallback(openAIModelOptions['o1']) + return toFallback(modelOptionDefaults) +} + + +const vLLMSettings: ProviderSettings = { + // reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions + ifSupportsReasoningOutput: { output: { nameOfFieldInDelta: 'reasoning_content' }, }, + modelOptionsFallback: (modelName) => extensiveModelFallback(modelName), + modelOptions: {}, +} + +const ollamaSettings: ProviderSettings = { + // reasoning: we need to filter out reasoning tags manually + ifSupportsReasoningOutput: { output: { needsManualParse: true }, }, + modelOptionsFallback: (modelName) => extensiveModelFallback(modelName), + modelOptions: {}, +} + +const openaiCompatible: ProviderSettings = { + // reasoning: we have no idea what endpoint they used, so we can't consistently parse out reasoning + modelOptionsFallback: (modelName) => extensiveModelFallback(modelName), + modelOptions: {}, +} + + +// ---------------- OPENROUTER ---------------- +const openRouterModelOptions = { + 'deepseek/deepseek-r1': { + ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, + contextWindow: 128_000, + cost: { input: 0.8, output: 2.4 }, + }, + 'anthropic/claude-3.5-sonnet': { + contextWindow: 200_000, + cost: { input: 3.00, output: 15.00 }, + supportsFIM: false, + supportsSystemMessage: 'system-role', + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, + 'mistralai/codestral-2501': { + ...openSourceModelDefaultOptionsAssumingOAICompat.codestral, + contextWindow: 256_000, + cost: { input: 0.3, output: 0.9 }, + supportsTools: 'openai-style', + supportsReasoningOutput: false, + }, +} as const satisfies { [s: string]: ModelOptions } + +const openRouterSettings: ProviderSettings = { + // reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models + ifSupportsReasoningOutput: { + input: { includeInPayload: { include_reasoning: true } }, + output: { nameOfFieldInDelta: 'reasoning' }, + }, + modelOptions: openRouterModelOptions, + // TODO!!! send a query to openrouter to get the price, isFIM, etc. + modelOptionsFallback: (modelName) => extensiveModelFallback(modelName), +} + +// ---------------- model settings of everything above ---------------- const modelSettingsOfProvider: ModelSettingsOfProvider = { openAI: openAISettings, anthropic: anthropicSettings, - xAI: XAISettings, - gemini: { - modelOptions: { - - } - }, - googleVertex: { - - }, - microsoftAzure: { - - }, - openRouter: { - providerReasoningOptions: { - // reasoning: OAICompat + response.choices[0].delta.reasoning : payload should have {include_reasoning: true} https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models - input: { includeInPayload: { include_reasoning: true } }, - output: { nameOfFieldInDelta: 'reasoning' }, - } - }, - vLLM: { - providerReasoningOptions: { - // reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://docs.vllm.ai/en/stable/features/reasoning_outputs.html#streaming-chat-completions - output: { nameOfFieldInDelta: 'reasoning_content' }, - } - }, - deepseek: { - providerReasoningOptions: { - // reasoning: OAICompat + response.choices[0].delta.reasoning_content // https://api-docs.deepseek.com/guides/reasoning_model - output: { nameOfFieldInDelta: 'reasoning_content' }, - }, - }, - ollama: { - providerReasoningOptions: { - // reasoning: we need to filter out reasoning tags manually - output: { needsManualParse: true }, - }, - }, - - openAICompatible: { - }, - mistral: { - }, - groq: { - }, + xAI: xAISettings, + gemini: geminiSettings, + // open source models + deepseek: deepseekSettings, + groq: groqSettings, + // open source models + providers (mixture of everything) + openRouter: openRouterSettings, + vLLM: vLLMSettings, + ollama: ollamaSettings, + openAICompatible: openaiCompatible, + // googleVertex: {}, + // microsoftAzure: {}, } as const satisfies ModelSettingsOfProvider -const modelOptionsOfProvider = (providerName: ProviderName, modelName: string) => { + + +export const modelOptionsOfProvider = (providerName: ProviderName, modelName: string): ModelOptions & { modelName: string } => { const { modelOptions, modelOptionsFallback } = modelSettingsOfProvider[providerName] - if (modelName in modelOptions) return modelOptions[modelName] - return modelOptionsFallback(modelName) + if (modelName in modelOptions) return { modelName, ...modelOptions[modelName] } + const result = modelOptionsFallback(modelName) + if (!result) return { modelName, ...modelOptionDefaults } + return result } @@ -361,10 +586,6 @@ const newOpenAICompatibleSDK = ({ settingsOfProvider, providerName, includeInPay const thisConfig = settingsOfProvider[providerName] return new OpenAI({ baseURL: thisConfig.endpoint, apiKey: thisConfig.apiKey, ...commonPayloadOpts }) } - else if (providerName === 'mistral') { - const thisConfig = settingsOfProvider[providerName] - return new OpenAI({ baseURL: 'https://api.mistral.ai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts }) - } else if (providerName === 'groq') { const thisConfig = settingsOfProvider[providerName] return new OpenAI({ baseURL: 'https://api.groq.com/openai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts }) @@ -379,33 +600,52 @@ const newOpenAICompatibleSDK = ({ settingsOfProvider, providerName, includeInPay -const manualParseOnText = ( - providerName: ProviderName, - modelName: string, - onText_: OnText -): OnText => { - return onText_ +const _sendOpenAICompatibleFIM = ({ messages: messages_, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, providerName, aiInstructions, }: SendFIMParams_Internal) => { + const { modelName, } = modelOptionsOfProvider(providerName, modelName_) + const messages = prepareFIMMessage({ messages: messages_, aiInstructions, }) + + const openai = newOpenAICompatibleSDK({ providerName, settingsOfProvider }) + openai.completions + .create({ + model: modelName, + prompt: messages.prefix, + suffix: messages.suffix, + stop: messages.stopTokens, + max_tokens: messages.maxTokens, + }) + .then(async response => { + const fullText = response.choices[0]?.text + onFinalMessage({ fullText, }); + }) + .catch(error => { + if (error instanceof OpenAI.APIError && error.status === 401) { onError({ message: 'Invalid API key.', fullError: error }); } + else { onError({ message: error + '', fullError: error }); } + }) } -const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter, providerName, aiInstructions, tools: tools_ }: SendChatParams_Internal) => { + + +const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, providerName, aiInstructions, tools: tools_ }: SendChatParams_Internal) => { const { - supportsReasoning: modelSupportsReasoning, + modelName, + supportsReasoningOutput, supportsSystemMessage, supportsTools, - } = modelOptionsOfProvider(providerName, modelName) + } = modelOptionsOfProvider(providerName, modelName_) const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, }) const tools = (supportsTools && ((tools_?.length ?? 0) !== 0)) ? tools_?.map(tool => toOpenAICompatibleTool(tool)) : undefined - const includeInPayload = modelSupportsReasoning ? {} : modelSettingsOfProvider[providerName].providerReasoningOptions?.input?.includeInPayload || {} + const includeInPayload = supportsReasoningOutput ? modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.input?.includeInPayload || {} : {} const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {} const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload }) const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj } - const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].providerReasoningOptions?.output ?? {} - if (needsManualReasoningParse) onText = manualParseOnText(providerName, modelName, onText) + const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.output ?? {} + if (needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags) + onText = extractReasoningFromText(onText, supportsReasoningOutput.openSourceThinkTags) let fullReasoning = '' let fullText = '' @@ -432,7 +672,7 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage let newReasoning = '' if (nameOfReasoningFieldInDelta) { // @ts-ignore - newReasoning = (chunk.choices[0]?.delta?.[nameOfFieldInDelta] || '') + '' + newReasoning = (chunk.choices[0]?.delta?.[nameOfReasoningFieldInDelta] || '') + '' fullReasoning += newReasoning } @@ -477,10 +717,6 @@ const _openaiCompatibleList = async ({ onSuccess: onSuccess_, onError: onError_, -// ------------ OPENAI ------------ -const sendOpenAIChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) -} // ------------ ANTHROPIC ------------ const toAnthropicTool = (toolInfo: InternalToolInfo) => { @@ -504,13 +740,14 @@ const toolCallsFromAnthropicContent = (content: Anthropic.Messages.ContentBlock[ }).filter(t => !!t) } -const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => { +const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => { const { // supportsReasoning: modelSupportsReasoning, + modelName, supportsSystemMessage, supportsTools, contextWindow, - } = modelOptionsOfProvider(providerName, modelName) + } = modelOptionsOfProvider(providerName, modelName_) const { messages, separateSystemMessageStr } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, }) @@ -561,16 +798,6 @@ const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalM // }) -// ------------ XAI ------------ -const sendXAIChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) -} - -// ------------ GEMINI ------------ -const sendGeminiAPIChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) -} - // ------------ OLLAMA ------------ const newOllamaSDK = ({ endpoint }: { endpoint: string }) => { // if endpoint is empty, normally ollama will send to 11434, but we want it to fail - the user should type it in @@ -603,10 +830,12 @@ const ollamaList = async ({ onSuccess: onSuccess_, onError: onError_, settingsOf } } -const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter }: SendFIMParams_Internal) => { +const sendOllamaFIM = ({ messages: messages_, onFinalMessage, onError, settingsOfProvider, modelName, aiInstructions, _setAborter }: SendFIMParams_Internal) => { const thisConfig = settingsOfProvider.ollama const ollama = newOllamaSDK({ endpoint: thisConfig.endpoint }) + const messages = prepareFIMMessage({ messages: messages_, aiInstructions, }) + let fullText = '' ollama.generate({ model: modelName, @@ -614,7 +843,7 @@ const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider, suffix: messages.suffix, options: { stop: messages.stopTokens, - num_predict: 300, // max tokens + num_predict: messages.maxTokens, // max tokens // repeat_penalty: 1, }, raw: true, @@ -635,57 +864,73 @@ const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider, } -// ollama's implementation of openai-compatible SDK dumps all reasoning tokens out with message, and supports tools, so we can use it for chat! -const sendOllamaChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) + +type CallFnOfProvider = { + [providerName in ProviderName]: { + sendChat: (params: SendChatParams_Internal) => void; + sendFIM: ((params: SendFIMParams_Internal) => void) | null; + list: ((params: ListParams_Internal) => void) | null; + } } -// ------------ OPENAI-COMPATIBLE ------------ -// TODO!!! FIM - -// using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration -const sendOpenAICompatibleChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) -} - -// ------------ OPENROUTER ------------ -const sendOpenRouterChat = (params: SendChatParams_Internal) => { - _sendOpenAICompatibleChat(params) -} - -// ------------ VLLM ------------ -const vLLMList = async (params: ListParams_Internal) => { - return _openaiCompatibleList(params) -} -const sendVLLMFIM = (params: SendFIMParams_Internal) => { - // TODO!!! -} - -// using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration -const sendVLLMChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) -} - -// ------------ DEEPSEEK API ------------ -const sendDeepSeekAPIChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) -} - -// ------------ MISTRAL ------------ -const sendMistralAPIChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) -} - -// ------------ GROQ ------------ -const sendGroqAPIChat = (params: SendChatParams_Internal) => { - return _sendOpenAICompatibleChat(params) -} +export const sendLLMMessageToProviderImplementation = { + anthropic: { + sendChat: sendAnthropicChat, + sendFIM: null, + list: null, + }, + openAI: { + sendChat: (params) => _sendOpenAICompatibleChat(params), + sendFIM: null, + list: null, + }, + xAI: { + sendChat: (params) => _sendOpenAICompatibleChat(params), + sendFIM: null, + list: null, + }, + gemini: { + sendChat: (params) => _sendOpenAICompatibleChat(params), + sendFIM: null, + list: null, + }, + ollama: { + sendChat: (params) => _sendOpenAICompatibleChat(params), + sendFIM: sendOllamaFIM, + list: ollamaList, + }, + openAICompatible: { + sendChat: (params) => _sendOpenAICompatibleChat(params), // using openai's SDK is not ideal (your implementation might not do tools, reasoning, FIM etc correctly), talk to us for a custom integration + sendFIM: (params) => _sendOpenAICompatibleFIM(params), + list: null, + }, + openRouter: { + sendChat: (params) => _sendOpenAICompatibleChat(params), + sendFIM: (params) => _sendOpenAICompatibleFIM(params), + list: null, + }, + vLLM: { + sendChat: (params) => _sendOpenAICompatibleChat(params), + sendFIM: (params) => _sendOpenAICompatibleFIM(params), + list: (params) => _openaiCompatibleList(params), + }, + deepseek: { + sendChat: (params) => _sendOpenAICompatibleChat(params), + sendFIM: null, + list: null, + }, + groq: { + sendChat: (params) => _sendOpenAICompatibleChat(params), + sendFIM: null, + list: null, + }, +} satisfies CallFnOfProvider /* -FIM: +FIM info (this may be useful in the future with vLLM, but in most cases the only way to use FIM is if the provider explicitly supports it): qwen2.5-coder https://ollama.com/library/qwen2.5-coder/blobs/e94a8ecb9327 <|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|> @@ -706,71 +951,3 @@ codegemma https://ollama.com/library/codegemma:2b/blobs/48d9a8140749 <|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|> */ - - - -type CallFnOfProvider = { - [providerName in ProviderName]: { - sendChat: (params: SendChatParams_Internal) => void; - sendFIM: ((params: SendFIMParams_Internal) => void) | null; - list: ((params: ListParams_Internal) => void) | null; - } -} -export const sendLLMMessageToProviderImplementation = { - openAI: { - sendChat: sendOpenAIChat, - sendFIM: null, - list: null, - }, - anthropic: { - sendChat: sendAnthropicChat, - sendFIM: null, - list: null, - }, - xAI: { - sendChat: sendXAIChat, - sendFIM: null, - list: null, - }, - gemini: { - sendChat: sendGeminiAPIChat, - sendFIM: null, - list: null, - }, - ollama: { - sendChat: sendOllamaChat, - sendFIM: sendOllamaFIM, - list: ollamaList, - }, - openAICompatible: { - sendChat: sendOpenAICompatibleChat, - sendFIM: null, - list: null, - }, - openRouter: { - sendChat: sendOpenRouterChat, - sendFIM: null, - list: null, - }, - vLLM: { - sendChat: sendVLLMChat, - sendFIM: sendVLLMFIM, - list: vLLMList, - }, - deepseek: { - sendChat: sendDeepSeekAPIChat, - sendFIM: null, - list: null, - }, - groq: { - sendChat: sendGroqAPIChat, - sendFIM: null, - list: null, - }, - mistral: { - sendChat: sendMistralAPIChat, - sendFIM: null, - list: null, - }, - -} satisfies CallFnOfProvider diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts index 1d388338..1aec2649 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/preprocessLLMMessages.ts @@ -1,6 +1,6 @@ -import { LLMChatMessage } from '../../common/llmMessageTypes.js'; +import { LLMChatMessage, LLMFIMMessage } from '../../common/llmMessageTypes.js'; import { deepClone } from '../../../../../base/common/objects.js'; @@ -322,3 +322,27 @@ export const prepareMessages = ({ } as const } + + + + +export const prepareFIMMessage = ({ + messages, + aiInstructions, +}: { + messages: LLMFIMMessage, + aiInstructions: string, +}) => { + + let prefix = `\ +## You are a helpful coding assistant that performs autocomplete (fill-in-the middle or "FIM") for the user. +${!aiInstructions ? '' : `\ +## Special user instructions: +${aiInstructions.split('\n').map(line => `##${line}`).join('\n')}`} + +${messages.prefix}` + + const suffix = messages.suffix + const stopTokens = messages.stopTokens + return { prefix, suffix, stopTokens, maxTokens: 300 } as const +}