maxTokens for anthropic

This commit is contained in:
Andrew Pareles 2025-02-24 04:32:53 -08:00
parent 3ae8f75641
commit d2fb0fb4ff

View file

@ -17,7 +17,8 @@ import { extractReasoningFromText } from '../../browser/helpers/extractCodeFromR
type ModelOptions = { type ModelOptions = {
contextWindow: number; contextWindow: number; // input tokens
maxOutputTokens: number | null; // output tokens
cost: { cost: {
input: number; input: number;
output: number; output: number;
@ -70,6 +71,7 @@ type ModelSettingsOfProvider = {
const modelOptionDefaults: ModelOptions = { const modelOptionDefaults: ModelOptions = {
contextWindow: 32_000, contextWindow: 32_000,
maxOutputTokens: null,
cost: { input: 0, output: 0 }, cost: { input: 0, output: 0 },
supportsSystemMessage: false, supportsSystemMessage: false,
supportsTools: false, supportsTools: false,
@ -82,6 +84,7 @@ const modelOptionDefaults: ModelOptions = {
const openAIModelOptions = { // https://platform.openai.com/docs/pricing const openAIModelOptions = { // https://platform.openai.com/docs/pricing
'o1': { 'o1': {
contextWindow: 128_000, contextWindow: 128_000,
maxOutputTokens: 100_000,
cost: { input: 15.00, cache_read: 7.50, output: 60.00, }, cost: { input: 15.00, cache_read: 7.50, output: 60.00, },
supportsFIM: false, supportsFIM: false,
supportsTools: false, supportsTools: false,
@ -90,6 +93,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
}, },
'o3-mini': { 'o3-mini': {
contextWindow: 200_000, contextWindow: 200_000,
maxOutputTokens: 100_000,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, }, cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
supportsFIM: false, supportsFIM: false,
supportsTools: false, supportsTools: false,
@ -98,6 +102,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
}, },
'gpt-4o': { 'gpt-4o': {
contextWindow: 128_000, contextWindow: 128_000,
maxOutputTokens: 16_384,
cost: { input: 2.50, cache_read: 1.25, output: 10.00, }, cost: { input: 2.50, cache_read: 1.25, output: 10.00, },
supportsFIM: false, supportsFIM: false,
supportsTools: 'openai-style', supportsTools: 'openai-style',
@ -106,6 +111,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
}, },
'o1-mini': { 'o1-mini': {
contextWindow: 128_000, contextWindow: 128_000,
maxOutputTokens: 65_536,
cost: { input: 1.10, cache_read: 0.55, output: 4.40, }, cost: { input: 1.10, cache_read: 0.55, output: 4.40, },
supportsFIM: false, supportsFIM: false,
supportsTools: false, supportsTools: false,
@ -114,6 +120,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
}, },
'gpt-4o-mini': { 'gpt-4o-mini': {
contextWindow: 128_000, contextWindow: 128_000,
maxOutputTokens: 16_384,
cost: { input: 0.15, cache_read: 0.075, output: 0.60, }, cost: { input: 0.15, cache_read: 0.075, output: 0.60, },
supportsFIM: false, supportsFIM: false,
supportsTools: 'openai-style', supportsTools: 'openai-style',
@ -139,6 +146,7 @@ const openAISettings: ProviderSettings = {
const anthropicModelOptions = { const anthropicModelOptions = {
'claude-3-5-sonnet-20241022': { 'claude-3-5-sonnet-20241022': {
contextWindow: 200_000, contextWindow: 200_000,
maxOutputTokens: 8_192,
cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 }, cost: { input: 3.00, cache_read: 0.30, cache_write: 3.75, output: 15.00 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'separated', supportsSystemMessage: 'separated',
@ -147,6 +155,7 @@ const anthropicModelOptions = {
}, },
'claude-3-5-haiku-20241022': { 'claude-3-5-haiku-20241022': {
contextWindow: 200_000, contextWindow: 200_000,
maxOutputTokens: 8_192,
cost: { input: 0.80, cache_read: 0.08, cache_write: 1.00, output: 4.00 }, cost: { input: 0.80, cache_read: 0.08, cache_write: 1.00, output: 4.00 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'separated', supportsSystemMessage: 'separated',
@ -155,6 +164,7 @@ const anthropicModelOptions = {
}, },
'claude-3-opus-20240229': { 'claude-3-opus-20240229': {
contextWindow: 200_000, contextWindow: 200_000,
maxOutputTokens: 4_096,
cost: { input: 15.00, cache_read: 1.50, cache_write: 18.75, output: 75.00 }, cost: { input: 15.00, cache_read: 1.50, cache_write: 18.75, output: 75.00 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'separated', supportsSystemMessage: 'separated',
@ -163,6 +173,7 @@ const anthropicModelOptions = {
}, },
'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in 'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in
contextWindow: 200_000, cost: { input: 3.00, output: 15.00 }, contextWindow: 200_000, cost: { input: 3.00, output: 15.00 },
maxOutputTokens: 4_096,
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'separated', supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style', supportsTools: 'anthropic-style',
@ -177,8 +188,9 @@ const anthropicSettings: ProviderSettings = {
if (modelName.includes('claude-3-5-sonnet')) fallbackName = 'claude-3-5-sonnet-20241022' if (modelName.includes('claude-3-5-sonnet')) fallbackName = 'claude-3-5-sonnet-20241022'
if (modelName.includes('claude-3-5-haiku')) fallbackName = 'claude-3-5-haiku-20241022' if (modelName.includes('claude-3-5-haiku')) fallbackName = 'claude-3-5-haiku-20241022'
if (modelName.includes('claude-3-opus')) fallbackName = 'claude-3-opus-20240229' if (modelName.includes('claude-3-opus')) fallbackName = 'claude-3-opus-20240229'
if (modelName.includes('claude-3-sonnet')) fallbackName = 'claude-3-sonnet-20240229'
if (fallbackName) return { modelName: fallbackName, ...anthropicModelOptions[fallbackName] } if (fallbackName) return { modelName: fallbackName, ...anthropicModelOptions[fallbackName] }
return null return { modelName, ...modelOptionDefaults, maxOutputTokens: 4_096 }
} }
} }
@ -187,6 +199,7 @@ const anthropicSettings: ProviderSettings = {
const xAIModelOptions = { const xAIModelOptions = {
'grok-2-latest': { 'grok-2-latest': {
contextWindow: 131_072, contextWindow: 131_072,
maxOutputTokens: null, // 131_072,
cost: { input: 2.00, output: 10.00 }, cost: { input: 2.00, output: 10.00 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -210,6 +223,7 @@ const xAISettings: ProviderSettings = {
const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
'gemini-2.0-flash': { 'gemini-2.0-flash': {
contextWindow: 1_048_576, contextWindow: 1_048_576,
maxOutputTokens: null, // 8_192,
cost: { input: 0.10, output: 0.40 }, cost: { input: 0.10, output: 0.40 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -218,6 +232,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
}, },
'gemini-2.0-flash-lite-preview-02-05': { 'gemini-2.0-flash-lite-preview-02-05': {
contextWindow: 1_048_576, contextWindow: 1_048_576,
maxOutputTokens: null, // 8_192,
cost: { input: 0.075, output: 0.30 }, cost: { input: 0.075, output: 0.30 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -226,6 +241,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
}, },
'gemini-1.5-flash': { 'gemini-1.5-flash': {
contextWindow: 1_048_576, contextWindow: 1_048_576,
maxOutputTokens: null, // 8_192,
cost: { input: 0.075, output: 0.30 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now cost: { input: 0.075, output: 0.30 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -234,6 +250,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
}, },
'gemini-1.5-pro': { 'gemini-1.5-pro': {
contextWindow: 2_097_152, contextWindow: 2_097_152,
maxOutputTokens: null, // 8_192,
cost: { input: 1.25, output: 5.00 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now cost: { input: 1.25, output: 5.00 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -242,6 +259,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
}, },
'gemini-1.5-flash-8b': { 'gemini-1.5-flash-8b': {
contextWindow: 1_048_576, contextWindow: 1_048_576,
maxOutputTokens: null, // 8_192,
cost: { input: 0.0375, output: 0.15 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now cost: { input: 0.0375, output: 0.15 }, // TODO!!! price doubles after 128K tokens, we are NOT encoding that info right now
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -332,11 +350,13 @@ const deepseekModelOptions = {
'deepseek-chat': { 'deepseek-chat': {
...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1,
contextWindow: 64_000, // https://api-docs.deepseek.com/quick_start/pricing contextWindow: 64_000, // https://api-docs.deepseek.com/quick_start/pricing
maxOutputTokens: null, // 8_000,
cost: { cache_read: .07, input: .27, output: 1.10, }, cost: { cache_read: .07, input: .27, output: 1.10, },
}, },
'deepseek-reasoner': { 'deepseek-reasoner': {
...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2, ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2,
contextWindow: 64_000, contextWindow: 64_000,
maxOutputTokens: null, // 8_000,
cost: { cache_read: .14, input: .55, output: 2.19, }, cost: { cache_read: .14, input: .55, output: 2.19, },
}, },
} as const satisfies { [s: string]: ModelOptions } } as const satisfies { [s: string]: ModelOptions }
@ -357,6 +377,7 @@ const deepseekSettings: ProviderSettings = {
const groqModelOptions = { const groqModelOptions = {
'llama-3.3-70b-versatile': { 'llama-3.3-70b-versatile': {
contextWindow: 128_000, contextWindow: 128_000,
maxOutputTokens: null, // 32_768,
cost: { input: 0.59, output: 0.79 }, cost: { input: 0.59, output: 0.79 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -365,6 +386,7 @@ const groqModelOptions = {
}, },
'llama-3.1-8b-instant': { 'llama-3.1-8b-instant': {
contextWindow: 128_000, contextWindow: 128_000,
maxOutputTokens: null, // 8_192,
cost: { input: 0.05, output: 0.08 }, cost: { input: 0.05, output: 0.08 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -373,6 +395,7 @@ const groqModelOptions = {
}, },
'qwen-2.5-coder-32b': { 'qwen-2.5-coder-32b': {
contextWindow: 128_000, contextWindow: 128_000,
maxOutputTokens: null, // not specified?
cost: { input: 0.79, output: 0.79 }, cost: { input: 0.79, output: 0.79 },
supportsFIM: false, // unfortunately looks like no FIM support on groq supportsFIM: false, // unfortunately looks like no FIM support on groq
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -401,11 +424,11 @@ const extensiveModelFallback: ProviderSettings['modelOptionsFallback'] = (modelN
if (modelName.includes('gpt-4o')) return toFallback(openAIModelOptions['gpt-4o']) if (modelName.includes('gpt-4o')) return toFallback(openAIModelOptions['gpt-4o'])
if (modelName.includes('claude')) return toFallback(anthropicModelOptions['claude-3-5-sonnet-20241022']) if (modelName.includes('claude')) return toFallback(anthropicModelOptions['claude-3-5-sonnet-20241022'])
if (modelName.includes('grok')) return toFallback(xAIModelOptions['grok-2-latest']) if (modelName.includes('grok')) return toFallback(xAIModelOptions['grok-2-latest'])
if (modelName.includes('deepseek-r1') || modelName.includes('deepseek-reasoner')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, contextWindow: 32_000, }) if (modelName.includes('deepseek-r1') || modelName.includes('deepseek-reasoner')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, contextWindow: 32_000, maxOutputTokens: 4_096, })
if (modelName.includes('deepseek')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2, contextWindow: 32_000, }) if (modelName.includes('deepseek')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekCoderV2, contextWindow: 32_000, maxOutputTokens: 4_096, })
if (modelName.includes('llama3')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.llama3, contextWindow: 32_000, }) if (modelName.includes('llama3')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.llama3, contextWindow: 32_000, maxOutputTokens: 4_096, })
if (modelName.includes('qwen2.5-coder')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat['qwen2.5coder'], contextWindow: 32_000, }) if (modelName.includes('qwen2.5-coder')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat['qwen2.5coder'], contextWindow: 32_000, maxOutputTokens: 4_096, })
if (modelName.includes('codestral')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.codestral, contextWindow: 32_000, }) if (modelName.includes('codestral')) return toFallback({ ...openSourceModelDefaultOptionsAssumingOAICompat.codestral, contextWindow: 32_000, maxOutputTokens: 4_096, })
if (/\bo1\b/.test(modelName) || /\bo3\b/.test(modelName)) return toFallback(openAIModelOptions['o1']) if (/\bo1\b/.test(modelName) || /\bo3\b/.test(modelName)) return toFallback(openAIModelOptions['o1'])
return toFallback(modelOptionDefaults) return toFallback(modelOptionDefaults)
} }
@ -437,10 +460,12 @@ const openRouterModelOptions = {
'deepseek/deepseek-r1': { 'deepseek/deepseek-r1': {
...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1, ...openSourceModelDefaultOptionsAssumingOAICompat.deepseekR1,
contextWindow: 128_000, contextWindow: 128_000,
maxOutputTokens: null,
cost: { input: 0.8, output: 2.4 }, cost: { input: 0.8, output: 2.4 },
}, },
'anthropic/claude-3.5-sonnet': { 'anthropic/claude-3.5-sonnet': {
contextWindow: 200_000, contextWindow: 200_000,
maxOutputTokens: null,
cost: { input: 3.00, output: 15.00 }, cost: { input: 3.00, output: 15.00 },
supportsFIM: false, supportsFIM: false,
supportsSystemMessage: 'system-role', supportsSystemMessage: 'system-role',
@ -450,6 +475,7 @@ const openRouterModelOptions = {
'mistralai/codestral-2501': { 'mistralai/codestral-2501': {
...openSourceModelDefaultOptionsAssumingOAICompat.codestral, ...openSourceModelDefaultOptionsAssumingOAICompat.codestral,
contextWindow: 256_000, contextWindow: 256_000,
maxOutputTokens: null,
cost: { input: 0.3, output: 0.9 }, cost: { input: 0.3, output: 0.9 },
supportsTools: 'openai-style', supportsTools: 'openai-style',
supportsReasoningOutput: false, supportsReasoningOutput: false,