Add AI model pricing, Bedrock provider, and InferenceProvider/ModelFamily split (#18155)

## Summary

- **Model pricing overhaul**: All model constants updated with accurate
pricing in dollars per 1M tokens, including cached input rates, cache
creation rates, and tiered >200k context pricing
- **New providers**: Added Google (Gemini 3.x), Mistral, and AWS Bedrock
as inference providers. Bedrock serves Claude Opus 4.6 and Sonnet 4.6
via AWS, with proper credential handling following the existing S3/SES
pattern
- **InferenceProvider/ModelFamily split**: Refactored `ModelProvider`
into two orthogonal enums — `InferenceProvider` (who serves the model:
auth, SDK, metadata format) and `ModelFamily` (who created it: token
counting semantics). This eliminates growing `||` chains for token
normalization checks like `excludesCachedTokens`
- **Billing improvements**: Reasoning tokens charged at output rate,
cache token discounts applied accurately, real errors thrown to Sentry
on billing failures

## Test plan

- [x] All existing unit tests updated and passing (23 tests across 3
test files)
- [x] Lint passes for both twenty-server and twenty-front
- [ ] CI checks pass


Made with [Cursor](https://cursor.com)

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Félix Malfait 2026-02-22 14:39:04 +01:00 committed by GitHub
parent a900b4a4b4
commit 41f09c5a4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 1462 additions and 383 deletions

View file

@ -679,12 +679,13 @@ export type CheckUserExistOutput = {
export type ClientAiModelConfig = {
__typename?: 'ClientAIModelConfig';
deprecated?: Maybe<Scalars['Boolean']>;
inputCostPer1kTokensInCredits: Scalars['Float'];
inferenceProvider: InferenceProvider;
inputCostPerMillionTokensInCredits: Scalars['Float'];
label: Scalars['String'];
modelFamily?: Maybe<ModelFamily>;
modelId: Scalars['String'];
nativeCapabilities?: Maybe<NativeModelCapabilities>;
outputCostPer1kTokensInCredits: Scalars['Float'];
provider: ModelProvider;
outputCostPerMillionTokensInCredits: Scalars['Float'];
};
export type ClientConfig = {
@ -1883,6 +1884,18 @@ export enum IndexType {
GIN = 'GIN'
}
export enum InferenceProvider {
ANTHROPIC = 'ANTHROPIC',
BEDROCK = 'BEDROCK',
GOOGLE = 'GOOGLE',
GROQ = 'GROQ',
MISTRAL = 'MISTRAL',
NONE = 'NONE',
OPENAI = 'OPENAI',
OPENAI_COMPATIBLE = 'OPENAI_COMPATIBLE',
XAI = 'XAI'
}
export type InitiateTwoFactorAuthenticationProvisioningOutput = {
__typename?: 'InitiateTwoFactorAuthenticationProvisioningOutput';
uri: Scalars['String'];
@ -2155,12 +2168,11 @@ export type MetadataEventWithQueryIds = {
queryIds: Array<Scalars['String']>;
};
export enum ModelProvider {
export enum ModelFamily {
ANTHROPIC = 'ANTHROPIC',
GROQ = 'GROQ',
NONE = 'NONE',
GOOGLE = 'GOOGLE',
MISTRAL = 'MISTRAL',
OPENAI = 'OPENAI',
OPENAI_COMPATIBLE = 'OPENAI_COMPATIBLE',
XAI = 'XAI'
}

View file

@ -15,7 +15,9 @@ import { Paragraph } from '@tiptap/extension-paragraph';
import { Strike } from '@tiptap/extension-strike';
import { Text } from '@tiptap/extension-text';
import { Underline } from '@tiptap/extension-underline';
import { Dropcursor, Placeholder, UndoRedo } from '@tiptap/extensions';
import { Dropcursor } from '@tiptap/extensions/drop-cursor';
import { Placeholder } from '@tiptap/extensions/placeholder';
import { UndoRedo } from '@tiptap/extensions/undo-redo';
import { type Editor, useEditor } from '@tiptap/react';
import { marked } from 'marked';
import { type DependencyList, useMemo } from 'react';

View file

@ -3,7 +3,7 @@ import { Document } from '@tiptap/extension-document';
import { HardBreak } from '@tiptap/extension-hard-break';
import { Paragraph } from '@tiptap/extension-paragraph';
import { Text } from '@tiptap/extension-text';
import { Placeholder } from '@tiptap/extensions';
import { Placeholder } from '@tiptap/extensions/placeholder';
import { useEditor } from '@tiptap/react';
import { useCallback, useMemo } from 'react';
import { isDefined } from 'twenty-shared/utils';

View file

@ -4,6 +4,7 @@ import { type SelectOption } from 'twenty-ui/input';
import { DEFAULT_FAST_MODEL } from '@/ai/constants/DefaultFastModel';
import { DEFAULT_SMART_MODEL } from '@/ai/constants/DefaultSmartModel';
import { useRecoilValueV2 } from '@/ui/utilities/state/jotai/hooks/useRecoilValueV2';
import { MODEL_FAMILY_CONFIG } from '~/pages/settings/ai/constants/SettingsAiModelProviders';
export const useAiModelOptions = (
includeDeprecated = false,
@ -18,11 +19,21 @@ export const useAiModelOptions = (
model.modelId === DEFAULT_FAST_MODEL ||
model.modelId === DEFAULT_SMART_MODEL
? model.label
: `${model.label} (${model.provider})`,
: `${model.label} (${getModelFamilyLabel(model.modelFamily) ?? model.inferenceProvider})`,
}))
.sort((a, b) => a.label.localeCompare(b.label));
};
const getModelFamilyLabel = (
modelFamily: string | null | undefined,
): string | undefined => {
if (!modelFamily) {
return undefined;
}
return MODEL_FAMILY_CONFIG[modelFamily]?.label || modelFamily;
};
export const useAiModelLabel = (
modelId: string | undefined,
includeProvider = true,
@ -47,5 +58,5 @@ export const useAiModelLabel = (
return model.label;
}
return `${model.label} (${model.provider})`;
return `${model.label} (${getModelFamilyLabel(model.modelFamily) ?? model.inferenceProvider})`;
};

View file

@ -6,7 +6,8 @@ import { Extension } from '@tiptap/core';
import Document from '@tiptap/extension-document';
import Paragraph from '@tiptap/extension-paragraph';
import Text from '@tiptap/extension-text';
import { Placeholder, UndoRedo } from '@tiptap/extensions';
import { Placeholder } from '@tiptap/extensions/placeholder';
import { UndoRedo } from '@tiptap/extensions/undo-redo';
import { Plugin, PluginKey } from '@tiptap/pm/state';
import { type Editor, useEditor } from '@tiptap/react';
import { isDefined } from 'twenty-shared/utils';

View file

@ -4,7 +4,8 @@ import Document from '@tiptap/extension-document';
import HardBreak from '@tiptap/extension-hard-break';
import Paragraph from '@tiptap/extension-paragraph';
import Text from '@tiptap/extension-text';
import { Placeholder, UndoRedo } from '@tiptap/extensions';
import { Placeholder } from '@tiptap/extensions/placeholder';
import { UndoRedo } from '@tiptap/extensions/undo-redo';
import { AllSelection, TextSelection } from '@tiptap/pm/state';
import { type Editor, useEditor } from '@tiptap/react';
import { isDefined, parseJson } from 'twenty-shared/utils';

View file

@ -12,7 +12,7 @@ import { t } from '@lingui/core/macro';
import { H2Title, IconBolt } from 'twenty-ui/display';
import { Card, Section } from 'twenty-ui/layout';
import { useUpdateWorkspaceMutation } from '~/generated-metadata/graphql';
import { PROVIDER_CONFIG } from '~/pages/settings/ai/constants/SettingsAiModelProviders';
import { MODEL_FAMILY_CONFIG } from '~/pages/settings/ai/constants/SettingsAiModelProviders';
const VIRTUAL_MODEL_IDS: Set<string> = new Set([
DEFAULT_SMART_MODEL,
@ -39,7 +39,11 @@ export const SettingsAIModelsTab = () => {
.map((model) => ({
value: model.modelId,
label: model.label,
Icon: PROVIDER_CONFIG[model.provider.toUpperCase()]?.Icon,
Icon: (model.modelFamily
? (MODEL_FAMILY_CONFIG[model.modelFamily] ??
MODEL_FAMILY_CONFIG.FALLBACK)
: MODEL_FAMILY_CONFIG.FALLBACK
).Icon,
}));
const handleDefaultModelChange = async (value: string) => {

View file

@ -1,7 +1,6 @@
import {
IconBrandAnthropic,
IconBrandGoogle,
IconBrandGroq,
IconBrandMistral,
IconBrandOpenai,
IconBrandXai,
@ -9,15 +8,16 @@ import {
type IconComponent,
} from 'twenty-ui/display';
export const PROVIDER_CONFIG: Record<
// Keyed by the server-side ModelFamily enum values (lowercase) since
// the client config is fetched via REST, not GraphQL.
export const MODEL_FAMILY_CONFIG: Record<
string,
{ label: string; Icon: IconComponent }
> = {
OPENAI: { label: 'OpenAI', Icon: IconBrandOpenai },
ANTHROPIC: { label: 'Anthropic', Icon: IconBrandAnthropic },
XAI: { label: 'xAI', Icon: IconBrandXai },
GOOGLE: { label: 'Google', Icon: IconBrandGoogle },
GROQ: { label: 'Groq', Icon: IconBrandGroq },
MISTRAL: { label: 'Mistral', Icon: IconBrandMistral },
NONE: { label: '', Icon: IconRobot },
openai: { label: 'OpenAI', Icon: IconBrandOpenai },
anthropic: { label: 'Anthropic', Icon: IconBrandAnthropic },
xai: { label: 'xAI', Icon: IconBrandXai },
google: { label: 'Google', Icon: IconBrandGoogle },
mistral: { label: 'Mistral', Icon: IconBrandMistral },
FALLBACK: { label: '', Icon: IconRobot },
};

View file

@ -15,8 +15,11 @@
"typeorm": "../../node_modules/typeorm/.bin/typeorm"
},
"dependencies": {
"@ai-sdk/amazon-bedrock": "^3.0.83",
"@ai-sdk/anthropic": "^2.0.17",
"@ai-sdk/google": "^2.0.54",
"@ai-sdk/groq": "^2.0.34",
"@ai-sdk/mistral": "^2.0.28",
"@ai-sdk/openai": "^2.0.30",
"@ai-sdk/provider-utils": "^3.0.9",
"@ai-sdk/xai": "^2.0.19",

View file

@ -4,8 +4,9 @@ import { SupportDriver } from 'src/engine/core-modules/twenty-config/interfaces/
import { ClientConfigService } from 'src/engine/core-modules/client-config/services/client-config.service';
import {
InferenceProvider,
ModelFamily,
type ModelId,
ModelProvider,
} from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models.const';
import { ClientConfigController } from './client-config.controller';
@ -52,9 +53,10 @@ describe('ClientConfigController', () => {
{
modelId: 'gpt-4o' as ModelId,
label: 'GPT-4o',
provider: ModelProvider.OPENAI,
inputCostPer1kTokensInCredits: 2.5,
outputCostPer1kTokensInCredits: 10.0,
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokensInCredits: 2500000,
outputCostPerMillionTokensInCredits: 10000000,
},
],
authProviders: {

View file

@ -7,16 +7,21 @@ import { CaptchaDriverType } from 'src/engine/core-modules/captcha/interfaces';
import { FeatureFlagKey } from 'src/engine/core-modules/feature-flag/enums/feature-flag-key.enum';
import { AuthProvidersDTO } from 'src/engine/core-modules/workspace/dtos/public-workspace-data-output';
import {
InferenceProvider,
ModelFamily,
ModelId,
ModelProvider,
} from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models.const';
registerEnumType(FeatureFlagKey, {
name: 'FeatureFlagKey',
});
registerEnumType(ModelProvider, {
name: 'ModelProvider',
registerEnumType(InferenceProvider, {
name: 'InferenceProvider',
});
registerEnumType(ModelFamily, {
name: 'ModelFamily',
});
@ObjectType()
@ -36,14 +41,17 @@ export class ClientAIModelConfig {
@Field(() => String)
label: string;
@Field(() => ModelProvider)
provider: ModelProvider;
@Field(() => ModelFamily, { nullable: true })
modelFamily?: ModelFamily;
@Field(() => InferenceProvider)
inferenceProvider: InferenceProvider;
@Field(() => Number)
inputCostPer1kTokensInCredits: number;
inputCostPerMillionTokensInCredits: number;
@Field(() => Number)
outputCostPer1kTokensInCredits: number;
outputCostPerMillionTokensInCredits: number;
@Field(() => NativeModelCapabilities, { nullable: true })
nativeCapabilities?: NativeModelCapabilities;

View file

@ -12,12 +12,12 @@ import {
import { DomainServerConfigService } from 'src/engine/core-modules/domain/domain-server-config/services/domain-server-config.service';
import { PUBLIC_FEATURE_FLAGS } from 'src/engine/core-modules/feature-flag/constants/public-feature-flag.const';
import { TwentyConfigService } from 'src/engine/core-modules/twenty-config/twenty-config.service';
import { convertCentsToBillingCredits } from 'src/engine/metadata-modules/ai/ai-billing/utils/convert-cents-to-billing-credits.util';
import { convertDollarsToBillingCredits } from 'src/engine/metadata-modules/ai/ai-billing/utils/convert-dollars-to-billing-credits.util';
import {
AI_MODELS,
DEFAULT_FAST_MODEL,
DEFAULT_SMART_MODEL,
ModelProvider,
InferenceProvider,
} from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models.const';
import { AiModelRegistryService } from 'src/engine/metadata-modules/ai/ai-models/services/ai-model-registry.service';
@ -54,16 +54,17 @@ export class ClientConfigService {
return {
modelId: registeredModel.modelId,
label: builtInModel?.label || registeredModel.modelId,
provider: registeredModel.provider,
modelFamily: builtInModel?.modelFamily,
inferenceProvider: registeredModel.inferenceProvider,
nativeCapabilities: builtInModel?.nativeCapabilities,
inputCostPer1kTokensInCredits: builtInModel
? convertCentsToBillingCredits(
builtInModel.inputCostPer1kTokensInCents,
inputCostPerMillionTokensInCredits: builtInModel
? convertDollarsToBillingCredits(
builtInModel.inputCostPerMillionTokens,
)
: 0,
outputCostPer1kTokensInCredits: builtInModel
? convertCentsToBillingCredits(
builtInModel.outputCostPer1kTokensInCents,
outputCostPerMillionTokensInCredits: builtInModel
? convertDollarsToBillingCredits(
builtInModel.outputCostPerMillionTokens,
)
: 0,
deprecated: builtInModel?.deprecated,
@ -96,16 +97,16 @@ export class ClientConfigService {
{
modelId: DEFAULT_SMART_MODEL,
label: `Smart (${defaultPerformanceModelLabel})`,
provider: ModelProvider.NONE,
inputCostPer1kTokensInCredits: 0,
outputCostPer1kTokensInCredits: 0,
inferenceProvider: InferenceProvider.NONE,
inputCostPerMillionTokensInCredits: 0,
outputCostPerMillionTokensInCredits: 0,
},
{
modelId: DEFAULT_FAST_MODEL,
label: `Fast (${defaultSpeedModelLabel})`,
provider: ModelProvider.NONE,
inputCostPer1kTokensInCredits: 0,
outputCostPer1kTokensInCredits: 0,
inferenceProvider: InferenceProvider.NONE,
inputCostPerMillionTokensInCredits: 0,
outputCostPerMillionTokensInCredits: 0,
},
);
}

View file

@ -1196,7 +1196,7 @@ export class ConfigVariables {
})
@IsOptional()
DEFAULT_AI_SPEED_MODEL_ID =
'gpt-4.1-mini,claude-haiku-4-5-20251001,grok-3-mini';
'gpt-5-mini,claude-haiku-4-5-20251001,gemini-3-flash-preview,grok-4-1-fast-reasoning,mistral-large-latest';
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.LLM,
@ -1205,7 +1205,8 @@ export class ConfigVariables {
type: ConfigVariableType.STRING,
})
@IsOptional()
DEFAULT_AI_PERFORMANCE_MODEL_ID = 'gpt-4.1,claude-sonnet-4-5-20250929,grok-4';
DEFAULT_AI_PERFORMANCE_MODEL_ID =
'gpt-5.2,claude-sonnet-4-6,gemini-3.1-pro-preview,grok-4,mistral-large-latest';
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.LLM,
@ -1271,6 +1272,60 @@ export class ConfigVariables {
@IsOptional()
GROQ_API_KEY: string;
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.LLM,
isSensitive: true,
description: 'API key for Google AI (Gemini) integration',
type: ConfigVariableType.STRING,
})
@IsOptional()
GOOGLE_API_KEY: string;
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.LLM,
isSensitive: true,
description: 'API key for Mistral AI integration',
type: ConfigVariableType.STRING,
})
@IsOptional()
MISTRAL_API_KEY: string;
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.LLM,
description: 'AWS region for Bedrock integration (e.g., us-east-1)',
type: ConfigVariableType.STRING,
})
@IsAWSRegion()
@IsOptional()
AWS_BEDROCK_REGION: AwsRegion;
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.LLM,
isSensitive: true,
description: 'AWS access key ID for Bedrock authentication',
type: ConfigVariableType.STRING,
})
@IsOptional()
AWS_BEDROCK_ACCESS_KEY_ID: string;
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.LLM,
isSensitive: true,
description: 'AWS secret access key for Bedrock authentication',
type: ConfigVariableType.STRING,
})
@IsOptional()
AWS_BEDROCK_SECRET_ACCESS_KEY: string;
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.LLM,
isSensitive: true,
description: 'AWS session token for Bedrock (for IAM role-based auth)',
type: ConfigVariableType.STRING,
})
@IsOptional()
AWS_BEDROCK_SESSION_TOKEN: string;
@ConfigVariablesMetadata({
group: ConfigVariablesGroup.SERVER_CONFIG,
description: 'Enable or disable multi-workspace support',

View file

@ -19,6 +19,7 @@ import { type WorkspaceAuthContext } from 'src/engine/core-modules/auth/types/wo
import { ToolCategory } from 'src/engine/core-modules/tool-provider/enums/tool-category.enum';
import { ToolRegistryService } from 'src/engine/core-modules/tool-provider/services/tool-registry.service';
import { type AgentExecutionResult } from 'src/engine/metadata-modules/ai/ai-agent-execution/types/agent-execution-result.type';
import { extractCacheCreationTokensFromSteps } from 'src/engine/metadata-modules/ai/ai-billing/utils/extract-cache-creation-tokens.util';
import {
AgentException,
AgentExceptionCode,
@ -185,6 +186,10 @@ export class AgentAsyncExecutorService {
},
});
const cacheCreationTokens = extractCacheCreationTokensFromSteps(
textResponse.steps,
);
const agentSchema =
agent?.responseFormat?.type === 'json'
? agent.responseFormat.schema
@ -194,6 +199,7 @@ export class AgentAsyncExecutorService {
return {
result: { response: textResponse.text },
usage: textResponse.usage,
cacheCreationTokens,
};
}
@ -222,6 +228,7 @@ export class AgentAsyncExecutorService {
(textResponse.usage?.totalTokens ?? 0) +
(output.usage?.totalTokens ?? 0),
},
cacheCreationTokens,
};
} catch (error) {
if (error instanceof AgentException) {

View file

@ -3,4 +3,5 @@ import { type LanguageModelUsage } from 'ai';
export interface AgentExecutionResult {
result: object;
usage: LanguageModelUsage;
cacheCreationTokens: number;
}

View file

@ -3,12 +3,40 @@ import { Test, type TestingModule } from '@nestjs/testing';
import { BILLING_FEATURE_USED } from 'src/engine/core-modules/billing/constants/billing-feature-used.constant';
import { BillingMeterEventName } from 'src/engine/core-modules/billing/enums/billing-meter-event-names';
import { AIBillingService } from 'src/engine/metadata-modules/ai/ai-billing/services/ai-billing.service';
import {
InferenceProvider,
ModelFamily,
} from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models-types.const';
import { AiModelRegistryService } from 'src/engine/metadata-modules/ai/ai-models/services/ai-model-registry.service';
import { WorkspaceEventEmitter } from 'src/engine/workspace-event-emitter/workspace-event-emitter';
describe('AIBillingService', () => {
let service: AIBillingService;
let mockWorkspaceEventEmitter: jest.Mocked<WorkspaceEventEmitter>;
let mockAiModelRegistryService: jest.Mocked<
Pick<AiModelRegistryService, 'getEffectiveModelConfig'>
>;
const openaiModelConfig = {
modelId: 'gpt-4o',
label: 'GPT-4o',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 2.5,
outputCostPerMillionTokens: 10.0,
cachedInputCostPerMillionTokens: 1.25,
};
const anthropicModelConfig = {
modelId: 'claude-sonnet-4-5-20250929',
label: 'Claude Sonnet 4.5',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 3.0,
outputCostPerMillionTokens: 15.0,
cachedInputCostPerMillionTokens: 0.3,
cacheCreationCostPerMillionTokens: 3.75,
};
const mockTokenUsage = {
inputTokens: 1000,
@ -22,13 +50,7 @@ describe('AIBillingService', () => {
};
const mockAiModelRegistryMethods = {
getEffectiveModelConfig: jest.fn().mockReturnValue({
modelId: 'gpt-4o',
label: 'GPT-4o',
provider: 'openai',
inputCostPer1kTokensInCents: 0.25,
outputCostPer1kTokensInCents: 1.0,
}),
getEffectiveModelConfig: jest.fn().mockReturnValue(openaiModelConfig),
};
const module: TestingModule = await Test.createTestingModule({
@ -47,6 +69,7 @@ describe('AIBillingService', () => {
service = module.get<AIBillingService>(AIBillingService);
mockWorkspaceEventEmitter = module.get(WorkspaceEventEmitter);
mockAiModelRegistryService = module.get(AiModelRegistryService);
});
it('should be defined', () => {
@ -54,40 +77,190 @@ describe('AIBillingService', () => {
});
describe('calculateCost', () => {
it('should calculate cost correctly for valid model and token usage', async () => {
const costInCents = await service.calculateCost('gpt-4o', mockTokenUsage);
it('should calculate cost correctly for basic token usage', () => {
const costInDollars = service.calculateCost('gpt-4o', {
usage: mockTokenUsage,
});
// Expected: (1000/1000 * 0.25) + (500/1000 * 1.0) = 0.25 + 0.5 = 0.75 cents
expect(costInCents).toBe(0.75);
// (1000/1M * 2.5) + (500/1M * 10.0) = 0.0025 + 0.005 = 0.0075
expect(costInDollars).toBeCloseTo(0.0075);
});
it('should calculate cost correctly with different token usage', async () => {
const differentTokenUsage = {
inputTokens: 2000,
outputTokens: 1000,
totalTokens: 3000,
};
it('should apply cached rate for OpenAI cached tokens', () => {
const costInDollars = service.calculateCost('gpt-4o', {
usage: {
inputTokens: 1000,
outputTokens: 500,
totalTokens: 1500,
cachedInputTokens: 600,
},
});
const costInCents = await service.calculateCost(
'gpt-4o',
differentTokenUsage,
// OpenAI: inputTokens includes cached, so adjusted = 1000 - 600 = 400
// inputCost = (400/1M * 2.5) = 0.001
// cachedCost = (600/1M * 1.25) = 0.00075
// outputCost = (500/1M * 10.0) = 0.005
// total = 0.00675
expect(costInDollars).toBeCloseTo(0.00675);
});
it('should not subtract cached tokens from input for Anthropic', () => {
mockAiModelRegistryService.getEffectiveModelConfig.mockReturnValue(
anthropicModelConfig as ReturnType<
AiModelRegistryService['getEffectiveModelConfig']
>,
);
// Expected: (2000/1000 * 0.25) + (1000/1000 * 1.0) = 0.5 + 1.0 = 1.5 cents
expect(costInCents).toBe(1.5);
const costInDollars = service.calculateCost(
'claude-sonnet-4-5-20250929',
{
usage: {
inputTokens: 400,
outputTokens: 500,
totalTokens: 900,
cachedInputTokens: 600,
},
cacheCreationTokens: 200,
},
);
// Anthropic: inputTokens already excludes cached
// inputCost = (400/1M * 3.0) = 0.0012
// cachedCost = (600/1M * 0.3) = 0.00018
// cacheCreationCost = (200/1M * 3.75) = 0.00075
// outputCost = (500/1M * 15.0) = 0.0075
// total = 0.00963
expect(costInDollars).toBeCloseTo(0.00963);
});
it('should charge reasoning tokens at the output rate without double-counting for OpenAI', () => {
const costInDollars = service.calculateCost('gpt-4o', {
usage: {
inputTokens: 1000,
outputTokens: 500,
totalTokens: 2000,
reasoningTokens: 500,
},
});
// OpenAI: outputTokens (500) already includes reasoningTokens (500)
// adjustedOutput = 500 - 500 = 0
// inputCost = (1000/1M * 2.5) = 0.0025
// outputCost = (0/1M * 10.0) = 0.0
// reasoningCost = (500/1M * 10.0) = 0.005
// total = 0.0075
expect(costInDollars).toBeCloseTo(0.0075);
});
it('should fall back to input rate when cachedInputCostPerMillionTokens is undefined', () => {
mockAiModelRegistryService.getEffectiveModelConfig.mockReturnValue({
...openaiModelConfig,
cachedInputCostPerMillionTokens: undefined,
} as ReturnType<AiModelRegistryService['getEffectiveModelConfig']>);
const costInDollars = service.calculateCost('gpt-4o', {
usage: {
inputTokens: 1000,
outputTokens: 500,
totalTokens: 1500,
cachedInputTokens: 600,
},
});
// Falls back to full input rate for cached tokens
// adjusted = 1000 - 600 = 400
// inputCost = (400/1M * 2.5) = 0.001
// cachedCost = (600/1M * 2.5) = 0.0015 (fallback to input rate)
// outputCost = (500/1M * 10.0) = 0.005
// total = 0.0075
expect(costInDollars).toBeCloseTo(0.0075);
});
it('should use long context pricing when input exceeds threshold', () => {
const anthropicWithLongContext = {
...anthropicModelConfig,
longContextCost: {
inputCostPerMillionTokens: 6.0,
outputCostPerMillionTokens: 22.5,
cachedInputCostPerMillionTokens: 0.6,
cacheCreationCostPerMillionTokens: 7.5,
thresholdTokens: 200_000,
},
};
mockAiModelRegistryService.getEffectiveModelConfig.mockReturnValue(
anthropicWithLongContext as ReturnType<
AiModelRegistryService['getEffectiveModelConfig']
>,
);
const costInDollars = service.calculateCost(
'claude-sonnet-4-5-20250929',
{
usage: {
inputTokens: 150_000,
outputTokens: 1000,
totalTokens: 251_000,
cachedInputTokens: 100_000,
},
},
);
// Anthropic: total input = 150k + 100k + 0 = 250k > 200k threshold
// Uses long context rates
// inputCost = (150_000/1M * 6.0) = 0.9
// cachedCost = (100_000/1M * 0.6) = 0.06
// outputCost = (1000/1M * 22.5) = 0.0225
// total = 0.9825
expect(costInDollars).toBeCloseTo(0.9825);
});
it('should use standard pricing when input is below threshold', () => {
const anthropicWithLongContext = {
...anthropicModelConfig,
longContextCost: {
inputCostPerMillionTokens: 6.0,
outputCostPerMillionTokens: 22.5,
cachedInputCostPerMillionTokens: 0.6,
cacheCreationCostPerMillionTokens: 7.5,
thresholdTokens: 200_000,
},
};
mockAiModelRegistryService.getEffectiveModelConfig.mockReturnValue(
anthropicWithLongContext as ReturnType<
AiModelRegistryService['getEffectiveModelConfig']
>,
);
const costInDollars = service.calculateCost(
'claude-sonnet-4-5-20250929',
{
usage: {
inputTokens: 50_000,
outputTokens: 1000,
totalTokens: 51_000,
},
},
);
// Total input = 50k < 200k threshold -> standard rates
// inputCost = (50_000/1M * 3.0) = 0.15
// outputCost = (1000/1M * 15.0) = 0.015
// total = 0.165
expect(costInDollars).toBeCloseTo(0.165);
});
});
describe('calculateAndBillUsage', () => {
it('should calculate cost and emit billing event when model exists', async () => {
await service.calculateAndBillUsage(
it('should calculate cost and emit billing event when model exists', () => {
service.calculateAndBillUsage(
'gpt-4o',
mockTokenUsage,
{ usage: mockTokenUsage },
'workspace-1',
'agent-id-123',
);
// Expected credits: (0.75 cents / 100) * 1000 = 0.0075 * 1000 = 7.5 credits, rounded to 8
expect(
mockWorkspaceEventEmitter.emitCustomBatchEvent,
).toHaveBeenCalledWith(

View file

@ -1,15 +1,21 @@
import { Injectable, Logger } from '@nestjs/common';
import { LanguageModelUsage } from 'ai';
import { type LanguageModelUsage } from 'ai';
import { BILLING_FEATURE_USED } from 'src/engine/core-modules/billing/constants/billing-feature-used.constant';
import { BillingMeterEventName } from 'src/engine/core-modules/billing/enums/billing-meter-event-names';
import { type BillingUsageEvent } from 'src/engine/core-modules/billing/types/billing-usage-event.type';
import { convertCentsToBillingCredits } from 'src/engine/metadata-modules/ai/ai-billing/utils/convert-cents-to-billing-credits.util';
import { type ModelId } from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models.const';
import { computeCostBreakdown } from 'src/engine/metadata-modules/ai/ai-billing/utils/compute-cost-breakdown.util';
import { convertDollarsToBillingCredits } from 'src/engine/metadata-modules/ai/ai-billing/utils/convert-dollars-to-billing-credits.util';
import { type ModelId } from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models-types.const';
import { AiModelRegistryService } from 'src/engine/metadata-modules/ai/ai-models/services/ai-model-registry.service';
import { WorkspaceEventEmitter } from 'src/engine/workspace-event-emitter/workspace-event-emitter';
export type BillingUsageInput = {
usage: LanguageModelUsage;
cacheCreationTokens?: number;
};
@Injectable()
export class AIBillingService {
private readonly logger = new Logger(AIBillingService.name);
@ -19,38 +25,45 @@ export class AIBillingService {
private readonly aiModelRegistryService: AiModelRegistryService,
) {}
async calculateCost(
modelId: ModelId,
usage: LanguageModelUsage,
): Promise<number> {
calculateCost(modelId: ModelId, billingInput: BillingUsageInput): number {
const model = this.aiModelRegistryService.getEffectiveModelConfig(modelId);
if (!model) {
throw new Error(`AI model with id ${modelId} not found`);
}
const inputCost =
((usage.inputTokens ?? 0) / 1000) * model.inputCostPer1kTokensInCents;
const outputCost =
((usage.outputTokens ?? 0) / 1000) * model.outputCostPer1kTokensInCents;
const { usage, cacheCreationTokens = 0 } = billingInput;
const totalCost = inputCost + outputCost;
const breakdown = computeCostBreakdown(model, {
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
reasoningTokens: usage.reasoningTokens,
cachedInputTokens: usage.cachedInputTokens,
cacheCreationTokens,
});
this.logger.log(
`Calculated cost for model ${modelId}: ${totalCost} cents (input: ${inputCost}, output: ${outputCost})`,
`Cost for ${model.modelId}: $${breakdown.totalCostInDollars.toFixed(6)} ` +
`(input: ${breakdown.tokenCounts.adjustedInputTokens}, ` +
`cached: ${breakdown.tokenCounts.cachedInputTokens}, ` +
`cacheCreation: ${breakdown.tokenCounts.cacheCreationTokens}, ` +
`output: ${breakdown.tokenCounts.adjustedOutputTokens}, ` +
`reasoning: ${breakdown.tokenCounts.reasoningTokens})`,
);
return totalCost;
return breakdown.totalCostInDollars;
}
async calculateAndBillUsage(
calculateAndBillUsage(
modelId: ModelId,
usage: LanguageModelUsage,
billingInput: BillingUsageInput,
workspaceId: string,
agentId?: string | null,
): Promise<void> {
const costInCents = await this.calculateCost(modelId, usage);
const creditsUsed = Math.round(convertCentsToBillingCredits(costInCents));
): void {
const costInDollars = this.calculateCost(modelId, billingInput);
const creditsUsed = Math.round(
convertDollarsToBillingCredits(costInDollars),
);
this.sendAiTokenUsageEvent(workspaceId, creditsUsed, modelId, agentId);
}

View file

@ -0,0 +1,98 @@
import {
type AIModelConfig,
ModelFamily,
} from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models-types.const';
export type TokenUsageInput = {
inputTokens?: number;
outputTokens?: number;
reasoningTokens?: number;
cachedInputTokens?: number;
cacheCreationTokens?: number;
};
export type CostBreakdown = {
totalCostInDollars: number;
inputCostInDollars: number;
outputCostInDollars: number;
tokenCounts: {
adjustedInputTokens: number;
adjustedOutputTokens: number;
cachedInputTokens: number;
cacheCreationTokens: number;
reasoningTokens: number;
totalInputTokens: number;
};
};
const safeNumber = (value: number | undefined): number => {
const result = value ?? 0;
return Number.isFinite(result) ? result : 0;
};
// Input token semantics differ by model family:
// Anthropic: inputTokens excludes cached and cache creation tokens
// OpenAI/xAI/Groq/Google: inputTokens includes cached tokens
// Output token semantics also differ:
// Anthropic: outputTokens excludes reasoning (thinking) tokens
// OpenAI/xAI/Groq/Google: outputTokens includes reasoning tokens
export const computeCostBreakdown = (
model: AIModelConfig,
usage: TokenUsageInput,
): CostBreakdown => {
const rawInputTokens = safeNumber(usage.inputTokens);
const rawOutputTokens = safeNumber(usage.outputTokens);
const reasoningTokens = safeNumber(usage.reasoningTokens);
const cachedInputTokens = safeNumber(usage.cachedInputTokens);
const cacheCreationTokens = safeNumber(usage.cacheCreationTokens);
const isAnthropicFamily = model.modelFamily === ModelFamily.ANTHROPIC;
const adjustedInputTokens = isAnthropicFamily
? rawInputTokens
: Math.max(0, rawInputTokens - cachedInputTokens);
const adjustedOutputTokens = isAnthropicFamily
? rawOutputTokens
: Math.max(0, rawOutputTokens - reasoningTokens);
const totalInputTokens = isAnthropicFamily
? rawInputTokens + cachedInputTokens + cacheCreationTokens
: rawInputTokens + cacheCreationTokens;
const costInfo =
model.longContextCost &&
totalInputTokens > model.longContextCost.thresholdTokens
? model.longContextCost
: model;
const inputRate = costInfo.inputCostPerMillionTokens;
const outputRate = costInfo.outputCostPerMillionTokens;
const cachedRate = costInfo.cachedInputCostPerMillionTokens ?? inputRate;
const cacheCreationRate =
costInfo.cacheCreationCostPerMillionTokens ?? inputRate;
const inputCostInDollars =
(adjustedInputTokens / 1_000_000) * inputRate +
(cachedInputTokens / 1_000_000) * cachedRate +
(cacheCreationTokens / 1_000_000) * cacheCreationRate;
const outputCostInDollars =
(adjustedOutputTokens / 1_000_000) * outputRate +
(reasoningTokens / 1_000_000) * outputRate;
return {
totalCostInDollars: inputCostInDollars + outputCostInDollars,
inputCostInDollars,
outputCostInDollars,
tokenCounts: {
adjustedInputTokens,
adjustedOutputTokens,
cachedInputTokens,
cacheCreationTokens,
reasoningTokens,
totalInputTokens,
},
};
};

View file

@ -1,8 +0,0 @@
import { DOLLAR_TO_CREDIT_MULTIPLIER } from 'src/engine/metadata-modules/ai/ai-billing/constants/dollar-to-credit-multiplier';
// Converts cost in cents to cost in credits
// Formula: credits = (cents / 100) * DOLLAR_TO_CREDIT_MULTIPLIER
// Where DOLLAR_TO_CREDIT_MULTIPLIER = 1_000_000 (so $0.000_001 = 1 credit)
// Example: 1 cent = (1 / 100) * 1_000_000 = 10_000 credits
export const convertCentsToBillingCredits = (cents: number): number =>
(cents / 100) * DOLLAR_TO_CREDIT_MULTIPLIER;

View file

@ -0,0 +1,4 @@
import { DOLLAR_TO_CREDIT_MULTIPLIER } from 'src/engine/metadata-modules/ai/ai-billing/constants/dollar-to-credit-multiplier';
export const convertDollarsToBillingCredits = (dollars: number): number =>
dollars * DOLLAR_TO_CREDIT_MULTIPLIER;

View file

@ -0,0 +1,28 @@
type ProviderMetadataLike = Record<string, Record<string, unknown> | undefined>;
// Anthropic and Bedrock report cache creation tokens in provider metadata
// rather than the standard usage object. Anthropic exposes it as a
// top-level camelCase field; Bedrock nests it under `usage`.
export const extractCacheCreationTokensFromSteps = (
steps: Array<{ providerMetadata?: ProviderMetadataLike }>,
): number => {
return steps.reduce((sum, step) => {
return sum + extractCacheCreationTokens(step.providerMetadata);
}, 0);
};
export const extractCacheCreationTokens = (
providerMetadata: ProviderMetadataLike | undefined,
): number => {
const anthropicMeta = providerMetadata?.anthropic as
| Record<string, unknown>
| undefined;
const bedrockUsage = (providerMetadata?.bedrock as Record<string, unknown>)
?.usage as Record<string, unknown> | undefined;
return (
(anthropicMeta?.cacheCreationInputTokens as number | undefined) ??
(bedrockUsage?.cacheWriteInputTokens as number | undefined) ??
0
);
};

View file

@ -16,8 +16,11 @@ import {
AgentExceptionCode,
} from 'src/engine/metadata-modules/ai/ai-agent/agent.exception';
import { type BrowsingContextType } from 'src/engine/metadata-modules/ai/ai-agent/types/browsingContext.type';
import { convertCentsToBillingCredits } from 'src/engine/metadata-modules/ai/ai-billing/utils/convert-cents-to-billing-credits.util';
import { computeCostBreakdown } from 'src/engine/metadata-modules/ai/ai-billing/utils/compute-cost-breakdown.util';
import { convertDollarsToBillingCredits } from 'src/engine/metadata-modules/ai/ai-billing/utils/convert-dollars-to-billing-credits.util';
import { extractCacheCreationTokens } from 'src/engine/metadata-modules/ai/ai-billing/utils/extract-cache-creation-tokens.util';
import { toDisplayCredits } from 'src/engine/core-modules/billing/utils/to-display-credits.util';
import { type AIModelConfig } from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models-types.const';
import { AgentChatThreadEntity } from 'src/engine/metadata-modules/ai/ai-chat/entities/agent-chat-thread.entity';
import { AgentChatService } from './agent-chat.service';
@ -129,20 +132,16 @@ export class AgentChatStreamingService {
if (part.type === 'finish-step') {
const stepInput = part.usage?.inputTokens ?? 0;
const stepCached = part.usage?.cachedInputTokens ?? 0;
// Anthropic excludes cached/created tokens from input_tokens,
// reporting them separately as cache_creation_input_tokens
const anthropicUsage = (
part as {
providerMetadata?: {
anthropic?: {
usage?: { cache_creation_input_tokens?: number };
};
};
}
).providerMetadata?.anthropic?.usage;
const stepCacheCreation =
anthropicUsage?.cache_creation_input_tokens ?? 0;
const stepCacheCreation = extractCacheCreationTokens(
(
part as {
providerMetadata?: Record<
string,
Record<string, unknown> | undefined
>;
}
).providerMetadata,
);
totalCacheCreationTokens += stepCacheCreation;
lastStepConversationSize =
@ -150,31 +149,16 @@ export class AgentChatStreamingService {
}
if (part.type === 'finish') {
const inputTokens =
(part.totalUsage?.inputTokens ?? 0) +
(part.totalUsage?.cachedInputTokens ?? 0) +
totalCacheCreationTokens;
const outputTokens = part.totalUsage?.outputTokens ?? 0;
const cachedInputTokens =
part.totalUsage?.cachedInputTokens ?? 0;
const inputCostInCents =
(inputTokens / 1000) *
modelConfig.inputCostPer1kTokensInCents;
const outputCostInCents =
(outputTokens / 1000) *
modelConfig.outputCostPer1kTokensInCents;
const inputCredits = Math.round(
convertCentsToBillingCredits(inputCostInCents),
);
const outputCredits = Math.round(
convertCentsToBillingCredits(outputCostInCents),
);
const { inputCredits, outputCredits, tokenCounts } =
computeStreamCosts(
modelConfig,
part.totalUsage,
totalCacheCreationTokens,
);
streamUsage = {
inputTokens,
outputTokens,
inputTokens: tokenCounts.totalInputTokens,
outputTokens: tokenCounts.outputTokens,
inputCredits,
outputCredits,
};
@ -182,9 +166,9 @@ export class AgentChatStreamingService {
return {
createdAt: new Date().toISOString(),
usage: {
inputTokens,
outputTokens,
cachedInputTokens,
inputTokens: tokenCounts.totalInputTokens,
outputTokens: tokenCounts.outputTokens,
cachedInputTokens: tokenCounts.cachedInputTokens,
inputCredits: toDisplayCredits(inputCredits),
outputCredits: toDisplayCredits(outputCredits),
conversationSize: lastStepConversationSize,
@ -254,3 +238,38 @@ export class AgentChatStreamingService {
}
}
}
function computeStreamCosts(
modelConfig: AIModelConfig,
totalUsage:
| {
inputTokens?: number;
outputTokens?: number;
cachedInputTokens?: number;
reasoningTokens?: number;
}
| undefined,
cacheCreationTokens: number,
) {
const breakdown = computeCostBreakdown(modelConfig, {
inputTokens: totalUsage?.inputTokens,
outputTokens: totalUsage?.outputTokens,
cachedInputTokens: totalUsage?.cachedInputTokens,
reasoningTokens: totalUsage?.reasoningTokens,
cacheCreationTokens,
});
return {
inputCredits: Math.round(
convertDollarsToBillingCredits(breakdown.inputCostInDollars),
),
outputCredits: Math.round(
convertDollarsToBillingCredits(breakdown.outputCostInDollars),
),
tokenCounts: {
totalInputTokens: breakdown.tokenCounts.totalInputTokens,
outputTokens: totalUsage?.outputTokens ?? 0,
cachedInputTokens: breakdown.tokenCounts.cachedInputTokens,
},
};
}

View file

@ -18,6 +18,7 @@ import { getAppPath, isDefined } from 'twenty-shared/utils';
import { type CodeExecutionStreamEmitter } from 'src/engine/core-modules/tool-provider/interfaces/tool-provider.interface';
import { ExceptionHandlerService } from 'src/engine/core-modules/exception-handler/exception-handler.service';
import { WorkspaceDomainsService } from 'src/engine/core-modules/domain/workspace-domains/services/workspace-domains.service';
import { COMMON_PRELOAD_TOOLS } from 'src/engine/core-modules/tool-provider/constants/common-preload-tools.const';
import { wrapToolsWithOutputSerialization } from 'src/engine/core-modules/tool-provider/output-serialization/wrap-tools-with-output-serialization.util';
@ -36,6 +37,7 @@ import { AGENT_CONFIG } from 'src/engine/metadata-modules/ai/ai-agent/constants/
import { type BrowsingContextType } from 'src/engine/metadata-modules/ai/ai-agent/types/browsingContext.type';
import { repairToolCall } from 'src/engine/metadata-modules/ai/ai-agent/utils/repair-tool-call.util';
import { AIBillingService } from 'src/engine/metadata-modules/ai/ai-billing/services/ai-billing.service';
import { extractCacheCreationTokensFromSteps } from 'src/engine/metadata-modules/ai/ai-billing/utils/extract-cache-creation-tokens.util';
import { SystemPromptBuilderService } from 'src/engine/metadata-modules/ai/ai-chat/services/system-prompt-builder.service';
import {
extractCodeInterpreterFiles,
@ -43,7 +45,7 @@ import {
} from 'src/engine/metadata-modules/ai/ai-chat/utils/extract-code-interpreter-files.util';
import {
type AIModelConfig,
ModelProvider,
InferenceProvider,
} from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models.const';
import { AI_TELEMETRY_CONFIG } from 'src/engine/metadata-modules/ai/ai-models/constants/ai-telemetry.const';
import { AiModelRegistryService } from 'src/engine/metadata-modules/ai/ai-models/services/ai-model-registry.service';
@ -74,6 +76,7 @@ export class ChatExecutionService {
private readonly agentActorContextService: AgentActorContextService,
private readonly workspaceDomainsService: WorkspaceDomainsService,
private readonly systemPromptBuilder: SystemPromptBuilderService,
private readonly exceptionHandlerService: ExceptionHandlerService,
) {}
async streamChat({
@ -137,7 +140,7 @@ export class ChatExecutionService {
// These are callable directly AND as fallback through execute_tool.
const directTools: ToolSet = {
...wrapToolsWithOutputSerialization(preloadedTools),
...this.getNativeWebSearchTool(registeredModel.provider),
...this.getNativeWebSearchTool(registeredModel.inferenceProvider),
};
// ToolSet is constant for the entire conversation — no mutation.
@ -192,9 +195,11 @@ export class ChatExecutionService {
role: 'system',
content: systemPrompt,
providerOptions:
registeredModel.provider === ModelProvider.ANTHROPIC
registeredModel.inferenceProvider === InferenceProvider.ANTHROPIC
? { anthropic: { cacheControl: { type: 'ephemeral' } } }
: undefined,
: registeredModel.inferenceProvider === InferenceProvider.BEDROCK
? { bedrock: { cacheControl: { type: 'ephemeral' } } }
: undefined,
};
const stream = streamText({
@ -219,17 +224,19 @@ export class ChatExecutionService {
},
});
stream.usage
.then((usage) => {
Promise.all([stream.usage, stream.steps])
.then(([usage, steps]) => {
const cacheCreationTokens = extractCacheCreationTokensFromSteps(steps);
this.aiBillingService.calculateAndBillUsage(
registeredModel.modelId,
usage,
{ usage, cacheCreationTokens },
workspace.id,
null,
);
})
.catch((error) => {
this.logger.error('Failed to bill usage:', error);
this.exceptionHandlerService.captureExceptions([error]);
});
return {
@ -308,14 +315,28 @@ export class ChatExecutionService {
return context;
}
private getNativeWebSearchTool(provider: ModelProvider): ToolSet {
switch (provider) {
case ModelProvider.ANTHROPIC:
private getNativeWebSearchTool(
inferenceProvider: InferenceProvider,
): ToolSet {
switch (inferenceProvider) {
case InferenceProvider.ANTHROPIC:
return { web_search: anthropic.tools.webSearch_20250305() };
case ModelProvider.OPENAI:
case InferenceProvider.BEDROCK: {
const bedrockProvider =
this.aiModelRegistryService.getBedrockProvider();
if (bedrockProvider) {
return {
web_search:
bedrockProvider.tools.webSearch_20250305() as ToolSet[string],
};
}
return {};
}
case InferenceProvider.OPENAI:
return { web_search: openai.tools.webSearch() };
case ModelProvider.GROQ:
// Type assertion needed due to @ai-sdk/groq tool type mismatch
case InferenceProvider.GROQ:
return {
web_search: groq.tools.browserSearch({}) as ToolSet[string],
};

View file

@ -1,12 +1,23 @@
export enum ModelProvider {
export enum InferenceProvider {
NONE = 'none',
OPENAI = 'openai',
ANTHROPIC = 'anthropic',
BEDROCK = 'bedrock',
GOOGLE = 'google',
MISTRAL = 'mistral',
OPENAI_COMPATIBLE = 'open_ai_compatible',
XAI = 'xai',
GROQ = 'groq',
}
export enum ModelFamily {
OPENAI = 'openai',
ANTHROPIC = 'anthropic',
GOOGLE = 'google',
MISTRAL = 'mistral',
XAI = 'xai',
}
export const DEFAULT_FAST_MODEL = 'default-fast-model' as const;
export const DEFAULT_SMART_MODEL = 'default-smart-model' as const;
@ -14,27 +25,41 @@ export type ModelId =
| typeof DEFAULT_FAST_MODEL
| typeof DEFAULT_SMART_MODEL
// OpenAI models
| 'gpt-5.2'
| 'gpt-5-mini'
| 'gpt-4.1'
| 'gpt-4.1-mini'
| 'gpt-4o'
| 'gpt-4o-mini'
| 'gpt-4-turbo'
| 'gpt-4.1'
| 'gpt-4.1-mini'
| 'o3'
| 'o4-mini'
// Anthropic models
| 'claude-opus-4-6'
| 'claude-sonnet-4-6'
| 'claude-sonnet-4-5-20250929'
| 'claude-haiku-4-5-20251001'
| 'claude-opus-4-5-20251101'
| 'claude-opus-4-20250514'
| 'claude-sonnet-4-20250514'
| 'claude-3-5-haiku-20241022'
| 'claude-opus-4-5-20251101'
| 'claude-sonnet-4-5-20250929'
| 'claude-haiku-4-5-20251001'
// xAI models
| 'grok-3'
| 'grok-3-mini'
| 'grok-4'
| 'grok-4-1-fast-reasoning'
| 'grok-3'
| 'grok-3-mini'
// Google models
| 'gemini-3.1-pro-preview'
| 'gemini-3-flash-preview'
| 'gemini-2.5-pro'
| 'gemini-2.5-flash'
// Bedrock models (Anthropic via AWS)
| 'anthropic.claude-opus-4-6-v1'
| 'anthropic.claude-sonnet-4-6'
// Groq models
| 'openai/gpt-oss-120b'
// Mistral models
| 'mistral-large-latest'
| string; // Allow custom model names
export type SupportedFileType =
@ -48,13 +73,25 @@ export type SupportedFileType =
| 'text/csv'
| 'application/json';
export type LongContextCost = {
inputCostPerMillionTokens: number;
outputCostPerMillionTokens: number;
cachedInputCostPerMillionTokens?: number;
cacheCreationCostPerMillionTokens?: number;
thresholdTokens: number;
};
export interface AIModelConfig {
modelId: ModelId;
label: string;
description: string;
provider: ModelProvider;
inputCostPer1kTokensInCents: number;
outputCostPer1kTokensInCents: number;
modelFamily: ModelFamily;
inferenceProvider: InferenceProvider;
inputCostPerMillionTokens: number;
outputCostPerMillionTokens: number;
cachedInputCostPerMillionTokens?: number;
cacheCreationCostPerMillionTokens?: number;
longContextCost?: LongContextCost;
contextWindowTokens: number;
maxOutputTokens: number;
supportedFileTypes?: SupportedFileType[];

View file

@ -6,21 +6,24 @@ import { AiModelRegistryService } from 'src/engine/metadata-modules/ai/ai-models
import {
AI_MODELS,
DEFAULT_SMART_MODEL,
ModelProvider,
InferenceProvider,
} from './ai-models.const';
describe('AI_MODELS', () => {
it('should have at least one model per provider', () => {
const providers = [
ModelProvider.OPENAI,
ModelProvider.ANTHROPIC,
ModelProvider.XAI,
ModelProvider.GROQ,
it('should have at least one model per inference provider', () => {
const inferenceProviders = [
InferenceProvider.OPENAI,
InferenceProvider.ANTHROPIC,
InferenceProvider.BEDROCK,
InferenceProvider.GOOGLE,
InferenceProvider.XAI,
InferenceProvider.GROQ,
InferenceProvider.MISTRAL,
];
providers.forEach((provider) => {
inferenceProviders.forEach((inferenceProvider) => {
const modelsForProvider = AI_MODELS.filter(
(model) => model.provider === provider,
(model) => model.inferenceProvider === inferenceProvider,
);
expect(modelsForProvider.length).toBeGreaterThan(0);
@ -32,9 +35,10 @@ describe('AI_MODELS', () => {
expect(model.modelId).toBeDefined();
expect(model.label).toBeDefined();
expect(model.description).toBeDefined();
expect(model.provider).toBeDefined();
expect(model.inputCostPer1kTokensInCents).toBeDefined();
expect(model.outputCostPer1kTokensInCents).toBeDefined();
expect(model.modelFamily).toBeDefined();
expect(model.inferenceProvider).toBeDefined();
expect(model.inputCostPerMillionTokens).toBeDefined();
expect(model.outputCostPerMillionTokens).toBeDefined();
expect(model.contextWindowTokens).toBeGreaterThan(0);
expect(model.maxOutputTokens).toBeGreaterThan(0);
});
@ -47,17 +51,21 @@ describe('AI_MODELS', () => {
expect(uniqueModelIds.size).toBe(modelIds.length);
});
it('should have at least one non-deprecated model per provider', () => {
const providers = [
ModelProvider.OPENAI,
ModelProvider.ANTHROPIC,
ModelProvider.XAI,
ModelProvider.GROQ,
it('should have at least one non-deprecated model per inference provider', () => {
const inferenceProviders = [
InferenceProvider.OPENAI,
InferenceProvider.ANTHROPIC,
InferenceProvider.BEDROCK,
InferenceProvider.GOOGLE,
InferenceProvider.XAI,
InferenceProvider.GROQ,
InferenceProvider.MISTRAL,
];
providers.forEach((provider) => {
inferenceProviders.forEach((inferenceProvider) => {
const activeModelsForProvider = AI_MODELS.filter(
(model) => model.provider === provider && !model.deprecated,
(model) =>
model.inferenceProvider === inferenceProvider && !model.deprecated,
);
expect(activeModelsForProvider.length).toBeGreaterThan(0);
@ -88,35 +96,35 @@ describe('AiModelRegistryService', () => {
});
it('should return effective model config for DEFAULT_SMART_MODEL', () => {
MOCK_CONFIG_SERVICE.get.mockReturnValue('gpt-4o');
MOCK_CONFIG_SERVICE.get.mockReturnValue('gpt-5.2');
expect(() => SERVICE.getEffectiveModelConfig(DEFAULT_SMART_MODEL)).toThrow(
'No AI models are available. Please configure at least one AI provider API key (OPENAI_API_KEY, ANTHROPIC_API_KEY, XAI_API_KEY, or GROQ_API_KEY).',
'No AI models are available. Please configure at least one AI provider (OPENAI_API_KEY, ANTHROPIC_API_KEY, AWS_BEDROCK_REGION, GOOGLE_API_KEY, XAI_API_KEY, GROQ_API_KEY, or MISTRAL_API_KEY).',
);
});
it('should return effective model config for DEFAULT_SMART_MODEL when models are available', () => {
MOCK_CONFIG_SERVICE.get.mockReturnValue('gpt-4o');
MOCK_CONFIG_SERVICE.get.mockReturnValue('gpt-5.2');
jest.spyOn(SERVICE, 'getAvailableModels').mockReturnValue([
{
modelId: 'gpt-4o',
provider: ModelProvider.OPENAI,
modelId: 'gpt-5.2',
inferenceProvider: InferenceProvider.OPENAI,
model: {} as any,
},
]);
jest.spyOn(SERVICE, 'getModel').mockReturnValue({
modelId: 'gpt-4o',
provider: ModelProvider.OPENAI,
modelId: 'gpt-5.2',
inferenceProvider: InferenceProvider.OPENAI,
model: {} as any,
});
const RESULT = SERVICE.getEffectiveModelConfig(DEFAULT_SMART_MODEL);
expect(RESULT).toBeDefined();
expect(RESULT.modelId).toBe('gpt-4o');
expect(RESULT.provider).toBe(ModelProvider.OPENAI);
expect(RESULT.modelId).toBe('gpt-5.2');
expect(RESULT.inferenceProvider).toBe(InferenceProvider.OPENAI);
});
it('should return effective model config for DEFAULT_SMART_MODEL with custom model', () => {
@ -125,14 +133,14 @@ describe('AiModelRegistryService', () => {
jest.spyOn(SERVICE, 'getAvailableModels').mockReturnValue([
{
modelId: 'mistral',
provider: ModelProvider.OPENAI_COMPATIBLE,
inferenceProvider: InferenceProvider.OPENAI_COMPATIBLE,
model: {} as any,
},
]);
jest.spyOn(SERVICE, 'getModel').mockReturnValue({
modelId: 'mistral',
provider: ModelProvider.OPENAI_COMPATIBLE,
inferenceProvider: InferenceProvider.OPENAI_COMPATIBLE,
model: {} as any,
});
@ -140,25 +148,24 @@ describe('AiModelRegistryService', () => {
expect(RESULT).toBeDefined();
expect(RESULT.modelId).toBe('mistral');
expect(RESULT.provider).toBe(ModelProvider.OPENAI_COMPATIBLE);
expect(RESULT.inferenceProvider).toBe(InferenceProvider.OPENAI_COMPATIBLE);
expect(RESULT.label).toBe('mistral');
expect(RESULT.inputCostPer1kTokensInCents).toBe(0);
expect(RESULT.outputCostPer1kTokensInCents).toBe(0);
expect(RESULT.inputCostPerMillionTokens).toBe(0);
expect(RESULT.outputCostPerMillionTokens).toBe(0);
});
it('should return effective model config for specific model', () => {
const RESULT = SERVICE.getEffectiveModelConfig('gpt-4o-mini');
const RESULT = SERVICE.getEffectiveModelConfig('gpt-5.2');
expect(RESULT).toBeDefined();
expect(RESULT.modelId).toBe('gpt-4o-mini');
expect(RESULT.provider).toBe(ModelProvider.OPENAI);
expect(RESULT.modelId).toBe('gpt-5.2');
expect(RESULT.inferenceProvider).toBe(InferenceProvider.OPENAI);
});
it('should return effective model config for custom model', () => {
// Mock that the custom model exists in registry
jest.spyOn(SERVICE, 'getModel').mockReturnValue({
modelId: 'mistral',
provider: ModelProvider.OPENAI_COMPATIBLE,
inferenceProvider: InferenceProvider.OPENAI_COMPATIBLE,
model: {} as any,
});
@ -166,10 +173,10 @@ describe('AiModelRegistryService', () => {
expect(RESULT).toBeDefined();
expect(RESULT.modelId).toBe('mistral');
expect(RESULT.provider).toBe(ModelProvider.OPENAI_COMPATIBLE);
expect(RESULT.inferenceProvider).toBe(InferenceProvider.OPENAI_COMPATIBLE);
expect(RESULT.label).toBe('mistral');
expect(RESULT.inputCostPer1kTokensInCents).toBe(0);
expect(RESULT.outputCostPer1kTokensInCents).toBe(0);
expect(RESULT.inputCostPerMillionTokens).toBe(0);
expect(RESULT.outputCostPerMillionTokens).toBe(0);
});
it('should throw error for non-existent model', () => {
@ -181,9 +188,8 @@ describe('AiModelRegistryService', () => {
});
it('should find first available model from comma-separated list', () => {
// First model not available, second model available
MOCK_CONFIG_SERVICE.get.mockReturnValue(
'gpt-4.1-mini,claude-haiku-4-5-20251001,grok-3-mini',
'gpt-5-mini,claude-haiku-4-5-20251001,gemini-3-flash-preview',
);
const getModelSpy = jest
@ -192,7 +198,7 @@ describe('AiModelRegistryService', () => {
if (modelId === 'claude-haiku-4-5-20251001') {
return {
modelId: 'claude-haiku-4-5-20251001',
provider: ModelProvider.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
model: {} as any,
};
}
@ -204,7 +210,7 @@ describe('AiModelRegistryService', () => {
expect(result).toBeDefined();
expect(result.modelId).toBe('claude-haiku-4-5-20251001');
expect(getModelSpy).toHaveBeenCalledWith('gpt-4.1-mini');
expect(getModelSpy).toHaveBeenCalledWith('gpt-5-mini');
expect(getModelSpy).toHaveBeenCalledWith('claude-haiku-4-5-20251001');
});
@ -215,7 +221,7 @@ describe('AiModelRegistryService', () => {
jest.spyOn(SERVICE, 'getAvailableModels').mockReturnValue([
{
modelId: 'fallback-model',
provider: ModelProvider.OPENAI_COMPATIBLE,
inferenceProvider: InferenceProvider.OPENAI_COMPATIBLE,
model: {} as any,
},
]);

View file

@ -1,7 +1,8 @@
export {
DEFAULT_FAST_MODEL,
DEFAULT_SMART_MODEL,
ModelProvider,
InferenceProvider,
ModelFamily,
type AIModelConfig,
type ModelId,
type SupportedFileType,
@ -9,13 +10,19 @@ export {
import { type AIModelConfig } from './ai-models-types.const';
import { ANTHROPIC_MODELS } from './anthropic-models.const';
import { BEDROCK_MODELS } from './bedrock-models.const';
import { GOOGLE_MODELS } from './google-models.const';
import { GROQ_MODELS } from './groq-models.const';
import { MISTRAL_MODELS } from './mistral-models.const';
import { OPENAI_MODELS } from './openai-models.const';
import { XAI_MODELS } from './xai-models.const';
export const AI_MODELS: AIModelConfig[] = [
...OPENAI_MODELS,
...ANTHROPIC_MODELS,
...BEDROCK_MODELS,
...GOOGLE_MODELS,
...XAI_MODELS,
...GROQ_MODELS,
...MISTRAL_MODELS,
];

View file

@ -1,15 +1,64 @@
import { type AIModelConfig, ModelProvider } from './ai-models-types.const';
import {
type AIModelConfig,
InferenceProvider,
ModelFamily,
} from './ai-models-types.const';
export const ANTHROPIC_MODELS: AIModelConfig[] = [
// Active models
{
modelId: 'claude-opus-4-5-20251101',
label: 'Claude Opus 4.5',
modelId: 'claude-opus-4-6',
label: 'Claude Opus 4.6',
description:
'Most powerful Claude model excelling in complex reasoning, coding, and agentic tasks',
provider: ModelProvider.ANTHROPIC,
inputCostPer1kTokensInCents: 0.5,
outputCostPer1kTokensInCents: 2.5,
'Flagship Claude model for software engineering, scientific reasoning, and agent teams',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 5.0,
outputCostPerMillionTokens: 25.0,
cachedInputCostPerMillionTokens: 0.5,
cacheCreationCostPerMillionTokens: 6.25,
longContextCost: {
inputCostPerMillionTokens: 10.0,
outputCostPerMillionTokens: 37.5,
cachedInputCostPerMillionTokens: 1.0,
cacheCreationCostPerMillionTokens: 12.5,
thresholdTokens: 200_000,
},
contextWindowTokens: 1000000,
maxOutputTokens: 128000,
supportedFileTypes: [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
'application/pdf',
'text/plain',
'text/html',
'text/csv',
],
doesSupportThinking: true,
nativeCapabilities: {
webSearch: true,
},
},
{
modelId: 'claude-sonnet-4-6',
label: 'Claude Sonnet 4.6',
description:
'Most capable Sonnet model with strong coding, computer use, and agent planning',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 3.0,
outputCostPerMillionTokens: 15.0,
cachedInputCostPerMillionTokens: 0.3,
cacheCreationCostPerMillionTokens: 3.75,
longContextCost: {
inputCostPerMillionTokens: 6.0,
outputCostPerMillionTokens: 22.5,
cachedInputCostPerMillionTokens: 0.6,
cacheCreationCostPerMillionTokens: 7.5,
thresholdTokens: 200_000,
},
contextWindowTokens: 200000,
maxOutputTokens: 64000,
supportedFileTypes: [
@ -31,10 +80,20 @@ export const ANTHROPIC_MODELS: AIModelConfig[] = [
modelId: 'claude-sonnet-4-5-20250929',
label: 'Claude Sonnet 4.5',
description:
'Advanced model for coding tasks and complex agent-based workflows with 1M context',
provider: ModelProvider.ANTHROPIC,
inputCostPer1kTokensInCents: 0.3,
outputCostPer1kTokensInCents: 1.5,
'Previous gen Sonnet with 1M native context for long-context workflows',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 3.0,
outputCostPerMillionTokens: 15.0,
cachedInputCostPerMillionTokens: 0.3,
cacheCreationCostPerMillionTokens: 3.75,
longContextCost: {
inputCostPerMillionTokens: 6.0,
outputCostPerMillionTokens: 22.5,
cachedInputCostPerMillionTokens: 0.6,
cacheCreationCostPerMillionTokens: 7.5,
thresholdTokens: 200_000,
},
contextWindowTokens: 1000000,
maxOutputTokens: 64000,
supportedFileTypes: [
@ -55,11 +114,13 @@ export const ANTHROPIC_MODELS: AIModelConfig[] = [
{
modelId: 'claude-haiku-4-5-20251001',
label: 'Claude Haiku 4.5',
description:
'Fast and cost-effective model optimized for high-speed processing',
provider: ModelProvider.ANTHROPIC,
inputCostPer1kTokensInCents: 0.1,
outputCostPer1kTokensInCents: 0.5,
description: 'Fast and cost-effective model for high-speed processing',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 1.0,
outputCostPerMillionTokens: 5.0,
cachedInputCostPerMillionTokens: 0.1,
cacheCreationCostPerMillionTokens: 1.25,
contextWindowTokens: 200000,
maxOutputTokens: 64000,
supportedFileTypes: [
@ -77,43 +138,27 @@ export const ANTHROPIC_MODELS: AIModelConfig[] = [
webSearch: true,
},
},
{
modelId: 'claude-3-5-haiku-20241022',
label: 'Claude Haiku 3.5',
description:
'Fast and efficient model optimized for speed and cost-effectiveness',
provider: ModelProvider.ANTHROPIC,
inputCostPer1kTokensInCents: 0.08,
outputCostPer1kTokensInCents: 0.4,
contextWindowTokens: 200000,
maxOutputTokens: 8192,
supportedFileTypes: [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
'application/pdf',
'text/plain',
'text/html',
'text/csv',
],
doesSupportThinking: false,
nativeCapabilities: {
webSearch: true,
},
},
// Deprecated models - kept for backward compatibility with existing agents
{
modelId: 'claude-opus-4-20250514',
label: 'Claude Opus 4',
description:
'Most powerful Claude model with extended thinking for complex reasoning tasks',
provider: ModelProvider.ANTHROPIC,
inputCostPer1kTokensInCents: 1.5,
outputCostPer1kTokensInCents: 7.5,
modelId: 'claude-opus-4-5-20251101',
label: 'Claude Opus 4.5',
description: 'Previous flagship model superseded by Opus 4.6',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 5.0,
outputCostPerMillionTokens: 25.0,
cachedInputCostPerMillionTokens: 0.5,
cacheCreationCostPerMillionTokens: 6.25,
longContextCost: {
inputCostPerMillionTokens: 10.0,
outputCostPerMillionTokens: 37.5,
cachedInputCostPerMillionTokens: 1.0,
cacheCreationCostPerMillionTokens: 12.5,
thresholdTokens: 200_000,
},
contextWindowTokens: 200000,
maxOutputTokens: 8192,
maxOutputTokens: 64000,
supportedFileTypes: [
'image/png',
'image/jpeg',
@ -133,11 +178,20 @@ export const ANTHROPIC_MODELS: AIModelConfig[] = [
{
modelId: 'claude-sonnet-4-20250514',
label: 'Claude Sonnet 4',
description:
'Balanced model with strong performance and extended thinking capabilities',
provider: ModelProvider.ANTHROPIC,
inputCostPer1kTokensInCents: 0.3,
outputCostPer1kTokensInCents: 1.5,
description: 'Previous gen Sonnet superseded by Sonnet 4.6',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 3.0,
outputCostPerMillionTokens: 15.0,
cachedInputCostPerMillionTokens: 0.3,
cacheCreationCostPerMillionTokens: 3.75,
longContextCost: {
inputCostPerMillionTokens: 6.0,
outputCostPerMillionTokens: 22.5,
cachedInputCostPerMillionTokens: 0.6,
cacheCreationCostPerMillionTokens: 7.5,
thresholdTokens: 200_000,
},
contextWindowTokens: 200000,
maxOutputTokens: 8192,
supportedFileTypes: [
@ -156,4 +210,60 @@ export const ANTHROPIC_MODELS: AIModelConfig[] = [
},
deprecated: true,
},
{
modelId: 'claude-opus-4-20250514',
label: 'Claude Opus 4',
description: 'Legacy Opus model with extended thinking',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 15.0,
outputCostPerMillionTokens: 75.0,
cachedInputCostPerMillionTokens: 1.5,
cacheCreationCostPerMillionTokens: 18.75,
contextWindowTokens: 200000,
maxOutputTokens: 8192,
supportedFileTypes: [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
'application/pdf',
'text/plain',
'text/html',
'text/csv',
],
doesSupportThinking: true,
nativeCapabilities: {
webSearch: true,
},
deprecated: true,
},
{
modelId: 'claude-3-5-haiku-20241022',
label: 'Claude Haiku 3.5',
description: 'Legacy fast model superseded by Haiku 4.5',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
inputCostPerMillionTokens: 0.8,
outputCostPerMillionTokens: 4.0,
cachedInputCostPerMillionTokens: 0.08,
cacheCreationCostPerMillionTokens: 1.0,
contextWindowTokens: 200000,
maxOutputTokens: 8192,
supportedFileTypes: [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
'application/pdf',
'text/plain',
'text/html',
'text/csv',
],
doesSupportThinking: false,
nativeCapabilities: {
webSearch: true,
},
deprecated: true,
},
];

View file

@ -0,0 +1,78 @@
import {
type AIModelConfig,
InferenceProvider,
ModelFamily,
} from './ai-models-types.const';
export const BEDROCK_MODELS: AIModelConfig[] = [
{
modelId: 'anthropic.claude-opus-4-6-v1',
label: 'Claude Opus 4.6 (Bedrock)',
description:
'Flagship Claude model via AWS Bedrock for enterprise deployments',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.BEDROCK,
inputCostPerMillionTokens: 5.0,
outputCostPerMillionTokens: 25.0,
cachedInputCostPerMillionTokens: 0.5,
cacheCreationCostPerMillionTokens: 6.25,
longContextCost: {
inputCostPerMillionTokens: 10.0,
outputCostPerMillionTokens: 37.5,
cachedInputCostPerMillionTokens: 1.0,
cacheCreationCostPerMillionTokens: 12.5,
thresholdTokens: 200_000,
},
contextWindowTokens: 1000000,
maxOutputTokens: 128000,
supportedFileTypes: [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
'application/pdf',
'text/plain',
'text/html',
'text/csv',
],
doesSupportThinking: true,
nativeCapabilities: {
webSearch: true,
},
},
{
modelId: 'anthropic.claude-sonnet-4-6',
label: 'Claude Sonnet 4.6 (Bedrock)',
description:
'Balanced Claude model via AWS Bedrock with strong coding and agent planning',
modelFamily: ModelFamily.ANTHROPIC,
inferenceProvider: InferenceProvider.BEDROCK,
inputCostPerMillionTokens: 3.0,
outputCostPerMillionTokens: 15.0,
cachedInputCostPerMillionTokens: 0.3,
cacheCreationCostPerMillionTokens: 3.75,
longContextCost: {
inputCostPerMillionTokens: 6.0,
outputCostPerMillionTokens: 22.5,
cachedInputCostPerMillionTokens: 0.6,
cacheCreationCostPerMillionTokens: 7.5,
thresholdTokens: 200_000,
},
contextWindowTokens: 200000,
maxOutputTokens: 64000,
supportedFileTypes: [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
'application/pdf',
'text/plain',
'text/html',
'text/csv',
],
doesSupportThinking: true,
nativeCapabilities: {
webSearch: true,
},
},
];

View file

@ -0,0 +1,81 @@
import {
type AIModelConfig,
InferenceProvider,
ModelFamily,
} from './ai-models-types.const';
export const GOOGLE_MODELS: AIModelConfig[] = [
// Active models
{
modelId: 'gemini-3.1-pro-preview',
label: 'Gemini 3.1 Pro',
description:
'Most advanced Gemini model for reasoning, coding, and agentic workflows',
modelFamily: ModelFamily.GOOGLE,
inferenceProvider: InferenceProvider.GOOGLE,
inputCostPerMillionTokens: 2.0,
outputCostPerMillionTokens: 12.0,
cachedInputCostPerMillionTokens: 0.2,
longContextCost: {
inputCostPerMillionTokens: 4.0,
outputCostPerMillionTokens: 18.0,
cachedInputCostPerMillionTokens: 0.4,
thresholdTokens: 200_000,
},
contextWindowTokens: 1048576,
maxOutputTokens: 65536,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
doesSupportThinking: true,
},
{
modelId: 'gemini-3-flash-preview',
label: 'Gemini 3 Flash',
description: 'Fast frontier-class Gemini model at low cost with 1M context',
modelFamily: ModelFamily.GOOGLE,
inferenceProvider: InferenceProvider.GOOGLE,
inputCostPerMillionTokens: 0.5,
outputCostPerMillionTokens: 3.0,
cachedInputCostPerMillionTokens: 0.05,
contextWindowTokens: 1048576,
maxOutputTokens: 65536,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
doesSupportThinking: true,
},
// Deprecated models - kept for backward compatibility with existing agents
{
modelId: 'gemini-2.5-pro',
label: 'Gemini 2.5 Pro',
description: 'Previous gen Gemini Pro superseded by 3.1 Pro',
modelFamily: ModelFamily.GOOGLE,
inferenceProvider: InferenceProvider.GOOGLE,
inputCostPerMillionTokens: 1.25,
outputCostPerMillionTokens: 10.0,
cachedInputCostPerMillionTokens: 0.315,
longContextCost: {
inputCostPerMillionTokens: 2.5,
outputCostPerMillionTokens: 15.0,
thresholdTokens: 200_000,
},
contextWindowTokens: 1048576,
maxOutputTokens: 65536,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
doesSupportThinking: true,
deprecated: true,
},
{
modelId: 'gemini-2.5-flash',
label: 'Gemini 2.5 Flash',
description: 'Previous gen Flash superseded by Gemini 3 Flash',
modelFamily: ModelFamily.GOOGLE,
inferenceProvider: InferenceProvider.GOOGLE,
inputCostPerMillionTokens: 0.3,
outputCostPerMillionTokens: 2.5,
cachedInputCostPerMillionTokens: 0.075,
contextWindowTokens: 1048576,
maxOutputTokens: 65536,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
doesSupportThinking: true,
deprecated: true,
},
];

View file

@ -1,14 +1,20 @@
import { type AIModelConfig, ModelProvider } from './ai-models-types.const';
import {
type AIModelConfig,
InferenceProvider,
ModelFamily,
} from './ai-models-types.const';
export const GROQ_MODELS: AIModelConfig[] = [
{
modelId: 'openai/gpt-oss-120b',
label: 'GPT-OSS 120B (Groq)',
description:
'Large-scale open-source model with browser search, served via Groq inference',
provider: ModelProvider.GROQ,
inputCostPer1kTokensInCents: 0.059,
outputCostPer1kTokensInCents: 0.079,
'Large-scale open-source model with ultra-fast inference via Groq',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.GROQ,
inputCostPerMillionTokens: 0.15,
outputCostPerMillionTokens: 0.6,
cachedInputCostPerMillionTokens: 0.075,
contextWindowTokens: 128000,
maxOutputTokens: 16384,
},

View file

@ -0,0 +1,20 @@
import {
type AIModelConfig,
InferenceProvider,
ModelFamily,
} from './ai-models-types.const';
export const MISTRAL_MODELS: AIModelConfig[] = [
{
modelId: 'mistral-large-latest',
label: 'Mistral Large',
description:
'Flagship Mistral model with strong reasoning and 256K context',
modelFamily: ModelFamily.MISTRAL,
inferenceProvider: InferenceProvider.MISTRAL,
inputCostPerMillionTokens: 0.5,
outputCostPerMillionTokens: 1.5,
contextWindowTokens: 256000,
maxOutputTokens: 8192,
},
];

View file

@ -1,15 +1,54 @@
import { type AIModelConfig, ModelProvider } from './ai-models-types.const';
import {
type AIModelConfig,
InferenceProvider,
ModelFamily,
} from './ai-models-types.const';
export const OPENAI_MODELS: AIModelConfig[] = [
// Active models
{
modelId: 'gpt-5.2',
label: 'GPT-5.2',
description:
'Most advanced OpenAI model for coding, agentic tasks, and complex reasoning',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 1.75,
outputCostPerMillionTokens: 14.0,
cachedInputCostPerMillionTokens: 0.175,
contextWindowTokens: 400000,
maxOutputTokens: 128000,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
doesSupportThinking: true,
nativeCapabilities: {
webSearch: true,
},
},
{
modelId: 'gpt-5-mini',
label: 'GPT-5 Mini',
description: 'Fast and cost-efficient GPT-5 variant for well-defined tasks',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 0.25,
outputCostPerMillionTokens: 2.0,
cachedInputCostPerMillionTokens: 0.025,
contextWindowTokens: 128000,
maxOutputTokens: 32768,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
nativeCapabilities: {
webSearch: true,
},
},
{
modelId: 'gpt-4.1',
label: 'GPT-4.1',
description:
'Advanced model excelling in coding, instruction following, and long-context comprehension',
provider: ModelProvider.OPENAI,
inputCostPer1kTokensInCents: 0.2,
outputCostPer1kTokensInCents: 0.8,
description: 'Strong model with 1M context, cost-effective output pricing',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 2.0,
outputCostPerMillionTokens: 8.0,
cachedInputCostPerMillionTokens: 0.5,
contextWindowTokens: 1047576,
maxOutputTokens: 32768,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -20,11 +59,12 @@ export const OPENAI_MODELS: AIModelConfig[] = [
{
modelId: 'gpt-4.1-mini',
label: 'GPT-4.1 Mini',
description:
'Fast and cost-efficient version of GPT-4.1 optimized for low latency',
provider: ModelProvider.OPENAI,
inputCostPer1kTokensInCents: 0.04,
outputCostPer1kTokensInCents: 0.16,
description: 'Budget-friendly model with 1M context for lightweight tasks',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 0.4,
outputCostPerMillionTokens: 1.6,
cachedInputCostPerMillionTokens: 0.1,
contextWindowTokens: 1047576,
maxOutputTokens: 32768,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -32,14 +72,18 @@ export const OPENAI_MODELS: AIModelConfig[] = [
webSearch: true,
},
},
// Deprecated models - kept for backward compatibility with existing agents
{
modelId: 'o3',
label: 'o3',
description:
'Powerful reasoning model excelling in complex queries, coding, math, and science',
provider: ModelProvider.OPENAI,
inputCostPer1kTokensInCents: 0.2,
outputCostPer1kTokensInCents: 0.8,
'Reasoning model for complex queries, coding, math, and science',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 2.0,
outputCostPerMillionTokens: 8.0,
cachedInputCostPerMillionTokens: 0.5,
contextWindowTokens: 200000,
maxOutputTokens: 100000,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -47,15 +91,18 @@ export const OPENAI_MODELS: AIModelConfig[] = [
nativeCapabilities: {
webSearch: true,
},
deprecated: true,
},
{
modelId: 'o4-mini',
label: 'o4-mini',
description:
'Cost-effective reasoning model excelling in math, coding, and visual tasks',
provider: ModelProvider.OPENAI,
inputCostPer1kTokensInCents: 0.11,
outputCostPer1kTokensInCents: 0.44,
'Cost-effective reasoning model for math, coding, and visual tasks',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 1.1,
outputCostPerMillionTokens: 4.4,
cachedInputCostPerMillionTokens: 0.275,
contextWindowTokens: 200000,
maxOutputTokens: 100000,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -63,17 +110,18 @@ export const OPENAI_MODELS: AIModelConfig[] = [
nativeCapabilities: {
webSearch: true,
},
deprecated: true,
},
// Deprecated models - kept for backward compatibility with existing agents
{
modelId: 'gpt-4o',
label: 'GPT-4o',
description:
'Most advanced multimodal model with strong reasoning, vision, and coding capabilities',
provider: ModelProvider.OPENAI,
inputCostPer1kTokensInCents: 0.25,
outputCostPer1kTokensInCents: 1.0,
'Previous generation multimodal model with strong reasoning and vision',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 2.5,
outputCostPerMillionTokens: 10.0,
cachedInputCostPerMillionTokens: 1.25,
contextWindowTokens: 128000,
maxOutputTokens: 16384,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -85,11 +133,12 @@ export const OPENAI_MODELS: AIModelConfig[] = [
{
modelId: 'gpt-4o-mini',
label: 'GPT-4o Mini',
description:
'Fast and cost-efficient model for lightweight tasks and high-volume operations',
provider: ModelProvider.OPENAI,
inputCostPer1kTokensInCents: 0.015,
outputCostPer1kTokensInCents: 0.06,
description: 'Previous generation fast model for lightweight tasks',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 0.15,
outputCostPerMillionTokens: 0.6,
cachedInputCostPerMillionTokens: 0.075,
contextWindowTokens: 128000,
maxOutputTokens: 16384,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -101,17 +150,14 @@ export const OPENAI_MODELS: AIModelConfig[] = [
{
modelId: 'gpt-4-turbo',
label: 'GPT-4 Turbo',
description:
'Previous generation high-performance model with vision capabilities',
provider: ModelProvider.OPENAI,
inputCostPer1kTokensInCents: 1.0,
outputCostPer1kTokensInCents: 3.0,
description: 'Legacy high-performance model with vision capabilities',
modelFamily: ModelFamily.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
inputCostPerMillionTokens: 10.0,
outputCostPerMillionTokens: 30.0,
contextWindowTokens: 128000,
maxOutputTokens: 4096,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
nativeCapabilities: {
webSearch: false,
},
deprecated: true,
},
];

View file

@ -1,32 +1,21 @@
import { type AIModelConfig, ModelProvider } from './ai-models-types.const';
import {
type AIModelConfig,
InferenceProvider,
ModelFamily,
} from './ai-models-types.const';
export const XAI_MODELS: AIModelConfig[] = [
// Active models
{
modelId: 'grok-4-1-fast-reasoning',
label: 'Grok 4.1 Fast',
description:
'Next-generation tool-calling agent with 2M context for advanced agentic workflows',
provider: ModelProvider.XAI,
inputCostPer1kTokensInCents: 0.02,
outputCostPer1kTokensInCents: 0.05,
contextWindowTokens: 2000000,
maxOutputTokens: 8192,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
doesSupportThinking: true,
nativeCapabilities: {
webSearch: true,
twitterSearch: true,
},
},
{
modelId: 'grok-4',
label: 'Grok-4',
description:
'Most capable Grok model with enhanced reasoning, web and Twitter search',
provider: ModelProvider.XAI,
inputCostPer1kTokensInCents: 0.3,
outputCostPer1kTokensInCents: 1.5,
modelFamily: ModelFamily.XAI,
inferenceProvider: InferenceProvider.XAI,
inputCostPerMillionTokens: 3.0,
outputCostPerMillionTokens: 15.0,
cachedInputCostPerMillionTokens: 0.75,
contextWindowTokens: 256000,
maxOutputTokens: 8192,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -35,14 +24,37 @@ export const XAI_MODELS: AIModelConfig[] = [
twitterSearch: true,
},
},
{
modelId: 'grok-4-1-fast-reasoning',
label: 'Grok 4.1 Fast',
description:
'Next-generation tool-calling agent with 2M context for advanced agentic workflows',
modelFamily: ModelFamily.XAI,
inferenceProvider: InferenceProvider.XAI,
inputCostPerMillionTokens: 0.2,
outputCostPerMillionTokens: 0.5,
cachedInputCostPerMillionTokens: 0.05,
contextWindowTokens: 2000000,
maxOutputTokens: 8192,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
doesSupportThinking: true,
nativeCapabilities: {
webSearch: true,
twitterSearch: true,
},
},
// Deprecated models - kept for backward compatibility with existing agents
{
modelId: 'grok-3',
label: 'Grok-3',
description:
'Advanced model with web and Twitter search, optimized for real-time information',
provider: ModelProvider.XAI,
inputCostPer1kTokensInCents: 0.3,
outputCostPer1kTokensInCents: 1.5,
modelFamily: ModelFamily.XAI,
inferenceProvider: InferenceProvider.XAI,
inputCostPerMillionTokens: 3.0,
outputCostPerMillionTokens: 15.0,
cachedInputCostPerMillionTokens: 0.75,
contextWindowTokens: 131072,
maxOutputTokens: 8192,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -50,15 +62,18 @@ export const XAI_MODELS: AIModelConfig[] = [
webSearch: true,
twitterSearch: true,
},
deprecated: true,
},
{
modelId: 'grok-3-mini',
label: 'Grok-3 Mini',
description:
'Lightweight model with web and Twitter search for fast, cost-effective operations',
provider: ModelProvider.XAI,
inputCostPer1kTokensInCents: 0.03,
outputCostPer1kTokensInCents: 0.05,
modelFamily: ModelFamily.XAI,
inferenceProvider: InferenceProvider.XAI,
inputCostPerMillionTokens: 0.3,
outputCostPerMillionTokens: 0.5,
cachedInputCostPerMillionTokens: 0.07,
contextWindowTokens: 131072,
maxOutputTokens: 8192,
supportedFileTypes: ['image/png', 'image/jpeg', 'image/gif', 'image/webp'],
@ -66,5 +81,6 @@ export const XAI_MODELS: AIModelConfig[] = [
webSearch: true,
twitterSearch: true,
},
deprecated: true,
},
];

View file

@ -6,23 +6,30 @@ import { ProviderOptions } from '@ai-sdk/provider-utils';
import { ToolSet } from 'ai';
import { AGENT_CONFIG } from 'src/engine/metadata-modules/ai/ai-agent/constants/agent-config.const';
import { ModelProvider } from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models.const';
import { RegisteredAIModel } from 'src/engine/metadata-modules/ai/ai-models/services/ai-model-registry.service';
import { InferenceProvider } from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models.const';
import {
AiModelRegistryService,
RegisteredAIModel,
} from 'src/engine/metadata-modules/ai/ai-models/services/ai-model-registry.service';
import { FlatAgentWithRoleId } from 'src/engine/metadata-modules/flat-agent/types/flat-agent.type';
@Injectable()
export class AgentModelConfigService {
constructor() {}
constructor(
private readonly aiModelRegistryService: AiModelRegistryService,
) {}
getProviderOptions(
model: RegisteredAIModel,
agent: FlatAgentWithRoleId,
): ProviderOptions {
switch (model.provider) {
case ModelProvider.XAI:
switch (model.inferenceProvider) {
case InferenceProvider.XAI:
return this.getXaiProviderOptions(agent);
case ModelProvider.ANTHROPIC:
case InferenceProvider.ANTHROPIC:
return this.getAnthropicProviderOptions(model);
case InferenceProvider.BEDROCK:
return this.getBedrockProviderOptions(model);
default:
return {};
}
@ -38,13 +45,25 @@ export class AgentModelConfigService {
return tools;
}
switch (model.provider) {
case ModelProvider.ANTHROPIC:
switch (model.inferenceProvider) {
case InferenceProvider.ANTHROPIC:
if (agent.modelConfiguration.webSearch?.enabled) {
tools.web_search = anthropic.tools.webSearch_20250305();
}
break;
case ModelProvider.OPENAI:
case InferenceProvider.BEDROCK: {
if (agent.modelConfiguration.webSearch?.enabled) {
const bedrockProvider =
this.aiModelRegistryService.getBedrockProvider();
if (bedrockProvider) {
tools.web_search =
bedrockProvider.tools.webSearch_20250305() as ToolSet[string];
}
}
break;
}
case InferenceProvider.OPENAI:
if (agent.modelConfiguration.webSearch?.enabled) {
tools.web_search = openai.tools.webSearch();
}
@ -99,4 +118,19 @@ export class AgentModelConfigService {
},
};
}
private getBedrockProviderOptions(model: RegisteredAIModel): ProviderOptions {
if (!model.doesSupportThinking) {
return {};
}
return {
bedrock: {
thinking: {
type: 'enabled',
budgetTokens: AGENT_CONFIG.REASONING_BUDGET_TOKENS,
},
},
};
}
}

View file

@ -1,7 +1,13 @@
import { Injectable } from '@nestjs/common';
import {
createAmazonBedrock,
type AmazonBedrockProvider,
} from '@ai-sdk/amazon-bedrock';
import { anthropic } from '@ai-sdk/anthropic';
import { google } from '@ai-sdk/google';
import { groq } from '@ai-sdk/groq';
import { mistral } from '@ai-sdk/mistral';
import { createOpenAI, openai } from '@ai-sdk/openai';
import { xai } from '@ai-sdk/xai';
import { type LanguageModel } from 'ai';
@ -15,17 +21,21 @@ import {
AI_MODELS,
DEFAULT_FAST_MODEL,
DEFAULT_SMART_MODEL,
ModelProvider,
InferenceProvider,
ModelFamily,
type AIModelConfig,
} from 'src/engine/metadata-modules/ai/ai-models/constants/ai-models.const';
import { ANTHROPIC_MODELS } from 'src/engine/metadata-modules/ai/ai-models/constants/anthropic-models.const';
import { BEDROCK_MODELS } from 'src/engine/metadata-modules/ai/ai-models/constants/bedrock-models.const';
import { GOOGLE_MODELS } from 'src/engine/metadata-modules/ai/ai-models/constants/google-models.const';
import { GROQ_MODELS } from 'src/engine/metadata-modules/ai/ai-models/constants/groq-models.const';
import { MISTRAL_MODELS } from 'src/engine/metadata-modules/ai/ai-models/constants/mistral-models.const';
import { OPENAI_MODELS } from 'src/engine/metadata-modules/ai/ai-models/constants/openai-models.const';
import { XAI_MODELS } from 'src/engine/metadata-modules/ai/ai-models/constants/xai-models.const';
export interface RegisteredAIModel {
modelId: string;
provider: ModelProvider;
inferenceProvider: InferenceProvider;
model: LanguageModel;
doesSupportThinking?: boolean;
}
@ -33,13 +43,19 @@ export interface RegisteredAIModel {
@Injectable()
export class AiModelRegistryService {
private modelRegistry: Map<string, RegisteredAIModel> = new Map();
private bedrockProvider: AmazonBedrockProvider | null = null;
constructor(private twentyConfigService: TwentyConfigService) {
this.buildModelRegistry();
}
getBedrockProvider(): AmazonBedrockProvider | null {
return this.bedrockProvider;
}
private buildModelRegistry(): void {
this.modelRegistry.clear();
this.bedrockProvider = null;
const openaiApiKey = this.twentyConfigService.get('OPENAI_API_KEY');
@ -65,6 +81,24 @@ export class AiModelRegistryService {
this.registerGroqModels();
}
const googleApiKey = this.twentyConfigService.get('GOOGLE_API_KEY');
if (googleApiKey) {
this.registerGoogleModels();
}
const mistralApiKey = this.twentyConfigService.get('MISTRAL_API_KEY');
if (mistralApiKey) {
this.registerMistralModels();
}
const bedrockRegion = this.twentyConfigService.get('AWS_BEDROCK_REGION');
if (bedrockRegion) {
this.registerBedrockModels(bedrockRegion);
}
const openaiCompatibleBaseUrl = this.twentyConfigService.get(
'OPENAI_COMPATIBLE_BASE_URL',
);
@ -84,7 +118,7 @@ export class AiModelRegistryService {
OPENAI_MODELS.forEach((modelConfig) => {
this.modelRegistry.set(modelConfig.modelId, {
modelId: modelConfig.modelId,
provider: ModelProvider.OPENAI,
inferenceProvider: InferenceProvider.OPENAI,
model: openai(modelConfig.modelId),
doesSupportThinking: modelConfig.doesSupportThinking,
});
@ -95,7 +129,7 @@ export class AiModelRegistryService {
ANTHROPIC_MODELS.forEach((modelConfig) => {
this.modelRegistry.set(modelConfig.modelId, {
modelId: modelConfig.modelId,
provider: ModelProvider.ANTHROPIC,
inferenceProvider: InferenceProvider.ANTHROPIC,
model: anthropic(modelConfig.modelId),
doesSupportThinking: modelConfig.doesSupportThinking,
});
@ -106,7 +140,7 @@ export class AiModelRegistryService {
XAI_MODELS.forEach((modelConfig) => {
this.modelRegistry.set(modelConfig.modelId, {
modelId: modelConfig.modelId,
provider: ModelProvider.XAI,
inferenceProvider: InferenceProvider.XAI,
model: xai(modelConfig.modelId),
doesSupportThinking: modelConfig.doesSupportThinking,
});
@ -117,13 +151,63 @@ export class AiModelRegistryService {
GROQ_MODELS.forEach((modelConfig) => {
this.modelRegistry.set(modelConfig.modelId, {
modelId: modelConfig.modelId,
provider: ModelProvider.GROQ,
inferenceProvider: InferenceProvider.GROQ,
model: groq(modelConfig.modelId),
doesSupportThinking: modelConfig.doesSupportThinking,
});
});
}
private registerGoogleModels(): void {
GOOGLE_MODELS.forEach((modelConfig) => {
this.modelRegistry.set(modelConfig.modelId, {
modelId: modelConfig.modelId,
inferenceProvider: InferenceProvider.GOOGLE,
model: google(modelConfig.modelId),
doesSupportThinking: modelConfig.doesSupportThinking,
});
});
}
private registerMistralModels(): void {
MISTRAL_MODELS.forEach((modelConfig) => {
this.modelRegistry.set(modelConfig.modelId, {
modelId: modelConfig.modelId,
inferenceProvider: InferenceProvider.MISTRAL,
model: mistral(modelConfig.modelId),
doesSupportThinking: modelConfig.doesSupportThinking,
});
});
}
private registerBedrockModels(region: string): void {
const accessKeyId = this.twentyConfigService.get(
'AWS_BEDROCK_ACCESS_KEY_ID',
);
const secretAccessKey = this.twentyConfigService.get(
'AWS_BEDROCK_SECRET_ACCESS_KEY',
);
const sessionToken = this.twentyConfigService.get(
'AWS_BEDROCK_SESSION_TOKEN',
);
this.bedrockProvider = createAmazonBedrock({
region,
...(accessKeyId && secretAccessKey
? { accessKeyId, secretAccessKey, sessionToken }
: {}),
});
BEDROCK_MODELS.forEach((modelConfig) => {
this.modelRegistry.set(modelConfig.modelId, {
modelId: modelConfig.modelId,
inferenceProvider: InferenceProvider.BEDROCK,
model: this.bedrockProvider!(modelConfig.modelId),
doesSupportThinking: modelConfig.doesSupportThinking,
});
});
}
private registerOpenAICompatibleModels(
baseUrl: string,
modelNamesString: string,
@ -142,7 +226,7 @@ export class AiModelRegistryService {
modelNames.forEach((modelId) => {
this.modelRegistry.set(modelId, {
modelId,
provider: ModelProvider.OPENAI_COMPATIBLE,
inferenceProvider: InferenceProvider.OPENAI_COMPATIBLE,
model: provider(modelId),
});
});
@ -189,7 +273,7 @@ export class AiModelRegistryService {
if (!model) {
throw new AgentException(
'No AI models are available. Please configure at least one AI provider API key (OPENAI_API_KEY, ANTHROPIC_API_KEY, XAI_API_KEY, or GROQ_API_KEY).',
'No AI models are available. Please configure at least one AI provider (OPENAI_API_KEY, ANTHROPIC_API_KEY, AWS_BEDROCK_REGION, GOOGLE_API_KEY, XAI_API_KEY, GROQ_API_KEY, or MISTRAL_API_KEY).',
AgentExceptionCode.API_KEY_NOT_CONFIGURED,
);
}
@ -211,7 +295,7 @@ export class AiModelRegistryService {
if (!model) {
throw new AgentException(
'No AI models are available. Please configure at least one AI provider API key (OPENAI_API_KEY, ANTHROPIC_API_KEY, XAI_API_KEY, or GROQ_API_KEY).',
'No AI models are available. Please configure at least one AI provider (OPENAI_API_KEY, ANTHROPIC_API_KEY, AWS_BEDROCK_REGION, GOOGLE_API_KEY, XAI_API_KEY, GROQ_API_KEY, or MISTRAL_API_KEY).',
AgentExceptionCode.API_KEY_NOT_CONFIGURED,
);
}
@ -221,7 +305,6 @@ export class AiModelRegistryService {
getEffectiveModelConfig(modelId: string): AIModelConfig {
if (modelId === DEFAULT_FAST_MODEL || modelId === DEFAULT_SMART_MODEL) {
// getDefaultSpeedModel/getDefaultPerformanceModel will throw AgentException if no models available
const defaultModel =
modelId === DEFAULT_FAST_MODEL
? this.getDefaultSpeedModel()
@ -265,15 +348,29 @@ export class AiModelRegistryService {
modelId: registeredModel.modelId,
label: registeredModel.modelId,
description: `Custom model: ${registeredModel.modelId}`,
provider: registeredModel.provider,
inputCostPer1kTokensInCents: 0,
outputCostPer1kTokensInCents: 0,
modelFamily: this.inferModelFamily(registeredModel.inferenceProvider),
inferenceProvider: registeredModel.inferenceProvider,
inputCostPerMillionTokens: 0,
outputCostPerMillionTokens: 0,
contextWindowTokens: 128000,
maxOutputTokens: 4096,
};
}
// Force refresh the registry (useful if config changes)
private inferModelFamily(inferenceProvider: InferenceProvider): ModelFamily {
const providerToFamily: Partial<Record<InferenceProvider, ModelFamily>> = {
[InferenceProvider.OPENAI]: ModelFamily.OPENAI,
[InferenceProvider.ANTHROPIC]: ModelFamily.ANTHROPIC,
[InferenceProvider.BEDROCK]: ModelFamily.ANTHROPIC,
[InferenceProvider.GOOGLE]: ModelFamily.GOOGLE,
[InferenceProvider.MISTRAL]: ModelFamily.MISTRAL,
[InferenceProvider.XAI]: ModelFamily.XAI,
[InferenceProvider.GROQ]: ModelFamily.OPENAI,
};
return providerToFamily[inferenceProvider] ?? ModelFamily.OPENAI;
}
refreshRegistry(): void {
this.buildModelRegistry();
}
@ -283,7 +380,7 @@ export class AiModelRegistryService {
agent?.modelId ?? DEFAULT_SMART_MODEL,
);
await this.validateApiKey(aiModel.provider);
await this.validateApiKey(aiModel.inferenceProvider);
const registeredModel = this.getModel(aiModel.modelId);
if (!registeredModel) {
@ -296,23 +393,32 @@ export class AiModelRegistryService {
return registeredModel;
}
async validateApiKey(provider: ModelProvider): Promise<void> {
async validateApiKey(inferenceProvider: InferenceProvider): Promise<void> {
let apiKey: string | undefined;
switch (provider) {
case ModelProvider.OPENAI:
switch (inferenceProvider) {
case InferenceProvider.OPENAI:
apiKey = this.twentyConfigService.get('OPENAI_API_KEY');
break;
case ModelProvider.ANTHROPIC:
case InferenceProvider.ANTHROPIC:
apiKey = this.twentyConfigService.get('ANTHROPIC_API_KEY');
break;
case ModelProvider.XAI:
case InferenceProvider.XAI:
apiKey = this.twentyConfigService.get('XAI_API_KEY');
break;
case ModelProvider.GROQ:
case InferenceProvider.GROQ:
apiKey = this.twentyConfigService.get('GROQ_API_KEY');
break;
case ModelProvider.OPENAI_COMPATIBLE:
case InferenceProvider.GOOGLE:
apiKey = this.twentyConfigService.get('GOOGLE_API_KEY');
break;
case InferenceProvider.MISTRAL:
apiKey = this.twentyConfigService.get('MISTRAL_API_KEY');
break;
case InferenceProvider.BEDROCK:
apiKey = this.twentyConfigService.get('AWS_BEDROCK_REGION');
break;
case InferenceProvider.OPENAI_COMPATIBLE:
apiKey = this.twentyConfigService.get('OPENAI_COMPATIBLE_API_KEY');
break;
default:
@ -321,7 +427,7 @@ export class AiModelRegistryService {
if (!apiKey) {
throw new AgentException(
`${provider.toUpperCase()} API key not configured. Please set the appropriate environment variable.`,
`${inferenceProvider.toUpperCase()} API key not configured. Please set the appropriate environment variable.`,
AgentExceptionCode.API_KEY_NOT_CONFIGURED,
);
}

View file

@ -78,8 +78,8 @@ export class AiAgentWorkflowAction implements WorkflowAction {
const executionContext =
await this.workflowExecutionContextService.getExecutionContext(runInfo);
const { result, usage } = await this.aiAgentExecutionService.executeAgent(
{
const { result, usage, cacheCreationTokens } =
await this.aiAgentExecutionService.executeAgent({
agent,
userPrompt: resolveInput(prompt, context) as string,
actorContext: executionContext.isActingOnBehalfOfUser
@ -87,12 +87,11 @@ export class AiAgentWorkflowAction implements WorkflowAction {
: undefined,
rolePermissionConfig: executionContext.rolePermissionConfig,
authContext: executionContext.authContext,
},
);
});
await this.aiBillingService.calculateAndBillUsage(
agent?.modelId ?? DEFAULT_SMART_MODEL,
usage,
{ usage, cacheCreationTokens },
workspaceId,
agent?.id || null,
);

View file

@ -24,6 +24,34 @@ __metadata:
languageName: node
linkType: hard
"@ai-sdk/amazon-bedrock@npm:^3.0.83":
version: 3.0.83
resolution: "@ai-sdk/amazon-bedrock@npm:3.0.83"
dependencies:
"@ai-sdk/anthropic": "npm:2.0.66"
"@ai-sdk/provider": "npm:2.0.1"
"@ai-sdk/provider-utils": "npm:3.0.21"
"@smithy/eventstream-codec": "npm:^4.0.1"
"@smithy/util-utf8": "npm:^4.0.0"
aws4fetch: "npm:^1.0.20"
peerDependencies:
zod: ^3.25.76 || ^4.1.8
checksum: 10c0/66072ecf1bc3183f6028fe8bf5f99a8d03986bdffc5836b8508193de58df701058bdc98b3724e9dc47ed0a924edc150712b0c8a6595c0f3a52ba4d30b9029b63
languageName: node
linkType: hard
"@ai-sdk/anthropic@npm:2.0.66":
version: 2.0.66
resolution: "@ai-sdk/anthropic@npm:2.0.66"
dependencies:
"@ai-sdk/provider": "npm:2.0.1"
"@ai-sdk/provider-utils": "npm:3.0.21"
peerDependencies:
zod: ^3.25.76 || ^4.1.8
checksum: 10c0/9137e26280efe245986b3a0ea469bb075e46d3e45050faa608b13f289575dd044e5271c56abaad0f9532f543fbe5681d25141674bd5d47fcd5520c5173cd23fe
languageName: node
linkType: hard
"@ai-sdk/anthropic@npm:^2.0.17":
version: 2.0.17
resolution: "@ai-sdk/anthropic@npm:2.0.17"
@ -48,6 +76,18 @@ __metadata:
languageName: node
linkType: hard
"@ai-sdk/google@npm:^2.0.54":
version: 2.0.54
resolution: "@ai-sdk/google@npm:2.0.54"
dependencies:
"@ai-sdk/provider": "npm:2.0.1"
"@ai-sdk/provider-utils": "npm:3.0.21"
peerDependencies:
zod: ^3.25.76 || ^4.1.8
checksum: 10c0/d1f244924b6f0484c2c683b189f4e64c8667814b16291fad12935432e7e89d5921351d69ab1971457fabb51e7239eb2f56a1da6ff2f46a86e4ce23b0bb12f019
languageName: node
linkType: hard
"@ai-sdk/groq@npm:^2.0.34":
version: 2.0.34
resolution: "@ai-sdk/groq@npm:2.0.34"
@ -60,6 +100,18 @@ __metadata:
languageName: node
linkType: hard
"@ai-sdk/mistral@npm:^2.0.28":
version: 2.0.28
resolution: "@ai-sdk/mistral@npm:2.0.28"
dependencies:
"@ai-sdk/provider": "npm:2.0.1"
"@ai-sdk/provider-utils": "npm:3.0.21"
peerDependencies:
zod: ^3.25.76 || ^4.1.8
checksum: 10c0/dd993fe477da8e053c68e162a52f0fcbf7671b2a0f8dd4b9a9db57fe780f17cd7ef2b08512e85b0b97a5622f57e085000c56aab102cf381bba1e0e5043c67306
languageName: node
linkType: hard
"@ai-sdk/openai-compatible@npm:1.0.30":
version: 1.0.30
resolution: "@ai-sdk/openai-compatible@npm:1.0.30"
@ -97,6 +149,19 @@ __metadata:
languageName: node
linkType: hard
"@ai-sdk/provider-utils@npm:3.0.21":
version: 3.0.21
resolution: "@ai-sdk/provider-utils@npm:3.0.21"
dependencies:
"@ai-sdk/provider": "npm:2.0.1"
"@standard-schema/spec": "npm:^1.0.0"
eventsource-parser: "npm:^3.0.6"
peerDependencies:
zod: ^3.25.76 || ^4.1.8
checksum: 10c0/272645109b4990c1367d46fd8982ee3436f88f4f5ec96d78f98c928052dcee6e9a7df326558d908d73901a5c0a84b3374c31722bb78579c2efd3417afb2100fb
languageName: node
linkType: hard
"@ai-sdk/provider-utils@npm:3.0.9, @ai-sdk/provider-utils@npm:^3.0.9":
version: 3.0.9
resolution: "@ai-sdk/provider-utils@npm:3.0.9"
@ -20797,7 +20862,7 @@ __metadata:
languageName: node
linkType: hard
"@smithy/eventstream-codec@npm:^4.2.8":
"@smithy/eventstream-codec@npm:^4.0.1, @smithy/eventstream-codec@npm:^4.2.8":
version: 4.2.8
resolution: "@smithy/eventstream-codec@npm:4.2.8"
dependencies:
@ -21339,7 +21404,7 @@ __metadata:
languageName: node
linkType: hard
"@smithy/util-utf8@npm:^4.2.0":
"@smithy/util-utf8@npm:^4.0.0, @smithy/util-utf8@npm:^4.2.0":
version: 4.2.0
resolution: "@smithy/util-utf8@npm:4.2.0"
dependencies:
@ -24162,7 +24227,7 @@ __metadata:
languageName: node
linkType: hard
"@types/lodash.kebabcase@npm:^4.1.7":
"@types/lodash.kebabcase@npm:^4.1.7, @types/lodash.kebabcase@npm:^4.1.9":
version: 4.1.9
resolution: "@types/lodash.kebabcase@npm:4.1.9"
dependencies:
@ -29266,6 +29331,13 @@ __metadata:
languageName: node
linkType: hard
"aws4fetch@npm:^1.0.20":
version: 1.0.20
resolution: "aws4fetch@npm:1.0.20"
checksum: 10c0/a4eac7bd0d1c3e611c17ed1ef41ac0b48c0a8e74a985ad968c071e74d94586d3572edc943b43fa5ca756c686ea73baa2f48e264d657bb8c2e95c8e0037d48a87
languageName: node
linkType: hard
"axe-core@npm:^4.10.0":
version: 4.10.3
resolution: "axe-core@npm:4.10.3"
@ -58672,6 +58744,7 @@ __metadata:
"@types/fs-extra": "npm:^11.0.0"
"@types/inquirer": "npm:^9.0.0"
"@types/lodash.camelcase": "npm:^4.3.7"
"@types/lodash.kebabcase": "npm:^4.1.9"
"@types/node": "npm:^24.0.0"
"@types/react": "npm:18.2.66"
"@types/react-dom": "npm:18.2.22"
@ -58691,6 +58764,7 @@ __metadata:
inquirer: "npm:^10.0.0"
jsonc-parser: "npm:^3.2.0"
lodash.camelcase: "npm:^4.3.0"
lodash.kebabcase: "npm:^4.1.1"
playwright: "npm:^1.56.1"
preact: "npm:^10.28.3"
react: "npm:^18.2.0"
@ -58716,8 +58790,11 @@ __metadata:
version: 0.0.0-use.local
resolution: "twenty-server@workspace:packages/twenty-server"
dependencies:
"@ai-sdk/amazon-bedrock": "npm:^3.0.83"
"@ai-sdk/anthropic": "npm:^2.0.17"
"@ai-sdk/google": "npm:^2.0.54"
"@ai-sdk/groq": "npm:^2.0.34"
"@ai-sdk/mistral": "npm:^2.0.28"
"@ai-sdk/openai": "npm:^2.0.30"
"@ai-sdk/provider-utils": "npm:^3.0.9"
"@ai-sdk/xai": "npm:^2.0.19"