GoogleGenerativeAI (deprecated) -> genai, add gemini reasoning, improve reasoning logic

This commit is contained in:
Andrew Pareles 2025-05-10 15:59:26 -07:00
parent 61fc593903
commit 4c1c0b591a
6 changed files with 110 additions and 63 deletions

37
package-lock.json generated
View file

@ -13,7 +13,7 @@
"@anthropic-ai/sdk": "^0.40.0",
"@c4312/eventsource-umd": "^3.0.5",
"@floating-ui/react": "^0.27.8",
"@google/generative-ai": "^0.24.1",
"@google/genai": "^0.13.0",
"@microsoft/1ds-core-js": "^3.2.13",
"@microsoft/1ds-post-js": "^3.2.13",
"@mistralai/mistralai": "^1.6.0",
@ -1817,11 +1817,17 @@
"integrity": "sha512-MDWhGtE+eHw5JW7lq4qhc5yRLS11ERl1c7Z6Xd0a58DozHES6EnNNwUWbMiG4J9Cgj053Bhk8zvlhFYKVhULwg==",
"license": "MIT"
},
"node_modules/@google/generative-ai": {
"version": "0.24.1",
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz",
"integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==",
"node_modules/@google/genai": {
"version": "0.13.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-0.13.0.tgz",
"integrity": "sha512-eaEncWt875H7046T04mOpxpHJUM+jLIljEf+5QctRyOeChylE/nhpwm1bZWTRWoOu/t46R9r+PmgsJFhTpE7tQ==",
"license": "Apache-2.0",
"dependencies": {
"google-auth-library": "^9.14.2",
"ws": "^8.18.0",
"zod": "^3.22.4",
"zod-to-json-schema": "^3.22.4"
},
"engines": {
"node": ">=18.0.0"
}
@ -23767,6 +23773,27 @@
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8= sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
},
"node_modules/ws": {
"version": "8.18.2",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz",
"integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": ">=5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
},
"node_modules/xml": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/xml/-/xml-1.0.1.tgz",

View file

@ -75,7 +75,7 @@
"@anthropic-ai/sdk": "^0.40.0",
"@c4312/eventsource-umd": "^3.0.5",
"@floating-ui/react": "^0.27.8",
"@google/generative-ai": "^0.24.1",
"@google/genai": "^0.13.0",
"@microsoft/1ds-core-js": "^3.2.13",
"@microsoft/1ds-post-js": "^3.2.13",
"@mistralai/mistralai": "^1.6.0",

View file

@ -237,18 +237,6 @@ const prepareMessages_XML_tools = (messages: SimpleLLMMessage[], supportsAnthrop
}
export type GeminiMessage = {
role: 'user' | 'model'; // Gemini uses 'user' and 'model' roles
parts: (
| { text: string; }
| { functionCall: { tool_call: any } }
| { functionResponse: { name: ToolName, response: { result: string } } }
)[];
};
// --- CHAT ---
const prepareOpenAIOrAnthropicMessages = ({
@ -457,7 +445,7 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => {
}
else if (c.type === 'tool_use') {
latestToolName = c.name as ToolName
return { functionCall: { name: c.name as ToolName, args: c.input } }
return { functionCall: { id: c.id, name: c.name as ToolName, args: c.input } }
}
else return null
}).filter(m => !!m)
@ -475,7 +463,7 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => {
}
else if (c.type === 'tool_result') {
if (!latestToolName) return null
return { functionResponse: { name: latestToolName, response: { result: c.content } } }
return { functionResponse: { id: c.tool_use_id, name: latestToolName, response: { output: c.content } } }
}
else return null
}).filter(m => !!m)

View file

@ -445,7 +445,7 @@ const anthropicModelOptions = {
supportsReasoning: true,
canTurnOffReasoning: true,
canIOReasoning: true,
reasoningReservedOutputTokenSpace: 64_000, // can bump it to 128_000 with beta mode output-128k-2025-02-19
reasoningReservedOutputTokenSpace: 8192, // can bump it to 128_000 with beta mode output-128k-2025-02-19
reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // they recommend batching if max > 32_000. we cap at 8192 because above is typically not necessary (often even buggy)
},
@ -715,6 +715,7 @@ const xAISettings: VoidStaticProviderInfo = {
// ---------------- GEMINI ----------------
const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
// https://ai.google.dev/gemini-api/docs/thinking#set-budget
'gemini-2.5-pro-preview-05-06': {
contextWindow: 1_048_576,
reservedOutputTokenSpace: 8_192,
@ -723,7 +724,13 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'separated',
specialToolFormat: 'gemini-style',
reasoningCapabilities: false,
reasoningCapabilities: {
supportsReasoning: true,
canTurnOffReasoning: true,
canIOReasoning: false,
reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // max is really 24576
reasoningReservedOutputTokenSpace: 8192,
},
},
'gemini-2.0-flash-lite': {
contextWindow: 1_048_576,
@ -733,7 +740,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'separated',
specialToolFormat: 'gemini-style',
reasoningCapabilities: false,
reasoningCapabilities: false, // no reasoning
},
'gemini-2.5-flash-preview-04-17': {
contextWindow: 1_048_576,
@ -743,7 +750,13 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'separated',
specialToolFormat: 'gemini-style',
reasoningCapabilities: false,
reasoningCapabilities: {
supportsReasoning: true,
canTurnOffReasoning: true,
canIOReasoning: false,
reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // max is really 24576
reasoningReservedOutputTokenSpace: 8192,
},
},
'gemini-2.5-pro-exp-03-25': {
contextWindow: 1_048_576,
@ -753,7 +766,13 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'separated',
specialToolFormat: 'gemini-style',
reasoningCapabilities: false,
reasoningCapabilities: {
supportsReasoning: true,
canTurnOffReasoning: true,
canIOReasoning: false,
reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // max is really 24576
reasoningReservedOutputTokenSpace: 8192,
},
},
'gemini-2.0-flash': {
contextWindow: 1_048_576,
@ -763,7 +782,13 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'separated',
specialToolFormat: 'gemini-style',
reasoningCapabilities: false,
reasoningCapabilities: { // thinking: experimental as of 5-10-25
supportsReasoning: true,
canTurnOffReasoning: true,
canIOReasoning: false,
reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // max is really 24576
reasoningReservedOutputTokenSpace: 8192,
},
},
'gemini-2.0-flash-lite-preview-02-05': {
contextWindow: 1_048_576,
@ -1144,7 +1169,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
supportsReasoning: true,
canTurnOffReasoning: false,
canIOReasoning: true,
reasoningReservedOutputTokenSpace: 64_000,
reasoningReservedOutputTokenSpace: 8192,
reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // they recommend batching if max > 32_000.
},
},
@ -1347,8 +1372,7 @@ export const getSendableReasoningInfo = (
overridesOfModel: OverridesOfModel | undefined,
): SendableReasoningInfo => {
const { canIOReasoning, reasoningSlider: reasoningBudgetSlider } = getModelCapabilities(providerName, modelName, overridesOfModel).reasoningCapabilities || {}
if (!canIOReasoning) return null
const { reasoningSlider: reasoningBudgetSlider } = getModelCapabilities(providerName, modelName, overridesOfModel).reasoningCapabilities || {}
const isReasoningEnabled = getIsReasoningEnabledState(featureName, providerName, modelName, modelSelectionOptions, overridesOfModel)
if (!isReasoningEnabled) return null

View file

@ -56,13 +56,13 @@ export type GeminiLLMChatMessage = {
role: 'model'
parts: (
| { text: string; }
| { functionCall: { name: ToolName, args: object } }
| { functionCall: { id: string; name: ToolName, args: Record<string, unknown> } }
)[];
} | {
role: 'user';
parts: (
| { text: string; }
| { functionResponse: { name: ToolName, response: { result: string } } }
| { functionResponse: { id: string; name: ToolName, response: { output: string } } }
)[];
}

View file

@ -10,11 +10,11 @@ import { Ollama } from 'ollama';
import OpenAI, { ClientOptions } from 'openai';
import { MistralCore } from '@mistralai/mistralai/core.js';
import { fimComplete } from '@mistralai/mistralai/funcs/fimComplete.js';
import { GoogleGenerativeAI, Tool as GeminiTool, SchemaType, FunctionDeclaration, FunctionDeclarationSchemaProperty } from '@google/generative-ai';
import { Tool as GeminiTool, FunctionDeclaration, GoogleGenAI, ThinkingConfig, Schema, Type } from '@google/genai';
import { GoogleAuth } from 'google-auth-library'
/* eslint-enable */
import { AnthropicLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js';
import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js';
@ -642,25 +642,24 @@ const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider,
// ---------------- GEMINI NATIVE IMPLEMENTATION ----------------
const toGeminiFunctionDecl = (toolInfo: InternalToolInfo) => {
const { name, description, params } = toolInfo
const paramsWithType: { [k: string]: FunctionDeclarationSchemaProperty } = {}
for (const key in params) {
paramsWithType[key] = { type: SchemaType.STRING, ...params[key] }
}
return {
name,
description,
parameters: {
type: SchemaType.OBJECT,
properties: paramsWithType,
type: Type.OBJECT,
properties: Object.entries(params).reduce((acc, [key, value]) => {
acc[key] = {
type: Type.STRING,
description: value.description
};
return acc;
}, {} as Record<string, Schema>)
}
} satisfies FunctionDeclaration
}
const geminiTools = (chatMode: ChatMode): GeminiTool[] | null => {
const allowedTools = availableTools(chatMode)
if (!allowedTools || Object.keys(allowedTools).length === 0) return null
@ -700,27 +699,29 @@ const sendGeminiChat = async ({
// reasoningCapabilities,
} = getModelCapabilities(providerName, modelName_, overridesOfModel)
const { providerReasoningIOSettings } = getProviderCapabilities(providerName)
// const { providerReasoningIOSettings } = getProviderCapabilities(providerName)
// reasoning
// const { canIOReasoning, openSourceThinkTags, } = reasoningCapabilities || {}
const reasoningInfo = getSendableReasoningInfo('Chat', providerName, modelName_, modelSelectionOptions, overridesOfModel) // user's modelName_ here
const includeInPayload = providerReasoningIOSettings?.input?.includeInPayload?.(reasoningInfo) || {}
// const includeInPayload = providerReasoningIOSettings?.input?.includeInPayload?.(reasoningInfo) || {}
console.log('reasoning info', JSON.stringify(reasoningInfo))
const thinkingConfig: ThinkingConfig | undefined = !reasoningInfo?.isReasoningEnabled ? undefined
: reasoningInfo.type === 'budget_slider_value' ?
{ thinkingBudget: reasoningInfo.reasoningBudget }
: undefined
// tools
const potentialTools = chatMode !== null ? geminiTools(chatMode) : null
const nativeToolsObj = potentialTools && specialToolFormat === 'gemini-style' ?
{ tools: potentialTools } as const
: {}
const potentialTools = chatMode !== null ? geminiTools(chatMode) : undefined
const toolConfig = potentialTools && specialToolFormat === 'gemini-style' ?
potentialTools
: undefined
// instance
const genAI = new GoogleGenerativeAI(
thisConfig.apiKey
);
const model = genAI.getGenerativeModel({
systemInstruction: separateSystemMessage,
model: modelName,
});
const genAI = new GoogleGenAI({ apiKey: thisConfig.apiKey });
// manually parse out tool results if XML
if (!specialToolFormat) {
@ -736,23 +737,30 @@ const sendGeminiChat = async ({
let toolName = ''
let toolParamsStr = ''
model.generateContentStream({
systemInstruction: separateSystemMessage ?? undefined,
contents: messages as any,
...includeInPayload,
...nativeToolsObj,
console.log('TOOL!', toolConfig)
console.log('REAS!', thinkingConfig)
genAI.models.generateContentStream({
model: modelName,
config: {
systemInstruction: separateSystemMessage,
thinkingConfig: thinkingConfig,
tools: toolConfig,
},
contents: messages as GeminiLLMChatMessage[],
})
.then(async ({ stream, response }) => {
.then(async (stream) => {
_setAborter(() => { stream.return(fullTextSoFar); });
// Process the stream
for await (const chunk of stream) {
// message
const newText = chunk.text() ?? ''
const newText = chunk.text ?? ''
fullTextSoFar += newText
// tool call
const functionCalls = chunk.functionCalls()
const functionCalls = chunk.functionCalls
if (functionCalls && functionCalls.length > 0) {
const functionCall = functionCalls[0] // Get the first function call
toolName = functionCall.name ?? ''