From 4c1c0b591ab710cd12bd9991a021aae99125e293 Mon Sep 17 00:00:00 2001 From: Andrew Pareles Date: Sat, 10 May 2025 15:59:26 -0700 Subject: [PATCH] GoogleGenerativeAI (deprecated) -> genai, add gemini reasoning, improve reasoning logic --- package-lock.json | 37 ++++++++-- package.json | 2 +- .../browser/convertToLLMMessageService.ts | 16 +---- .../contrib/void/common/modelCapabilities.ts | 42 ++++++++--- .../void/common/sendLLMMessageTypes.ts | 4 +- .../llmMessage/sendLLMMessage.impl.ts | 72 ++++++++++--------- 6 files changed, 110 insertions(+), 63 deletions(-) diff --git a/package-lock.json b/package-lock.json index 7057acfd..5c783599 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,7 @@ "@anthropic-ai/sdk": "^0.40.0", "@c4312/eventsource-umd": "^3.0.5", "@floating-ui/react": "^0.27.8", - "@google/generative-ai": "^0.24.1", + "@google/genai": "^0.13.0", "@microsoft/1ds-core-js": "^3.2.13", "@microsoft/1ds-post-js": "^3.2.13", "@mistralai/mistralai": "^1.6.0", @@ -1817,11 +1817,17 @@ "integrity": "sha512-MDWhGtE+eHw5JW7lq4qhc5yRLS11ERl1c7Z6Xd0a58DozHES6EnNNwUWbMiG4J9Cgj053Bhk8zvlhFYKVhULwg==", "license": "MIT" }, - "node_modules/@google/generative-ai": { - "version": "0.24.1", - "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz", - "integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==", + "node_modules/@google/genai": { + "version": "0.13.0", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-0.13.0.tgz", + "integrity": "sha512-eaEncWt875H7046T04mOpxpHJUM+jLIljEf+5QctRyOeChylE/nhpwm1bZWTRWoOu/t46R9r+PmgsJFhTpE7tQ==", "license": "Apache-2.0", + "dependencies": { + "google-auth-library": "^9.14.2", + "ws": "^8.18.0", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.4" + }, "engines": { "node": ">=18.0.0" } @@ -23767,6 +23773,27 @@ "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8= sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" }, + "node_modules/ws": { + "version": "8.18.2", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz", + "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/xml": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/xml/-/xml-1.0.1.tgz", diff --git a/package.json b/package.json index 767d6b49..7a5bc366 100644 --- a/package.json +++ b/package.json @@ -75,7 +75,7 @@ "@anthropic-ai/sdk": "^0.40.0", "@c4312/eventsource-umd": "^3.0.5", "@floating-ui/react": "^0.27.8", - "@google/generative-ai": "^0.24.1", + "@google/genai": "^0.13.0", "@microsoft/1ds-core-js": "^3.2.13", "@microsoft/1ds-post-js": "^3.2.13", "@mistralai/mistralai": "^1.6.0", diff --git a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts index 48b21b77..74d81593 100644 --- a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts +++ b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts @@ -237,18 +237,6 @@ const prepareMessages_XML_tools = (messages: SimpleLLMMessage[], supportsAnthrop } - - -export type GeminiMessage = { - role: 'user' | 'model'; // Gemini uses 'user' and 'model' roles - parts: ( - | { text: string; } - | { functionCall: { tool_call: any } } - | { functionResponse: { name: ToolName, response: { result: string } } } - )[]; -}; - - // --- CHAT --- const prepareOpenAIOrAnthropicMessages = ({ @@ -457,7 +445,7 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => { } else if (c.type === 'tool_use') { latestToolName = c.name as ToolName - return { functionCall: { name: c.name as ToolName, args: c.input } } + return { functionCall: { id: c.id, name: c.name as ToolName, args: c.input } } } else return null }).filter(m => !!m) @@ -475,7 +463,7 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => { } else if (c.type === 'tool_result') { if (!latestToolName) return null - return { functionResponse: { name: latestToolName, response: { result: c.content } } } + return { functionResponse: { id: c.tool_use_id, name: latestToolName, response: { output: c.content } } } } else return null }).filter(m => !!m) diff --git a/src/vs/workbench/contrib/void/common/modelCapabilities.ts b/src/vs/workbench/contrib/void/common/modelCapabilities.ts index e9eec31b..6048f143 100644 --- a/src/vs/workbench/contrib/void/common/modelCapabilities.ts +++ b/src/vs/workbench/contrib/void/common/modelCapabilities.ts @@ -445,7 +445,7 @@ const anthropicModelOptions = { supportsReasoning: true, canTurnOffReasoning: true, canIOReasoning: true, - reasoningReservedOutputTokenSpace: 64_000, // can bump it to 128_000 with beta mode output-128k-2025-02-19 + reasoningReservedOutputTokenSpace: 8192, // can bump it to 128_000 with beta mode output-128k-2025-02-19 reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // they recommend batching if max > 32_000. we cap at 8192 because above is typically not necessary (often even buggy) }, @@ -715,6 +715,7 @@ const xAISettings: VoidStaticProviderInfo = { // ---------------- GEMINI ---------------- const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing + // https://ai.google.dev/gemini-api/docs/thinking#set-budget 'gemini-2.5-pro-preview-05-06': { contextWindow: 1_048_576, reservedOutputTokenSpace: 8_192, @@ -723,7 +724,13 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing supportsFIM: false, supportsSystemMessage: 'separated', specialToolFormat: 'gemini-style', - reasoningCapabilities: false, + reasoningCapabilities: { + supportsReasoning: true, + canTurnOffReasoning: true, + canIOReasoning: false, + reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // max is really 24576 + reasoningReservedOutputTokenSpace: 8192, + }, }, 'gemini-2.0-flash-lite': { contextWindow: 1_048_576, @@ -733,7 +740,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing supportsFIM: false, supportsSystemMessage: 'separated', specialToolFormat: 'gemini-style', - reasoningCapabilities: false, + reasoningCapabilities: false, // no reasoning }, 'gemini-2.5-flash-preview-04-17': { contextWindow: 1_048_576, @@ -743,7 +750,13 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing supportsFIM: false, supportsSystemMessage: 'separated', specialToolFormat: 'gemini-style', - reasoningCapabilities: false, + reasoningCapabilities: { + supportsReasoning: true, + canTurnOffReasoning: true, + canIOReasoning: false, + reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // max is really 24576 + reasoningReservedOutputTokenSpace: 8192, + }, }, 'gemini-2.5-pro-exp-03-25': { contextWindow: 1_048_576, @@ -753,7 +766,13 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing supportsFIM: false, supportsSystemMessage: 'separated', specialToolFormat: 'gemini-style', - reasoningCapabilities: false, + reasoningCapabilities: { + supportsReasoning: true, + canTurnOffReasoning: true, + canIOReasoning: false, + reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // max is really 24576 + reasoningReservedOutputTokenSpace: 8192, + }, }, 'gemini-2.0-flash': { contextWindow: 1_048_576, @@ -763,7 +782,13 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing supportsFIM: false, supportsSystemMessage: 'separated', specialToolFormat: 'gemini-style', - reasoningCapabilities: false, + reasoningCapabilities: { // thinking: experimental as of 5-10-25 + supportsReasoning: true, + canTurnOffReasoning: true, + canIOReasoning: false, + reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // max is really 24576 + reasoningReservedOutputTokenSpace: 8192, + }, }, 'gemini-2.0-flash-lite-preview-02-05': { contextWindow: 1_048_576, @@ -1144,7 +1169,7 @@ const openRouterModelOptions_assumingOpenAICompat = { supportsReasoning: true, canTurnOffReasoning: false, canIOReasoning: true, - reasoningReservedOutputTokenSpace: 64_000, + reasoningReservedOutputTokenSpace: 8192, reasoningSlider: { type: 'budget_slider', min: 1024, max: 8192, default: 1024 }, // they recommend batching if max > 32_000. }, }, @@ -1347,8 +1372,7 @@ export const getSendableReasoningInfo = ( overridesOfModel: OverridesOfModel | undefined, ): SendableReasoningInfo => { - const { canIOReasoning, reasoningSlider: reasoningBudgetSlider } = getModelCapabilities(providerName, modelName, overridesOfModel).reasoningCapabilities || {} - if (!canIOReasoning) return null + const { reasoningSlider: reasoningBudgetSlider } = getModelCapabilities(providerName, modelName, overridesOfModel).reasoningCapabilities || {} const isReasoningEnabled = getIsReasoningEnabledState(featureName, providerName, modelName, modelSelectionOptions, overridesOfModel) if (!isReasoningEnabled) return null diff --git a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts index 6b7cad52..f6e634ee 100644 --- a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts +++ b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts @@ -56,13 +56,13 @@ export type GeminiLLMChatMessage = { role: 'model' parts: ( | { text: string; } - | { functionCall: { name: ToolName, args: object } } + | { functionCall: { id: string; name: ToolName, args: Record } } )[]; } | { role: 'user'; parts: ( | { text: string; } - | { functionResponse: { name: ToolName, response: { result: string } } } + | { functionResponse: { id: string; name: ToolName, response: { output: string } } } )[]; } diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts index 4eec4850..4a7dd7da 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts @@ -10,11 +10,11 @@ import { Ollama } from 'ollama'; import OpenAI, { ClientOptions } from 'openai'; import { MistralCore } from '@mistralai/mistralai/core.js'; import { fimComplete } from '@mistralai/mistralai/funcs/fimComplete.js'; -import { GoogleGenerativeAI, Tool as GeminiTool, SchemaType, FunctionDeclaration, FunctionDeclarationSchemaProperty } from '@google/generative-ai'; +import { Tool as GeminiTool, FunctionDeclaration, GoogleGenAI, ThinkingConfig, Schema, Type } from '@google/genai'; import { GoogleAuth } from 'google-auth-library' /* eslint-enable */ -import { AnthropicLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js'; +import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js'; import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js'; import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js'; import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js'; @@ -642,25 +642,24 @@ const sendOllamaFIM = ({ messages, onFinalMessage, onError, settingsOfProvider, // ---------------- GEMINI NATIVE IMPLEMENTATION ---------------- - - const toGeminiFunctionDecl = (toolInfo: InternalToolInfo) => { const { name, description, params } = toolInfo - const paramsWithType: { [k: string]: FunctionDeclarationSchemaProperty } = {} - for (const key in params) { - paramsWithType[key] = { type: SchemaType.STRING, ...params[key] } - } return { name, description, parameters: { - type: SchemaType.OBJECT, - properties: paramsWithType, + type: Type.OBJECT, + properties: Object.entries(params).reduce((acc, [key, value]) => { + acc[key] = { + type: Type.STRING, + description: value.description + }; + return acc; + }, {} as Record) } } satisfies FunctionDeclaration } - const geminiTools = (chatMode: ChatMode): GeminiTool[] | null => { const allowedTools = availableTools(chatMode) if (!allowedTools || Object.keys(allowedTools).length === 0) return null @@ -700,27 +699,29 @@ const sendGeminiChat = async ({ // reasoningCapabilities, } = getModelCapabilities(providerName, modelName_, overridesOfModel) - const { providerReasoningIOSettings } = getProviderCapabilities(providerName) + // const { providerReasoningIOSettings } = getProviderCapabilities(providerName) // reasoning // const { canIOReasoning, openSourceThinkTags, } = reasoningCapabilities || {} const reasoningInfo = getSendableReasoningInfo('Chat', providerName, modelName_, modelSelectionOptions, overridesOfModel) // user's modelName_ here - const includeInPayload = providerReasoningIOSettings?.input?.includeInPayload?.(reasoningInfo) || {} + // const includeInPayload = providerReasoningIOSettings?.input?.includeInPayload?.(reasoningInfo) || {} + + console.log('reasoning info', JSON.stringify(reasoningInfo)) + + const thinkingConfig: ThinkingConfig | undefined = !reasoningInfo?.isReasoningEnabled ? undefined + : reasoningInfo.type === 'budget_slider_value' ? + { thinkingBudget: reasoningInfo.reasoningBudget } + : undefined // tools - const potentialTools = chatMode !== null ? geminiTools(chatMode) : null - const nativeToolsObj = potentialTools && specialToolFormat === 'gemini-style' ? - { tools: potentialTools } as const - : {} + const potentialTools = chatMode !== null ? geminiTools(chatMode) : undefined + const toolConfig = potentialTools && specialToolFormat === 'gemini-style' ? + potentialTools + : undefined // instance - const genAI = new GoogleGenerativeAI( - thisConfig.apiKey - ); - const model = genAI.getGenerativeModel({ - systemInstruction: separateSystemMessage, - model: modelName, - }); + const genAI = new GoogleGenAI({ apiKey: thisConfig.apiKey }); + // manually parse out tool results if XML if (!specialToolFormat) { @@ -736,23 +737,30 @@ const sendGeminiChat = async ({ let toolName = '' let toolParamsStr = '' - model.generateContentStream({ - systemInstruction: separateSystemMessage ?? undefined, - contents: messages as any, - ...includeInPayload, - ...nativeToolsObj, + console.log('TOOL!', toolConfig) + console.log('REAS!', thinkingConfig) + + + genAI.models.generateContentStream({ + model: modelName, + config: { + systemInstruction: separateSystemMessage, + thinkingConfig: thinkingConfig, + tools: toolConfig, + }, + contents: messages as GeminiLLMChatMessage[], }) - .then(async ({ stream, response }) => { + .then(async (stream) => { _setAborter(() => { stream.return(fullTextSoFar); }); // Process the stream for await (const chunk of stream) { // message - const newText = chunk.text() ?? '' + const newText = chunk.text ?? '' fullTextSoFar += newText // tool call - const functionCalls = chunk.functionCalls() + const functionCalls = chunk.functionCalls if (functionCalls && functionCalls.length > 0) { const functionCall = functionCalls[0] // Get the first function call toolName = functionCall.name ?? ''