mirror of
https://github.com/google-gemini/gemini-cli
synced 2026-04-21 13:37:17 +00:00
feat(core): implement Hybrid Auth and Tool Routing Engine for Gemma 4
This commit is contained in:
parent
ba51d8c0c6
commit
d33d5cf7e5
7 changed files with 167 additions and 18 deletions
|
|
@ -60,6 +60,9 @@ export function resolvePolicyChain(
|
|||
hasAccessToPreview,
|
||||
config,
|
||||
);
|
||||
if (process.env['DEBUG']) {
|
||||
console.log(`[applyModelSelection] Resolved Model: "${resolvedModel}"`);
|
||||
}
|
||||
const isAutoPreferred = preferredModel
|
||||
? isAutoModel(preferredModel, config)
|
||||
: false;
|
||||
|
|
@ -268,15 +271,27 @@ export function applyModelSelection(
|
|||
modelConfigKey: ModelConfigKey,
|
||||
options: { consumeAttempt?: boolean } = {},
|
||||
): { model: string; config: GenerateContentConfig; maxAttempts?: number } {
|
||||
if (process.env['DEBUG']) {
|
||||
console.log(`[applyModelSelection] Start - Key: ${JSON.stringify(modelConfigKey)}`);
|
||||
}
|
||||
const resolved = config.modelConfigService.getResolvedConfig(modelConfigKey);
|
||||
const model = resolved.model;
|
||||
if (process.env['DEBUG']) {
|
||||
console.log(`[applyModelSelection] Initial Model: ${model}`);
|
||||
}
|
||||
const selection = selectModelForAvailability(config, model);
|
||||
|
||||
if (!selection) {
|
||||
if (process.env['DEBUG']) {
|
||||
console.log(`[applyModelSelection] No selection policy, returning: ${model}`);
|
||||
}
|
||||
return { model, config: resolved.generateContentConfig };
|
||||
}
|
||||
|
||||
const finalModel = selection.selectedModel ?? model;
|
||||
if (process.env['DEBUG']) {
|
||||
console.log(`[applyModelSelection] Selection result - Final Model: ${finalModel}`);
|
||||
}
|
||||
let generateContentConfig = resolved.generateContentConfig;
|
||||
|
||||
if (finalModel !== model) {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,12 @@ import {
|
|||
type ContentGenerator,
|
||||
type ContentGeneratorConfig,
|
||||
} from '../core/contentGenerator.js';
|
||||
export {
|
||||
AuthType,
|
||||
createContentGenerator,
|
||||
createContentGeneratorConfig,
|
||||
getAuthTypeFromEnv,
|
||||
};
|
||||
import type { OverageStrategy } from '../billing/billing.js';
|
||||
import { PromptRegistry } from '../prompts/prompt-registry.js';
|
||||
import { ResourceRegistry } from '../resources/resource-registry.js';
|
||||
|
|
@ -754,6 +760,8 @@ export class Config implements McpContext, AgentLoopContext {
|
|||
readonly topicState = new TopicState();
|
||||
private contentGeneratorConfig!: ContentGeneratorConfig;
|
||||
private contentGenerator!: ContentGenerator;
|
||||
private utilityGeneratorConfig?: ContentGeneratorConfig;
|
||||
private utilityGenerator?: ContentGenerator;
|
||||
readonly modelConfigService: ModelConfigService;
|
||||
private readonly embeddingModel: string;
|
||||
private readonly sandbox: SandboxConfig | undefined;
|
||||
|
|
@ -1512,6 +1520,39 @@ export class Config implements McpContext, AgentLoopContext {
|
|||
return this.contentGenerator;
|
||||
}
|
||||
|
||||
getUtilityGenerator(): ContentGenerator {
|
||||
const primaryIsOpenAi = this.contentGeneratorConfig?.authType === AuthType.OPENAI;
|
||||
const utility = this.utilityGenerator ?? this.contentGenerator;
|
||||
|
||||
// HYBRID PROTECTION: If we're using Gemma (OpenAI) as primary,
|
||||
// we must ensure the utility generator doesn't use the Gemma model name.
|
||||
if (primaryIsOpenAi && utility) {
|
||||
// Create a lightweight wrapper that overrides the model in any request
|
||||
const originalGenerate = utility.generateContent.bind(utility);
|
||||
const originalGenerateStream = utility.generateContentStream.bind(utility);
|
||||
|
||||
return new Proxy(utility, {
|
||||
get(target, prop, receiver) {
|
||||
if (prop === 'generateContent') {
|
||||
return async (request: any, promptId: string, role: any) => {
|
||||
const maskedRequest = { ...request, model: 'gemini-3-flash-preview' };
|
||||
return originalGenerate(maskedRequest, promptId, role);
|
||||
};
|
||||
}
|
||||
if (prop === 'generateContentStream') {
|
||||
return async (request: any, promptId: string, role: any) => {
|
||||
const maskedRequest = { ...request, model: 'gemini-3-flash-preview' };
|
||||
return originalGenerateStream(maskedRequest, promptId, role);
|
||||
};
|
||||
}
|
||||
return Reflect.get(target, prop, receiver);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return utility;
|
||||
}
|
||||
|
||||
async refreshAuth(
|
||||
authMethod: AuthType,
|
||||
apiKey?: string,
|
||||
|
|
@ -1567,9 +1608,36 @@ export class Config implements McpContext, AgentLoopContext {
|
|||
this,
|
||||
this.getSessionId(),
|
||||
);
|
||||
// Only assign to instance properties after successful initialization
|
||||
this.contentGeneratorConfig = newContentGeneratorConfig;
|
||||
|
||||
// --- HYBRID AUTH START ---
|
||||
debugLogger.log(`[Config] Checking Hybrid Auth. Primary AuthType: ${this.contentGeneratorConfig.authType}`);
|
||||
// If the primary generator is OpenAI, try to create a utility generator for Google-auth tasks
|
||||
if (this.contentGeneratorConfig.authType === AuthType.OPENAI) {
|
||||
try {
|
||||
debugLogger.log('[Config] Attempting to find secondary Google Auth for hybrid mode...');
|
||||
// Force model to undefined to probe for non-OpenAI auth
|
||||
const googleAuthType = getAuthTypeFromEnv(undefined);
|
||||
debugLogger.log(`[Config] Resolved potential Google AuthType: ${googleAuthType}`);
|
||||
|
||||
if (googleAuthType && googleAuthType !== AuthType.OPENAI) {
|
||||
// If we found a Google auth type, create a separate config and generator for it
|
||||
this.utilityGeneratorConfig = await createContentGeneratorConfig(this, googleAuthType);
|
||||
this.utilityGenerator = await createContentGenerator(this.utilityGeneratorConfig, this, this.getSessionId());
|
||||
debugLogger.log(`[Config] Hybrid mode ENABLED. Utility generator initialized with ${googleAuthType}`);
|
||||
} else {
|
||||
debugLogger.log('[Config] Hybrid mode SKIPPED: Still resolving to OpenAI or no auth found.');
|
||||
}
|
||||
} catch (e) {
|
||||
debugLogger.warn('[Config] Failed to initialize utility generator for hybrid mode', e);
|
||||
}
|
||||
} else {
|
||||
// Otherwise, the primary generator is already Google-auth capable
|
||||
this.utilityGenerator = this.contentGenerator;
|
||||
this.utilityGeneratorConfig = this.contentGeneratorConfig;
|
||||
}
|
||||
// --- HYBRID AUTH END ---
|
||||
|
||||
const codeAssistServer = getCodeAssistServer(this);
|
||||
const quotaPromise = codeAssistServer?.projectId
|
||||
? this.refreshUserQuota()
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import {
|
|||
type ServerGeminiStreamEvent,
|
||||
type ChatCompressionInfo,
|
||||
} from './turn.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import { AuthType, type Config } from '../config/config.js';
|
||||
import { type AgentLoopContext } from '../config/agent-loop-context.js';
|
||||
import { getCoreSystemPrompt } from './prompts.js';
|
||||
import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
|
||||
|
|
@ -1057,6 +1057,18 @@ export class GeminiClient {
|
|||
abortSignal: AbortSignal,
|
||||
role: LlmRole,
|
||||
): Promise<GenerateContentResponse> {
|
||||
// HYBRID MODEL MASKING:
|
||||
// If this is a utility task and we are in a Gemma (OpenAI) session,
|
||||
// steer the config resolution toward a Google-native model.
|
||||
const isPrimaryOpenAi = this.config.getContentGeneratorConfig()?.authType === AuthType.OPENAI;
|
||||
const isUtilityTask = !modelConfigKey.isChatModel ||
|
||||
['web-search', 'web-fetch', 'classifier'].includes(modelConfigKey.model || '');
|
||||
|
||||
if (isPrimaryOpenAi && isUtilityTask) {
|
||||
modelConfigKey = { ...modelConfigKey, model: 'gemini-3-flash-preview' };
|
||||
debugLogger.log(`[GeminiClient] Steering utility task to Google-native model: ${modelConfigKey.model}`);
|
||||
}
|
||||
|
||||
const desiredModelConfig =
|
||||
this.config.modelConfigService.getResolvedConfig(modelConfigKey);
|
||||
let {
|
||||
|
|
@ -1101,19 +1113,37 @@ export class GeminiClient {
|
|||
currentAttemptGenerateContentConfig = generateContentConfig;
|
||||
}
|
||||
|
||||
// HYBRID PROMPT SANITIZATION:
|
||||
// Use a minimal system prompt for utility tasks to prevent Google models
|
||||
// from getting confused by the primary (Gemma) session's heavy instructions.
|
||||
const effectiveSystemInstruction = (isPrimaryOpenAi && isUtilityTask)
|
||||
? { role: 'system', parts: [{ text: 'You are a helpful assistant specialized in using tools like web search. Provide concise and accurate data based on tool results.' }] }
|
||||
: systemInstruction;
|
||||
|
||||
const requestConfig: GenerateContentConfig = {
|
||||
...currentAttemptGenerateContentConfig,
|
||||
abortSignal,
|
||||
systemInstruction,
|
||||
systemInstruction: effectiveSystemInstruction,
|
||||
};
|
||||
|
||||
return this.getContentGeneratorOrFail().generateContent(
|
||||
const generator = (isPrimaryOpenAi && isUtilityTask)
|
||||
? this.config.getUtilityGenerator()
|
||||
: this.getContentGeneratorOrFail();
|
||||
|
||||
if (process.env['DEBUG']) {
|
||||
console.log(`[GeminiClient] FINAL GENERATOR CALL:
|
||||
- Generator Type: ${generator.constructor.name}
|
||||
- Request Model: ${currentAttemptModel}
|
||||
`);
|
||||
}
|
||||
|
||||
return generator.generateContent(
|
||||
{
|
||||
model: currentAttemptModel,
|
||||
config: requestConfig,
|
||||
contents,
|
||||
},
|
||||
this.lastPromptId,
|
||||
this.lastPromptId ?? 'default',
|
||||
role,
|
||||
);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -79,21 +79,37 @@ export enum AuthType {
|
|||
* 5. OPENAI_API_KEY -> OPENAI
|
||||
*/
|
||||
export function getAuthTypeFromEnv(model?: string): AuthType | undefined {
|
||||
debugLogger.log(`[getAuthTypeFromEnv] Resolving auth for model: ${model}`);
|
||||
debugLogger.log(`[getAuthTypeFromEnv] Env GEMINI_API_KEY: ${process.env['GEMINI_API_KEY'] ? 'PRESENT' : 'MISSING'}`);
|
||||
debugLogger.log(`[getAuthTypeFromEnv] Env OPENAI_API_KEY: ${process.env['OPENAI_API_KEY'] ? 'PRESENT' : 'MISSING'}`);
|
||||
debugLogger.log(`[getAuthTypeFromEnv] Env GOOGLE_GENAI_USE_GCA: ${process.env['GOOGLE_GENAI_USE_GCA']}`);
|
||||
|
||||
// HIGHEST PRIORITY: If a custom model is requested, FORCE OpenAI auth.
|
||||
if (model?.startsWith('google/gemma') || model === 'gemma') {
|
||||
debugLogger.log(`[getAuthTypeFromEnv] FORCING AuthType.OPENAI due to gemma prefix`);
|
||||
return AuthType.OPENAI;
|
||||
}
|
||||
|
||||
if (process.env['GOOGLE_GENAI_USE_GCA'] === 'true') {
|
||||
debugLogger.log(`[getAuthTypeFromEnv] Found GOOGLE_GENAI_USE_GCA, returning LOGIN_WITH_GOOGLE`);
|
||||
return AuthType.LOGIN_WITH_GOOGLE;
|
||||
}
|
||||
if (process.env['GOOGLE_GENAI_USE_VERTEXAI'] === 'true') {
|
||||
debugLogger.log(`[getAuthTypeFromEnv] Found GOOGLE_GENAI_USE_VERTEXAI, returning USE_VERTEX_AI`);
|
||||
return AuthType.USE_VERTEX_AI;
|
||||
}
|
||||
if (process.env['GEMINI_API_KEY']) {
|
||||
debugLogger.log(`[getAuthTypeFromEnv] Found GEMINI_API_KEY, returning USE_GEMINI`);
|
||||
return AuthType.USE_GEMINI;
|
||||
}
|
||||
|
||||
// FALLBACK: If we are in hybrid discovery (no model) and NO OpenAI keys are forced,
|
||||
// assume we can try Google Login if available.
|
||||
if (!model && !process.env['OPENAI_API_KEY'] && !process.env['OPENAI_API_BASE_URL']) {
|
||||
debugLogger.log(`[getAuthTypeFromEnv] Hybrid fallback: assuming Google Auth might be available via OAuth/ADC`);
|
||||
return AuthType.LOGIN_WITH_GOOGLE;
|
||||
}
|
||||
|
||||
const isOpenAiEnv =
|
||||
!!process.env['OPENAI_API_KEY'] || !!process.env['OPENAI_API_BASE_URL'];
|
||||
|
||||
|
|
@ -362,6 +378,10 @@ export async function createContentGenerator(
|
|||
httpOptions.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
if (process.env['DEBUG']) {
|
||||
console.log(`[ContentGenerator] Creating GoogleGenAI with AuthType: ${config.authType}, BaseURL: ${httpOptions.baseUrl}, Headers: ${JSON.stringify(httpOptions.headers)}`);
|
||||
}
|
||||
|
||||
const googleGenAI = new GoogleGenAI({
|
||||
apiKey: config.apiKey === '' ? undefined : config.apiKey,
|
||||
vertexai: config.vertexai ?? config.authType === AuthType.USE_VERTEX_AI,
|
||||
|
|
|
|||
|
|
@ -667,7 +667,18 @@ export class GeminiChat {
|
|||
`[GeminiChat] FULL REQUEST CONFIG (JSON): ${JSON.stringify(config, null, 2)}`,
|
||||
);
|
||||
|
||||
return this.context.config.getContentGenerator().generateContentStream(
|
||||
// HYBRID ROUTING: Determine which generator to use based on the model.
|
||||
const isInternalModel =
|
||||
modelToUse.startsWith('gemini-') ||
|
||||
['web-search', 'web-fetch', 'classifier', 'summarizer-default'].includes(modelToUse);
|
||||
|
||||
const generator = isInternalModel
|
||||
? this.context.config.getUtilityGenerator()
|
||||
: this.context.config.getContentGenerator();
|
||||
|
||||
debugLogger.log(`[GeminiChat] Routing request to ${isInternalModel ? 'Utility' : 'Primary'} generator for model: ${modelToUse}`);
|
||||
|
||||
return generator.generateContentStream(
|
||||
{
|
||||
model: modelToUse,
|
||||
contents: contentsToUse,
|
||||
|
|
|
|||
|
|
@ -170,7 +170,6 @@ export class OpenAiContentGenerator implements ContentGenerator {
|
|||
if (!schema || typeof schema !== 'object') return schema;
|
||||
const transformed: any = { ...schema };
|
||||
|
||||
// Convert type to lowercase for OpenAI compatibility
|
||||
if (typeof transformed.type === 'string') {
|
||||
transformed.type = transformed.type.toLowerCase();
|
||||
}
|
||||
|
|
@ -191,9 +190,6 @@ export class OpenAiContentGenerator implements ContentGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
// OpenAI uses "required" array at the same level as "properties"
|
||||
// Gemini sometimes uses "required" inside properties or different structures
|
||||
|
||||
delete transformed.format;
|
||||
delete transformed.nullable;
|
||||
return transformed;
|
||||
|
|
@ -269,8 +265,7 @@ export class OpenAiContentGenerator implements ContentGenerator {
|
|||
if (systemText && messages.length > 0) {
|
||||
const firstUserMsg = messages.find(m => m.role === 'user');
|
||||
if (firstUserMsg && typeof firstUserMsg.content === 'string') {
|
||||
// Add a nudge for Gemma to use correct tool parameter names
|
||||
const nudge = "\n\nIMPORTANT: When calling tools, ensure you use the exact parameter names defined in the tool's schema (e.g., use 'file_path' instead of 'path' for read_file).";
|
||||
const nudge = "\n\nIMPORTANT: \n1. Use 'google_web_search' for any external information. \n2. NEVER use 'run_shell_command' for searching or finding files. \n3. Use tools IMMEDIATELY without asking for permission. \n4. When calling tools, ensure you use the EXACT parameter names (e.g., 'query' for search, 'file_path' for read_file).";
|
||||
firstUserMsg.content = `System Instruction:\n${systemText}${nudge}\n\nUser Question:\n${firstUserMsg.content}`;
|
||||
}
|
||||
}
|
||||
|
|
@ -295,7 +290,6 @@ export class OpenAiContentGenerator implements ContentGenerator {
|
|||
const parts: any[] = [];
|
||||
const functionCalls: any[] = [];
|
||||
|
||||
// Parse Gemma-style tool calls: <|tool_call|>call:name{args}<tool_call|>
|
||||
const toolCallPatterns = [
|
||||
/<\|tool_call>([\s\S]*?)<tool_call\|?>/g,
|
||||
/call:([a-zA-Z0-9_]+)(\{[\s\S]*?\})/g
|
||||
|
|
@ -313,19 +307,26 @@ export class OpenAiContentGenerator implements ContentGenerator {
|
|||
const name = parts_match[1].trim();
|
||||
let rawArgs = parts_match[2];
|
||||
|
||||
// SUPER ROBUST PSEUDO-JSON PARSING
|
||||
// 1. Quote keys: {key: -> {"key":
|
||||
// SUPER ROBUST PSEUDO-JSON PARSING V3
|
||||
// 1. Quote ALL unquoted keys (even if they start the object)
|
||||
// Regex: Finds an identifier followed by a colon that isn't already preceded by a quote
|
||||
rawArgs = rawArgs.replace(/([{,]\s*)([a-zA-Z0-9_]+)(\s*:)/g, '$1"$2"$3');
|
||||
// 2. Quote string values that are not already quoted: :value -> :"value"
|
||||
// We target values that don't start with ", {, [, t (true), f (false), n (null) or a number
|
||||
// If the very first key is missing its opening brace context in some outputs
|
||||
if (rawArgs.startsWith('{') && !rawArgs.startsWith('{"') && !rawArgs.startsWith('{"')) {
|
||||
rawArgs = rawArgs.replace(/^{([a-zA-Z0-9_]+):/, '{"$1":');
|
||||
}
|
||||
|
||||
// 2. Quote string values ONLY if not already quoted
|
||||
rawArgs = rawArgs.replace(/:\s*([^"{\[tf\n\-0-9\s][^,}\n]*)/g, (match, p1) => {
|
||||
const trimmed = p1.trim();
|
||||
if (trimmed === 'true' || trimmed === 'false' || trimmed === 'null' || !isNaN(Number(trimmed))) {
|
||||
return `: ${trimmed}`;
|
||||
}
|
||||
if (trimmed.startsWith('"')) return `: ${trimmed}`;
|
||||
return `: "${trimmed}"`;
|
||||
});
|
||||
// 3. Clean up trailing commas before } or ]
|
||||
|
||||
// 3. Clean up
|
||||
rawArgs = rawArgs.replace(/,\s*([}\]])/g, '$1');
|
||||
|
||||
const args = JSON.parse(rawArgs);
|
||||
|
|
|
|||
|
|
@ -98,6 +98,10 @@ class WebSearchToolInvocation extends BaseToolInvocation<
|
|||
LlmRole.UTILITY_TOOL,
|
||||
);
|
||||
|
||||
if (process.env['DEBUG']) {
|
||||
console.log(`[WebSearchTool] Raw Response: ${JSON.stringify(response, null, 2)}`);
|
||||
}
|
||||
|
||||
const responseText = getResponseText(response);
|
||||
const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
|
||||
const sources = groundingMetadata?.groundingChunks as
|
||||
|
|
|
|||
Loading…
Reference in a new issue