feat(core): implement Hybrid Auth and Tool Routing Engine for Gemma 4

This commit is contained in:
jihoonP 2026-04-21 13:08:56 +09:00
parent ba51d8c0c6
commit d33d5cf7e5
7 changed files with 167 additions and 18 deletions

View file

@ -60,6 +60,9 @@ export function resolvePolicyChain(
hasAccessToPreview,
config,
);
if (process.env['DEBUG']) {
console.log(`[applyModelSelection] Resolved Model: "${resolvedModel}"`);
}
const isAutoPreferred = preferredModel
? isAutoModel(preferredModel, config)
: false;
@ -268,15 +271,27 @@ export function applyModelSelection(
modelConfigKey: ModelConfigKey,
options: { consumeAttempt?: boolean } = {},
): { model: string; config: GenerateContentConfig; maxAttempts?: number } {
if (process.env['DEBUG']) {
console.log(`[applyModelSelection] Start - Key: ${JSON.stringify(modelConfigKey)}`);
}
const resolved = config.modelConfigService.getResolvedConfig(modelConfigKey);
const model = resolved.model;
if (process.env['DEBUG']) {
console.log(`[applyModelSelection] Initial Model: ${model}`);
}
const selection = selectModelForAvailability(config, model);
if (!selection) {
if (process.env['DEBUG']) {
console.log(`[applyModelSelection] No selection policy, returning: ${model}`);
}
return { model, config: resolved.generateContentConfig };
}
const finalModel = selection.selectedModel ?? model;
if (process.env['DEBUG']) {
console.log(`[applyModelSelection] Selection result - Final Model: ${finalModel}`);
}
let generateContentConfig = resolved.generateContentConfig;
if (finalModel !== model) {

View file

@ -25,6 +25,12 @@ import {
type ContentGenerator,
type ContentGeneratorConfig,
} from '../core/contentGenerator.js';
export {
AuthType,
createContentGenerator,
createContentGeneratorConfig,
getAuthTypeFromEnv,
};
import type { OverageStrategy } from '../billing/billing.js';
import { PromptRegistry } from '../prompts/prompt-registry.js';
import { ResourceRegistry } from '../resources/resource-registry.js';
@ -754,6 +760,8 @@ export class Config implements McpContext, AgentLoopContext {
readonly topicState = new TopicState();
private contentGeneratorConfig!: ContentGeneratorConfig;
private contentGenerator!: ContentGenerator;
private utilityGeneratorConfig?: ContentGeneratorConfig;
private utilityGenerator?: ContentGenerator;
readonly modelConfigService: ModelConfigService;
private readonly embeddingModel: string;
private readonly sandbox: SandboxConfig | undefined;
@ -1512,6 +1520,39 @@ export class Config implements McpContext, AgentLoopContext {
return this.contentGenerator;
}
getUtilityGenerator(): ContentGenerator {
const primaryIsOpenAi = this.contentGeneratorConfig?.authType === AuthType.OPENAI;
const utility = this.utilityGenerator ?? this.contentGenerator;
// HYBRID PROTECTION: If we're using Gemma (OpenAI) as primary,
// we must ensure the utility generator doesn't use the Gemma model name.
if (primaryIsOpenAi && utility) {
// Create a lightweight wrapper that overrides the model in any request
const originalGenerate = utility.generateContent.bind(utility);
const originalGenerateStream = utility.generateContentStream.bind(utility);
return new Proxy(utility, {
get(target, prop, receiver) {
if (prop === 'generateContent') {
return async (request: any, promptId: string, role: any) => {
const maskedRequest = { ...request, model: 'gemini-3-flash-preview' };
return originalGenerate(maskedRequest, promptId, role);
};
}
if (prop === 'generateContentStream') {
return async (request: any, promptId: string, role: any) => {
const maskedRequest = { ...request, model: 'gemini-3-flash-preview' };
return originalGenerateStream(maskedRequest, promptId, role);
};
}
return Reflect.get(target, prop, receiver);
}
});
}
return utility;
}
async refreshAuth(
authMethod: AuthType,
apiKey?: string,
@ -1567,9 +1608,36 @@ export class Config implements McpContext, AgentLoopContext {
this,
this.getSessionId(),
);
// Only assign to instance properties after successful initialization
this.contentGeneratorConfig = newContentGeneratorConfig;
// --- HYBRID AUTH START ---
debugLogger.log(`[Config] Checking Hybrid Auth. Primary AuthType: ${this.contentGeneratorConfig.authType}`);
// If the primary generator is OpenAI, try to create a utility generator for Google-auth tasks
if (this.contentGeneratorConfig.authType === AuthType.OPENAI) {
try {
debugLogger.log('[Config] Attempting to find secondary Google Auth for hybrid mode...');
// Force model to undefined to probe for non-OpenAI auth
const googleAuthType = getAuthTypeFromEnv(undefined);
debugLogger.log(`[Config] Resolved potential Google AuthType: ${googleAuthType}`);
if (googleAuthType && googleAuthType !== AuthType.OPENAI) {
// If we found a Google auth type, create a separate config and generator for it
this.utilityGeneratorConfig = await createContentGeneratorConfig(this, googleAuthType);
this.utilityGenerator = await createContentGenerator(this.utilityGeneratorConfig, this, this.getSessionId());
debugLogger.log(`[Config] Hybrid mode ENABLED. Utility generator initialized with ${googleAuthType}`);
} else {
debugLogger.log('[Config] Hybrid mode SKIPPED: Still resolving to OpenAI or no auth found.');
}
} catch (e) {
debugLogger.warn('[Config] Failed to initialize utility generator for hybrid mode', e);
}
} else {
// Otherwise, the primary generator is already Google-auth capable
this.utilityGenerator = this.contentGenerator;
this.utilityGeneratorConfig = this.contentGeneratorConfig;
}
// --- HYBRID AUTH END ---
const codeAssistServer = getCodeAssistServer(this);
const quotaPromise = codeAssistServer?.projectId
? this.refreshUserQuota()

View file

@ -24,7 +24,7 @@ import {
type ServerGeminiStreamEvent,
type ChatCompressionInfo,
} from './turn.js';
import type { Config } from '../config/config.js';
import { AuthType, type Config } from '../config/config.js';
import { type AgentLoopContext } from '../config/agent-loop-context.js';
import { getCoreSystemPrompt } from './prompts.js';
import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
@ -1057,6 +1057,18 @@ export class GeminiClient {
abortSignal: AbortSignal,
role: LlmRole,
): Promise<GenerateContentResponse> {
// HYBRID MODEL MASKING:
// If this is a utility task and we are in a Gemma (OpenAI) session,
// steer the config resolution toward a Google-native model.
const isPrimaryOpenAi = this.config.getContentGeneratorConfig()?.authType === AuthType.OPENAI;
const isUtilityTask = !modelConfigKey.isChatModel ||
['web-search', 'web-fetch', 'classifier'].includes(modelConfigKey.model || '');
if (isPrimaryOpenAi && isUtilityTask) {
modelConfigKey = { ...modelConfigKey, model: 'gemini-3-flash-preview' };
debugLogger.log(`[GeminiClient] Steering utility task to Google-native model: ${modelConfigKey.model}`);
}
const desiredModelConfig =
this.config.modelConfigService.getResolvedConfig(modelConfigKey);
let {
@ -1101,19 +1113,37 @@ export class GeminiClient {
currentAttemptGenerateContentConfig = generateContentConfig;
}
// HYBRID PROMPT SANITIZATION:
// Use a minimal system prompt for utility tasks to prevent Google models
// from getting confused by the primary (Gemma) session's heavy instructions.
const effectiveSystemInstruction = (isPrimaryOpenAi && isUtilityTask)
? { role: 'system', parts: [{ text: 'You are a helpful assistant specialized in using tools like web search. Provide concise and accurate data based on tool results.' }] }
: systemInstruction;
const requestConfig: GenerateContentConfig = {
...currentAttemptGenerateContentConfig,
abortSignal,
systemInstruction,
systemInstruction: effectiveSystemInstruction,
};
return this.getContentGeneratorOrFail().generateContent(
const generator = (isPrimaryOpenAi && isUtilityTask)
? this.config.getUtilityGenerator()
: this.getContentGeneratorOrFail();
if (process.env['DEBUG']) {
console.log(`[GeminiClient] FINAL GENERATOR CALL:
- Generator Type: ${generator.constructor.name}
- Request Model: ${currentAttemptModel}
`);
}
return generator.generateContent(
{
model: currentAttemptModel,
config: requestConfig,
contents,
},
this.lastPromptId,
this.lastPromptId ?? 'default',
role,
);
};

View file

@ -79,21 +79,37 @@ export enum AuthType {
* 5. OPENAI_API_KEY -> OPENAI
*/
export function getAuthTypeFromEnv(model?: string): AuthType | undefined {
debugLogger.log(`[getAuthTypeFromEnv] Resolving auth for model: ${model}`);
debugLogger.log(`[getAuthTypeFromEnv] Env GEMINI_API_KEY: ${process.env['GEMINI_API_KEY'] ? 'PRESENT' : 'MISSING'}`);
debugLogger.log(`[getAuthTypeFromEnv] Env OPENAI_API_KEY: ${process.env['OPENAI_API_KEY'] ? 'PRESENT' : 'MISSING'}`);
debugLogger.log(`[getAuthTypeFromEnv] Env GOOGLE_GENAI_USE_GCA: ${process.env['GOOGLE_GENAI_USE_GCA']}`);
// HIGHEST PRIORITY: If a custom model is requested, FORCE OpenAI auth.
if (model?.startsWith('google/gemma') || model === 'gemma') {
debugLogger.log(`[getAuthTypeFromEnv] FORCING AuthType.OPENAI due to gemma prefix`);
return AuthType.OPENAI;
}
if (process.env['GOOGLE_GENAI_USE_GCA'] === 'true') {
debugLogger.log(`[getAuthTypeFromEnv] Found GOOGLE_GENAI_USE_GCA, returning LOGIN_WITH_GOOGLE`);
return AuthType.LOGIN_WITH_GOOGLE;
}
if (process.env['GOOGLE_GENAI_USE_VERTEXAI'] === 'true') {
debugLogger.log(`[getAuthTypeFromEnv] Found GOOGLE_GENAI_USE_VERTEXAI, returning USE_VERTEX_AI`);
return AuthType.USE_VERTEX_AI;
}
if (process.env['GEMINI_API_KEY']) {
debugLogger.log(`[getAuthTypeFromEnv] Found GEMINI_API_KEY, returning USE_GEMINI`);
return AuthType.USE_GEMINI;
}
// FALLBACK: If we are in hybrid discovery (no model) and NO OpenAI keys are forced,
// assume we can try Google Login if available.
if (!model && !process.env['OPENAI_API_KEY'] && !process.env['OPENAI_API_BASE_URL']) {
debugLogger.log(`[getAuthTypeFromEnv] Hybrid fallback: assuming Google Auth might be available via OAuth/ADC`);
return AuthType.LOGIN_WITH_GOOGLE;
}
const isOpenAiEnv =
!!process.env['OPENAI_API_KEY'] || !!process.env['OPENAI_API_BASE_URL'];
@ -362,6 +378,10 @@ export async function createContentGenerator(
httpOptions.baseUrl = baseUrl;
}
if (process.env['DEBUG']) {
console.log(`[ContentGenerator] Creating GoogleGenAI with AuthType: ${config.authType}, BaseURL: ${httpOptions.baseUrl}, Headers: ${JSON.stringify(httpOptions.headers)}`);
}
const googleGenAI = new GoogleGenAI({
apiKey: config.apiKey === '' ? undefined : config.apiKey,
vertexai: config.vertexai ?? config.authType === AuthType.USE_VERTEX_AI,

View file

@ -667,7 +667,18 @@ export class GeminiChat {
`[GeminiChat] FULL REQUEST CONFIG (JSON): ${JSON.stringify(config, null, 2)}`,
);
return this.context.config.getContentGenerator().generateContentStream(
// HYBRID ROUTING: Determine which generator to use based on the model.
const isInternalModel =
modelToUse.startsWith('gemini-') ||
['web-search', 'web-fetch', 'classifier', 'summarizer-default'].includes(modelToUse);
const generator = isInternalModel
? this.context.config.getUtilityGenerator()
: this.context.config.getContentGenerator();
debugLogger.log(`[GeminiChat] Routing request to ${isInternalModel ? 'Utility' : 'Primary'} generator for model: ${modelToUse}`);
return generator.generateContentStream(
{
model: modelToUse,
contents: contentsToUse,

View file

@ -170,7 +170,6 @@ export class OpenAiContentGenerator implements ContentGenerator {
if (!schema || typeof schema !== 'object') return schema;
const transformed: any = { ...schema };
// Convert type to lowercase for OpenAI compatibility
if (typeof transformed.type === 'string') {
transformed.type = transformed.type.toLowerCase();
}
@ -191,9 +190,6 @@ export class OpenAiContentGenerator implements ContentGenerator {
}
}
// OpenAI uses "required" array at the same level as "properties"
// Gemini sometimes uses "required" inside properties or different structures
delete transformed.format;
delete transformed.nullable;
return transformed;
@ -269,8 +265,7 @@ export class OpenAiContentGenerator implements ContentGenerator {
if (systemText && messages.length > 0) {
const firstUserMsg = messages.find(m => m.role === 'user');
if (firstUserMsg && typeof firstUserMsg.content === 'string') {
// Add a nudge for Gemma to use correct tool parameter names
const nudge = "\n\nIMPORTANT: When calling tools, ensure you use the exact parameter names defined in the tool's schema (e.g., use 'file_path' instead of 'path' for read_file).";
const nudge = "\n\nIMPORTANT: \n1. Use 'google_web_search' for any external information. \n2. NEVER use 'run_shell_command' for searching or finding files. \n3. Use tools IMMEDIATELY without asking for permission. \n4. When calling tools, ensure you use the EXACT parameter names (e.g., 'query' for search, 'file_path' for read_file).";
firstUserMsg.content = `System Instruction:\n${systemText}${nudge}\n\nUser Question:\n${firstUserMsg.content}`;
}
}
@ -295,7 +290,6 @@ export class OpenAiContentGenerator implements ContentGenerator {
const parts: any[] = [];
const functionCalls: any[] = [];
// Parse Gemma-style tool calls: <|tool_call|>call:name{args}<tool_call|>
const toolCallPatterns = [
/<\|tool_call>([\s\S]*?)<tool_call\|?>/g,
/call:([a-zA-Z0-9_]+)(\{[\s\S]*?\})/g
@ -313,19 +307,26 @@ export class OpenAiContentGenerator implements ContentGenerator {
const name = parts_match[1].trim();
let rawArgs = parts_match[2];
// SUPER ROBUST PSEUDO-JSON PARSING
// 1. Quote keys: {key: -> {"key":
// SUPER ROBUST PSEUDO-JSON PARSING V3
// 1. Quote ALL unquoted keys (even if they start the object)
// Regex: Finds an identifier followed by a colon that isn't already preceded by a quote
rawArgs = rawArgs.replace(/([{,]\s*)([a-zA-Z0-9_]+)(\s*:)/g, '$1"$2"$3');
// 2. Quote string values that are not already quoted: :value -> :"value"
// We target values that don't start with ", {, [, t (true), f (false), n (null) or a number
// If the very first key is missing its opening brace context in some outputs
if (rawArgs.startsWith('{') && !rawArgs.startsWith('{"') && !rawArgs.startsWith('{"')) {
rawArgs = rawArgs.replace(/^{([a-zA-Z0-9_]+):/, '{"$1":');
}
// 2. Quote string values ONLY if not already quoted
rawArgs = rawArgs.replace(/:\s*([^"{\[tf\n\-0-9\s][^,}\n]*)/g, (match, p1) => {
const trimmed = p1.trim();
if (trimmed === 'true' || trimmed === 'false' || trimmed === 'null' || !isNaN(Number(trimmed))) {
return `: ${trimmed}`;
}
if (trimmed.startsWith('"')) return `: ${trimmed}`;
return `: "${trimmed}"`;
});
// 3. Clean up trailing commas before } or ]
// 3. Clean up
rawArgs = rawArgs.replace(/,\s*([}\]])/g, '$1');
const args = JSON.parse(rawArgs);

View file

@ -98,6 +98,10 @@ class WebSearchToolInvocation extends BaseToolInvocation<
LlmRole.UTILITY_TOOL,
);
if (process.env['DEBUG']) {
console.log(`[WebSearchTool] Raw Response: ${JSON.stringify(response, null, 2)}`);
}
const responseText = getResponseText(response);
const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
const sources = groundingMetadata?.groundingChunks as