Merge branch 'main' into main

This commit is contained in:
figureConan 2026-04-21 12:02:53 +08:00 committed by GitHub
commit 73e0aa88e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 2654 additions and 22 deletions

View file

@ -1,6 +1,6 @@
# Latest stable release: v0.38.1
# Latest stable release: v0.38.2
Released: April 15, 2026
Released: April 17, 2026
For most users, our latest stable release is the recommended release. Install
the latest stable version with:
@ -29,6 +29,9 @@ npm install -g @google/gemini-cli
## What's Changed
- fix(patch): cherry-pick 14b2f35 to release/v0.38.1-pr-24974 to patch version
v0.38.1 and create version 0.38.2 by @gemini-cli-robot in
[#25585](https://github.com/google-gemini/gemini-cli/pull/25585)
- fix(patch): cherry-pick 050c303 to release/v0.38.0-pr-25317 to patch version
v0.38.0 and create version 0.38.1 by @gemini-cli-robot in
[#25466](https://github.com/google-gemini/gemini-cli/pull/25466)
@ -268,4 +271,4 @@ npm install -g @google/gemini-cli
[#24844](https://github.com/google-gemini/gemini-cli/pull/24844)
**Full Changelog**:
https://github.com/google-gemini/gemini-cli/compare/v0.38.0...v0.38.1
https://github.com/google-gemini/gemini-cli/compare/v0.38.0...v0.38.2

View file

@ -161,17 +161,19 @@ they appear in the UI.
### Experimental
| UI Label | Setting | Description | Default |
| ---------------------------------------------------- | -------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` |
| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` |
| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` |
| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` |
| Auto Memory | `experimental.autoMemory` | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox. | `false` |
| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` |
| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` |
| UI Label | Setting | Description | Default |
| ---------------------------------------------------- | ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` |
| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` |
| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` |
| Enable Gemma Model Router | `experimental.gemmaModelRouter.enabled` | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim. | `false` |
| Auto-start LiteRT Server | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled. | `false` |
| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` |
| Auto Memory | `experimental.autoMemory` | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox. | `false` |
| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` |
| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` |
### Skills

View file

@ -51,7 +51,7 @@ error with: `missing system prompt file '<path>'`.
- Create `.gemini/system.md`, then add to `.gemini/.env`:
- `GEMINI_SYSTEM_MD=1`
- Use a custom file under your home directory:
- `GEMINI_SYSTEM_MD=~/prompts/SYSTEM.md gemini`
- `GEMINI_SYSTEM_MD=~/prompts/system.md gemini`
## UI indicator
@ -102,17 +102,17 @@ safety and workflow rules.
This creates the file and writes the current builtin system prompt to it.
## Best practices: SYSTEM.md vs GEMINI.md
## Best practices: system.md vs GEMINI.md
- SYSTEM.md (firmware):
- system.md (firmware):
- Nonnegotiable operational rules: safety, tooluse protocols, approvals, and
mechanics that keep the CLI reliable.
- Stable across tasks and projects (or per project when needed).
- GEMINI.md (strategy):
- Persona, goals, methodologies, and project/domain context.
- Evolves per task; relies on SYSTEM.md for safe execution.
- Evolves per task; relies on system.md for safe execution.
Keep SYSTEM.md minimal but complete for safety and tool operation. Keep
Keep system.md minimal but complete for safety and tool operation. Keep
GEMINI.md focused on highlevel guidance and project specifics.
## Troubleshooting

View file

@ -1711,6 +1711,18 @@ their corresponding top-level category object in your `settings.json` file.
- **Default:** `false`
- **Requires restart:** Yes
- **`experimental.gemmaModelRouter.autoStartServer`** (boolean):
- **Description:** Automatically start the LiteRT-LM server when Gemini CLI
starts and the Gemma router is enabled.
- **Default:** `false`
- **Requires restart:** Yes
- **`experimental.gemmaModelRouter.binaryPath`** (string):
- **Description:** Custom path to the LiteRT-LM binary. Leave empty to use the
default location (~/.gemini/bin/litert/).
- **Default:** `""`
- **Requires restart:** Yes
- **`experimental.gemmaModelRouter.classifier.host`** (string):
- **Description:** The host of the classifier.
- **Default:** `"http://localhost:9379"`

View file

@ -92,6 +92,21 @@ each tool.
| [`ask_user`](../tools/ask-user.md) | `Communicate` | Requests clarification or missing information via an interactive dialog. |
| [`write_todos`](../tools/todos.md) | `Other` | Maintains an internal list of subtasks. The model uses this to track its own progress. |
### Task Tracker (Experimental)
<!-- prettier-ignore -->
> [!NOTE]
> This is an experimental feature currently under active development. Enable via `experimental.taskTracker`.
| Tool | Kind | Description |
| :---------------------------------------------- | :------ | :-------------------------------------------------------------------------- |
| [`tracker_create_task`](../tools/tracker.md) | `Other` | Creates a new task in the experimental tracker. |
| [`tracker_update_task`](../tools/tracker.md) | `Other` | Updates an existing task's status, description, or dependencies. |
| [`tracker_get_task`](../tools/tracker.md) | `Other` | Retrieves the full details of a specific task. |
| [`tracker_list_tasks`](../tools/tracker.md) | `Other` | Lists tasks in the tracker, optionally filtered by status, type, or parent. |
| [`tracker_add_dependency`](../tools/tracker.md) | `Other` | Adds a dependency between two tasks, ensuring topological execution. |
| [`tracker_visualize`](../tools/tracker.md) | `Other` | Renders an ASCII tree visualization of the current task graph. |
### MCP
| Tool | Kind | Description |

61
docs/tools/tracker.md Normal file
View file

@ -0,0 +1,61 @@
# Tracker tools (`tracker_*`)
<!-- prettier-ignore -->
> [!NOTE]
> This is an experimental feature currently under active development.
The `tracker_*` tools allow the Gemini agent to maintain an internal, persistent
graph of tasks and dependencies for multi-step requests. This suite of tools
provides a more robust and granular way to manage execution plans than the
legacy `write_todos` tool.
## Technical reference
The agent uses these tools to manage its execution plan, decompose complex goals
into actionable sub-tasks, and provide real-time progress updates to the CLI
interface. The task state is stored in the `.gemini/tmp/tracker/<session-id>`
directory, allowing the agent to manage its plan for the current session.
### Available Tools
- `tracker_create_task`: Creates a new task in the tracker. You can specify a
title, description, and task type (`epic`, `task`, `bug`).
- `tracker_update_task`: Updates an existing task's status (`open`,
`in_progress`, `blocked`, `closed`), description, or dependencies.
- `tracker_get_task`: Retrieves the full details of a specific task by its
6-character hex ID.
- `tracker_list_tasks`: Lists tasks in the tracker, optionally filtered by
status, type, or parent ID.
- `tracker_add_dependency`: Adds a dependency between two tasks, ensuring
topological execution.
- `tracker_visualize`: Renders an ASCII tree visualization of the current task
graph.
## Technical behavior
- **Interface:** Updates the progress indicator and task tree above the CLI
input prompt.
- **Persistence:** Task state is saved automatically to the
`.gemini/tmp/tracker/<session-id>` directory. Task states are session-specific
and do not persist across different sessions.
- **Dependencies:** Tasks can depend on other tasks, forming a directed acyclic
graph (DAG). The agent must resolve dependencies before starting blocked
tasks.
- **Interaction:** Users can view the current state of the tracker by asking the
agent to visualize it, or by running `gemini-cli` commands if implemented.
## Use cases
- Coordinating multi-file refactoring projects.
- Breaking down a mission into a hierarchy of epics and tasks for better
visibility.
- Tracking bugs and feature requests directly within the context of an active
codebase.
- Providing visibility into the agent's current focus and remaining work.
## Next steps
- Follow the [Task planning tutorial](../cli/tutorials/task-planning.md) for
usage details and migration from the legacy todo list.
- Learn about [Session management](../cli/session-management.md) for context on
persistent state.

View file

@ -0,0 +1,33 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule, Argv } from 'yargs';
import { initializeOutputListenersAndFlush } from '../gemini.js';
import { defer } from '../deferred.js';
import { setupCommand } from './gemma/setup.js';
import { startCommand } from './gemma/start.js';
import { stopCommand } from './gemma/stop.js';
import { statusCommand } from './gemma/status.js';
import { logsCommand } from './gemma/logs.js';
export const gemmaCommand: CommandModule = {
command: 'gemma',
describe: 'Manage local Gemma model routing',
builder: (yargs: Argv) =>
yargs
.middleware((argv) => {
initializeOutputListenersAndFlush();
argv['isCommand'] = true;
})
.command(defer(setupCommand, 'gemma'))
.command(defer(startCommand, 'gemma'))
.command(defer(stopCommand, 'gemma'))
.command(defer(statusCommand, 'gemma'))
.command(defer(logsCommand, 'gemma'))
.demandCommand(1, 'You need at least one command before continuing.')
.version(false),
handler: () => {},
};

View file

@ -0,0 +1,45 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import path from 'node:path';
import { Storage } from '@google/gemini-cli-core';
export const LITERT_RELEASE_VERSION = 'v0.9.0-alpha03';
export const LITERT_RELEASE_BASE_URL =
'https://github.com/google-ai-edge/LiteRT-LM/releases/download';
export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom';
export const DEFAULT_PORT = 9379;
export const HEALTH_CHECK_TIMEOUT_MS = 5000;
export const LITERT_API_VERSION = 'v1beta';
export const SERVER_START_WAIT_MS = 3000;
export const PLATFORM_BINARY_MAP: Record<string, string> = {
'darwin-arm64': 'lit.macos_arm64',
'linux-x64': 'lit.linux_x86_64',
'win32-x64': 'lit.windows_x86_64.exe',
};
// SHA-256 hashes for the official LiteRT-LM v0.9.0-alpha03 release binaries.
export const PLATFORM_BINARY_SHA256: Record<string, string> = {
'lit.macos_arm64':
'9e826a2634f2e8b220ad0f1e1b5c139e0b47cb172326e3b7d46d31382f49478e',
'lit.linux_x86_64':
'66601df8a07f08244b188e9fcab0bf4a16562fe76d8d47e49f40273d57541ee8',
'lit.windows_x86_64.exe':
'de82d2829d2fb1cbdb318e2d8a78dc2f9659ff14cb11b2894d1f30e0bfde2bf6',
};
export function getLiteRtBinDir(): string {
return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert');
}
export function getPidFilePath(): string {
return path.join(Storage.getGlobalTempDir(), 'litert-server.pid');
}
export function getLogFilePath(): string {
return path.join(Storage.getGlobalTempDir(), 'litert-server.log');
}

View file

@ -0,0 +1,186 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import type { ChildProcess } from 'node:child_process';
import { EventEmitter } from 'node:events';
import os from 'node:os';
import path from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { spawn } from 'node:child_process';
import { exitCli } from '../utils.js';
import { getLogFilePath } from './constants.js';
import { logsCommand, readLastLines } from './logs.js';
vi.mock('@google/gemini-cli-core', async (importOriginal) => {
const { mockCoreDebugLogger } = await import(
'../../test-utils/mockDebugLogger.js'
);
return mockCoreDebugLogger(
await importOriginal<typeof import('@google/gemini-cli-core')>(),
{
stripAnsi: false,
},
);
});
vi.mock('node:child_process', async (importOriginal) => {
const actual = await importOriginal<typeof import('node:child_process')>();
return {
...actual,
spawn: vi.fn(),
};
});
vi.mock('../utils.js', () => ({
exitCli: vi.fn(),
}));
vi.mock('./constants.js', () => ({
getLogFilePath: vi.fn(),
}));
function createMockChild(): ChildProcess {
return Object.assign(new EventEmitter(), {
kill: vi.fn(),
}) as unknown as ChildProcess;
}
async function flushMicrotasks() {
await Promise.resolve();
await Promise.resolve();
}
describe('readLastLines', () => {
const tempFiles: string[] = [];
afterEach(async () => {
await Promise.all(
tempFiles
.splice(0)
.map((filePath) => fs.promises.rm(filePath, { force: true })),
);
});
it('returns only the requested tail lines without reading the whole file eagerly', async () => {
const filePath = path.join(
os.tmpdir(),
`gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
);
tempFiles.push(filePath);
const content = Array.from({ length: 2000 }, (_, i) => `line-${i + 1}`)
.join('\n')
.concat('\n');
await fs.promises.writeFile(filePath, content, 'utf-8');
await expect(readLastLines(filePath, 3)).resolves.toBe(
'line-1998\nline-1999\nline-2000\n',
);
});
it('returns an empty string when zero lines are requested', async () => {
const filePath = path.join(
os.tmpdir(),
`gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
);
tempFiles.push(filePath);
await fs.promises.writeFile(filePath, 'line-1\nline-2\n', 'utf-8');
await expect(readLastLines(filePath, 0)).resolves.toBe('');
});
});
describe('logsCommand', () => {
const originalPlatform = process.platform;
beforeEach(() => {
vi.clearAllMocks();
Object.defineProperty(process, 'platform', {
value: 'linux',
configurable: true,
});
vi.mocked(getLogFilePath).mockReturnValue('/tmp/gemma.log');
vi.spyOn(fs.promises, 'access').mockResolvedValue(undefined);
});
afterEach(() => {
Object.defineProperty(process, 'platform', {
value: originalPlatform,
configurable: true,
});
vi.restoreAllMocks();
});
it('waits for the tail process to close before exiting in follow mode', async () => {
const child = createMockChild();
vi.mocked(spawn).mockReturnValue(child);
let resolved = false;
const handlerPromise = (
logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
)({}).then(() => {
resolved = true;
});
await flushMicrotasks();
expect(spawn).toHaveBeenCalledWith(
'tail',
['-f', '-n', '20', '/tmp/gemma.log'],
{ stdio: 'inherit' },
);
expect(resolved).toBe(false);
expect(exitCli).not.toHaveBeenCalled();
child.emit('close', 0);
await handlerPromise;
expect(exitCli).toHaveBeenCalledWith(0);
});
it('uses one-shot tail output when follow is disabled', async () => {
const child = createMockChild();
vi.mocked(spawn).mockReturnValue(child);
const handlerPromise = (
logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
)({ follow: false });
await flushMicrotasks();
expect(spawn).toHaveBeenCalledWith('tail', ['-n', '20', '/tmp/gemma.log'], {
stdio: 'inherit',
});
child.emit('close', 0);
await handlerPromise;
expect(exitCli).toHaveBeenCalledWith(0);
});
it('follows from the requested line count when both --lines and --follow are set', async () => {
const child = createMockChild();
vi.mocked(spawn).mockReturnValue(child);
const handlerPromise = (
logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
)({ lines: 5, follow: true });
await flushMicrotasks();
expect(spawn).toHaveBeenCalledWith(
'tail',
['-f', '-n', '5', '/tmp/gemma.log'],
{ stdio: 'inherit' },
);
child.emit('close', 0);
await handlerPromise;
expect(exitCli).toHaveBeenCalledWith(0);
});
});

View file

@ -0,0 +1,200 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import fs from 'node:fs';
import { spawn, type ChildProcess } from 'node:child_process';
import { debugLogger } from '@google/gemini-cli-core';
import { exitCli } from '../utils.js';
import { getLogFilePath } from './constants.js';
export async function readLastLines(
filePath: string,
count: number,
): Promise<string> {
if (count <= 0) {
return '';
}
const CHUNK_SIZE = 64 * 1024;
const fileHandle = await fs.promises.open(filePath, fs.constants.O_RDONLY);
try {
const stats = await fileHandle.stat();
if (stats.size === 0) {
return '';
}
const chunks: Buffer[] = [];
let totalBytes = 0;
let newlineCount = 0;
let position = stats.size;
while (position > 0 && newlineCount <= count) {
const readSize = Math.min(CHUNK_SIZE, position);
position -= readSize;
const buffer = Buffer.allocUnsafe(readSize);
const { bytesRead } = await fileHandle.read(
buffer,
0,
readSize,
position,
);
if (bytesRead === 0) {
break;
}
const chunk =
bytesRead === readSize ? buffer : buffer.subarray(0, bytesRead);
chunks.unshift(chunk);
totalBytes += chunk.length;
for (const byte of chunk) {
if (byte === 0x0a) {
newlineCount += 1;
}
}
}
const content = Buffer.concat(chunks, totalBytes).toString('utf-8');
const lines = content.split('\n');
if (position > 0 && lines.length > 0) {
const boundary = Buffer.allocUnsafe(1);
const { bytesRead } = await fileHandle.read(boundary, 0, 1, position - 1);
if (bytesRead === 1 && boundary[0] !== 0x0a) {
lines.shift();
}
}
if (lines.length > 0 && lines[lines.length - 1] === '') {
lines.pop();
}
if (lines.length === 0) {
return '';
}
return lines.slice(-count).join('\n') + '\n';
} finally {
await fileHandle.close();
}
}
interface LogsArgs {
lines?: number;
follow?: boolean;
}
function waitForChild(child: ChildProcess): Promise<number> {
return new Promise((resolve, reject) => {
child.once('error', reject);
child.once('close', (code) => resolve(code ?? 1));
});
}
async function runTail(logPath: string, lines: number, follow: boolean) {
const tailArgs = follow
? ['-f', '-n', String(lines), logPath]
: ['-n', String(lines), logPath];
const child = spawn('tail', tailArgs, { stdio: 'inherit' });
if (!follow) {
return waitForChild(child);
}
const handleSigint = () => {
child.kill('SIGTERM');
};
process.once('SIGINT', handleSigint);
try {
return await waitForChild(child);
} finally {
process.off('SIGINT', handleSigint);
}
}
export const logsCommand: CommandModule<object, LogsArgs> = {
command: 'logs',
describe: 'View LiteRT-LM server logs',
builder: (yargs) =>
yargs
.option('lines', {
alias: 'n',
type: 'number',
description: 'Show the last N lines and exit (omit to follow live)',
})
.option('follow', {
alias: 'f',
type: 'boolean',
description:
'Follow log output (defaults to true when --lines is omitted)',
}),
handler: async (argv) => {
const logPath = getLogFilePath();
try {
await fs.promises.access(logPath, fs.constants.F_OK);
} catch {
debugLogger.log(`No log file found at ${logPath}`);
debugLogger.log(
'Is the LiteRT server running? Start it with: gemini gemma start',
);
await exitCli(1);
return;
}
const lines = argv.lines;
const follow = argv.follow ?? lines === undefined;
const requestedLines = lines ?? 20;
if (follow && process.platform === 'win32') {
debugLogger.log(
'Live log following is not supported on Windows. Use --lines N to view recent logs.',
);
await exitCli(1);
return;
}
if (process.platform === 'win32') {
process.stdout.write(await readLastLines(logPath, requestedLines));
await exitCli(0);
return;
}
try {
if (follow) {
debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`);
}
const exitCode = await runTail(logPath, requestedLines, follow);
await exitCli(exitCode);
} catch (error) {
if (
error instanceof Error &&
'code' in error &&
error.code === 'ENOENT'
) {
if (!follow) {
process.stdout.write(await readLastLines(logPath, requestedLines));
await exitCli(0);
} else {
debugLogger.error(
'"tail" command not found. Use --lines N to view recent logs without tail.',
);
await exitCli(1);
}
} else {
debugLogger.error(
`Failed to read log output: ${error instanceof Error ? error.message : String(error)}`,
);
await exitCli(1);
}
}
},
};

View file

@ -0,0 +1,162 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import path from 'node:path';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { SettingScope } from '../../config/settings.js';
import { getLiteRtBinDir } from './constants.js';
const mockLoadSettings = vi.hoisted(() => vi.fn());
vi.mock('../../config/settings.js', () => ({
loadSettings: mockLoadSettings,
SettingScope: {
User: 'User',
},
}));
import {
getBinaryPath,
isExpectedLiteRtServerCommand,
isBinaryInstalled,
readServerProcessInfo,
resolveGemmaConfig,
} from './platform.js';
describe('gemma platform helpers', () => {
function createMockSettings(
userGemmaSettings?: object,
mergedGemmaSettings?: object,
) {
return {
merged: {
experimental: {
gemmaModelRouter: mergedGemmaSettings,
},
},
forScope: vi.fn((scope: SettingScope) => {
if (scope !== SettingScope.User) {
throw new Error(`Unexpected scope ${scope}`);
}
return {
settings: {
experimental: {
gemmaModelRouter: userGemmaSettings,
},
},
};
}),
};
}
beforeEach(() => {
vi.clearAllMocks();
mockLoadSettings.mockReturnValue(createMockSettings());
});
it('prefers the configured binary path from settings', () => {
mockLoadSettings.mockReturnValue(
createMockSettings({ binaryPath: '/custom/lit' }),
);
expect(getBinaryPath('lit.test')).toBe('/custom/lit');
});
it('ignores workspace overrides for the configured binary path', () => {
mockLoadSettings.mockReturnValue(
createMockSettings(
{ binaryPath: '/user/lit' },
{ binaryPath: '/workspace/evil' },
),
);
expect(getBinaryPath('lit.test')).toBe('/user/lit');
});
it('falls back to the default install location when no custom path is set', () => {
expect(getBinaryPath('lit.test')).toBe(
path.join(getLiteRtBinDir(), 'lit.test'),
);
});
it('resolves the configured port and binary path from settings', () => {
mockLoadSettings.mockReturnValue(
createMockSettings(
{ binaryPath: '/custom/lit' },
{
enabled: true,
classifier: {
host: 'http://localhost:8123/v1beta',
},
},
),
);
expect(resolveGemmaConfig(9379)).toEqual({
settingsEnabled: true,
configuredPort: 8123,
configuredBinaryPath: '/custom/lit',
});
});
it('checks binary installation using the resolved binary path', () => {
mockLoadSettings.mockReturnValue(
createMockSettings({ binaryPath: '/custom/lit' }),
);
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
expect(isBinaryInstalled()).toBe(true);
expect(fs.existsSync).toHaveBeenCalledWith('/custom/lit');
});
it('parses structured server process info from the pid file', () => {
vi.spyOn(fs, 'readFileSync').mockReturnValue(
JSON.stringify({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
}),
);
expect(readServerProcessInfo()).toEqual({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
});
});
it('parses legacy pid-only files for backward compatibility', () => {
vi.spyOn(fs, 'readFileSync').mockReturnValue('4321');
expect(readServerProcessInfo()).toEqual({
pid: 4321,
});
});
it('matches only the expected LiteRT serve command', () => {
expect(
isExpectedLiteRtServerCommand('/custom/lit serve --port=8123 --verbose', {
binaryPath: '/custom/lit',
port: 8123,
}),
).toBe(true);
expect(
isExpectedLiteRtServerCommand('/custom/lit run --port=8123', {
binaryPath: '/custom/lit',
port: 8123,
}),
).toBe(false);
expect(
isExpectedLiteRtServerCommand('/custom/lit serve --port=9000', {
binaryPath: '/custom/lit',
port: 8123,
}),
).toBe(false);
});
});

View file

@ -0,0 +1,316 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { loadSettings, SettingScope } from '../../config/settings.js';
import fs from 'node:fs';
import path from 'node:path';
import { execFileSync } from 'node:child_process';
import {
PLATFORM_BINARY_MAP,
LITERT_RELEASE_BASE_URL,
LITERT_RELEASE_VERSION,
getLiteRtBinDir,
GEMMA_MODEL_NAME,
HEALTH_CHECK_TIMEOUT_MS,
LITERT_API_VERSION,
getPidFilePath,
} from './constants.js';
export interface PlatformInfo {
key: string;
binaryName: string;
}
export interface GemmaConfigStatus {
settingsEnabled: boolean;
configuredPort: number;
configuredBinaryPath?: string;
}
export interface LiteRtServerProcessInfo {
pid: number;
binaryPath?: string;
port?: number;
}
function getUserConfiguredBinaryPath(
workspaceDir = process.cwd(),
): string | undefined {
try {
const userGemmaSettings = loadSettings(workspaceDir).forScope(
SettingScope.User,
).settings.experimental?.gemmaModelRouter;
return userGemmaSettings?.binaryPath?.trim() || undefined;
} catch {
return undefined;
}
}
function parsePortFromHost(
host: string | undefined,
fallbackPort: number,
): number {
if (!host) {
return fallbackPort;
}
try {
const url = new URL(host);
const port = Number(url.port);
return Number.isFinite(port) && port > 0 ? port : fallbackPort;
} catch {
const match = host.match(/:(\d+)/);
if (!match) {
return fallbackPort;
}
const port = parseInt(match[1], 10);
return Number.isFinite(port) && port > 0 ? port : fallbackPort;
}
}
export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
let settingsEnabled = false;
let configuredPort = fallbackPort;
const configuredBinaryPath = getUserConfiguredBinaryPath();
try {
const settings = loadSettings(process.cwd());
const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
settingsEnabled = gemmaSettings?.enabled === true;
configuredPort = parsePortFromHost(
gemmaSettings?.classifier?.host,
fallbackPort,
);
} catch {
// ignore — settings may fail to load outside a workspace
}
return { settingsEnabled, configuredPort, configuredBinaryPath };
}
export function detectPlatform(): PlatformInfo | null {
const key = `${process.platform}-${process.arch}`;
const binaryName = PLATFORM_BINARY_MAP[key];
if (!binaryName) {
return null;
}
return { key, binaryName };
}
export function getBinaryPath(binaryName?: string): string | null {
const configuredBinaryPath = getUserConfiguredBinaryPath();
if (configuredBinaryPath) {
return configuredBinaryPath;
}
const name = binaryName ?? detectPlatform()?.binaryName;
if (!name) return null;
return path.join(getLiteRtBinDir(), name);
}
export function getBinaryDownloadUrl(binaryName: string): string {
return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`;
}
export function isBinaryInstalled(binaryPath = getBinaryPath()): boolean {
if (!binaryPath) return false;
return fs.existsSync(binaryPath);
}
export function isModelDownloaded(binaryPath: string): boolean {
try {
const output = execFileSync(binaryPath, ['list'], {
encoding: 'utf-8',
timeout: 10000,
});
return output.includes(GEMMA_MODEL_NAME);
} catch {
return false;
}
}
export async function isServerRunning(port: number): Promise<boolean> {
try {
const controller = new AbortController();
const timeout = setTimeout(
() => controller.abort(),
HEALTH_CHECK_TIMEOUT_MS,
);
const response = await fetch(
`http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}:generateContent`,
{ method: 'POST', signal: controller.signal },
);
clearTimeout(timeout);
// A 400 (bad request) confirms the route exists — the server recognises
// the model endpoint. Only a 404 means "wrong server / wrong model".
return response.status !== 404;
} catch {
return false;
}
}
function isLiteRtServerProcessInfo(
value: unknown,
): value is LiteRtServerProcessInfo {
if (!value || typeof value !== 'object') {
return false;
}
const isPositiveInteger = (candidate: unknown): candidate is number =>
typeof candidate === 'number' &&
Number.isInteger(candidate) &&
candidate > 0;
const isNonEmptyString = (candidate: unknown): candidate is string =>
typeof candidate === 'string' && candidate.length > 0;
const pid: unknown = Object.getOwnPropertyDescriptor(value, 'pid')?.value;
if (!isPositiveInteger(pid)) {
return false;
}
const binaryPath: unknown = Object.getOwnPropertyDescriptor(
value,
'binaryPath',
)?.value;
if (binaryPath !== undefined && !isNonEmptyString(binaryPath)) {
return false;
}
const port: unknown = Object.getOwnPropertyDescriptor(value, 'port')?.value;
if (port !== undefined && !isPositiveInteger(port)) {
return false;
}
return true;
}
export function readServerProcessInfo(): LiteRtServerProcessInfo | null {
const pidPath = getPidFilePath();
try {
const content = fs.readFileSync(pidPath, 'utf-8').trim();
if (!content) {
return null;
}
if (/^\d+$/.test(content)) {
return { pid: parseInt(content, 10) };
}
const parsed = JSON.parse(content) as unknown;
return isLiteRtServerProcessInfo(parsed) ? parsed : null;
} catch {
return null;
}
}
export function writeServerProcessInfo(
processInfo: LiteRtServerProcessInfo,
): void {
fs.writeFileSync(getPidFilePath(), JSON.stringify(processInfo), 'utf-8');
}
export function readServerPid(): number | null {
return readServerProcessInfo()?.pid ?? null;
}
function normalizeProcessValue(value: string): string {
const normalized = value.replace(/\0/g, ' ').trim();
if (process.platform === 'win32') {
return normalized.replace(/\\/g, '/').replace(/\s+/g, ' ').toLowerCase();
}
return normalized.replace(/\s+/g, ' ');
}
function readProcessCommandLine(pid: number): string | null {
try {
if (process.platform === 'linux') {
const output = fs.readFileSync(`/proc/${pid}/cmdline`, 'utf-8');
return output.trim() ? output : null;
}
if (process.platform === 'win32') {
const output = execFileSync(
'powershell.exe',
[
'-NoProfile',
'-Command',
`(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}").CommandLine`,
],
{
encoding: 'utf-8',
timeout: 5000,
},
);
return output.trim() || null;
}
const output = execFileSync('ps', ['-p', String(pid), '-o', 'command='], {
encoding: 'utf-8',
timeout: 5000,
});
return output.trim() || null;
} catch {
return null;
}
}
export function isExpectedLiteRtServerCommand(
commandLine: string,
options: {
binaryPath?: string | null;
port?: number;
},
): boolean {
const normalizedCommandLine = normalizeProcessValue(commandLine);
if (!normalizedCommandLine) {
return false;
}
if (!/(^|\s|")serve(\s|$)/.test(normalizedCommandLine)) {
return false;
}
if (
options.port !== undefined &&
!normalizedCommandLine.includes(`--port=${options.port}`)
) {
return false;
}
if (!options.binaryPath) {
return true;
}
const normalizedBinaryPath = normalizeProcessValue(options.binaryPath);
const normalizedBinaryName = normalizeProcessValue(
path.basename(options.binaryPath),
);
return (
normalizedCommandLine.includes(normalizedBinaryPath) ||
normalizedCommandLine.includes(normalizedBinaryName)
);
}
export function isExpectedLiteRtServerProcess(
pid: number,
options: {
binaryPath?: string | null;
port?: number;
},
): boolean {
const commandLine = readProcessCommandLine(pid);
if (!commandLine) {
return false;
}
return isExpectedLiteRtServerCommand(commandLine, options);
}
export function isProcessRunning(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}

View file

@ -0,0 +1,60 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { afterEach, describe, expect, it } from 'vitest';
import { PLATFORM_BINARY_MAP, PLATFORM_BINARY_SHA256 } from './constants.js';
import { computeFileSha256, verifyFileSha256 } from './setup.js';
describe('gemma setup checksum helpers', () => {
const tempFiles: string[] = [];
afterEach(async () => {
await Promise.all(
tempFiles
.splice(0)
.map((filePath) => fs.promises.rm(filePath, { force: true })),
);
});
it('has a pinned checksum for every supported LiteRT binary', () => {
expect(Object.keys(PLATFORM_BINARY_SHA256).sort()).toEqual(
Object.values(PLATFORM_BINARY_MAP).sort(),
);
});
it('computes the sha256 for a downloaded file', async () => {
const filePath = path.join(
os.tmpdir(),
`gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`,
);
tempFiles.push(filePath);
await fs.promises.writeFile(filePath, 'hello world', 'utf-8');
await expect(computeFileSha256(filePath)).resolves.toBe(
'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
);
});
it('verifies whether a file matches the expected sha256', async () => {
const filePath = path.join(
os.tmpdir(),
`gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`,
);
tempFiles.push(filePath);
await fs.promises.writeFile(filePath, 'hello world', 'utf-8');
await expect(
verifyFileSha256(
filePath,
'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
),
).resolves.toBe(true);
await expect(verifyFileSha256(filePath, 'deadbeef')).resolves.toBe(false);
});
});

View file

@ -0,0 +1,504 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import { createHash } from 'node:crypto';
import fs from 'node:fs';
import path from 'node:path';
import { execFileSync, spawn as nodeSpawn } from 'node:child_process';
import chalk from 'chalk';
import { debugLogger } from '@google/gemini-cli-core';
import { loadSettings, SettingScope } from '../../config/settings.js';
import { exitCli } from '../utils.js';
import {
DEFAULT_PORT,
GEMMA_MODEL_NAME,
PLATFORM_BINARY_SHA256,
} from './constants.js';
import {
detectPlatform,
getBinaryDownloadUrl,
getBinaryPath,
isBinaryInstalled,
isModelDownloaded,
} from './platform.js';
import { startServer } from './start.js';
import readline from 'node:readline';
const log = (msg: string) => debugLogger.log(msg);
const logError = (msg: string) => debugLogger.error(msg);
async function promptYesNo(question: string): Promise<boolean> {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
return new Promise((resolve) => {
rl.question(`${question} (y/N): `, (answer) => {
rl.close();
resolve(
answer.trim().toLowerCase() === 'y' ||
answer.trim().toLowerCase() === 'yes',
);
});
});
}
function formatBytes(bytes: number): string {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}
function renderProgress(downloaded: number, total: number | null): void {
const barWidth = 30;
if (total && total > 0) {
const pct = Math.min(downloaded / total, 1);
const filled = Math.round(barWidth * pct);
const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled);
const pctStr = (pct * 100).toFixed(0).padStart(3);
process.stderr.write(
`\r [${bar}] ${pctStr}% ${formatBytes(downloaded)} / ${formatBytes(total)}`,
);
} else {
process.stderr.write(`\r Downloaded ${formatBytes(downloaded)}`);
}
}
async function downloadFile(url: string, destPath: string): Promise<void> {
const tmpPath = destPath + '.downloading';
if (fs.existsSync(tmpPath)) {
fs.unlinkSync(tmpPath);
}
const response = await fetch(url, { redirect: 'follow' });
if (!response.ok) {
throw new Error(
`Download failed: HTTP ${response.status} ${response.statusText}`,
);
}
if (!response.body) {
throw new Error('Download failed: No response body');
}
const contentLength = response.headers.get('content-length');
const totalBytes = contentLength ? parseInt(contentLength, 10) : null;
let downloadedBytes = 0;
const fileStream = fs.createWriteStream(tmpPath);
const reader = response.body.getReader();
try {
for (;;) {
const { done, value } = await reader.read();
if (done) break;
const writeOk = fileStream.write(value);
if (!writeOk) {
await new Promise<void>((resolve) => fileStream.once('drain', resolve));
}
downloadedBytes += value.byteLength;
renderProgress(downloadedBytes, totalBytes);
}
} finally {
fileStream.end();
process.stderr.write('\r' + ' '.repeat(80) + '\r');
}
await new Promise<void>((resolve, reject) => {
fileStream.on('finish', resolve);
fileStream.on('error', reject);
});
fs.renameSync(tmpPath, destPath);
}
export async function computeFileSha256(filePath: string): Promise<string> {
const hash = createHash('sha256');
const fileStream = fs.createReadStream(filePath);
return new Promise((resolve, reject) => {
fileStream.on('data', (chunk) => {
hash.update(chunk);
});
fileStream.on('error', reject);
fileStream.on('end', () => {
resolve(hash.digest('hex'));
});
});
}
export async function verifyFileSha256(
filePath: string,
expectedHash: string,
): Promise<boolean> {
const actualHash = await computeFileSha256(filePath);
return actualHash === expectedHash;
}
function spawnInherited(command: string, args: string[]): Promise<number> {
return new Promise((resolve, reject) => {
const child = nodeSpawn(command, args, {
stdio: 'inherit',
});
child.on('close', (code) => resolve(code ?? 1));
child.on('error', reject);
});
}
interface SetupArgs {
port: number;
skipModel: boolean;
start: boolean;
force: boolean;
consent: boolean;
}
async function handleSetup(argv: SetupArgs): Promise<number> {
const { port, force } = argv;
let settingsUpdated = false;
let serverStarted = false;
let autoStartServer = true;
log('');
log(chalk.bold('Gemma Local Model Routing Setup'));
log(chalk.dim('─'.repeat(40)));
log('');
const platform = detectPlatform();
if (!platform) {
logError(
chalk.red(`Unsupported platform: ${process.platform}-${process.arch}`),
);
logError(
'LiteRT-LM binaries are available for: macOS (ARM64), Linux (x86_64), Windows (x86_64)',
);
return 1;
}
log(chalk.dim(` Platform: ${platform.key}${platform.binaryName}`));
if (!argv.consent) {
log('');
log('This will download and install the LiteRT-LM runtime and the');
log(
`Gemma model (${GEMMA_MODEL_NAME}, ~1 GB). By proceeding, you agree to the`,
);
log('Gemma Terms of Use: https://ai.google.dev/gemma/terms');
log('');
const accepted = await promptYesNo('Do you want to continue?');
if (!accepted) {
log('Setup cancelled.');
return 0;
}
}
const binaryPath = getBinaryPath(platform.binaryName)!;
const alreadyInstalled = isBinaryInstalled();
if (alreadyInstalled && !force) {
log('');
log(chalk.green(' ✓ LiteRT-LM binary already installed at:'));
log(chalk.dim(` ${binaryPath}`));
} else {
log('');
log(' Downloading LiteRT-LM binary...');
const downloadUrl = getBinaryDownloadUrl(platform.binaryName);
debugLogger.log(`Downloading from: ${downloadUrl}`);
try {
const binDir = path.dirname(binaryPath);
fs.mkdirSync(binDir, { recursive: true });
await downloadFile(downloadUrl, binaryPath);
log(chalk.green(' ✓ Binary downloaded successfully'));
} catch (error) {
logError(
chalk.red(
` ✗ Failed to download binary: ${error instanceof Error ? error.message : String(error)}`,
),
);
logError(' Check your internet connection and try again.');
return 1;
}
const expectedHash = PLATFORM_BINARY_SHA256[platform.binaryName];
if (!expectedHash) {
logError(
chalk.red(
` ✗ No checksum is configured for ${platform.binaryName}. Refusing to install the binary.`,
),
);
try {
fs.rmSync(binaryPath, { force: true });
} catch {
// ignore
}
return 1;
}
try {
const checksumVerified = await verifyFileSha256(binaryPath, expectedHash);
if (!checksumVerified) {
logError(
chalk.red(
' ✗ Downloaded binary checksum did not match the expected release hash.',
),
);
try {
fs.rmSync(binaryPath, { force: true });
} catch {
// ignore
}
return 1;
}
log(chalk.green(' ✓ Binary checksum verified'));
} catch (error) {
logError(
chalk.red(
` ✗ Failed to verify binary checksum: ${error instanceof Error ? error.message : String(error)}`,
),
);
try {
fs.rmSync(binaryPath, { force: true });
} catch {
// ignore
}
return 1;
}
if (process.platform !== 'win32') {
try {
fs.chmodSync(binaryPath, 0o755);
} catch (error) {
logError(
chalk.red(
` ✗ Failed to set executable permission: ${error instanceof Error ? error.message : String(error)}`,
),
);
return 1;
}
}
if (process.platform === 'darwin') {
try {
execFileSync('xattr', ['-d', 'com.apple.quarantine', binaryPath], {
stdio: 'ignore',
});
log(chalk.green(' ✓ macOS quarantine attribute removed'));
} catch {
// Expected if the attribute doesn't exist.
}
}
}
if (!argv.skipModel) {
const modelAlreadyDownloaded = isModelDownloaded(binaryPath);
if (modelAlreadyDownloaded && !force) {
log('');
log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} already downloaded`));
} else {
log('');
log(` Downloading model ${GEMMA_MODEL_NAME}...`);
log(chalk.dim(' You may be prompted to accept the Gemma Terms of Use.'));
log('');
const exitCode = await spawnInherited(binaryPath, [
'pull',
GEMMA_MODEL_NAME,
]);
if (exitCode !== 0) {
logError('');
logError(
chalk.red(` ✗ Model download failed (exit code ${exitCode})`),
);
return 1;
}
log('');
log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} downloaded`));
}
}
log('');
log(' Configuring settings...');
try {
const settings = loadSettings(process.cwd());
// User scope: security-sensitive settings that must not be overridable
// by workspace configs (prevents arbitrary binary execution).
const existingUserGemma =
settings.forScope(SettingScope.User).settings.experimental
?.gemmaModelRouter ?? {};
autoStartServer = existingUserGemma.autoStartServer ?? true;
const existingUserExperimental =
settings.forScope(SettingScope.User).settings.experimental ?? {};
settings.setValue(SettingScope.User, 'experimental', {
...existingUserExperimental,
gemmaModelRouter: {
autoStartServer,
...(existingUserGemma.binaryPath !== undefined
? { binaryPath: existingUserGemma.binaryPath }
: {}),
},
});
// Workspace scope: project-isolated settings so the local model only
// runs for this specific project, saving resources globally.
const existingWorkspaceGemma =
settings.forScope(SettingScope.Workspace).settings.experimental
?.gemmaModelRouter ?? {};
const existingWorkspaceExperimental =
settings.forScope(SettingScope.Workspace).settings.experimental ?? {};
settings.setValue(SettingScope.Workspace, 'experimental', {
...existingWorkspaceExperimental,
gemmaModelRouter: {
...existingWorkspaceGemma,
enabled: true,
classifier: {
...existingWorkspaceGemma.classifier,
host: `http://localhost:${port}`,
model: GEMMA_MODEL_NAME,
},
},
});
log(chalk.green(' ✓ Settings updated'));
log(chalk.dim(' User (~/.gemini/settings.json): autoStartServer'));
log(
chalk.dim(' Workspace (.gemini/settings.json): enabled, classifier'),
);
settingsUpdated = true;
} catch (error) {
logError(
chalk.red(
` ✗ Failed to update settings: ${error instanceof Error ? error.message : String(error)}`,
),
);
logError(
' You can manually add the configuration to ~/.gemini/settings.json',
);
}
if (argv.start) {
log('');
log(' Starting LiteRT server...');
serverStarted = await startServer(binaryPath, port);
if (serverStarted) {
log(chalk.green(` ✓ Server started on port ${port}`));
} else {
log(
chalk.yellow(
` ! Server may not have started correctly. Check: gemini gemma status`,
),
);
}
}
const routingActive = settingsUpdated && serverStarted;
const setupSucceeded = settingsUpdated && (!argv.start || serverStarted);
log('');
log(chalk.dim('─'.repeat(40)));
if (routingActive) {
log(chalk.bold.green(' Setup complete! Local model routing is active.'));
} else if (settingsUpdated) {
log(
chalk.bold.green(' Setup complete! Local model routing is configured.'),
);
} else {
log(
chalk.bold.yellow(
' Setup incomplete. Manual settings changes are still required.',
),
);
}
log('');
log(' How it works: Every request is classified by the local Gemma model.');
log(
' Simple tasks (file reads, quick edits) route to ' +
chalk.cyan('Flash') +
' for speed.',
);
log(
' Complex tasks (debugging, architecture) route to ' +
chalk.cyan('Pro') +
' for quality.',
);
log(' This happens automatically — just use the CLI as usual.');
log('');
if (!settingsUpdated) {
log(
chalk.yellow(
' Fix the settings update above, then rerun "gemini gemma status".',
),
);
log('');
} else if (!argv.start) {
log(chalk.yellow(' Note: Run "gemini gemma start" to start the server.'));
if (autoStartServer) {
log(
chalk.yellow(
' Or restart the CLI to auto-start it on the next launch.',
),
);
}
log('');
} else if (!serverStarted) {
log(
chalk.yellow(
' Review the server logs and rerun "gemini gemma start" after fixing the issue.',
),
);
log('');
}
log(' Useful commands:');
log(chalk.dim(' gemini gemma status Check routing status'));
log(chalk.dim(' gemini gemma start Start the LiteRT server'));
log(chalk.dim(' gemini gemma stop Stop the LiteRT server'));
log(chalk.dim(' /gemma Check status inside a session'));
log('');
return setupSucceeded ? 0 : 1;
}
export const setupCommand: CommandModule = {
command: 'setup',
describe: 'Download and configure Gemma local model routing',
builder: (yargs) =>
yargs
.option('port', {
type: 'number',
default: DEFAULT_PORT,
description: 'Port for the LiteRT server',
})
.option('skip-model', {
type: 'boolean',
default: false,
description: 'Skip model download (binary only)',
})
.option('start', {
type: 'boolean',
default: true,
description: 'Start the server after setup',
})
.option('force', {
type: 'boolean',
default: false,
description: 'Re-download binary and model even if already present',
})
.option('consent', {
type: 'boolean',
default: false,
description: 'Skip interactive consent prompt (implies acceptance)',
}),
handler: async (argv) => {
const exitCode = await handleSetup({
port: Number(argv['port']),
skipModel: Boolean(argv['skipModel']),
start: Boolean(argv['start']),
force: Boolean(argv['force']),
consent: Boolean(argv['consent']),
});
await exitCli(exitCode);
},
};

View file

@ -0,0 +1,123 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import fs from 'node:fs';
import path from 'node:path';
import { spawn } from 'node:child_process';
import chalk from 'chalk';
import { debugLogger } from '@google/gemini-cli-core';
import { exitCli } from '../utils.js';
import {
DEFAULT_PORT,
getPidFilePath,
getLogFilePath,
getLiteRtBinDir,
SERVER_START_WAIT_MS,
} from './constants.js';
import {
getBinaryPath,
isBinaryInstalled,
isServerRunning,
resolveGemmaConfig,
writeServerProcessInfo,
} from './platform.js';
export async function startServer(
binaryPath: string,
port: number,
): Promise<boolean> {
const alreadyRunning = await isServerRunning(port);
if (alreadyRunning) {
debugLogger.log(`LiteRT server already running on port ${port}`);
return true;
}
const logPath = getLogFilePath();
fs.mkdirSync(getLiteRtBinDir(), { recursive: true });
const tmpDir = path.dirname(getPidFilePath());
fs.mkdirSync(tmpDir, { recursive: true });
const logFd = fs.openSync(logPath, 'a');
try {
const child = spawn(binaryPath, ['serve', `--port=${port}`, '--verbose'], {
detached: true,
stdio: ['ignore', logFd, logFd],
});
if (child.pid) {
writeServerProcessInfo({
pid: child.pid,
binaryPath,
port,
});
}
child.unref();
} finally {
fs.closeSync(logFd);
}
await new Promise((resolve) => setTimeout(resolve, SERVER_START_WAIT_MS));
return isServerRunning(port);
}
export const startCommand: CommandModule = {
command: 'start',
describe: 'Start the LiteRT-LM server',
builder: (yargs) =>
yargs.option('port', {
type: 'number',
description: 'Port for the LiteRT server',
}),
handler: async (argv) => {
let port: number | undefined;
if (argv['port'] !== undefined) {
port = Number(argv['port']);
}
if (!port) {
const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
port = configuredPort;
}
const binaryPath = getBinaryPath();
if (!binaryPath || !isBinaryInstalled(binaryPath)) {
debugLogger.error(
chalk.red(
'LiteRT-LM binary not found. Run "gemini gemma setup" first.',
),
);
await exitCli(1);
return;
}
const alreadyRunning = await isServerRunning(port);
if (alreadyRunning) {
debugLogger.log(
chalk.green(`LiteRT server is already running on port ${port}.`),
);
await exitCli(0);
return;
}
debugLogger.log(`Starting LiteRT server on port ${port}...`);
const started = await startServer(binaryPath, port);
if (started) {
debugLogger.log(chalk.green(`LiteRT server started on port ${port}.`));
debugLogger.log(chalk.dim(`Logs: ${getLogFilePath()}`));
await exitCli(0);
} else {
debugLogger.error(
chalk.red('Server may not have started correctly. Check logs:'),
);
debugLogger.error(chalk.dim(` ${getLogFilePath()}`));
await exitCli(1);
}
},
};

View file

@ -0,0 +1,165 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import chalk from 'chalk';
import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js';
import {
detectPlatform,
getBinaryPath,
isBinaryInstalled,
isModelDownloaded,
isServerRunning,
readServerPid,
isProcessRunning,
resolveGemmaConfig,
} from './platform.js';
import { exitCli } from '../utils.js';
export interface GemmaStatusResult {
binaryInstalled: boolean;
binaryPath: string | null;
modelDownloaded: boolean;
serverRunning: boolean;
serverPid: number | null;
settingsEnabled: boolean;
port: number;
allPassing: boolean;
}
export async function checkGemmaStatus(
port?: number,
): Promise<GemmaStatusResult> {
const { settingsEnabled, configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
const effectivePort = port ?? configuredPort;
const binaryPath = getBinaryPath();
const binaryInstalled = isBinaryInstalled(binaryPath);
const modelDownloaded =
binaryInstalled && binaryPath ? isModelDownloaded(binaryPath) : false;
const serverRunning = await isServerRunning(effectivePort);
const pid = readServerPid();
const serverPid = pid && isProcessRunning(pid) ? pid : null;
const allPassing =
binaryInstalled && modelDownloaded && serverRunning && settingsEnabled;
return {
binaryInstalled,
binaryPath,
modelDownloaded,
serverRunning,
serverPid,
settingsEnabled,
port: effectivePort,
allPassing,
};
}
export function formatGemmaStatus(status: GemmaStatusResult): string {
const check = (ok: boolean) => (ok ? chalk.green('✓') : chalk.red('✗'));
const lines: string[] = [
'',
chalk.bold('Gemma Local Model Routing Status'),
chalk.dim('─'.repeat(40)),
'',
];
if (status.binaryInstalled) {
lines.push(` Binary: ${check(true)} Installed (${status.binaryPath})`);
} else {
const platform = detectPlatform();
if (platform) {
lines.push(` Binary: ${check(false)} Not installed`);
lines.push(chalk.dim(` Run: gemini gemma setup`));
} else {
lines.push(
` Binary: ${check(false)} Unsupported platform (${process.platform}-${process.arch})`,
);
}
}
if (status.modelDownloaded) {
lines.push(` Model: ${check(true)} ${GEMMA_MODEL_NAME} downloaded`);
} else {
lines.push(` Model: ${check(false)} ${GEMMA_MODEL_NAME} not found`);
if (status.binaryInstalled) {
lines.push(
chalk.dim(
` Run: ${status.binaryPath} pull ${GEMMA_MODEL_NAME}`,
),
);
} else {
lines.push(chalk.dim(` Run: gemini gemma setup`));
}
}
if (status.serverRunning) {
const pidInfo = status.serverPid ? ` (PID ${status.serverPid})` : '';
lines.push(
` Server: ${check(true)} Running on port ${status.port}${pidInfo}`,
);
} else {
lines.push(
` Server: ${check(false)} Not running on port ${status.port}`,
);
lines.push(chalk.dim(` Run: gemini gemma start`));
}
if (status.settingsEnabled) {
lines.push(` Settings: ${check(true)} Enabled in settings.json`);
} else {
lines.push(` Settings: ${check(false)} Not enabled in settings.json`);
lines.push(
chalk.dim(
` Run: gemini gemma setup (auto-configures settings)`,
),
);
}
lines.push('');
if (status.allPassing) {
lines.push(chalk.green(' Routing is active — no action needed.'));
lines.push('');
lines.push(
chalk.dim(
' Simple requests → Flash (fast) | Complex requests → Pro (powerful)',
),
);
lines.push(chalk.dim(' This happens automatically on every request.'));
} else {
lines.push(
chalk.yellow(
' Some checks failed. Run "gemini gemma setup" for guided installation.',
),
);
}
lines.push('');
return lines.join('\n');
}
export const statusCommand: CommandModule = {
command: 'status',
describe: 'Check Gemma local model routing status',
builder: (yargs) =>
yargs.option('port', {
type: 'number',
description: 'Port to check for the LiteRT server',
}),
handler: async (argv) => {
let port: number | undefined;
if (argv['port'] !== undefined) {
port = Number(argv['port']);
}
const status = await checkGemmaStatus(port);
const output = formatGemmaStatus(status);
process.stdout.write(output);
await exitCli(status.allPassing ? 0 : 1);
},
};

View file

@ -0,0 +1,112 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
const mockGetBinaryPath = vi.hoisted(() => vi.fn());
const mockIsExpectedLiteRtServerProcess = vi.hoisted(() => vi.fn());
const mockIsProcessRunning = vi.hoisted(() => vi.fn());
const mockIsServerRunning = vi.hoisted(() => vi.fn());
const mockReadServerPid = vi.hoisted(() => vi.fn());
const mockReadServerProcessInfo = vi.hoisted(() => vi.fn());
const mockResolveGemmaConfig = vi.hoisted(() => vi.fn());
vi.mock('@google/gemini-cli-core', async (importOriginal) => {
const { mockCoreDebugLogger } = await import(
'../../test-utils/mockDebugLogger.js'
);
return mockCoreDebugLogger(
await importOriginal<typeof import('@google/gemini-cli-core')>(),
{
stripAnsi: false,
},
);
});
vi.mock('./constants.js', () => ({
DEFAULT_PORT: 9379,
getPidFilePath: vi.fn(() => '/tmp/litert-server.pid'),
}));
vi.mock('./platform.js', () => ({
getBinaryPath: mockGetBinaryPath,
isExpectedLiteRtServerProcess: mockIsExpectedLiteRtServerProcess,
isProcessRunning: mockIsProcessRunning,
isServerRunning: mockIsServerRunning,
readServerPid: mockReadServerPid,
readServerProcessInfo: mockReadServerProcessInfo,
resolveGemmaConfig: mockResolveGemmaConfig,
}));
vi.mock('../utils.js', () => ({
exitCli: vi.fn(),
}));
import { stopServer } from './stop.js';
describe('gemma stop command', () => {
beforeEach(() => {
vi.clearAllMocks();
vi.useFakeTimers();
mockGetBinaryPath.mockReturnValue('/custom/lit');
mockResolveGemmaConfig.mockReturnValue({ configuredPort: 9379 });
});
afterEach(() => {
vi.useRealTimers();
vi.restoreAllMocks();
});
it('refuses to signal a pid that does not match the expected LiteRT server', async () => {
mockReadServerProcessInfo.mockReturnValue({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
});
mockIsProcessRunning.mockReturnValue(true);
mockIsExpectedLiteRtServerProcess.mockReturnValue(false);
const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true);
await expect(stopServer(8123)).resolves.toBe('unexpected-process');
expect(killSpy).not.toHaveBeenCalled();
});
it('stops the verified LiteRT server and removes the pid file', async () => {
mockReadServerProcessInfo.mockReturnValue({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
});
mockIsProcessRunning.mockReturnValueOnce(true).mockReturnValueOnce(false);
mockIsExpectedLiteRtServerProcess.mockReturnValue(true);
const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {});
const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true);
const stopPromise = stopServer(8123);
await vi.runAllTimersAsync();
await expect(stopPromise).resolves.toBe('stopped');
expect(killSpy).toHaveBeenCalledWith(1234, 'SIGTERM');
expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid');
});
it('cleans up a stale pid file when the recorded process is no longer running', async () => {
mockReadServerProcessInfo.mockReturnValue({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
});
mockIsProcessRunning.mockReturnValue(false);
const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {});
await expect(stopServer(8123)).resolves.toBe('not-running');
expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid');
});
});

View file

@ -0,0 +1,155 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import fs from 'node:fs';
import chalk from 'chalk';
import { debugLogger } from '@google/gemini-cli-core';
import { exitCli } from '../utils.js';
import { DEFAULT_PORT, getPidFilePath } from './constants.js';
import {
getBinaryPath,
isExpectedLiteRtServerProcess,
isProcessRunning,
isServerRunning,
readServerPid,
readServerProcessInfo,
resolveGemmaConfig,
} from './platform.js';
export type StopServerResult =
| 'stopped'
| 'not-running'
| 'unexpected-process'
| 'failed';
export async function stopServer(
expectedPort?: number,
): Promise<StopServerResult> {
const processInfo = readServerProcessInfo();
const pidPath = getPidFilePath();
if (!processInfo) {
return 'not-running';
}
const { pid } = processInfo;
if (!isProcessRunning(pid)) {
debugLogger.log(
`Stale PID file found (PID ${pid} is not running), removing ${pidPath}`,
);
try {
fs.unlinkSync(pidPath);
} catch {
// ignore
}
return 'not-running';
}
const binaryPath = processInfo.binaryPath ?? getBinaryPath();
const port = processInfo.port ?? expectedPort;
if (!isExpectedLiteRtServerProcess(pid, { binaryPath, port })) {
debugLogger.warn(
`Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`,
);
return 'unexpected-process';
}
try {
process.kill(pid, 'SIGTERM');
} catch {
return 'failed';
}
await new Promise((resolve) => setTimeout(resolve, 1000));
if (isProcessRunning(pid)) {
try {
process.kill(pid, 'SIGKILL');
} catch {
// ignore
}
await new Promise((resolve) => setTimeout(resolve, 500));
if (isProcessRunning(pid)) {
return 'failed';
}
}
try {
fs.unlinkSync(pidPath);
} catch {
// ignore
}
return 'stopped';
}
export const stopCommand: CommandModule = {
command: 'stop',
describe: 'Stop the LiteRT-LM server',
builder: (yargs) =>
yargs.option('port', {
type: 'number',
description: 'Port where the LiteRT server is running',
}),
handler: async (argv) => {
let port: number | undefined;
if (argv['port'] !== undefined) {
port = Number(argv['port']);
}
if (!port) {
const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
port = configuredPort;
}
const processInfo = readServerProcessInfo();
const pid = processInfo?.pid ?? readServerPid();
if (pid !== null && isProcessRunning(pid)) {
debugLogger.log(`Stopping LiteRT server (PID ${pid})...`);
const result = await stopServer(port);
if (result === 'stopped') {
debugLogger.log(chalk.green('LiteRT server stopped.'));
await exitCli(0);
} else if (result === 'unexpected-process') {
debugLogger.error(
chalk.red(
`Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`,
),
);
debugLogger.error(
chalk.dim(
'Remove the stale pid file after verifying the process, or stop the process manually.',
),
);
await exitCli(1);
} else {
debugLogger.error(chalk.red('Failed to stop LiteRT server.'));
await exitCli(1);
}
return;
}
const running = await isServerRunning(port);
if (running) {
debugLogger.log(
chalk.yellow(
`A server is responding on port ${port}, but it was not started by "gemini gemma start".`,
),
);
debugLogger.log(
chalk.dim(
'If you started it manually, stop it from the terminal where it is running.',
),
);
await exitCli(1);
} else {
debugLogger.log('No LiteRT server is currently running.');
await exitCli(0);
}
},
};

View file

@ -338,6 +338,7 @@ describe('parseArguments', () => {
{ cmd: 'skill list', expected: true },
{ cmd: 'hooks migrate', expected: true },
{ cmd: 'hook migrate', expected: true },
{ cmd: 'gemma status', expected: true },
{ cmd: 'some query', expected: undefined },
{ cmd: 'hello world', expected: undefined },
])(
@ -758,6 +759,12 @@ describe('parseArguments', () => {
const argv = await parseArguments(settings);
expect(argv.isCommand).toBe(true);
});
it('should set isCommand to true for gemma command', async () => {
process.argv = ['node', 'script.js', 'gemma', 'status'];
const argv = await parseArguments(createTestMergedSettings());
expect(argv.isCommand).toBe(true);
});
});
describe('loadCliConfig', () => {
@ -3030,6 +3037,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
experimental: {
gemmaModelRouter: {
enabled: true,
autoStartServer: false,
binaryPath: '/custom/lit',
classifier: {
host: 'http://custom:1234',
model: 'custom-gemma',
@ -3040,6 +3049,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
const config = await loadCliConfig(settings, 'test-session', argv);
expect(config.getGemmaModelRouterEnabled()).toBe(true);
const gemmaSettings = config.getGemmaModelRouterSettings();
expect(gemmaSettings.autoStartServer).toBe(false);
expect(gemmaSettings.binaryPath).toBe('/custom/lit');
expect(gemmaSettings.classifier?.host).toBe('http://custom:1234');
expect(gemmaSettings.classifier?.model).toBe('custom-gemma');
});
@ -3057,6 +3068,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
const config = await loadCliConfig(settings, 'test-session', argv);
expect(config.getGemmaModelRouterEnabled()).toBe(true);
const gemmaSettings = config.getGemmaModelRouterSettings();
expect(gemmaSettings.autoStartServer).toBe(false);
expect(gemmaSettings.binaryPath).toBe('');
expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379');
expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom');
});

View file

@ -13,6 +13,7 @@ import { mcpCommand } from '../commands/mcp.js';
import { extensionsCommand } from '../commands/extensions.js';
import { skillsCommand } from '../commands/skills.js';
import { hooksCommand } from '../commands/hooks.js';
import { gemmaCommand } from '../commands/gemma.js';
import {
setGeminiMdFilename as setServerGeminiMdFilename,
getCurrentGeminiMdFilename,
@ -181,6 +182,7 @@ export async function parseArguments(
extensionsCommand,
skillsCommand,
hooksCommand,
gemmaCommand,
];
const subcommands = commandModules.flatMap((mod) => {
@ -260,6 +262,7 @@ export async function parseArguments(
yargsInstance.command(extensionsCommand);
yargsInstance.command(skillsCommand);
yargsInstance.command(hooksCommand);
yargsInstance.command(gemmaCommand);
yargsInstance
.command('$0 [query..]', 'Launch Gemini CLI', (yargsInstance) =>

View file

@ -471,11 +471,33 @@ describe('SettingsSchema', () => {
expect(enabled.category).toBe('Experimental');
expect(enabled.default).toBe(false);
expect(enabled.requiresRestart).toBe(true);
expect(enabled.showInDialog).toBe(false);
expect(enabled.showInDialog).toBe(true);
expect(enabled.description).toBe(
'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
);
const autoStartServer = gemmaModelRouter.properties.autoStartServer;
expect(autoStartServer).toBeDefined();
expect(autoStartServer.type).toBe('boolean');
expect(autoStartServer.category).toBe('Experimental');
expect(autoStartServer.default).toBe(false);
expect(autoStartServer.requiresRestart).toBe(true);
expect(autoStartServer.showInDialog).toBe(true);
expect(autoStartServer.description).toBe(
'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
);
const binaryPath = gemmaModelRouter.properties.binaryPath;
expect(binaryPath).toBeDefined();
expect(binaryPath.type).toBe('string');
expect(binaryPath.category).toBe('Experimental');
expect(binaryPath.default).toBe('');
expect(binaryPath.requiresRestart).toBe(true);
expect(binaryPath.showInDialog).toBe(false);
expect(binaryPath.description).toBe(
'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).',
);
const classifier = gemmaModelRouter.properties.classifier;
expect(classifier).toBeDefined();
expect(classifier.type).toBe('object');

View file

@ -2169,6 +2169,26 @@ const SETTINGS_SCHEMA = {
default: false,
description:
'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
showInDialog: true,
},
autoStartServer: {
type: 'boolean',
label: 'Auto-start LiteRT Server',
category: 'Experimental',
requiresRestart: true,
default: false,
description:
'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
showInDialog: true,
},
binaryPath: {
type: 'string',
label: 'LiteRT Binary Path',
category: 'Experimental',
requiresRestart: true,
default: '',
description:
'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).',
showInDialog: false,
},
classifier: {

View file

@ -612,6 +612,23 @@ export async function main() {
const initializationResult = await initializeApp(config, settings);
initAppHandle?.end();
import('./services/liteRtServerManager.js')
.then(({ LiteRtServerManager }) => {
const mergedGemma = settings.merged.experimental?.gemmaModelRouter;
if (!mergedGemma) return;
// Security: binaryPath and autoStartServer must come from user-scoped
// settings only to prevent workspace configs from triggering arbitrary
// binary execution.
const userGemma = settings.forScope(SettingScope.User).settings
.experimental?.gemmaModelRouter;
return LiteRtServerManager.ensureRunning({
...mergedGemma,
binaryPath: userGemma?.binaryPath,
autoStartServer: userGemma?.autoStartServer,
});
})
.catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e));
if (
settings.merged.security.auth.selectedType ===
AuthType.LOGIN_WITH_GOOGLE &&

View file

@ -151,7 +151,7 @@ export async function startInteractiveUI(
isScreenReaderEnabled: config.getScreenReader(),
onRender: ({ renderTime }: { renderTime: number }) => {
if (renderTime > SLOW_RENDER_MS) {
recordSlowRender(config, renderTime);
recordSlowRender(config, Math.round(renderTime));
}
profiler.reportFrameRendered();
},

View file

@ -61,6 +61,7 @@ import { vimCommand } from '../ui/commands/vimCommand.js';
import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js';
import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js';
import { upgradeCommand } from '../ui/commands/upgradeCommand.js';
import { gemmaStatusCommand } from '../ui/commands/gemmaStatusCommand.js';
/**
* Loads the core, hard-coded slash commands that are an integral part
@ -221,6 +222,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
: [skillsCommand]
: []),
settingsCommand,
gemmaStatusCommand,
tasksCommand,
vimCommand,
setupGithubCommand,

View file

@ -0,0 +1,68 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
const mockGetBinaryPath = vi.hoisted(() => vi.fn());
const mockIsServerRunning = vi.hoisted(() => vi.fn());
const mockStartServer = vi.hoisted(() => vi.fn());
vi.mock('../commands/gemma/platform.js', () => ({
getBinaryPath: mockGetBinaryPath,
isServerRunning: mockIsServerRunning,
}));
vi.mock('../commands/gemma/start.js', () => ({
startServer: mockStartServer,
}));
import { LiteRtServerManager } from './liteRtServerManager.js';
describe('LiteRtServerManager', () => {
beforeEach(() => {
vi.clearAllMocks();
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
mockIsServerRunning.mockResolvedValue(false);
mockStartServer.mockResolvedValue(true);
});
it('uses the configured custom binary path when auto-starting', async () => {
mockGetBinaryPath.mockReturnValue('/user/lit');
const settings: GemmaModelRouterSettings = {
enabled: true,
binaryPath: '/workspace/evil',
classifier: {
host: 'http://localhost:8123',
},
};
await LiteRtServerManager.ensureRunning(settings);
expect(mockGetBinaryPath).toHaveBeenCalledTimes(1);
expect(fs.existsSync).toHaveBeenCalledWith('/user/lit');
expect(mockStartServer).toHaveBeenCalledWith('/user/lit', 8123);
});
it('falls back to the default binary path when no custom path is configured', async () => {
mockGetBinaryPath.mockReturnValue('/default/lit');
const settings: GemmaModelRouterSettings = {
enabled: true,
classifier: {
host: 'http://localhost:9379',
},
};
await LiteRtServerManager.ensureRunning(settings);
expect(mockGetBinaryPath).toHaveBeenCalledTimes(1);
expect(fs.existsSync).toHaveBeenCalledWith('/default/lit');
expect(mockStartServer).toHaveBeenCalledWith('/default/lit', 9379);
});
});

View file

@ -0,0 +1,59 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import { debugLogger } from '@google/gemini-cli-core';
import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
import { getBinaryPath, isServerRunning } from '../commands/gemma/platform.js';
import { DEFAULT_PORT } from '../commands/gemma/constants.js';
export class LiteRtServerManager {
static async ensureRunning(
gemmaSettings: GemmaModelRouterSettings | undefined,
): Promise<void> {
if (!gemmaSettings?.enabled) return;
if (gemmaSettings.autoStartServer === false) return;
const binaryPath = getBinaryPath();
if (!binaryPath || !fs.existsSync(binaryPath)) {
debugLogger.log(
'[LiteRtServerManager] Binary not installed, skipping auto-start. Run "gemini gemma setup".',
);
return;
}
const port =
parseInt(
gemmaSettings.classifier?.host?.match(/:(\d+)/)?.[1] ?? '',
10,
) || DEFAULT_PORT;
const running = await isServerRunning(port);
if (running) {
debugLogger.log(
`[LiteRtServerManager] Server already running on port ${port}`,
);
return;
}
debugLogger.log(
`[LiteRtServerManager] Auto-starting LiteRT server on port ${port}...`,
);
try {
const { startServer } = await import('../commands/gemma/start.js');
const started = await startServer(binaryPath, port);
if (started) {
debugLogger.log(`[LiteRtServerManager] Server started on port ${port}`);
} else {
debugLogger.warn(
`[LiteRtServerManager] Server may not have started correctly on port ${port}`,
);
}
} catch (error) {
debugLogger.warn('[LiteRtServerManager] Auto-start failed:', error);
}
}
}

View file

@ -0,0 +1,41 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { CommandKind, type SlashCommand } from './types.js';
import { MessageType, type HistoryItemGemmaStatus } from '../types.js';
import { checkGemmaStatus } from '../../commands/gemma/status.js';
import { GEMMA_MODEL_NAME } from '../../commands/gemma/constants.js';
export const gemmaStatusCommand: SlashCommand = {
name: 'gemma',
description: 'Check local Gemma model routing status',
kind: CommandKind.BUILT_IN,
autoExecute: true,
isSafeConcurrent: true,
action: async (context) => {
const port =
parseInt(
context.services.settings.merged.experimental?.gemmaModelRouter?.classifier?.host?.match(
/:(\d+)/,
)?.[1] ?? '',
10,
) || undefined;
const status = await checkGemmaStatus(port);
const item: Omit<HistoryItemGemmaStatus, 'id'> = {
type: MessageType.GEMMA_STATUS,
binaryInstalled: status.binaryInstalled,
binaryPath: status.binaryPath,
modelName: GEMMA_MODEL_NAME,
modelDownloaded: status.modelDownloaded,
serverRunning: status.serverRunning,
serverPid: status.serverPid,
serverPort: status.port,
settingsEnabled: status.settingsEnabled,
allPassing: status.allPassing,
};
context.ui.addItem(item);
},
};

View file

@ -32,6 +32,7 @@ import { ToolsList } from './views/ToolsList.js';
import { SkillsList } from './views/SkillsList.js';
import { AgentsStatus } from './views/AgentsStatus.js';
import { McpStatus } from './views/McpStatus.js';
import { GemmaStatus } from './views/GemmaStatus.js';
import { ChatList } from './views/ChatList.js';
import { ModelMessage } from './messages/ModelMessage.js';
import { ThinkingMessage } from './messages/ThinkingMessage.js';
@ -228,6 +229,9 @@ export const HistoryItemDisplay: React.FC<HistoryItemDisplayProps> = ({
{itemForDisplay.type === 'mcp_status' && (
<McpStatus {...itemForDisplay} serverStatus={getMCPServerStatus} />
)}
{itemForDisplay.type === 'gemma_status' && (
<GemmaStatus {...itemForDisplay} />
)}
{itemForDisplay.type === 'chat_list' && (
<ChatList chats={itemForDisplay.chats} />
)}

View file

@ -0,0 +1,120 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { Box, Text } from 'ink';
import type React from 'react';
import { theme } from '../../semantic-colors.js';
import type { HistoryItemGemmaStatus } from '../../types.js';
type GemmaStatusProps = Omit<HistoryItemGemmaStatus, 'id' | 'type'>;
const StatusDot: React.FC<{ ok: boolean }> = ({ ok }) => (
<Text color={ok ? theme.status.success : theme.status.error}>
{ok ? '\u25CF' : '\u25CB'}
</Text>
);
export const GemmaStatus: React.FC<GemmaStatusProps> = ({
binaryInstalled,
binaryPath,
modelName,
modelDownloaded,
serverRunning,
serverPid,
serverPort,
settingsEnabled,
allPassing,
}) => (
<Box flexDirection="column">
<Text bold>Gemma Local Model Routing</Text>
<Box height={1} />
<Box>
<StatusDot ok={binaryInstalled} />
<Text>
{' '}
<Text bold>Binary: </Text>
{binaryInstalled ? (
<Text color={theme.text.secondary}>{binaryPath}</Text>
) : (
<Text color={theme.status.error}>Not installed</Text>
)}
</Text>
</Box>
<Box>
<StatusDot ok={modelDownloaded} />
<Text>
{' '}
<Text bold>Model: </Text>
{modelDownloaded ? (
<Text>{modelName}</Text>
) : (
<Text color={theme.status.error}>{modelName} not found</Text>
)}
</Text>
</Box>
<Box>
<StatusDot ok={serverRunning} />
<Text>
{' '}
<Text bold>Server: </Text>
{serverRunning ? (
<Text>
port {serverPort}
{serverPid ? (
<Text color={theme.text.secondary}> (PID {serverPid})</Text>
) : null}
</Text>
) : (
<Text color={theme.status.error}>
not running on port {serverPort}
</Text>
)}
</Text>
</Box>
<Box>
<StatusDot ok={settingsEnabled} />
<Text>
{' '}
<Text bold>Settings: </Text>
{settingsEnabled ? (
<Text>enabled</Text>
) : (
<Text color={theme.status.error}>not enabled</Text>
)}
</Text>
</Box>
<Box marginTop={1}>
<Text bold>Active for: </Text>
{allPassing ? (
<Text color={theme.status.success}>[routing]</Text>
) : (
<Text color={theme.text.secondary}>none</Text>
)}
</Box>
<Box marginTop={1}>
{allPassing ? (
<Box flexDirection="column">
<Text color={theme.text.secondary}>
Simple requests route to Flash, complex requests to Pro.
</Text>
<Text color={theme.text.secondary}>
This happens automatically on every request.
</Text>
</Box>
) : (
<Text color={theme.status.warning}>
Run &quot;gemini gemma setup&quot; to install and configure.
</Text>
)}
</Box>
</Box>
);

View file

@ -355,6 +355,19 @@ export interface JsonMcpResource {
description?: string;
}
export type HistoryItemGemmaStatus = HistoryItemBase & {
type: 'gemma_status';
binaryInstalled: boolean;
binaryPath: string | null;
modelName: string;
modelDownloaded: boolean;
serverRunning: boolean;
serverPid: number | null;
serverPort: number;
settingsEnabled: boolean;
allPassing: boolean;
};
export type HistoryItemMcpStatus = HistoryItemBase & {
type: 'mcp_status';
servers: Record<string, MCPServerConfig>;
@ -404,6 +417,7 @@ export type HistoryItemWithoutId =
| HistoryItemSkillsList
| HistoryItemAgentsList
| HistoryItemMcpStatus
| HistoryItemGemmaStatus
| HistoryItemChatList
| HistoryItemThinking
| HistoryItemHint
@ -430,6 +444,7 @@ export enum MessageType {
SKILLS_LIST = 'skills_list',
AGENTS_LIST = 'agents_list',
MCP_STATUS = 'mcp_status',
GEMMA_STATUS = 'gemma_status',
CHAT_LIST = 'chat_list',
HINT = 'hint',
}

View file

@ -710,6 +710,59 @@ describe('Server Config (config.ts)', () => {
);
});
describe('getProModelNoAccessSync', () => {
it('should return experiment value for AuthType.LOGIN_WITH_GOOGLE', async () => {
vi.mocked(getExperiments).mockResolvedValue({
experimentIds: [],
flags: {
[ExperimentFlags.PRO_MODEL_NO_ACCESS]: {
boolValue: true,
},
},
});
const config = new Config(baseParams);
vi.mocked(createContentGeneratorConfig).mockResolvedValue({
authType: AuthType.LOGIN_WITH_GOOGLE,
});
await config.refreshAuth(AuthType.LOGIN_WITH_GOOGLE);
expect(config.getProModelNoAccessSync()).toBe(true);
});
it('should return experiment value for AuthType.COMPUTE_ADC', async () => {
vi.mocked(getExperiments).mockResolvedValue({
experimentIds: [],
flags: {
[ExperimentFlags.PRO_MODEL_NO_ACCESS]: {
boolValue: true,
},
},
});
const config = new Config(baseParams);
vi.mocked(createContentGeneratorConfig).mockResolvedValue({
authType: AuthType.COMPUTE_ADC,
});
await config.refreshAuth(AuthType.COMPUTE_ADC);
expect(config.getProModelNoAccessSync()).toBe(true);
});
it('should return false for other auth types even if experiment is true', async () => {
vi.mocked(getExperiments).mockResolvedValue({
experimentIds: [],
flags: {
[ExperimentFlags.PRO_MODEL_NO_ACCESS]: {
boolValue: true,
},
},
});
const config = new Config(baseParams);
vi.mocked(createContentGeneratorConfig).mockResolvedValue({
authType: AuthType.USE_GEMINI,
});
await config.refreshAuth(AuthType.USE_GEMINI);
expect(config.getProModelNoAccessSync()).toBe(false);
});
});
describe('getRequestTimeoutMs', () => {
it('should return undefined if the flag is not set', () => {
const config = new Config(baseParams);
@ -1922,6 +1975,8 @@ describe('GemmaModelRouterSettings', () => {
const config = new Config(baseParams);
const settings = config.getGemmaModelRouterSettings();
expect(settings.enabled).toBe(false);
expect(settings.autoStartServer).toBe(true);
expect(settings.binaryPath).toBe('');
expect(settings.classifier?.host).toBe('http://localhost:9379');
expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
});
@ -1931,6 +1986,8 @@ describe('GemmaModelRouterSettings', () => {
...baseParams,
gemmaModelRouter: {
enabled: true,
autoStartServer: false,
binaryPath: '/custom/lit',
classifier: {
host: 'http://custom:1234',
model: 'custom-gemma',
@ -1940,6 +1997,8 @@ describe('GemmaModelRouterSettings', () => {
const config = new Config(params);
const settings = config.getGemmaModelRouterSettings();
expect(settings.enabled).toBe(true);
expect(settings.autoStartServer).toBe(false);
expect(settings.binaryPath).toBe('/custom/lit');
expect(settings.classifier?.host).toBe('http://custom:1234');
expect(settings.classifier?.model).toBe('custom-gemma');
});
@ -1954,6 +2013,8 @@ describe('GemmaModelRouterSettings', () => {
const config = new Config(params);
const settings = config.getGemmaModelRouterSettings();
expect(settings.enabled).toBe(true);
expect(settings.autoStartServer).toBe(true);
expect(settings.binaryPath).toBe('');
expect(settings.classifier?.host).toBe('http://localhost:9379');
expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
});

View file

@ -219,6 +219,8 @@ export interface OutputSettings {
export interface GemmaModelRouterSettings {
enabled?: boolean;
autoStartServer?: boolean;
binaryPath?: string;
classifier?: {
host?: string;
model?: string;
@ -1323,6 +1325,8 @@ export class Config implements McpContext, AgentLoopContext {
};
this.gemmaModelRouter = {
enabled: params.gemmaModelRouter?.enabled ?? false,
autoStartServer: params.gemmaModelRouter?.autoStartServer ?? true,
binaryPath: params.gemmaModelRouter?.binaryPath ?? '',
classifier: {
host:
params.gemmaModelRouter?.classifier?.host ?? 'http://localhost:9379',
@ -3160,7 +3164,10 @@ export class Config implements McpContext, AgentLoopContext {
* Note: This method should only be called after startup, once experiments have been loaded.
*/
getProModelNoAccessSync(): boolean {
if (this.contentGeneratorConfig?.authType !== AuthType.LOGIN_WITH_GOOGLE) {
if (
this.contentGeneratorConfig?.authType !== AuthType.LOGIN_WITH_GOOGLE &&
this.contentGeneratorConfig?.authType !== AuthType.COMPUTE_ADC
) {
return false;
}
return (

View file

@ -7,6 +7,8 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { LocalLiteRtLmClient } from './localLiteRtLmClient.js';
import type { Config } from '../config/config.js';
import { GoogleGenAI } from '@google/genai';
const mockGenerateContent = vi.fn();
vi.mock('@google/genai', () => {
@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => {
const result = await client.generateJson([], 'test-instruction');
expect(result).toEqual({ key: 'value' });
expect(GoogleGenAI).toHaveBeenCalledWith(
expect.objectContaining({
apiVersion: 'v1beta',
httpOptions: expect.objectContaining({
baseUrl: 'http://test-host:1234',
}),
}),
);
expect(mockGenerateContent).toHaveBeenCalledWith(
expect.objectContaining({
model: 'gemma:latest',

View file

@ -25,6 +25,8 @@ export class LocalLiteRtLmClient {
this.client = new GoogleGenAI({
// The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication.
apiKey: 'no-api-key-needed',
apiVersion: 'v1beta',
vertexai: false,
httpOptions: {
baseUrl: this.host,
// If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).

View file

@ -2920,6 +2920,20 @@
"default": false,
"type": "boolean"
},
"autoStartServer": {
"title": "Auto-start LiteRT Server",
"description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.",
"markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
"default": false,
"type": "boolean"
},
"binaryPath": {
"title": "LiteRT Binary Path",
"description": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).",
"markdownDescription": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: ``",
"default": "",
"type": "string"
},
"classifier": {
"title": "Classifier",
"description": "Classifier configuration.",