feat(cli): add streamlined gemini gemma local model setup (#25498)

Co-authored-by: Abhijit Balaji <abhijitbalaji@google.com>
Co-authored-by: Samee Zahid <sameez@google.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Samee Zahid 2026-04-20 16:57:56 -07:00 committed by GitHub
parent 6afc47f81c
commit 1d383a4a8e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 2509 additions and 12 deletions

View file

@ -162,12 +162,14 @@ they appear in the UI.
### Experimental
| UI Label | Setting | Description | Default |
| ---------------------------------------------------- | -------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| ---------------------------------------------------- | ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` |
| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` |
| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` |
| Enable Gemma Model Router | `experimental.gemmaModelRouter.enabled` | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim. | `false` |
| Auto-start LiteRT Server | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled. | `false` |
| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` |
| Auto Memory | `experimental.autoMemory` | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox. | `false` |
| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` |

View file

@ -1711,6 +1711,18 @@ their corresponding top-level category object in your `settings.json` file.
- **Default:** `false`
- **Requires restart:** Yes
- **`experimental.gemmaModelRouter.autoStartServer`** (boolean):
- **Description:** Automatically start the LiteRT-LM server when Gemini CLI
starts and the Gemma router is enabled.
- **Default:** `false`
- **Requires restart:** Yes
- **`experimental.gemmaModelRouter.binaryPath`** (string):
- **Description:** Custom path to the LiteRT-LM binary. Leave empty to use the
default location (~/.gemini/bin/litert/).
- **Default:** `""`
- **Requires restart:** Yes
- **`experimental.gemmaModelRouter.classifier.host`** (string):
- **Description:** The host of the classifier.
- **Default:** `"http://localhost:9379"`

View file

@ -0,0 +1,33 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule, Argv } from 'yargs';
import { initializeOutputListenersAndFlush } from '../gemini.js';
import { defer } from '../deferred.js';
import { setupCommand } from './gemma/setup.js';
import { startCommand } from './gemma/start.js';
import { stopCommand } from './gemma/stop.js';
import { statusCommand } from './gemma/status.js';
import { logsCommand } from './gemma/logs.js';
export const gemmaCommand: CommandModule = {
command: 'gemma',
describe: 'Manage local Gemma model routing',
builder: (yargs: Argv) =>
yargs
.middleware((argv) => {
initializeOutputListenersAndFlush();
argv['isCommand'] = true;
})
.command(defer(setupCommand, 'gemma'))
.command(defer(startCommand, 'gemma'))
.command(defer(stopCommand, 'gemma'))
.command(defer(statusCommand, 'gemma'))
.command(defer(logsCommand, 'gemma'))
.demandCommand(1, 'You need at least one command before continuing.')
.version(false),
handler: () => {},
};

View file

@ -0,0 +1,45 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import path from 'node:path';
import { Storage } from '@google/gemini-cli-core';
export const LITERT_RELEASE_VERSION = 'v0.9.0-alpha03';
export const LITERT_RELEASE_BASE_URL =
'https://github.com/google-ai-edge/LiteRT-LM/releases/download';
export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom';
export const DEFAULT_PORT = 9379;
export const HEALTH_CHECK_TIMEOUT_MS = 5000;
export const LITERT_API_VERSION = 'v1beta';
export const SERVER_START_WAIT_MS = 3000;
export const PLATFORM_BINARY_MAP: Record<string, string> = {
'darwin-arm64': 'lit.macos_arm64',
'linux-x64': 'lit.linux_x86_64',
'win32-x64': 'lit.windows_x86_64.exe',
};
// SHA-256 hashes for the official LiteRT-LM v0.9.0-alpha03 release binaries.
export const PLATFORM_BINARY_SHA256: Record<string, string> = {
'lit.macos_arm64':
'9e826a2634f2e8b220ad0f1e1b5c139e0b47cb172326e3b7d46d31382f49478e',
'lit.linux_x86_64':
'66601df8a07f08244b188e9fcab0bf4a16562fe76d8d47e49f40273d57541ee8',
'lit.windows_x86_64.exe':
'de82d2829d2fb1cbdb318e2d8a78dc2f9659ff14cb11b2894d1f30e0bfde2bf6',
};
export function getLiteRtBinDir(): string {
return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert');
}
export function getPidFilePath(): string {
return path.join(Storage.getGlobalTempDir(), 'litert-server.pid');
}
export function getLogFilePath(): string {
return path.join(Storage.getGlobalTempDir(), 'litert-server.log');
}

View file

@ -0,0 +1,186 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import type { ChildProcess } from 'node:child_process';
import { EventEmitter } from 'node:events';
import os from 'node:os';
import path from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { spawn } from 'node:child_process';
import { exitCli } from '../utils.js';
import { getLogFilePath } from './constants.js';
import { logsCommand, readLastLines } from './logs.js';
vi.mock('@google/gemini-cli-core', async (importOriginal) => {
const { mockCoreDebugLogger } = await import(
'../../test-utils/mockDebugLogger.js'
);
return mockCoreDebugLogger(
await importOriginal<typeof import('@google/gemini-cli-core')>(),
{
stripAnsi: false,
},
);
});
vi.mock('node:child_process', async (importOriginal) => {
const actual = await importOriginal<typeof import('node:child_process')>();
return {
...actual,
spawn: vi.fn(),
};
});
vi.mock('../utils.js', () => ({
exitCli: vi.fn(),
}));
vi.mock('./constants.js', () => ({
getLogFilePath: vi.fn(),
}));
function createMockChild(): ChildProcess {
return Object.assign(new EventEmitter(), {
kill: vi.fn(),
}) as unknown as ChildProcess;
}
async function flushMicrotasks() {
await Promise.resolve();
await Promise.resolve();
}
describe('readLastLines', () => {
const tempFiles: string[] = [];
afterEach(async () => {
await Promise.all(
tempFiles
.splice(0)
.map((filePath) => fs.promises.rm(filePath, { force: true })),
);
});
it('returns only the requested tail lines without reading the whole file eagerly', async () => {
const filePath = path.join(
os.tmpdir(),
`gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
);
tempFiles.push(filePath);
const content = Array.from({ length: 2000 }, (_, i) => `line-${i + 1}`)
.join('\n')
.concat('\n');
await fs.promises.writeFile(filePath, content, 'utf-8');
await expect(readLastLines(filePath, 3)).resolves.toBe(
'line-1998\nline-1999\nline-2000\n',
);
});
it('returns an empty string when zero lines are requested', async () => {
const filePath = path.join(
os.tmpdir(),
`gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
);
tempFiles.push(filePath);
await fs.promises.writeFile(filePath, 'line-1\nline-2\n', 'utf-8');
await expect(readLastLines(filePath, 0)).resolves.toBe('');
});
});
describe('logsCommand', () => {
const originalPlatform = process.platform;
beforeEach(() => {
vi.clearAllMocks();
Object.defineProperty(process, 'platform', {
value: 'linux',
configurable: true,
});
vi.mocked(getLogFilePath).mockReturnValue('/tmp/gemma.log');
vi.spyOn(fs.promises, 'access').mockResolvedValue(undefined);
});
afterEach(() => {
Object.defineProperty(process, 'platform', {
value: originalPlatform,
configurable: true,
});
vi.restoreAllMocks();
});
it('waits for the tail process to close before exiting in follow mode', async () => {
const child = createMockChild();
vi.mocked(spawn).mockReturnValue(child);
let resolved = false;
const handlerPromise = (
logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
)({}).then(() => {
resolved = true;
});
await flushMicrotasks();
expect(spawn).toHaveBeenCalledWith(
'tail',
['-f', '-n', '20', '/tmp/gemma.log'],
{ stdio: 'inherit' },
);
expect(resolved).toBe(false);
expect(exitCli).not.toHaveBeenCalled();
child.emit('close', 0);
await handlerPromise;
expect(exitCli).toHaveBeenCalledWith(0);
});
it('uses one-shot tail output when follow is disabled', async () => {
const child = createMockChild();
vi.mocked(spawn).mockReturnValue(child);
const handlerPromise = (
logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
)({ follow: false });
await flushMicrotasks();
expect(spawn).toHaveBeenCalledWith('tail', ['-n', '20', '/tmp/gemma.log'], {
stdio: 'inherit',
});
child.emit('close', 0);
await handlerPromise;
expect(exitCli).toHaveBeenCalledWith(0);
});
it('follows from the requested line count when both --lines and --follow are set', async () => {
const child = createMockChild();
vi.mocked(spawn).mockReturnValue(child);
const handlerPromise = (
logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
)({ lines: 5, follow: true });
await flushMicrotasks();
expect(spawn).toHaveBeenCalledWith(
'tail',
['-f', '-n', '5', '/tmp/gemma.log'],
{ stdio: 'inherit' },
);
child.emit('close', 0);
await handlerPromise;
expect(exitCli).toHaveBeenCalledWith(0);
});
});

View file

@ -0,0 +1,200 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import fs from 'node:fs';
import { spawn, type ChildProcess } from 'node:child_process';
import { debugLogger } from '@google/gemini-cli-core';
import { exitCli } from '../utils.js';
import { getLogFilePath } from './constants.js';
export async function readLastLines(
filePath: string,
count: number,
): Promise<string> {
if (count <= 0) {
return '';
}
const CHUNK_SIZE = 64 * 1024;
const fileHandle = await fs.promises.open(filePath, fs.constants.O_RDONLY);
try {
const stats = await fileHandle.stat();
if (stats.size === 0) {
return '';
}
const chunks: Buffer[] = [];
let totalBytes = 0;
let newlineCount = 0;
let position = stats.size;
while (position > 0 && newlineCount <= count) {
const readSize = Math.min(CHUNK_SIZE, position);
position -= readSize;
const buffer = Buffer.allocUnsafe(readSize);
const { bytesRead } = await fileHandle.read(
buffer,
0,
readSize,
position,
);
if (bytesRead === 0) {
break;
}
const chunk =
bytesRead === readSize ? buffer : buffer.subarray(0, bytesRead);
chunks.unshift(chunk);
totalBytes += chunk.length;
for (const byte of chunk) {
if (byte === 0x0a) {
newlineCount += 1;
}
}
}
const content = Buffer.concat(chunks, totalBytes).toString('utf-8');
const lines = content.split('\n');
if (position > 0 && lines.length > 0) {
const boundary = Buffer.allocUnsafe(1);
const { bytesRead } = await fileHandle.read(boundary, 0, 1, position - 1);
if (bytesRead === 1 && boundary[0] !== 0x0a) {
lines.shift();
}
}
if (lines.length > 0 && lines[lines.length - 1] === '') {
lines.pop();
}
if (lines.length === 0) {
return '';
}
return lines.slice(-count).join('\n') + '\n';
} finally {
await fileHandle.close();
}
}
interface LogsArgs {
lines?: number;
follow?: boolean;
}
function waitForChild(child: ChildProcess): Promise<number> {
return new Promise((resolve, reject) => {
child.once('error', reject);
child.once('close', (code) => resolve(code ?? 1));
});
}
async function runTail(logPath: string, lines: number, follow: boolean) {
const tailArgs = follow
? ['-f', '-n', String(lines), logPath]
: ['-n', String(lines), logPath];
const child = spawn('tail', tailArgs, { stdio: 'inherit' });
if (!follow) {
return waitForChild(child);
}
const handleSigint = () => {
child.kill('SIGTERM');
};
process.once('SIGINT', handleSigint);
try {
return await waitForChild(child);
} finally {
process.off('SIGINT', handleSigint);
}
}
export const logsCommand: CommandModule<object, LogsArgs> = {
command: 'logs',
describe: 'View LiteRT-LM server logs',
builder: (yargs) =>
yargs
.option('lines', {
alias: 'n',
type: 'number',
description: 'Show the last N lines and exit (omit to follow live)',
})
.option('follow', {
alias: 'f',
type: 'boolean',
description:
'Follow log output (defaults to true when --lines is omitted)',
}),
handler: async (argv) => {
const logPath = getLogFilePath();
try {
await fs.promises.access(logPath, fs.constants.F_OK);
} catch {
debugLogger.log(`No log file found at ${logPath}`);
debugLogger.log(
'Is the LiteRT server running? Start it with: gemini gemma start',
);
await exitCli(1);
return;
}
const lines = argv.lines;
const follow = argv.follow ?? lines === undefined;
const requestedLines = lines ?? 20;
if (follow && process.platform === 'win32') {
debugLogger.log(
'Live log following is not supported on Windows. Use --lines N to view recent logs.',
);
await exitCli(1);
return;
}
if (process.platform === 'win32') {
process.stdout.write(await readLastLines(logPath, requestedLines));
await exitCli(0);
return;
}
try {
if (follow) {
debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`);
}
const exitCode = await runTail(logPath, requestedLines, follow);
await exitCli(exitCode);
} catch (error) {
if (
error instanceof Error &&
'code' in error &&
error.code === 'ENOENT'
) {
if (!follow) {
process.stdout.write(await readLastLines(logPath, requestedLines));
await exitCli(0);
} else {
debugLogger.error(
'"tail" command not found. Use --lines N to view recent logs without tail.',
);
await exitCli(1);
}
} else {
debugLogger.error(
`Failed to read log output: ${error instanceof Error ? error.message : String(error)}`,
);
await exitCli(1);
}
}
},
};

View file

@ -0,0 +1,162 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import path from 'node:path';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { SettingScope } from '../../config/settings.js';
import { getLiteRtBinDir } from './constants.js';
const mockLoadSettings = vi.hoisted(() => vi.fn());
vi.mock('../../config/settings.js', () => ({
loadSettings: mockLoadSettings,
SettingScope: {
User: 'User',
},
}));
import {
getBinaryPath,
isExpectedLiteRtServerCommand,
isBinaryInstalled,
readServerProcessInfo,
resolveGemmaConfig,
} from './platform.js';
describe('gemma platform helpers', () => {
function createMockSettings(
userGemmaSettings?: object,
mergedGemmaSettings?: object,
) {
return {
merged: {
experimental: {
gemmaModelRouter: mergedGemmaSettings,
},
},
forScope: vi.fn((scope: SettingScope) => {
if (scope !== SettingScope.User) {
throw new Error(`Unexpected scope ${scope}`);
}
return {
settings: {
experimental: {
gemmaModelRouter: userGemmaSettings,
},
},
};
}),
};
}
beforeEach(() => {
vi.clearAllMocks();
mockLoadSettings.mockReturnValue(createMockSettings());
});
it('prefers the configured binary path from settings', () => {
mockLoadSettings.mockReturnValue(
createMockSettings({ binaryPath: '/custom/lit' }),
);
expect(getBinaryPath('lit.test')).toBe('/custom/lit');
});
it('ignores workspace overrides for the configured binary path', () => {
mockLoadSettings.mockReturnValue(
createMockSettings(
{ binaryPath: '/user/lit' },
{ binaryPath: '/workspace/evil' },
),
);
expect(getBinaryPath('lit.test')).toBe('/user/lit');
});
it('falls back to the default install location when no custom path is set', () => {
expect(getBinaryPath('lit.test')).toBe(
path.join(getLiteRtBinDir(), 'lit.test'),
);
});
it('resolves the configured port and binary path from settings', () => {
mockLoadSettings.mockReturnValue(
createMockSettings(
{ binaryPath: '/custom/lit' },
{
enabled: true,
classifier: {
host: 'http://localhost:8123/v1beta',
},
},
),
);
expect(resolveGemmaConfig(9379)).toEqual({
settingsEnabled: true,
configuredPort: 8123,
configuredBinaryPath: '/custom/lit',
});
});
it('checks binary installation using the resolved binary path', () => {
mockLoadSettings.mockReturnValue(
createMockSettings({ binaryPath: '/custom/lit' }),
);
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
expect(isBinaryInstalled()).toBe(true);
expect(fs.existsSync).toHaveBeenCalledWith('/custom/lit');
});
it('parses structured server process info from the pid file', () => {
vi.spyOn(fs, 'readFileSync').mockReturnValue(
JSON.stringify({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
}),
);
expect(readServerProcessInfo()).toEqual({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
});
});
it('parses legacy pid-only files for backward compatibility', () => {
vi.spyOn(fs, 'readFileSync').mockReturnValue('4321');
expect(readServerProcessInfo()).toEqual({
pid: 4321,
});
});
it('matches only the expected LiteRT serve command', () => {
expect(
isExpectedLiteRtServerCommand('/custom/lit serve --port=8123 --verbose', {
binaryPath: '/custom/lit',
port: 8123,
}),
).toBe(true);
expect(
isExpectedLiteRtServerCommand('/custom/lit run --port=8123', {
binaryPath: '/custom/lit',
port: 8123,
}),
).toBe(false);
expect(
isExpectedLiteRtServerCommand('/custom/lit serve --port=9000', {
binaryPath: '/custom/lit',
port: 8123,
}),
).toBe(false);
});
});

View file

@ -0,0 +1,316 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { loadSettings, SettingScope } from '../../config/settings.js';
import fs from 'node:fs';
import path from 'node:path';
import { execFileSync } from 'node:child_process';
import {
PLATFORM_BINARY_MAP,
LITERT_RELEASE_BASE_URL,
LITERT_RELEASE_VERSION,
getLiteRtBinDir,
GEMMA_MODEL_NAME,
HEALTH_CHECK_TIMEOUT_MS,
LITERT_API_VERSION,
getPidFilePath,
} from './constants.js';
export interface PlatformInfo {
key: string;
binaryName: string;
}
export interface GemmaConfigStatus {
settingsEnabled: boolean;
configuredPort: number;
configuredBinaryPath?: string;
}
export interface LiteRtServerProcessInfo {
pid: number;
binaryPath?: string;
port?: number;
}
function getUserConfiguredBinaryPath(
workspaceDir = process.cwd(),
): string | undefined {
try {
const userGemmaSettings = loadSettings(workspaceDir).forScope(
SettingScope.User,
).settings.experimental?.gemmaModelRouter;
return userGemmaSettings?.binaryPath?.trim() || undefined;
} catch {
return undefined;
}
}
function parsePortFromHost(
host: string | undefined,
fallbackPort: number,
): number {
if (!host) {
return fallbackPort;
}
try {
const url = new URL(host);
const port = Number(url.port);
return Number.isFinite(port) && port > 0 ? port : fallbackPort;
} catch {
const match = host.match(/:(\d+)/);
if (!match) {
return fallbackPort;
}
const port = parseInt(match[1], 10);
return Number.isFinite(port) && port > 0 ? port : fallbackPort;
}
}
export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
let settingsEnabled = false;
let configuredPort = fallbackPort;
const configuredBinaryPath = getUserConfiguredBinaryPath();
try {
const settings = loadSettings(process.cwd());
const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
settingsEnabled = gemmaSettings?.enabled === true;
configuredPort = parsePortFromHost(
gemmaSettings?.classifier?.host,
fallbackPort,
);
} catch {
// ignore — settings may fail to load outside a workspace
}
return { settingsEnabled, configuredPort, configuredBinaryPath };
}
export function detectPlatform(): PlatformInfo | null {
const key = `${process.platform}-${process.arch}`;
const binaryName = PLATFORM_BINARY_MAP[key];
if (!binaryName) {
return null;
}
return { key, binaryName };
}
export function getBinaryPath(binaryName?: string): string | null {
const configuredBinaryPath = getUserConfiguredBinaryPath();
if (configuredBinaryPath) {
return configuredBinaryPath;
}
const name = binaryName ?? detectPlatform()?.binaryName;
if (!name) return null;
return path.join(getLiteRtBinDir(), name);
}
export function getBinaryDownloadUrl(binaryName: string): string {
return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`;
}
export function isBinaryInstalled(binaryPath = getBinaryPath()): boolean {
if (!binaryPath) return false;
return fs.existsSync(binaryPath);
}
export function isModelDownloaded(binaryPath: string): boolean {
try {
const output = execFileSync(binaryPath, ['list'], {
encoding: 'utf-8',
timeout: 10000,
});
return output.includes(GEMMA_MODEL_NAME);
} catch {
return false;
}
}
export async function isServerRunning(port: number): Promise<boolean> {
try {
const controller = new AbortController();
const timeout = setTimeout(
() => controller.abort(),
HEALTH_CHECK_TIMEOUT_MS,
);
const response = await fetch(
`http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}:generateContent`,
{ method: 'POST', signal: controller.signal },
);
clearTimeout(timeout);
// A 400 (bad request) confirms the route exists — the server recognises
// the model endpoint. Only a 404 means "wrong server / wrong model".
return response.status !== 404;
} catch {
return false;
}
}
function isLiteRtServerProcessInfo(
value: unknown,
): value is LiteRtServerProcessInfo {
if (!value || typeof value !== 'object') {
return false;
}
const isPositiveInteger = (candidate: unknown): candidate is number =>
typeof candidate === 'number' &&
Number.isInteger(candidate) &&
candidate > 0;
const isNonEmptyString = (candidate: unknown): candidate is string =>
typeof candidate === 'string' && candidate.length > 0;
const pid: unknown = Object.getOwnPropertyDescriptor(value, 'pid')?.value;
if (!isPositiveInteger(pid)) {
return false;
}
const binaryPath: unknown = Object.getOwnPropertyDescriptor(
value,
'binaryPath',
)?.value;
if (binaryPath !== undefined && !isNonEmptyString(binaryPath)) {
return false;
}
const port: unknown = Object.getOwnPropertyDescriptor(value, 'port')?.value;
if (port !== undefined && !isPositiveInteger(port)) {
return false;
}
return true;
}
export function readServerProcessInfo(): LiteRtServerProcessInfo | null {
const pidPath = getPidFilePath();
try {
const content = fs.readFileSync(pidPath, 'utf-8').trim();
if (!content) {
return null;
}
if (/^\d+$/.test(content)) {
return { pid: parseInt(content, 10) };
}
const parsed = JSON.parse(content) as unknown;
return isLiteRtServerProcessInfo(parsed) ? parsed : null;
} catch {
return null;
}
}
export function writeServerProcessInfo(
processInfo: LiteRtServerProcessInfo,
): void {
fs.writeFileSync(getPidFilePath(), JSON.stringify(processInfo), 'utf-8');
}
export function readServerPid(): number | null {
return readServerProcessInfo()?.pid ?? null;
}
function normalizeProcessValue(value: string): string {
const normalized = value.replace(/\0/g, ' ').trim();
if (process.platform === 'win32') {
return normalized.replace(/\\/g, '/').replace(/\s+/g, ' ').toLowerCase();
}
return normalized.replace(/\s+/g, ' ');
}
function readProcessCommandLine(pid: number): string | null {
try {
if (process.platform === 'linux') {
const output = fs.readFileSync(`/proc/${pid}/cmdline`, 'utf-8');
return output.trim() ? output : null;
}
if (process.platform === 'win32') {
const output = execFileSync(
'powershell.exe',
[
'-NoProfile',
'-Command',
`(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}").CommandLine`,
],
{
encoding: 'utf-8',
timeout: 5000,
},
);
return output.trim() || null;
}
const output = execFileSync('ps', ['-p', String(pid), '-o', 'command='], {
encoding: 'utf-8',
timeout: 5000,
});
return output.trim() || null;
} catch {
return null;
}
}
export function isExpectedLiteRtServerCommand(
commandLine: string,
options: {
binaryPath?: string | null;
port?: number;
},
): boolean {
const normalizedCommandLine = normalizeProcessValue(commandLine);
if (!normalizedCommandLine) {
return false;
}
if (!/(^|\s|")serve(\s|$)/.test(normalizedCommandLine)) {
return false;
}
if (
options.port !== undefined &&
!normalizedCommandLine.includes(`--port=${options.port}`)
) {
return false;
}
if (!options.binaryPath) {
return true;
}
const normalizedBinaryPath = normalizeProcessValue(options.binaryPath);
const normalizedBinaryName = normalizeProcessValue(
path.basename(options.binaryPath),
);
return (
normalizedCommandLine.includes(normalizedBinaryPath) ||
normalizedCommandLine.includes(normalizedBinaryName)
);
}
export function isExpectedLiteRtServerProcess(
pid: number,
options: {
binaryPath?: string | null;
port?: number;
},
): boolean {
const commandLine = readProcessCommandLine(pid);
if (!commandLine) {
return false;
}
return isExpectedLiteRtServerCommand(commandLine, options);
}
export function isProcessRunning(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}

View file

@ -0,0 +1,60 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { afterEach, describe, expect, it } from 'vitest';
import { PLATFORM_BINARY_MAP, PLATFORM_BINARY_SHA256 } from './constants.js';
import { computeFileSha256, verifyFileSha256 } from './setup.js';
describe('gemma setup checksum helpers', () => {
const tempFiles: string[] = [];
afterEach(async () => {
await Promise.all(
tempFiles
.splice(0)
.map((filePath) => fs.promises.rm(filePath, { force: true })),
);
});
it('has a pinned checksum for every supported LiteRT binary', () => {
expect(Object.keys(PLATFORM_BINARY_SHA256).sort()).toEqual(
Object.values(PLATFORM_BINARY_MAP).sort(),
);
});
it('computes the sha256 for a downloaded file', async () => {
const filePath = path.join(
os.tmpdir(),
`gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`,
);
tempFiles.push(filePath);
await fs.promises.writeFile(filePath, 'hello world', 'utf-8');
await expect(computeFileSha256(filePath)).resolves.toBe(
'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
);
});
it('verifies whether a file matches the expected sha256', async () => {
const filePath = path.join(
os.tmpdir(),
`gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`,
);
tempFiles.push(filePath);
await fs.promises.writeFile(filePath, 'hello world', 'utf-8');
await expect(
verifyFileSha256(
filePath,
'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
),
).resolves.toBe(true);
await expect(verifyFileSha256(filePath, 'deadbeef')).resolves.toBe(false);
});
});

View file

@ -0,0 +1,504 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import { createHash } from 'node:crypto';
import fs from 'node:fs';
import path from 'node:path';
import { execFileSync, spawn as nodeSpawn } from 'node:child_process';
import chalk from 'chalk';
import { debugLogger } from '@google/gemini-cli-core';
import { loadSettings, SettingScope } from '../../config/settings.js';
import { exitCli } from '../utils.js';
import {
DEFAULT_PORT,
GEMMA_MODEL_NAME,
PLATFORM_BINARY_SHA256,
} from './constants.js';
import {
detectPlatform,
getBinaryDownloadUrl,
getBinaryPath,
isBinaryInstalled,
isModelDownloaded,
} from './platform.js';
import { startServer } from './start.js';
import readline from 'node:readline';
const log = (msg: string) => debugLogger.log(msg);
const logError = (msg: string) => debugLogger.error(msg);
async function promptYesNo(question: string): Promise<boolean> {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
return new Promise((resolve) => {
rl.question(`${question} (y/N): `, (answer) => {
rl.close();
resolve(
answer.trim().toLowerCase() === 'y' ||
answer.trim().toLowerCase() === 'yes',
);
});
});
}
function formatBytes(bytes: number): string {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}
function renderProgress(downloaded: number, total: number | null): void {
const barWidth = 30;
if (total && total > 0) {
const pct = Math.min(downloaded / total, 1);
const filled = Math.round(barWidth * pct);
const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled);
const pctStr = (pct * 100).toFixed(0).padStart(3);
process.stderr.write(
`\r [${bar}] ${pctStr}% ${formatBytes(downloaded)} / ${formatBytes(total)}`,
);
} else {
process.stderr.write(`\r Downloaded ${formatBytes(downloaded)}`);
}
}
async function downloadFile(url: string, destPath: string): Promise<void> {
const tmpPath = destPath + '.downloading';
if (fs.existsSync(tmpPath)) {
fs.unlinkSync(tmpPath);
}
const response = await fetch(url, { redirect: 'follow' });
if (!response.ok) {
throw new Error(
`Download failed: HTTP ${response.status} ${response.statusText}`,
);
}
if (!response.body) {
throw new Error('Download failed: No response body');
}
const contentLength = response.headers.get('content-length');
const totalBytes = contentLength ? parseInt(contentLength, 10) : null;
let downloadedBytes = 0;
const fileStream = fs.createWriteStream(tmpPath);
const reader = response.body.getReader();
try {
for (;;) {
const { done, value } = await reader.read();
if (done) break;
const writeOk = fileStream.write(value);
if (!writeOk) {
await new Promise<void>((resolve) => fileStream.once('drain', resolve));
}
downloadedBytes += value.byteLength;
renderProgress(downloadedBytes, totalBytes);
}
} finally {
fileStream.end();
process.stderr.write('\r' + ' '.repeat(80) + '\r');
}
await new Promise<void>((resolve, reject) => {
fileStream.on('finish', resolve);
fileStream.on('error', reject);
});
fs.renameSync(tmpPath, destPath);
}
export async function computeFileSha256(filePath: string): Promise<string> {
const hash = createHash('sha256');
const fileStream = fs.createReadStream(filePath);
return new Promise((resolve, reject) => {
fileStream.on('data', (chunk) => {
hash.update(chunk);
});
fileStream.on('error', reject);
fileStream.on('end', () => {
resolve(hash.digest('hex'));
});
});
}
export async function verifyFileSha256(
filePath: string,
expectedHash: string,
): Promise<boolean> {
const actualHash = await computeFileSha256(filePath);
return actualHash === expectedHash;
}
function spawnInherited(command: string, args: string[]): Promise<number> {
return new Promise((resolve, reject) => {
const child = nodeSpawn(command, args, {
stdio: 'inherit',
});
child.on('close', (code) => resolve(code ?? 1));
child.on('error', reject);
});
}
interface SetupArgs {
port: number;
skipModel: boolean;
start: boolean;
force: boolean;
consent: boolean;
}
async function handleSetup(argv: SetupArgs): Promise<number> {
const { port, force } = argv;
let settingsUpdated = false;
let serverStarted = false;
let autoStartServer = true;
log('');
log(chalk.bold('Gemma Local Model Routing Setup'));
log(chalk.dim('─'.repeat(40)));
log('');
const platform = detectPlatform();
if (!platform) {
logError(
chalk.red(`Unsupported platform: ${process.platform}-${process.arch}`),
);
logError(
'LiteRT-LM binaries are available for: macOS (ARM64), Linux (x86_64), Windows (x86_64)',
);
return 1;
}
log(chalk.dim(` Platform: ${platform.key}${platform.binaryName}`));
if (!argv.consent) {
log('');
log('This will download and install the LiteRT-LM runtime and the');
log(
`Gemma model (${GEMMA_MODEL_NAME}, ~1 GB). By proceeding, you agree to the`,
);
log('Gemma Terms of Use: https://ai.google.dev/gemma/terms');
log('');
const accepted = await promptYesNo('Do you want to continue?');
if (!accepted) {
log('Setup cancelled.');
return 0;
}
}
const binaryPath = getBinaryPath(platform.binaryName)!;
const alreadyInstalled = isBinaryInstalled();
if (alreadyInstalled && !force) {
log('');
log(chalk.green(' ✓ LiteRT-LM binary already installed at:'));
log(chalk.dim(` ${binaryPath}`));
} else {
log('');
log(' Downloading LiteRT-LM binary...');
const downloadUrl = getBinaryDownloadUrl(platform.binaryName);
debugLogger.log(`Downloading from: ${downloadUrl}`);
try {
const binDir = path.dirname(binaryPath);
fs.mkdirSync(binDir, { recursive: true });
await downloadFile(downloadUrl, binaryPath);
log(chalk.green(' ✓ Binary downloaded successfully'));
} catch (error) {
logError(
chalk.red(
` ✗ Failed to download binary: ${error instanceof Error ? error.message : String(error)}`,
),
);
logError(' Check your internet connection and try again.');
return 1;
}
const expectedHash = PLATFORM_BINARY_SHA256[platform.binaryName];
if (!expectedHash) {
logError(
chalk.red(
` ✗ No checksum is configured for ${platform.binaryName}. Refusing to install the binary.`,
),
);
try {
fs.rmSync(binaryPath, { force: true });
} catch {
// ignore
}
return 1;
}
try {
const checksumVerified = await verifyFileSha256(binaryPath, expectedHash);
if (!checksumVerified) {
logError(
chalk.red(
' ✗ Downloaded binary checksum did not match the expected release hash.',
),
);
try {
fs.rmSync(binaryPath, { force: true });
} catch {
// ignore
}
return 1;
}
log(chalk.green(' ✓ Binary checksum verified'));
} catch (error) {
logError(
chalk.red(
` ✗ Failed to verify binary checksum: ${error instanceof Error ? error.message : String(error)}`,
),
);
try {
fs.rmSync(binaryPath, { force: true });
} catch {
// ignore
}
return 1;
}
if (process.platform !== 'win32') {
try {
fs.chmodSync(binaryPath, 0o755);
} catch (error) {
logError(
chalk.red(
` ✗ Failed to set executable permission: ${error instanceof Error ? error.message : String(error)}`,
),
);
return 1;
}
}
if (process.platform === 'darwin') {
try {
execFileSync('xattr', ['-d', 'com.apple.quarantine', binaryPath], {
stdio: 'ignore',
});
log(chalk.green(' ✓ macOS quarantine attribute removed'));
} catch {
// Expected if the attribute doesn't exist.
}
}
}
if (!argv.skipModel) {
const modelAlreadyDownloaded = isModelDownloaded(binaryPath);
if (modelAlreadyDownloaded && !force) {
log('');
log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} already downloaded`));
} else {
log('');
log(` Downloading model ${GEMMA_MODEL_NAME}...`);
log(chalk.dim(' You may be prompted to accept the Gemma Terms of Use.'));
log('');
const exitCode = await spawnInherited(binaryPath, [
'pull',
GEMMA_MODEL_NAME,
]);
if (exitCode !== 0) {
logError('');
logError(
chalk.red(` ✗ Model download failed (exit code ${exitCode})`),
);
return 1;
}
log('');
log(chalk.green(` ✓ Model ${GEMMA_MODEL_NAME} downloaded`));
}
}
log('');
log(' Configuring settings...');
try {
const settings = loadSettings(process.cwd());
// User scope: security-sensitive settings that must not be overridable
// by workspace configs (prevents arbitrary binary execution).
const existingUserGemma =
settings.forScope(SettingScope.User).settings.experimental
?.gemmaModelRouter ?? {};
autoStartServer = existingUserGemma.autoStartServer ?? true;
const existingUserExperimental =
settings.forScope(SettingScope.User).settings.experimental ?? {};
settings.setValue(SettingScope.User, 'experimental', {
...existingUserExperimental,
gemmaModelRouter: {
autoStartServer,
...(existingUserGemma.binaryPath !== undefined
? { binaryPath: existingUserGemma.binaryPath }
: {}),
},
});
// Workspace scope: project-isolated settings so the local model only
// runs for this specific project, saving resources globally.
const existingWorkspaceGemma =
settings.forScope(SettingScope.Workspace).settings.experimental
?.gemmaModelRouter ?? {};
const existingWorkspaceExperimental =
settings.forScope(SettingScope.Workspace).settings.experimental ?? {};
settings.setValue(SettingScope.Workspace, 'experimental', {
...existingWorkspaceExperimental,
gemmaModelRouter: {
...existingWorkspaceGemma,
enabled: true,
classifier: {
...existingWorkspaceGemma.classifier,
host: `http://localhost:${port}`,
model: GEMMA_MODEL_NAME,
},
},
});
log(chalk.green(' ✓ Settings updated'));
log(chalk.dim(' User (~/.gemini/settings.json): autoStartServer'));
log(
chalk.dim(' Workspace (.gemini/settings.json): enabled, classifier'),
);
settingsUpdated = true;
} catch (error) {
logError(
chalk.red(
` ✗ Failed to update settings: ${error instanceof Error ? error.message : String(error)}`,
),
);
logError(
' You can manually add the configuration to ~/.gemini/settings.json',
);
}
if (argv.start) {
log('');
log(' Starting LiteRT server...');
serverStarted = await startServer(binaryPath, port);
if (serverStarted) {
log(chalk.green(` ✓ Server started on port ${port}`));
} else {
log(
chalk.yellow(
` ! Server may not have started correctly. Check: gemini gemma status`,
),
);
}
}
const routingActive = settingsUpdated && serverStarted;
const setupSucceeded = settingsUpdated && (!argv.start || serverStarted);
log('');
log(chalk.dim('─'.repeat(40)));
if (routingActive) {
log(chalk.bold.green(' Setup complete! Local model routing is active.'));
} else if (settingsUpdated) {
log(
chalk.bold.green(' Setup complete! Local model routing is configured.'),
);
} else {
log(
chalk.bold.yellow(
' Setup incomplete. Manual settings changes are still required.',
),
);
}
log('');
log(' How it works: Every request is classified by the local Gemma model.');
log(
' Simple tasks (file reads, quick edits) route to ' +
chalk.cyan('Flash') +
' for speed.',
);
log(
' Complex tasks (debugging, architecture) route to ' +
chalk.cyan('Pro') +
' for quality.',
);
log(' This happens automatically — just use the CLI as usual.');
log('');
if (!settingsUpdated) {
log(
chalk.yellow(
' Fix the settings update above, then rerun "gemini gemma status".',
),
);
log('');
} else if (!argv.start) {
log(chalk.yellow(' Note: Run "gemini gemma start" to start the server.'));
if (autoStartServer) {
log(
chalk.yellow(
' Or restart the CLI to auto-start it on the next launch.',
),
);
}
log('');
} else if (!serverStarted) {
log(
chalk.yellow(
' Review the server logs and rerun "gemini gemma start" after fixing the issue.',
),
);
log('');
}
log(' Useful commands:');
log(chalk.dim(' gemini gemma status Check routing status'));
log(chalk.dim(' gemini gemma start Start the LiteRT server'));
log(chalk.dim(' gemini gemma stop Stop the LiteRT server'));
log(chalk.dim(' /gemma Check status inside a session'));
log('');
return setupSucceeded ? 0 : 1;
}
export const setupCommand: CommandModule = {
command: 'setup',
describe: 'Download and configure Gemma local model routing',
builder: (yargs) =>
yargs
.option('port', {
type: 'number',
default: DEFAULT_PORT,
description: 'Port for the LiteRT server',
})
.option('skip-model', {
type: 'boolean',
default: false,
description: 'Skip model download (binary only)',
})
.option('start', {
type: 'boolean',
default: true,
description: 'Start the server after setup',
})
.option('force', {
type: 'boolean',
default: false,
description: 'Re-download binary and model even if already present',
})
.option('consent', {
type: 'boolean',
default: false,
description: 'Skip interactive consent prompt (implies acceptance)',
}),
handler: async (argv) => {
const exitCode = await handleSetup({
port: Number(argv['port']),
skipModel: Boolean(argv['skipModel']),
start: Boolean(argv['start']),
force: Boolean(argv['force']),
consent: Boolean(argv['consent']),
});
await exitCli(exitCode);
},
};

View file

@ -0,0 +1,123 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import fs from 'node:fs';
import path from 'node:path';
import { spawn } from 'node:child_process';
import chalk from 'chalk';
import { debugLogger } from '@google/gemini-cli-core';
import { exitCli } from '../utils.js';
import {
DEFAULT_PORT,
getPidFilePath,
getLogFilePath,
getLiteRtBinDir,
SERVER_START_WAIT_MS,
} from './constants.js';
import {
getBinaryPath,
isBinaryInstalled,
isServerRunning,
resolveGemmaConfig,
writeServerProcessInfo,
} from './platform.js';
export async function startServer(
binaryPath: string,
port: number,
): Promise<boolean> {
const alreadyRunning = await isServerRunning(port);
if (alreadyRunning) {
debugLogger.log(`LiteRT server already running on port ${port}`);
return true;
}
const logPath = getLogFilePath();
fs.mkdirSync(getLiteRtBinDir(), { recursive: true });
const tmpDir = path.dirname(getPidFilePath());
fs.mkdirSync(tmpDir, { recursive: true });
const logFd = fs.openSync(logPath, 'a');
try {
const child = spawn(binaryPath, ['serve', `--port=${port}`, '--verbose'], {
detached: true,
stdio: ['ignore', logFd, logFd],
});
if (child.pid) {
writeServerProcessInfo({
pid: child.pid,
binaryPath,
port,
});
}
child.unref();
} finally {
fs.closeSync(logFd);
}
await new Promise((resolve) => setTimeout(resolve, SERVER_START_WAIT_MS));
return isServerRunning(port);
}
export const startCommand: CommandModule = {
command: 'start',
describe: 'Start the LiteRT-LM server',
builder: (yargs) =>
yargs.option('port', {
type: 'number',
description: 'Port for the LiteRT server',
}),
handler: async (argv) => {
let port: number | undefined;
if (argv['port'] !== undefined) {
port = Number(argv['port']);
}
if (!port) {
const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
port = configuredPort;
}
const binaryPath = getBinaryPath();
if (!binaryPath || !isBinaryInstalled(binaryPath)) {
debugLogger.error(
chalk.red(
'LiteRT-LM binary not found. Run "gemini gemma setup" first.',
),
);
await exitCli(1);
return;
}
const alreadyRunning = await isServerRunning(port);
if (alreadyRunning) {
debugLogger.log(
chalk.green(`LiteRT server is already running on port ${port}.`),
);
await exitCli(0);
return;
}
debugLogger.log(`Starting LiteRT server on port ${port}...`);
const started = await startServer(binaryPath, port);
if (started) {
debugLogger.log(chalk.green(`LiteRT server started on port ${port}.`));
debugLogger.log(chalk.dim(`Logs: ${getLogFilePath()}`));
await exitCli(0);
} else {
debugLogger.error(
chalk.red('Server may not have started correctly. Check logs:'),
);
debugLogger.error(chalk.dim(` ${getLogFilePath()}`));
await exitCli(1);
}
},
};

View file

@ -0,0 +1,165 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import chalk from 'chalk';
import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js';
import {
detectPlatform,
getBinaryPath,
isBinaryInstalled,
isModelDownloaded,
isServerRunning,
readServerPid,
isProcessRunning,
resolveGemmaConfig,
} from './platform.js';
import { exitCli } from '../utils.js';
export interface GemmaStatusResult {
binaryInstalled: boolean;
binaryPath: string | null;
modelDownloaded: boolean;
serverRunning: boolean;
serverPid: number | null;
settingsEnabled: boolean;
port: number;
allPassing: boolean;
}
export async function checkGemmaStatus(
port?: number,
): Promise<GemmaStatusResult> {
const { settingsEnabled, configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
const effectivePort = port ?? configuredPort;
const binaryPath = getBinaryPath();
const binaryInstalled = isBinaryInstalled(binaryPath);
const modelDownloaded =
binaryInstalled && binaryPath ? isModelDownloaded(binaryPath) : false;
const serverRunning = await isServerRunning(effectivePort);
const pid = readServerPid();
const serverPid = pid && isProcessRunning(pid) ? pid : null;
const allPassing =
binaryInstalled && modelDownloaded && serverRunning && settingsEnabled;
return {
binaryInstalled,
binaryPath,
modelDownloaded,
serverRunning,
serverPid,
settingsEnabled,
port: effectivePort,
allPassing,
};
}
export function formatGemmaStatus(status: GemmaStatusResult): string {
const check = (ok: boolean) => (ok ? chalk.green('✓') : chalk.red('✗'));
const lines: string[] = [
'',
chalk.bold('Gemma Local Model Routing Status'),
chalk.dim('─'.repeat(40)),
'',
];
if (status.binaryInstalled) {
lines.push(` Binary: ${check(true)} Installed (${status.binaryPath})`);
} else {
const platform = detectPlatform();
if (platform) {
lines.push(` Binary: ${check(false)} Not installed`);
lines.push(chalk.dim(` Run: gemini gemma setup`));
} else {
lines.push(
` Binary: ${check(false)} Unsupported platform (${process.platform}-${process.arch})`,
);
}
}
if (status.modelDownloaded) {
lines.push(` Model: ${check(true)} ${GEMMA_MODEL_NAME} downloaded`);
} else {
lines.push(` Model: ${check(false)} ${GEMMA_MODEL_NAME} not found`);
if (status.binaryInstalled) {
lines.push(
chalk.dim(
` Run: ${status.binaryPath} pull ${GEMMA_MODEL_NAME}`,
),
);
} else {
lines.push(chalk.dim(` Run: gemini gemma setup`));
}
}
if (status.serverRunning) {
const pidInfo = status.serverPid ? ` (PID ${status.serverPid})` : '';
lines.push(
` Server: ${check(true)} Running on port ${status.port}${pidInfo}`,
);
} else {
lines.push(
` Server: ${check(false)} Not running on port ${status.port}`,
);
lines.push(chalk.dim(` Run: gemini gemma start`));
}
if (status.settingsEnabled) {
lines.push(` Settings: ${check(true)} Enabled in settings.json`);
} else {
lines.push(` Settings: ${check(false)} Not enabled in settings.json`);
lines.push(
chalk.dim(
` Run: gemini gemma setup (auto-configures settings)`,
),
);
}
lines.push('');
if (status.allPassing) {
lines.push(chalk.green(' Routing is active — no action needed.'));
lines.push('');
lines.push(
chalk.dim(
' Simple requests → Flash (fast) | Complex requests → Pro (powerful)',
),
);
lines.push(chalk.dim(' This happens automatically on every request.'));
} else {
lines.push(
chalk.yellow(
' Some checks failed. Run "gemini gemma setup" for guided installation.',
),
);
}
lines.push('');
return lines.join('\n');
}
export const statusCommand: CommandModule = {
command: 'status',
describe: 'Check Gemma local model routing status',
builder: (yargs) =>
yargs.option('port', {
type: 'number',
description: 'Port to check for the LiteRT server',
}),
handler: async (argv) => {
let port: number | undefined;
if (argv['port'] !== undefined) {
port = Number(argv['port']);
}
const status = await checkGemmaStatus(port);
const output = formatGemmaStatus(status);
process.stdout.write(output);
await exitCli(status.allPassing ? 0 : 1);
},
};

View file

@ -0,0 +1,112 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
const mockGetBinaryPath = vi.hoisted(() => vi.fn());
const mockIsExpectedLiteRtServerProcess = vi.hoisted(() => vi.fn());
const mockIsProcessRunning = vi.hoisted(() => vi.fn());
const mockIsServerRunning = vi.hoisted(() => vi.fn());
const mockReadServerPid = vi.hoisted(() => vi.fn());
const mockReadServerProcessInfo = vi.hoisted(() => vi.fn());
const mockResolveGemmaConfig = vi.hoisted(() => vi.fn());
vi.mock('@google/gemini-cli-core', async (importOriginal) => {
const { mockCoreDebugLogger } = await import(
'../../test-utils/mockDebugLogger.js'
);
return mockCoreDebugLogger(
await importOriginal<typeof import('@google/gemini-cli-core')>(),
{
stripAnsi: false,
},
);
});
vi.mock('./constants.js', () => ({
DEFAULT_PORT: 9379,
getPidFilePath: vi.fn(() => '/tmp/litert-server.pid'),
}));
vi.mock('./platform.js', () => ({
getBinaryPath: mockGetBinaryPath,
isExpectedLiteRtServerProcess: mockIsExpectedLiteRtServerProcess,
isProcessRunning: mockIsProcessRunning,
isServerRunning: mockIsServerRunning,
readServerPid: mockReadServerPid,
readServerProcessInfo: mockReadServerProcessInfo,
resolveGemmaConfig: mockResolveGemmaConfig,
}));
vi.mock('../utils.js', () => ({
exitCli: vi.fn(),
}));
import { stopServer } from './stop.js';
describe('gemma stop command', () => {
beforeEach(() => {
vi.clearAllMocks();
vi.useFakeTimers();
mockGetBinaryPath.mockReturnValue('/custom/lit');
mockResolveGemmaConfig.mockReturnValue({ configuredPort: 9379 });
});
afterEach(() => {
vi.useRealTimers();
vi.restoreAllMocks();
});
it('refuses to signal a pid that does not match the expected LiteRT server', async () => {
mockReadServerProcessInfo.mockReturnValue({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
});
mockIsProcessRunning.mockReturnValue(true);
mockIsExpectedLiteRtServerProcess.mockReturnValue(false);
const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true);
await expect(stopServer(8123)).resolves.toBe('unexpected-process');
expect(killSpy).not.toHaveBeenCalled();
});
it('stops the verified LiteRT server and removes the pid file', async () => {
mockReadServerProcessInfo.mockReturnValue({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
});
mockIsProcessRunning.mockReturnValueOnce(true).mockReturnValueOnce(false);
mockIsExpectedLiteRtServerProcess.mockReturnValue(true);
const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {});
const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true);
const stopPromise = stopServer(8123);
await vi.runAllTimersAsync();
await expect(stopPromise).resolves.toBe('stopped');
expect(killSpy).toHaveBeenCalledWith(1234, 'SIGTERM');
expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid');
});
it('cleans up a stale pid file when the recorded process is no longer running', async () => {
mockReadServerProcessInfo.mockReturnValue({
pid: 1234,
binaryPath: '/custom/lit',
port: 8123,
});
mockIsProcessRunning.mockReturnValue(false);
const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {});
await expect(stopServer(8123)).resolves.toBe('not-running');
expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid');
});
});

View file

@ -0,0 +1,155 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { CommandModule } from 'yargs';
import fs from 'node:fs';
import chalk from 'chalk';
import { debugLogger } from '@google/gemini-cli-core';
import { exitCli } from '../utils.js';
import { DEFAULT_PORT, getPidFilePath } from './constants.js';
import {
getBinaryPath,
isExpectedLiteRtServerProcess,
isProcessRunning,
isServerRunning,
readServerPid,
readServerProcessInfo,
resolveGemmaConfig,
} from './platform.js';
export type StopServerResult =
| 'stopped'
| 'not-running'
| 'unexpected-process'
| 'failed';
export async function stopServer(
expectedPort?: number,
): Promise<StopServerResult> {
const processInfo = readServerProcessInfo();
const pidPath = getPidFilePath();
if (!processInfo) {
return 'not-running';
}
const { pid } = processInfo;
if (!isProcessRunning(pid)) {
debugLogger.log(
`Stale PID file found (PID ${pid} is not running), removing ${pidPath}`,
);
try {
fs.unlinkSync(pidPath);
} catch {
// ignore
}
return 'not-running';
}
const binaryPath = processInfo.binaryPath ?? getBinaryPath();
const port = processInfo.port ?? expectedPort;
if (!isExpectedLiteRtServerProcess(pid, { binaryPath, port })) {
debugLogger.warn(
`Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`,
);
return 'unexpected-process';
}
try {
process.kill(pid, 'SIGTERM');
} catch {
return 'failed';
}
await new Promise((resolve) => setTimeout(resolve, 1000));
if (isProcessRunning(pid)) {
try {
process.kill(pid, 'SIGKILL');
} catch {
// ignore
}
await new Promise((resolve) => setTimeout(resolve, 500));
if (isProcessRunning(pid)) {
return 'failed';
}
}
try {
fs.unlinkSync(pidPath);
} catch {
// ignore
}
return 'stopped';
}
export const stopCommand: CommandModule = {
command: 'stop',
describe: 'Stop the LiteRT-LM server',
builder: (yargs) =>
yargs.option('port', {
type: 'number',
description: 'Port where the LiteRT server is running',
}),
handler: async (argv) => {
let port: number | undefined;
if (argv['port'] !== undefined) {
port = Number(argv['port']);
}
if (!port) {
const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
port = configuredPort;
}
const processInfo = readServerProcessInfo();
const pid = processInfo?.pid ?? readServerPid();
if (pid !== null && isProcessRunning(pid)) {
debugLogger.log(`Stopping LiteRT server (PID ${pid})...`);
const result = await stopServer(port);
if (result === 'stopped') {
debugLogger.log(chalk.green('LiteRT server stopped.'));
await exitCli(0);
} else if (result === 'unexpected-process') {
debugLogger.error(
chalk.red(
`Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`,
),
);
debugLogger.error(
chalk.dim(
'Remove the stale pid file after verifying the process, or stop the process manually.',
),
);
await exitCli(1);
} else {
debugLogger.error(chalk.red('Failed to stop LiteRT server.'));
await exitCli(1);
}
return;
}
const running = await isServerRunning(port);
if (running) {
debugLogger.log(
chalk.yellow(
`A server is responding on port ${port}, but it was not started by "gemini gemma start".`,
),
);
debugLogger.log(
chalk.dim(
'If you started it manually, stop it from the terminal where it is running.',
),
);
await exitCli(1);
} else {
debugLogger.log('No LiteRT server is currently running.');
await exitCli(0);
}
},
};

View file

@ -338,6 +338,7 @@ describe('parseArguments', () => {
{ cmd: 'skill list', expected: true },
{ cmd: 'hooks migrate', expected: true },
{ cmd: 'hook migrate', expected: true },
{ cmd: 'gemma status', expected: true },
{ cmd: 'some query', expected: undefined },
{ cmd: 'hello world', expected: undefined },
])(
@ -758,6 +759,12 @@ describe('parseArguments', () => {
const argv = await parseArguments(settings);
expect(argv.isCommand).toBe(true);
});
it('should set isCommand to true for gemma command', async () => {
process.argv = ['node', 'script.js', 'gemma', 'status'];
const argv = await parseArguments(createTestMergedSettings());
expect(argv.isCommand).toBe(true);
});
});
describe('loadCliConfig', () => {
@ -3030,6 +3037,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
experimental: {
gemmaModelRouter: {
enabled: true,
autoStartServer: false,
binaryPath: '/custom/lit',
classifier: {
host: 'http://custom:1234',
model: 'custom-gemma',
@ -3040,6 +3049,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
const config = await loadCliConfig(settings, 'test-session', argv);
expect(config.getGemmaModelRouterEnabled()).toBe(true);
const gemmaSettings = config.getGemmaModelRouterSettings();
expect(gemmaSettings.autoStartServer).toBe(false);
expect(gemmaSettings.binaryPath).toBe('/custom/lit');
expect(gemmaSettings.classifier?.host).toBe('http://custom:1234');
expect(gemmaSettings.classifier?.model).toBe('custom-gemma');
});
@ -3057,6 +3068,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
const config = await loadCliConfig(settings, 'test-session', argv);
expect(config.getGemmaModelRouterEnabled()).toBe(true);
const gemmaSettings = config.getGemmaModelRouterSettings();
expect(gemmaSettings.autoStartServer).toBe(false);
expect(gemmaSettings.binaryPath).toBe('');
expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379');
expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom');
});

View file

@ -13,6 +13,7 @@ import { mcpCommand } from '../commands/mcp.js';
import { extensionsCommand } from '../commands/extensions.js';
import { skillsCommand } from '../commands/skills.js';
import { hooksCommand } from '../commands/hooks.js';
import { gemmaCommand } from '../commands/gemma.js';
import {
setGeminiMdFilename as setServerGeminiMdFilename,
getCurrentGeminiMdFilename,
@ -181,6 +182,7 @@ export async function parseArguments(
extensionsCommand,
skillsCommand,
hooksCommand,
gemmaCommand,
];
const subcommands = commandModules.flatMap((mod) => {
@ -260,6 +262,7 @@ export async function parseArguments(
yargsInstance.command(extensionsCommand);
yargsInstance.command(skillsCommand);
yargsInstance.command(hooksCommand);
yargsInstance.command(gemmaCommand);
yargsInstance
.command('$0 [query..]', 'Launch Gemini CLI', (yargsInstance) =>

View file

@ -471,11 +471,33 @@ describe('SettingsSchema', () => {
expect(enabled.category).toBe('Experimental');
expect(enabled.default).toBe(false);
expect(enabled.requiresRestart).toBe(true);
expect(enabled.showInDialog).toBe(false);
expect(enabled.showInDialog).toBe(true);
expect(enabled.description).toBe(
'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
);
const autoStartServer = gemmaModelRouter.properties.autoStartServer;
expect(autoStartServer).toBeDefined();
expect(autoStartServer.type).toBe('boolean');
expect(autoStartServer.category).toBe('Experimental');
expect(autoStartServer.default).toBe(false);
expect(autoStartServer.requiresRestart).toBe(true);
expect(autoStartServer.showInDialog).toBe(true);
expect(autoStartServer.description).toBe(
'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
);
const binaryPath = gemmaModelRouter.properties.binaryPath;
expect(binaryPath).toBeDefined();
expect(binaryPath.type).toBe('string');
expect(binaryPath.category).toBe('Experimental');
expect(binaryPath.default).toBe('');
expect(binaryPath.requiresRestart).toBe(true);
expect(binaryPath.showInDialog).toBe(false);
expect(binaryPath.description).toBe(
'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).',
);
const classifier = gemmaModelRouter.properties.classifier;
expect(classifier).toBeDefined();
expect(classifier.type).toBe('object');

View file

@ -2169,6 +2169,26 @@ const SETTINGS_SCHEMA = {
default: false,
description:
'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
showInDialog: true,
},
autoStartServer: {
type: 'boolean',
label: 'Auto-start LiteRT Server',
category: 'Experimental',
requiresRestart: true,
default: false,
description:
'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
showInDialog: true,
},
binaryPath: {
type: 'string',
label: 'LiteRT Binary Path',
category: 'Experimental',
requiresRestart: true,
default: '',
description:
'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).',
showInDialog: false,
},
classifier: {

View file

@ -612,6 +612,23 @@ export async function main() {
const initializationResult = await initializeApp(config, settings);
initAppHandle?.end();
import('./services/liteRtServerManager.js')
.then(({ LiteRtServerManager }) => {
const mergedGemma = settings.merged.experimental?.gemmaModelRouter;
if (!mergedGemma) return;
// Security: binaryPath and autoStartServer must come from user-scoped
// settings only to prevent workspace configs from triggering arbitrary
// binary execution.
const userGemma = settings.forScope(SettingScope.User).settings
.experimental?.gemmaModelRouter;
return LiteRtServerManager.ensureRunning({
...mergedGemma,
binaryPath: userGemma?.binaryPath,
autoStartServer: userGemma?.autoStartServer,
});
})
.catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e));
if (
settings.merged.security.auth.selectedType ===
AuthType.LOGIN_WITH_GOOGLE &&

View file

@ -61,6 +61,7 @@ import { vimCommand } from '../ui/commands/vimCommand.js';
import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js';
import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js';
import { upgradeCommand } from '../ui/commands/upgradeCommand.js';
import { gemmaStatusCommand } from '../ui/commands/gemmaStatusCommand.js';
/**
* Loads the core, hard-coded slash commands that are an integral part
@ -221,6 +222,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
: [skillsCommand]
: []),
settingsCommand,
gemmaStatusCommand,
tasksCommand,
vimCommand,
setupGithubCommand,

View file

@ -0,0 +1,68 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
const mockGetBinaryPath = vi.hoisted(() => vi.fn());
const mockIsServerRunning = vi.hoisted(() => vi.fn());
const mockStartServer = vi.hoisted(() => vi.fn());
vi.mock('../commands/gemma/platform.js', () => ({
getBinaryPath: mockGetBinaryPath,
isServerRunning: mockIsServerRunning,
}));
vi.mock('../commands/gemma/start.js', () => ({
startServer: mockStartServer,
}));
import { LiteRtServerManager } from './liteRtServerManager.js';
describe('LiteRtServerManager', () => {
beforeEach(() => {
vi.clearAllMocks();
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
mockIsServerRunning.mockResolvedValue(false);
mockStartServer.mockResolvedValue(true);
});
it('uses the configured custom binary path when auto-starting', async () => {
mockGetBinaryPath.mockReturnValue('/user/lit');
const settings: GemmaModelRouterSettings = {
enabled: true,
binaryPath: '/workspace/evil',
classifier: {
host: 'http://localhost:8123',
},
};
await LiteRtServerManager.ensureRunning(settings);
expect(mockGetBinaryPath).toHaveBeenCalledTimes(1);
expect(fs.existsSync).toHaveBeenCalledWith('/user/lit');
expect(mockStartServer).toHaveBeenCalledWith('/user/lit', 8123);
});
it('falls back to the default binary path when no custom path is configured', async () => {
mockGetBinaryPath.mockReturnValue('/default/lit');
const settings: GemmaModelRouterSettings = {
enabled: true,
classifier: {
host: 'http://localhost:9379',
},
};
await LiteRtServerManager.ensureRunning(settings);
expect(mockGetBinaryPath).toHaveBeenCalledTimes(1);
expect(fs.existsSync).toHaveBeenCalledWith('/default/lit');
expect(mockStartServer).toHaveBeenCalledWith('/default/lit', 9379);
});
});

View file

@ -0,0 +1,59 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import { debugLogger } from '@google/gemini-cli-core';
import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
import { getBinaryPath, isServerRunning } from '../commands/gemma/platform.js';
import { DEFAULT_PORT } from '../commands/gemma/constants.js';
export class LiteRtServerManager {
static async ensureRunning(
gemmaSettings: GemmaModelRouterSettings | undefined,
): Promise<void> {
if (!gemmaSettings?.enabled) return;
if (gemmaSettings.autoStartServer === false) return;
const binaryPath = getBinaryPath();
if (!binaryPath || !fs.existsSync(binaryPath)) {
debugLogger.log(
'[LiteRtServerManager] Binary not installed, skipping auto-start. Run "gemini gemma setup".',
);
return;
}
const port =
parseInt(
gemmaSettings.classifier?.host?.match(/:(\d+)/)?.[1] ?? '',
10,
) || DEFAULT_PORT;
const running = await isServerRunning(port);
if (running) {
debugLogger.log(
`[LiteRtServerManager] Server already running on port ${port}`,
);
return;
}
debugLogger.log(
`[LiteRtServerManager] Auto-starting LiteRT server on port ${port}...`,
);
try {
const { startServer } = await import('../commands/gemma/start.js');
const started = await startServer(binaryPath, port);
if (started) {
debugLogger.log(`[LiteRtServerManager] Server started on port ${port}`);
} else {
debugLogger.warn(
`[LiteRtServerManager] Server may not have started correctly on port ${port}`,
);
}
} catch (error) {
debugLogger.warn('[LiteRtServerManager] Auto-start failed:', error);
}
}
}

View file

@ -0,0 +1,41 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { CommandKind, type SlashCommand } from './types.js';
import { MessageType, type HistoryItemGemmaStatus } from '../types.js';
import { checkGemmaStatus } from '../../commands/gemma/status.js';
import { GEMMA_MODEL_NAME } from '../../commands/gemma/constants.js';
export const gemmaStatusCommand: SlashCommand = {
name: 'gemma',
description: 'Check local Gemma model routing status',
kind: CommandKind.BUILT_IN,
autoExecute: true,
isSafeConcurrent: true,
action: async (context) => {
const port =
parseInt(
context.services.settings.merged.experimental?.gemmaModelRouter?.classifier?.host?.match(
/:(\d+)/,
)?.[1] ?? '',
10,
) || undefined;
const status = await checkGemmaStatus(port);
const item: Omit<HistoryItemGemmaStatus, 'id'> = {
type: MessageType.GEMMA_STATUS,
binaryInstalled: status.binaryInstalled,
binaryPath: status.binaryPath,
modelName: GEMMA_MODEL_NAME,
modelDownloaded: status.modelDownloaded,
serverRunning: status.serverRunning,
serverPid: status.serverPid,
serverPort: status.port,
settingsEnabled: status.settingsEnabled,
allPassing: status.allPassing,
};
context.ui.addItem(item);
},
};

View file

@ -32,6 +32,7 @@ import { ToolsList } from './views/ToolsList.js';
import { SkillsList } from './views/SkillsList.js';
import { AgentsStatus } from './views/AgentsStatus.js';
import { McpStatus } from './views/McpStatus.js';
import { GemmaStatus } from './views/GemmaStatus.js';
import { ChatList } from './views/ChatList.js';
import { ModelMessage } from './messages/ModelMessage.js';
import { ThinkingMessage } from './messages/ThinkingMessage.js';
@ -228,6 +229,9 @@ export const HistoryItemDisplay: React.FC<HistoryItemDisplayProps> = ({
{itemForDisplay.type === 'mcp_status' && (
<McpStatus {...itemForDisplay} serverStatus={getMCPServerStatus} />
)}
{itemForDisplay.type === 'gemma_status' && (
<GemmaStatus {...itemForDisplay} />
)}
{itemForDisplay.type === 'chat_list' && (
<ChatList chats={itemForDisplay.chats} />
)}

View file

@ -0,0 +1,120 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { Box, Text } from 'ink';
import type React from 'react';
import { theme } from '../../semantic-colors.js';
import type { HistoryItemGemmaStatus } from '../../types.js';
type GemmaStatusProps = Omit<HistoryItemGemmaStatus, 'id' | 'type'>;
const StatusDot: React.FC<{ ok: boolean }> = ({ ok }) => (
<Text color={ok ? theme.status.success : theme.status.error}>
{ok ? '\u25CF' : '\u25CB'}
</Text>
);
export const GemmaStatus: React.FC<GemmaStatusProps> = ({
binaryInstalled,
binaryPath,
modelName,
modelDownloaded,
serverRunning,
serverPid,
serverPort,
settingsEnabled,
allPassing,
}) => (
<Box flexDirection="column">
<Text bold>Gemma Local Model Routing</Text>
<Box height={1} />
<Box>
<StatusDot ok={binaryInstalled} />
<Text>
{' '}
<Text bold>Binary: </Text>
{binaryInstalled ? (
<Text color={theme.text.secondary}>{binaryPath}</Text>
) : (
<Text color={theme.status.error}>Not installed</Text>
)}
</Text>
</Box>
<Box>
<StatusDot ok={modelDownloaded} />
<Text>
{' '}
<Text bold>Model: </Text>
{modelDownloaded ? (
<Text>{modelName}</Text>
) : (
<Text color={theme.status.error}>{modelName} not found</Text>
)}
</Text>
</Box>
<Box>
<StatusDot ok={serverRunning} />
<Text>
{' '}
<Text bold>Server: </Text>
{serverRunning ? (
<Text>
port {serverPort}
{serverPid ? (
<Text color={theme.text.secondary}> (PID {serverPid})</Text>
) : null}
</Text>
) : (
<Text color={theme.status.error}>
not running on port {serverPort}
</Text>
)}
</Text>
</Box>
<Box>
<StatusDot ok={settingsEnabled} />
<Text>
{' '}
<Text bold>Settings: </Text>
{settingsEnabled ? (
<Text>enabled</Text>
) : (
<Text color={theme.status.error}>not enabled</Text>
)}
</Text>
</Box>
<Box marginTop={1}>
<Text bold>Active for: </Text>
{allPassing ? (
<Text color={theme.status.success}>[routing]</Text>
) : (
<Text color={theme.text.secondary}>none</Text>
)}
</Box>
<Box marginTop={1}>
{allPassing ? (
<Box flexDirection="column">
<Text color={theme.text.secondary}>
Simple requests route to Flash, complex requests to Pro.
</Text>
<Text color={theme.text.secondary}>
This happens automatically on every request.
</Text>
</Box>
) : (
<Text color={theme.status.warning}>
Run &quot;gemini gemma setup&quot; to install and configure.
</Text>
)}
</Box>
</Box>
);

View file

@ -355,6 +355,19 @@ export interface JsonMcpResource {
description?: string;
}
export type HistoryItemGemmaStatus = HistoryItemBase & {
type: 'gemma_status';
binaryInstalled: boolean;
binaryPath: string | null;
modelName: string;
modelDownloaded: boolean;
serverRunning: boolean;
serverPid: number | null;
serverPort: number;
settingsEnabled: boolean;
allPassing: boolean;
};
export type HistoryItemMcpStatus = HistoryItemBase & {
type: 'mcp_status';
servers: Record<string, MCPServerConfig>;
@ -404,6 +417,7 @@ export type HistoryItemWithoutId =
| HistoryItemSkillsList
| HistoryItemAgentsList
| HistoryItemMcpStatus
| HistoryItemGemmaStatus
| HistoryItemChatList
| HistoryItemThinking
| HistoryItemHint
@ -430,6 +444,7 @@ export enum MessageType {
SKILLS_LIST = 'skills_list',
AGENTS_LIST = 'agents_list',
MCP_STATUS = 'mcp_status',
GEMMA_STATUS = 'gemma_status',
CHAT_LIST = 'chat_list',
HINT = 'hint',
}

View file

@ -1975,6 +1975,8 @@ describe('GemmaModelRouterSettings', () => {
const config = new Config(baseParams);
const settings = config.getGemmaModelRouterSettings();
expect(settings.enabled).toBe(false);
expect(settings.autoStartServer).toBe(true);
expect(settings.binaryPath).toBe('');
expect(settings.classifier?.host).toBe('http://localhost:9379');
expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
});
@ -1984,6 +1986,8 @@ describe('GemmaModelRouterSettings', () => {
...baseParams,
gemmaModelRouter: {
enabled: true,
autoStartServer: false,
binaryPath: '/custom/lit',
classifier: {
host: 'http://custom:1234',
model: 'custom-gemma',
@ -1993,6 +1997,8 @@ describe('GemmaModelRouterSettings', () => {
const config = new Config(params);
const settings = config.getGemmaModelRouterSettings();
expect(settings.enabled).toBe(true);
expect(settings.autoStartServer).toBe(false);
expect(settings.binaryPath).toBe('/custom/lit');
expect(settings.classifier?.host).toBe('http://custom:1234');
expect(settings.classifier?.model).toBe('custom-gemma');
});
@ -2007,6 +2013,8 @@ describe('GemmaModelRouterSettings', () => {
const config = new Config(params);
const settings = config.getGemmaModelRouterSettings();
expect(settings.enabled).toBe(true);
expect(settings.autoStartServer).toBe(true);
expect(settings.binaryPath).toBe('');
expect(settings.classifier?.host).toBe('http://localhost:9379');
expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
});

View file

@ -219,6 +219,8 @@ export interface OutputSettings {
export interface GemmaModelRouterSettings {
enabled?: boolean;
autoStartServer?: boolean;
binaryPath?: string;
classifier?: {
host?: string;
model?: string;
@ -1323,6 +1325,8 @@ export class Config implements McpContext, AgentLoopContext {
};
this.gemmaModelRouter = {
enabled: params.gemmaModelRouter?.enabled ?? false,
autoStartServer: params.gemmaModelRouter?.autoStartServer ?? true,
binaryPath: params.gemmaModelRouter?.binaryPath ?? '',
classifier: {
host:
params.gemmaModelRouter?.classifier?.host ?? 'http://localhost:9379',

View file

@ -7,6 +7,8 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { LocalLiteRtLmClient } from './localLiteRtLmClient.js';
import type { Config } from '../config/config.js';
import { GoogleGenAI } from '@google/genai';
const mockGenerateContent = vi.fn();
vi.mock('@google/genai', () => {
@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => {
const result = await client.generateJson([], 'test-instruction');
expect(result).toEqual({ key: 'value' });
expect(GoogleGenAI).toHaveBeenCalledWith(
expect.objectContaining({
apiVersion: 'v1beta',
httpOptions: expect.objectContaining({
baseUrl: 'http://test-host:1234',
}),
}),
);
expect(mockGenerateContent).toHaveBeenCalledWith(
expect.objectContaining({
model: 'gemma:latest',

View file

@ -25,6 +25,8 @@ export class LocalLiteRtLmClient {
this.client = new GoogleGenAI({
// The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication.
apiKey: 'no-api-key-needed',
apiVersion: 'v1beta',
vertexai: false,
httpOptions: {
baseUrl: this.host,
// If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).

View file

@ -2920,6 +2920,20 @@
"default": false,
"type": "boolean"
},
"autoStartServer": {
"title": "Auto-start LiteRT Server",
"description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.",
"markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
"default": false,
"type": "boolean"
},
"binaryPath": {
"title": "LiteRT Binary Path",
"description": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).",
"markdownDescription": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: ``",
"default": "",
"type": "string"
},
"classifier": {
"title": "Classifier",
"description": "Classifier configuration.",