feat(cli): add streamlined gemini gemma local model setup (#25498)

Co-authored-by: Abhijit Balaji <abhijitbalaji@google.com> Co-authored-by: Samee Zahid <sameez@google.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-04-21 13:37:17 +00:00 · 2026-04-20 16:57:56 -07:00 · 2026-04-20 16:57:56 -07:00 · 1d383a4a8e
commit 1d383a4a8e
parent 6afc47f81c
31 changed files with 2509 additions and 12 deletions
--- a/docs/cli/settings.md
+++ b/docs/cli/settings.md
@ -162,12 +162,14 @@ they appear in the UI.
 ### Experimental

 | UI Label                                             | Setting                                         | Description                                                                                                                                               | Default |
-| ---------------------------------------------------- | -------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
+| ---------------------------------------------------- | ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
 | Enable Git Worktrees                                 | `experimental.worktrees`                        | Enable automated Git worktree management for parallel work.                                                                                               | `false` |
 | Use OSC 52 Paste                                     | `experimental.useOSC52Paste`                    | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
 | Use OSC 52 Copy                                      | `experimental.useOSC52Copy`                     | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
 | Model Steering                                       | `experimental.modelSteering`                    | Enable model steering (user hints) to guide the model during tool execution.                                                                              | `false` |
 | Direct Web Fetch                                     | `experimental.directWebFetch`                   | Enable web fetch behavior that bypasses LLM summarization.                                                                                                | `false` |
+| Enable Gemma Model Router                            | `experimental.gemmaModelRouter.enabled`         | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.                            | `false` |
+| Auto-start LiteRT Server                             | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.                                                          | `false` |
 | Memory Manager Agent                                 | `experimental.memoryManager`                    | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.             | `false` |
 | Auto Memory                                          | `experimental.autoMemory`                       | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.                                            | `false` |
 | Use the generalist profile to manage agent contexts. | `experimental.generalistProfile`                | Suitable for general coding and software development tasks.                                                                                               | `false` |
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@ -1711,6 +1711,18 @@ their corresponding top-level category object in your `settings.json` file.
  - **Default:** `false`
  - **Requires restart:** Yes

+- **`experimental.gemmaModelRouter.autoStartServer`** (boolean):
+  - **Description:** Automatically start the LiteRT-LM server when Gemini CLI
+    starts and the Gemma router is enabled.
+  - **Default:** `false`
+  - **Requires restart:** Yes
+
+- **`experimental.gemmaModelRouter.binaryPath`** (string):
+  - **Description:** Custom path to the LiteRT-LM binary. Leave empty to use the
+    default location (~/.gemini/bin/litert/).
+  - **Default:** `""`
+  - **Requires restart:** Yes
+
 - **`experimental.gemmaModelRouter.classifier.host`** (string):
  - **Description:** The host of the classifier.
  - **Default:** `"http://localhost:9379"`
--- a/packages/cli/src/commands/gemma.ts
+++ b/packages/cli/src/commands/gemma.ts
@ -0,0 +1,33 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule, Argv } from 'yargs';
+import { initializeOutputListenersAndFlush } from '../gemini.js';
+import { defer } from '../deferred.js';
+import { setupCommand } from './gemma/setup.js';
+import { startCommand } from './gemma/start.js';
+import { stopCommand } from './gemma/stop.js';
+import { statusCommand } from './gemma/status.js';
+import { logsCommand } from './gemma/logs.js';
+
+export const gemmaCommand: CommandModule = {
+  command: 'gemma',
+  describe: 'Manage local Gemma model routing',
+  builder: (yargs: Argv) =>
+    yargs
+      .middleware((argv) => {
+        initializeOutputListenersAndFlush();
+        argv['isCommand'] = true;
+      })
+      .command(defer(setupCommand, 'gemma'))
+      .command(defer(startCommand, 'gemma'))
+      .command(defer(stopCommand, 'gemma'))
+      .command(defer(statusCommand, 'gemma'))
+      .command(defer(logsCommand, 'gemma'))
+      .demandCommand(1, 'You need at least one command before continuing.')
+      .version(false),
+  handler: () => {},
+};
--- a/packages/cli/src/commands/gemma/constants.ts
+++ b/packages/cli/src/commands/gemma/constants.ts
@ -0,0 +1,45 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import path from 'node:path';
+import { Storage } from '@google/gemini-cli-core';
+
+export const LITERT_RELEASE_VERSION = 'v0.9.0-alpha03';
+export const LITERT_RELEASE_BASE_URL =
+  'https://github.com/google-ai-edge/LiteRT-LM/releases/download';
+export const GEMMA_MODEL_NAME = 'gemma3-1b-gpu-custom';
+export const DEFAULT_PORT = 9379;
+export const HEALTH_CHECK_TIMEOUT_MS = 5000;
+export const LITERT_API_VERSION = 'v1beta';
+export const SERVER_START_WAIT_MS = 3000;
+
+export const PLATFORM_BINARY_MAP: Record<string, string> = {
+  'darwin-arm64': 'lit.macos_arm64',
+  'linux-x64': 'lit.linux_x86_64',
+  'win32-x64': 'lit.windows_x86_64.exe',
+};
+
+// SHA-256 hashes for the official LiteRT-LM v0.9.0-alpha03 release binaries.
+export const PLATFORM_BINARY_SHA256: Record<string, string> = {
+  'lit.macos_arm64':
+    '9e826a2634f2e8b220ad0f1e1b5c139e0b47cb172326e3b7d46d31382f49478e',
+  'lit.linux_x86_64':
+    '66601df8a07f08244b188e9fcab0bf4a16562fe76d8d47e49f40273d57541ee8',
+  'lit.windows_x86_64.exe':
+    'de82d2829d2fb1cbdb318e2d8a78dc2f9659ff14cb11b2894d1f30e0bfde2bf6',
+};
+
+export function getLiteRtBinDir(): string {
+  return path.join(Storage.getGlobalGeminiDir(), 'bin', 'litert');
+}
+
+export function getPidFilePath(): string {
+  return path.join(Storage.getGlobalTempDir(), 'litert-server.pid');
+}
+
+export function getLogFilePath(): string {
+  return path.join(Storage.getGlobalTempDir(), 'litert-server.log');
+}
--- a/packages/cli/src/commands/gemma/logs.test.ts
+++ b/packages/cli/src/commands/gemma/logs.test.ts
@ -0,0 +1,186 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import type { ChildProcess } from 'node:child_process';
+import { EventEmitter } from 'node:events';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { spawn } from 'node:child_process';
+import { exitCli } from '../utils.js';
+import { getLogFilePath } from './constants.js';
+import { logsCommand, readLastLines } from './logs.js';
+
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  const { mockCoreDebugLogger } = await import(
+    '../../test-utils/mockDebugLogger.js'
+  );
+  return mockCoreDebugLogger(
+    await importOriginal<typeof import('@google/gemini-cli-core')>(),
+    {
+      stripAnsi: false,
+    },
+  );
+});
+
+vi.mock('node:child_process', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('node:child_process')>();
+  return {
+    ...actual,
+    spawn: vi.fn(),
+  };
+});
+
+vi.mock('../utils.js', () => ({
+  exitCli: vi.fn(),
+}));
+
+vi.mock('./constants.js', () => ({
+  getLogFilePath: vi.fn(),
+}));
+
+function createMockChild(): ChildProcess {
+  return Object.assign(new EventEmitter(), {
+    kill: vi.fn(),
+  }) as unknown as ChildProcess;
+}
+
+async function flushMicrotasks() {
+  await Promise.resolve();
+  await Promise.resolve();
+}
+
+describe('readLastLines', () => {
+  const tempFiles: string[] = [];
+
+  afterEach(async () => {
+    await Promise.all(
+      tempFiles
+        .splice(0)
+        .map((filePath) => fs.promises.rm(filePath, { force: true })),
+    );
+  });
+
+  it('returns only the requested tail lines without reading the whole file eagerly', async () => {
+    const filePath = path.join(
+      os.tmpdir(),
+      `gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
+    );
+    tempFiles.push(filePath);
+
+    const content = Array.from({ length: 2000 }, (_, i) => `line-${i + 1}`)
+      .join('\n')
+      .concat('\n');
+    await fs.promises.writeFile(filePath, content, 'utf-8');
+
+    await expect(readLastLines(filePath, 3)).resolves.toBe(
+      'line-1998\nline-1999\nline-2000\n',
+    );
+  });
+
+  it('returns an empty string when zero lines are requested', async () => {
+    const filePath = path.join(
+      os.tmpdir(),
+      `gemma-logs-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
+    );
+    tempFiles.push(filePath);
+    await fs.promises.writeFile(filePath, 'line-1\nline-2\n', 'utf-8');
+
+    await expect(readLastLines(filePath, 0)).resolves.toBe('');
+  });
+});
+
+describe('logsCommand', () => {
+  const originalPlatform = process.platform;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    Object.defineProperty(process, 'platform', {
+      value: 'linux',
+      configurable: true,
+    });
+    vi.mocked(getLogFilePath).mockReturnValue('/tmp/gemma.log');
+    vi.spyOn(fs.promises, 'access').mockResolvedValue(undefined);
+  });
+
+  afterEach(() => {
+    Object.defineProperty(process, 'platform', {
+      value: originalPlatform,
+      configurable: true,
+    });
+    vi.restoreAllMocks();
+  });
+
+  it('waits for the tail process to close before exiting in follow mode', async () => {
+    const child = createMockChild();
+    vi.mocked(spawn).mockReturnValue(child);
+
+    let resolved = false;
+    const handlerPromise = (
+      logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
+    )({}).then(() => {
+      resolved = true;
+    });
+
+    await flushMicrotasks();
+
+    expect(spawn).toHaveBeenCalledWith(
+      'tail',
+      ['-f', '-n', '20', '/tmp/gemma.log'],
+      { stdio: 'inherit' },
+    );
+    expect(resolved).toBe(false);
+    expect(exitCli).not.toHaveBeenCalled();
+
+    child.emit('close', 0);
+    await handlerPromise;
+
+    expect(exitCli).toHaveBeenCalledWith(0);
+  });
+
+  it('uses one-shot tail output when follow is disabled', async () => {
+    const child = createMockChild();
+    vi.mocked(spawn).mockReturnValue(child);
+
+    const handlerPromise = (
+      logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
+    )({ follow: false });
+
+    await flushMicrotasks();
+
+    expect(spawn).toHaveBeenCalledWith('tail', ['-n', '20', '/tmp/gemma.log'], {
+      stdio: 'inherit',
+    });
+
+    child.emit('close', 0);
+    await handlerPromise;
+
+    expect(exitCli).toHaveBeenCalledWith(0);
+  });
+
+  it('follows from the requested line count when both --lines and --follow are set', async () => {
+    const child = createMockChild();
+    vi.mocked(spawn).mockReturnValue(child);
+
+    const handlerPromise = (
+      logsCommand.handler as (argv: Record<string, unknown>) => Promise<void>
+    )({ lines: 5, follow: true });
+
+    await flushMicrotasks();
+
+    expect(spawn).toHaveBeenCalledWith(
+      'tail',
+      ['-f', '-n', '5', '/tmp/gemma.log'],
+      { stdio: 'inherit' },
+    );
+
+    child.emit('close', 0);
+    await handlerPromise;
+
+    expect(exitCli).toHaveBeenCalledWith(0);
+  });
+});
--- a/packages/cli/src/commands/gemma/logs.ts
+++ b/packages/cli/src/commands/gemma/logs.ts
@ -0,0 +1,200 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import fs from 'node:fs';
+import { spawn, type ChildProcess } from 'node:child_process';
+import { debugLogger } from '@google/gemini-cli-core';
+import { exitCli } from '../utils.js';
+import { getLogFilePath } from './constants.js';
+
+export async function readLastLines(
+  filePath: string,
+  count: number,
+): Promise<string> {
+  if (count <= 0) {
+    return '';
+  }
+
+  const CHUNK_SIZE = 64 * 1024;
+  const fileHandle = await fs.promises.open(filePath, fs.constants.O_RDONLY);
+
+  try {
+    const stats = await fileHandle.stat();
+    if (stats.size === 0) {
+      return '';
+    }
+
+    const chunks: Buffer[] = [];
+    let totalBytes = 0;
+    let newlineCount = 0;
+    let position = stats.size;
+
+    while (position > 0 && newlineCount <= count) {
+      const readSize = Math.min(CHUNK_SIZE, position);
+      position -= readSize;
+
+      const buffer = Buffer.allocUnsafe(readSize);
+      const { bytesRead } = await fileHandle.read(
+        buffer,
+        0,
+        readSize,
+        position,
+      );
+
+      if (bytesRead === 0) {
+        break;
+      }
+
+      const chunk =
+        bytesRead === readSize ? buffer : buffer.subarray(0, bytesRead);
+      chunks.unshift(chunk);
+      totalBytes += chunk.length;
+
+      for (const byte of chunk) {
+        if (byte === 0x0a) {
+          newlineCount += 1;
+        }
+      }
+    }
+
+    const content = Buffer.concat(chunks, totalBytes).toString('utf-8');
+    const lines = content.split('\n');
+
+    if (position > 0 && lines.length > 0) {
+      const boundary = Buffer.allocUnsafe(1);
+      const { bytesRead } = await fileHandle.read(boundary, 0, 1, position - 1);
+      if (bytesRead === 1 && boundary[0] !== 0x0a) {
+        lines.shift();
+      }
+    }
+
+    if (lines.length > 0 && lines[lines.length - 1] === '') {
+      lines.pop();
+    }
+
+    if (lines.length === 0) {
+      return '';
+    }
+
+    return lines.slice(-count).join('\n') + '\n';
+  } finally {
+    await fileHandle.close();
+  }
+}
+
+interface LogsArgs {
+  lines?: number;
+  follow?: boolean;
+}
+
+function waitForChild(child: ChildProcess): Promise<number> {
+  return new Promise((resolve, reject) => {
+    child.once('error', reject);
+    child.once('close', (code) => resolve(code ?? 1));
+  });
+}
+
+async function runTail(logPath: string, lines: number, follow: boolean) {
+  const tailArgs = follow
+    ? ['-f', '-n', String(lines), logPath]
+    : ['-n', String(lines), logPath];
+  const child = spawn('tail', tailArgs, { stdio: 'inherit' });
+
+  if (!follow) {
+    return waitForChild(child);
+  }
+
+  const handleSigint = () => {
+    child.kill('SIGTERM');
+  };
+  process.once('SIGINT', handleSigint);
+
+  try {
+    return await waitForChild(child);
+  } finally {
+    process.off('SIGINT', handleSigint);
+  }
+}
+
+export const logsCommand: CommandModule<object, LogsArgs> = {
+  command: 'logs',
+  describe: 'View LiteRT-LM server logs',
+  builder: (yargs) =>
+    yargs
+      .option('lines', {
+        alias: 'n',
+        type: 'number',
+        description: 'Show the last N lines and exit (omit to follow live)',
+      })
+      .option('follow', {
+        alias: 'f',
+        type: 'boolean',
+        description:
+          'Follow log output (defaults to true when --lines is omitted)',
+      }),
+  handler: async (argv) => {
+    const logPath = getLogFilePath();
+
+    try {
+      await fs.promises.access(logPath, fs.constants.F_OK);
+    } catch {
+      debugLogger.log(`No log file found at ${logPath}`);
+      debugLogger.log(
+        'Is the LiteRT server running? Start it with: gemini gemma start',
+      );
+      await exitCli(1);
+      return;
+    }
+
+    const lines = argv.lines;
+    const follow = argv.follow ?? lines === undefined;
+    const requestedLines = lines ?? 20;
+
+    if (follow && process.platform === 'win32') {
+      debugLogger.log(
+        'Live log following is not supported on Windows. Use --lines N to view recent logs.',
+      );
+      await exitCli(1);
+      return;
+    }
+
+    if (process.platform === 'win32') {
+      process.stdout.write(await readLastLines(logPath, requestedLines));
+      await exitCli(0);
+      return;
+    }
+
+    try {
+      if (follow) {
+        debugLogger.log(`Tailing ${logPath} (Ctrl+C to stop)\n`);
+      }
+      const exitCode = await runTail(logPath, requestedLines, follow);
+      await exitCli(exitCode);
+    } catch (error) {
+      if (
+        error instanceof Error &&
+        'code' in error &&
+        error.code === 'ENOENT'
+      ) {
+        if (!follow) {
+          process.stdout.write(await readLastLines(logPath, requestedLines));
+          await exitCli(0);
+        } else {
+          debugLogger.error(
+            '"tail" command not found. Use --lines N to view recent logs without tail.',
+          );
+          await exitCli(1);
+        }
+      } else {
+        debugLogger.error(
+          `Failed to read log output: ${error instanceof Error ? error.message : String(error)}`,
+        );
+        await exitCli(1);
+      }
+    }
+  },
+};
--- a/packages/cli/src/commands/gemma/platform.test.ts
+++ b/packages/cli/src/commands/gemma/platform.test.ts
@ -0,0 +1,162 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { SettingScope } from '../../config/settings.js';
+import { getLiteRtBinDir } from './constants.js';
+
+const mockLoadSettings = vi.hoisted(() => vi.fn());
+
+vi.mock('../../config/settings.js', () => ({
+  loadSettings: mockLoadSettings,
+  SettingScope: {
+    User: 'User',
+  },
+}));
+
+import {
+  getBinaryPath,
+  isExpectedLiteRtServerCommand,
+  isBinaryInstalled,
+  readServerProcessInfo,
+  resolveGemmaConfig,
+} from './platform.js';
+
+describe('gemma platform helpers', () => {
+  function createMockSettings(
+    userGemmaSettings?: object,
+    mergedGemmaSettings?: object,
+  ) {
+    return {
+      merged: {
+        experimental: {
+          gemmaModelRouter: mergedGemmaSettings,
+        },
+      },
+      forScope: vi.fn((scope: SettingScope) => {
+        if (scope !== SettingScope.User) {
+          throw new Error(`Unexpected scope ${scope}`);
+        }
+        return {
+          settings: {
+            experimental: {
+              gemmaModelRouter: userGemmaSettings,
+            },
+          },
+        };
+      }),
+    };
+  }
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockLoadSettings.mockReturnValue(createMockSettings());
+  });
+
+  it('prefers the configured binary path from settings', () => {
+    mockLoadSettings.mockReturnValue(
+      createMockSettings({ binaryPath: '/custom/lit' }),
+    );
+
+    expect(getBinaryPath('lit.test')).toBe('/custom/lit');
+  });
+
+  it('ignores workspace overrides for the configured binary path', () => {
+    mockLoadSettings.mockReturnValue(
+      createMockSettings(
+        { binaryPath: '/user/lit' },
+        { binaryPath: '/workspace/evil' },
+      ),
+    );
+
+    expect(getBinaryPath('lit.test')).toBe('/user/lit');
+  });
+
+  it('falls back to the default install location when no custom path is set', () => {
+    expect(getBinaryPath('lit.test')).toBe(
+      path.join(getLiteRtBinDir(), 'lit.test'),
+    );
+  });
+
+  it('resolves the configured port and binary path from settings', () => {
+    mockLoadSettings.mockReturnValue(
+      createMockSettings(
+        { binaryPath: '/custom/lit' },
+        {
+          enabled: true,
+          classifier: {
+            host: 'http://localhost:8123/v1beta',
+          },
+        },
+      ),
+    );
+
+    expect(resolveGemmaConfig(9379)).toEqual({
+      settingsEnabled: true,
+      configuredPort: 8123,
+      configuredBinaryPath: '/custom/lit',
+    });
+  });
+
+  it('checks binary installation using the resolved binary path', () => {
+    mockLoadSettings.mockReturnValue(
+      createMockSettings({ binaryPath: '/custom/lit' }),
+    );
+    vi.spyOn(fs, 'existsSync').mockReturnValue(true);
+
+    expect(isBinaryInstalled()).toBe(true);
+    expect(fs.existsSync).toHaveBeenCalledWith('/custom/lit');
+  });
+
+  it('parses structured server process info from the pid file', () => {
+    vi.spyOn(fs, 'readFileSync').mockReturnValue(
+      JSON.stringify({
+        pid: 1234,
+        binaryPath: '/custom/lit',
+        port: 8123,
+      }),
+    );
+
+    expect(readServerProcessInfo()).toEqual({
+      pid: 1234,
+      binaryPath: '/custom/lit',
+      port: 8123,
+    });
+  });
+
+  it('parses legacy pid-only files for backward compatibility', () => {
+    vi.spyOn(fs, 'readFileSync').mockReturnValue('4321');
+
+    expect(readServerProcessInfo()).toEqual({
+      pid: 4321,
+    });
+  });
+
+  it('matches only the expected LiteRT serve command', () => {
+    expect(
+      isExpectedLiteRtServerCommand('/custom/lit serve --port=8123 --verbose', {
+        binaryPath: '/custom/lit',
+        port: 8123,
+      }),
+    ).toBe(true);
+
+    expect(
+      isExpectedLiteRtServerCommand('/custom/lit run --port=8123', {
+        binaryPath: '/custom/lit',
+        port: 8123,
+      }),
+    ).toBe(false);
+
+    expect(
+      isExpectedLiteRtServerCommand('/custom/lit serve --port=9000', {
+        binaryPath: '/custom/lit',
+        port: 8123,
+      }),
+    ).toBe(false);
+  });
+});
--- a/packages/cli/src/commands/gemma/platform.ts
+++ b/packages/cli/src/commands/gemma/platform.ts
@ -0,0 +1,316 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { loadSettings, SettingScope } from '../../config/settings.js';
+import fs from 'node:fs';
+import path from 'node:path';
+import { execFileSync } from 'node:child_process';
+import {
+  PLATFORM_BINARY_MAP,
+  LITERT_RELEASE_BASE_URL,
+  LITERT_RELEASE_VERSION,
+  getLiteRtBinDir,
+  GEMMA_MODEL_NAME,
+  HEALTH_CHECK_TIMEOUT_MS,
+  LITERT_API_VERSION,
+  getPidFilePath,
+} from './constants.js';
+
+export interface PlatformInfo {
+  key: string;
+  binaryName: string;
+}
+
+export interface GemmaConfigStatus {
+  settingsEnabled: boolean;
+  configuredPort: number;
+  configuredBinaryPath?: string;
+}
+
+export interface LiteRtServerProcessInfo {
+  pid: number;
+  binaryPath?: string;
+  port?: number;
+}
+
+function getUserConfiguredBinaryPath(
+  workspaceDir = process.cwd(),
+): string | undefined {
+  try {
+    const userGemmaSettings = loadSettings(workspaceDir).forScope(
+      SettingScope.User,
+    ).settings.experimental?.gemmaModelRouter;
+    return userGemmaSettings?.binaryPath?.trim() || undefined;
+  } catch {
+    return undefined;
+  }
+}
+
+function parsePortFromHost(
+  host: string | undefined,
+  fallbackPort: number,
+): number {
+  if (!host) {
+    return fallbackPort;
+  }
+
+  try {
+    const url = new URL(host);
+    const port = Number(url.port);
+    return Number.isFinite(port) && port > 0 ? port : fallbackPort;
+  } catch {
+    const match = host.match(/:(\d+)/);
+    if (!match) {
+      return fallbackPort;
+    }
+    const port = parseInt(match[1], 10);
+    return Number.isFinite(port) && port > 0 ? port : fallbackPort;
+  }
+}
+
+export function resolveGemmaConfig(fallbackPort: number): GemmaConfigStatus {
+  let settingsEnabled = false;
+  let configuredPort = fallbackPort;
+  const configuredBinaryPath = getUserConfiguredBinaryPath();
+  try {
+    const settings = loadSettings(process.cwd());
+    const gemmaSettings = settings.merged.experimental?.gemmaModelRouter;
+    settingsEnabled = gemmaSettings?.enabled === true;
+    configuredPort = parsePortFromHost(
+      gemmaSettings?.classifier?.host,
+      fallbackPort,
+    );
+  } catch {
+    // ignore — settings may fail to load outside a workspace
+  }
+  return { settingsEnabled, configuredPort, configuredBinaryPath };
+}
+
+export function detectPlatform(): PlatformInfo | null {
+  const key = `${process.platform}-${process.arch}`;
+  const binaryName = PLATFORM_BINARY_MAP[key];
+  if (!binaryName) {
+    return null;
+  }
+  return { key, binaryName };
+}
+
+export function getBinaryPath(binaryName?: string): string | null {
+  const configuredBinaryPath = getUserConfiguredBinaryPath();
+  if (configuredBinaryPath) {
+    return configuredBinaryPath;
+  }
+
+  const name = binaryName ?? detectPlatform()?.binaryName;
+  if (!name) return null;
+  return path.join(getLiteRtBinDir(), name);
+}
+
+export function getBinaryDownloadUrl(binaryName: string): string {
+  return `${LITERT_RELEASE_BASE_URL}/${LITERT_RELEASE_VERSION}/${binaryName}`;
+}
+
+export function isBinaryInstalled(binaryPath = getBinaryPath()): boolean {
+  if (!binaryPath) return false;
+  return fs.existsSync(binaryPath);
+}
+
+export function isModelDownloaded(binaryPath: string): boolean {
+  try {
+    const output = execFileSync(binaryPath, ['list'], {
+      encoding: 'utf-8',
+      timeout: 10000,
+    });
+    return output.includes(GEMMA_MODEL_NAME);
+  } catch {
+    return false;
+  }
+}
+
+export async function isServerRunning(port: number): Promise<boolean> {
+  try {
+    const controller = new AbortController();
+    const timeout = setTimeout(
+      () => controller.abort(),
+      HEALTH_CHECK_TIMEOUT_MS,
+    );
+    const response = await fetch(
+      `http://localhost:${port}/${LITERT_API_VERSION}/models/${GEMMA_MODEL_NAME}:generateContent`,
+      { method: 'POST', signal: controller.signal },
+    );
+    clearTimeout(timeout);
+    // A 400 (bad request) confirms the route exists — the server recognises
+    // the model endpoint.  Only a 404 means "wrong server / wrong model".
+    return response.status !== 404;
+  } catch {
+    return false;
+  }
+}
+
+function isLiteRtServerProcessInfo(
+  value: unknown,
+): value is LiteRtServerProcessInfo {
+  if (!value || typeof value !== 'object') {
+    return false;
+  }
+
+  const isPositiveInteger = (candidate: unknown): candidate is number =>
+    typeof candidate === 'number' &&
+    Number.isInteger(candidate) &&
+    candidate > 0;
+  const isNonEmptyString = (candidate: unknown): candidate is string =>
+    typeof candidate === 'string' && candidate.length > 0;
+
+  const pid: unknown = Object.getOwnPropertyDescriptor(value, 'pid')?.value;
+  if (!isPositiveInteger(pid)) {
+    return false;
+  }
+
+  const binaryPath: unknown = Object.getOwnPropertyDescriptor(
+    value,
+    'binaryPath',
+  )?.value;
+  if (binaryPath !== undefined && !isNonEmptyString(binaryPath)) {
+    return false;
+  }
+
+  const port: unknown = Object.getOwnPropertyDescriptor(value, 'port')?.value;
+  if (port !== undefined && !isPositiveInteger(port)) {
+    return false;
+  }
+
+  return true;
+}
+
+export function readServerProcessInfo(): LiteRtServerProcessInfo | null {
+  const pidPath = getPidFilePath();
+  try {
+    const content = fs.readFileSync(pidPath, 'utf-8').trim();
+    if (!content) {
+      return null;
+    }
+
+    if (/^\d+$/.test(content)) {
+      return { pid: parseInt(content, 10) };
+    }
+
+    const parsed = JSON.parse(content) as unknown;
+    return isLiteRtServerProcessInfo(parsed) ? parsed : null;
+  } catch {
+    return null;
+  }
+}
+
+export function writeServerProcessInfo(
+  processInfo: LiteRtServerProcessInfo,
+): void {
+  fs.writeFileSync(getPidFilePath(), JSON.stringify(processInfo), 'utf-8');
+}
+
+export function readServerPid(): number | null {
+  return readServerProcessInfo()?.pid ?? null;
+}
+
+function normalizeProcessValue(value: string): string {
+  const normalized = value.replace(/\0/g, ' ').trim();
+  if (process.platform === 'win32') {
+    return normalized.replace(/\\/g, '/').replace(/\s+/g, ' ').toLowerCase();
+  }
+  return normalized.replace(/\s+/g, ' ');
+}
+
+function readProcessCommandLine(pid: number): string | null {
+  try {
+    if (process.platform === 'linux') {
+      const output = fs.readFileSync(`/proc/${pid}/cmdline`, 'utf-8');
+      return output.trim() ? output : null;
+    }
+
+    if (process.platform === 'win32') {
+      const output = execFileSync(
+        'powershell.exe',
+        [
+          '-NoProfile',
+          '-Command',
+          `(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}").CommandLine`,
+        ],
+        {
+          encoding: 'utf-8',
+          timeout: 5000,
+        },
+      );
+      return output.trim() || null;
+    }
+
+    const output = execFileSync('ps', ['-p', String(pid), '-o', 'command='], {
+      encoding: 'utf-8',
+      timeout: 5000,
+    });
+    return output.trim() || null;
+  } catch {
+    return null;
+  }
+}
+
+export function isExpectedLiteRtServerCommand(
+  commandLine: string,
+  options: {
+    binaryPath?: string | null;
+    port?: number;
+  },
+): boolean {
+  const normalizedCommandLine = normalizeProcessValue(commandLine);
+  if (!normalizedCommandLine) {
+    return false;
+  }
+
+  if (!/(^|\s|")serve(\s|$)/.test(normalizedCommandLine)) {
+    return false;
+  }
+
+  if (
+    options.port !== undefined &&
+    !normalizedCommandLine.includes(`--port=${options.port}`)
+  ) {
+    return false;
+  }
+
+  if (!options.binaryPath) {
+    return true;
+  }
+
+  const normalizedBinaryPath = normalizeProcessValue(options.binaryPath);
+  const normalizedBinaryName = normalizeProcessValue(
+    path.basename(options.binaryPath),
+  );
+  return (
+    normalizedCommandLine.includes(normalizedBinaryPath) ||
+    normalizedCommandLine.includes(normalizedBinaryName)
+  );
+}
+
+export function isExpectedLiteRtServerProcess(
+  pid: number,
+  options: {
+    binaryPath?: string | null;
+    port?: number;
+  },
+): boolean {
+  const commandLine = readProcessCommandLine(pid);
+  if (!commandLine) {
+    return false;
+  }
+  return isExpectedLiteRtServerCommand(commandLine, options);
+}
+
+export function isProcessRunning(pid: number): boolean {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}
--- a/packages/cli/src/commands/gemma/setup.test.ts
+++ b/packages/cli/src/commands/gemma/setup.test.ts
@ -0,0 +1,60 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { afterEach, describe, expect, it } from 'vitest';
+import { PLATFORM_BINARY_MAP, PLATFORM_BINARY_SHA256 } from './constants.js';
+import { computeFileSha256, verifyFileSha256 } from './setup.js';
+
+describe('gemma setup checksum helpers', () => {
+  const tempFiles: string[] = [];
+
+  afterEach(async () => {
+    await Promise.all(
+      tempFiles
+        .splice(0)
+        .map((filePath) => fs.promises.rm(filePath, { force: true })),
+    );
+  });
+
+  it('has a pinned checksum for every supported LiteRT binary', () => {
+    expect(Object.keys(PLATFORM_BINARY_SHA256).sort()).toEqual(
+      Object.values(PLATFORM_BINARY_MAP).sort(),
+    );
+  });
+
+  it('computes the sha256 for a downloaded file', async () => {
+    const filePath = path.join(
+      os.tmpdir(),
+      `gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    tempFiles.push(filePath);
+    await fs.promises.writeFile(filePath, 'hello world', 'utf-8');
+
+    await expect(computeFileSha256(filePath)).resolves.toBe(
+      'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
+    );
+  });
+
+  it('verifies whether a file matches the expected sha256', async () => {
+    const filePath = path.join(
+      os.tmpdir(),
+      `gemma-setup-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    tempFiles.push(filePath);
+    await fs.promises.writeFile(filePath, 'hello world', 'utf-8');
+
+    await expect(
+      verifyFileSha256(
+        filePath,
+        'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
+      ),
+    ).resolves.toBe(true);
+    await expect(verifyFileSha256(filePath, 'deadbeef')).resolves.toBe(false);
+  });
+});
--- a/packages/cli/src/commands/gemma/setup.ts
+++ b/packages/cli/src/commands/gemma/setup.ts
@ -0,0 +1,504 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import { createHash } from 'node:crypto';
+import fs from 'node:fs';
+import path from 'node:path';
+import { execFileSync, spawn as nodeSpawn } from 'node:child_process';
+import chalk from 'chalk';
+import { debugLogger } from '@google/gemini-cli-core';
+import { loadSettings, SettingScope } from '../../config/settings.js';
+import { exitCli } from '../utils.js';
+import {
+  DEFAULT_PORT,
+  GEMMA_MODEL_NAME,
+  PLATFORM_BINARY_SHA256,
+} from './constants.js';
+import {
+  detectPlatform,
+  getBinaryDownloadUrl,
+  getBinaryPath,
+  isBinaryInstalled,
+  isModelDownloaded,
+} from './platform.js';
+import { startServer } from './start.js';
+import readline from 'node:readline';
+
+const log = (msg: string) => debugLogger.log(msg);
+const logError = (msg: string) => debugLogger.error(msg);
+
+async function promptYesNo(question: string): Promise<boolean> {
+  const rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+  return new Promise((resolve) => {
+    rl.question(`${question} (y/N): `, (answer) => {
+      rl.close();
+      resolve(
+        answer.trim().toLowerCase() === 'y' ||
+          answer.trim().toLowerCase() === 'yes',
+      );
+    });
+  });
+}
+
+function formatBytes(bytes: number): string {
+  if (bytes < 1024) return `${bytes} B`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
+  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+}
+
+function renderProgress(downloaded: number, total: number | null): void {
+  const barWidth = 30;
+  if (total && total > 0) {
+    const pct = Math.min(downloaded / total, 1);
+    const filled = Math.round(barWidth * pct);
+    const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled);
+    const pctStr = (pct * 100).toFixed(0).padStart(3);
+    process.stderr.write(
+      `\r  [${bar}] ${pctStr}% ${formatBytes(downloaded)} / ${formatBytes(total)}`,
+    );
+  } else {
+    process.stderr.write(`\r  Downloaded ${formatBytes(downloaded)}`);
+  }
+}
+
+async function downloadFile(url: string, destPath: string): Promise<void> {
+  const tmpPath = destPath + '.downloading';
+  if (fs.existsSync(tmpPath)) {
+    fs.unlinkSync(tmpPath);
+  }
+
+  const response = await fetch(url, { redirect: 'follow' });
+  if (!response.ok) {
+    throw new Error(
+      `Download failed: HTTP ${response.status} ${response.statusText}`,
+    );
+  }
+  if (!response.body) {
+    throw new Error('Download failed: No response body');
+  }
+
+  const contentLength = response.headers.get('content-length');
+  const totalBytes = contentLength ? parseInt(contentLength, 10) : null;
+  let downloadedBytes = 0;
+
+  const fileStream = fs.createWriteStream(tmpPath);
+  const reader = response.body.getReader();
+
+  try {
+    for (;;) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      const writeOk = fileStream.write(value);
+      if (!writeOk) {
+        await new Promise<void>((resolve) => fileStream.once('drain', resolve));
+      }
+      downloadedBytes += value.byteLength;
+      renderProgress(downloadedBytes, totalBytes);
+    }
+  } finally {
+    fileStream.end();
+    process.stderr.write('\r' + ' '.repeat(80) + '\r');
+  }
+
+  await new Promise<void>((resolve, reject) => {
+    fileStream.on('finish', resolve);
+    fileStream.on('error', reject);
+  });
+
+  fs.renameSync(tmpPath, destPath);
+}
+
+export async function computeFileSha256(filePath: string): Promise<string> {
+  const hash = createHash('sha256');
+  const fileStream = fs.createReadStream(filePath);
+
+  return new Promise((resolve, reject) => {
+    fileStream.on('data', (chunk) => {
+      hash.update(chunk);
+    });
+    fileStream.on('error', reject);
+    fileStream.on('end', () => {
+      resolve(hash.digest('hex'));
+    });
+  });
+}
+
+export async function verifyFileSha256(
+  filePath: string,
+  expectedHash: string,
+): Promise<boolean> {
+  const actualHash = await computeFileSha256(filePath);
+  return actualHash === expectedHash;
+}
+
+function spawnInherited(command: string, args: string[]): Promise<number> {
+  return new Promise((resolve, reject) => {
+    const child = nodeSpawn(command, args, {
+      stdio: 'inherit',
+    });
+    child.on('close', (code) => resolve(code ?? 1));
+    child.on('error', reject);
+  });
+}
+
+interface SetupArgs {
+  port: number;
+  skipModel: boolean;
+  start: boolean;
+  force: boolean;
+  consent: boolean;
+}
+
+async function handleSetup(argv: SetupArgs): Promise<number> {
+  const { port, force } = argv;
+  let settingsUpdated = false;
+  let serverStarted = false;
+  let autoStartServer = true;
+
+  log('');
+  log(chalk.bold('Gemma Local Model Routing Setup'));
+  log(chalk.dim('─'.repeat(40)));
+  log('');
+
+  const platform = detectPlatform();
+  if (!platform) {
+    logError(
+      chalk.red(`Unsupported platform: ${process.platform}-${process.arch}`),
+    );
+    logError(
+      'LiteRT-LM binaries are available for: macOS (ARM64), Linux (x86_64), Windows (x86_64)',
+    );
+    return 1;
+  }
+  log(chalk.dim(`  Platform: ${platform.key} → ${platform.binaryName}`));
+
+  if (!argv.consent) {
+    log('');
+    log('This will download and install the LiteRT-LM runtime and the');
+    log(
+      `Gemma model (${GEMMA_MODEL_NAME}, ~1 GB). By proceeding, you agree to the`,
+    );
+    log('Gemma Terms of Use: https://ai.google.dev/gemma/terms');
+    log('');
+
+    const accepted = await promptYesNo('Do you want to continue?');
+    if (!accepted) {
+      log('Setup cancelled.');
+      return 0;
+    }
+  }
+
+  const binaryPath = getBinaryPath(platform.binaryName)!;
+  const alreadyInstalled = isBinaryInstalled();
+
+  if (alreadyInstalled && !force) {
+    log('');
+    log(chalk.green('  ✓ LiteRT-LM binary already installed at:'));
+    log(chalk.dim(`    ${binaryPath}`));
+  } else {
+    log('');
+    log('  Downloading LiteRT-LM binary...');
+    const downloadUrl = getBinaryDownloadUrl(platform.binaryName);
+    debugLogger.log(`Downloading from: ${downloadUrl}`);
+
+    try {
+      const binDir = path.dirname(binaryPath);
+      fs.mkdirSync(binDir, { recursive: true });
+      await downloadFile(downloadUrl, binaryPath);
+      log(chalk.green('  ✓ Binary downloaded successfully'));
+    } catch (error) {
+      logError(
+        chalk.red(
+          `  ✗ Failed to download binary: ${error instanceof Error ? error.message : String(error)}`,
+        ),
+      );
+      logError('  Check your internet connection and try again.');
+      return 1;
+    }
+
+    const expectedHash = PLATFORM_BINARY_SHA256[platform.binaryName];
+    if (!expectedHash) {
+      logError(
+        chalk.red(
+          `  ✗ No checksum is configured for ${platform.binaryName}. Refusing to install the binary.`,
+        ),
+      );
+      try {
+        fs.rmSync(binaryPath, { force: true });
+      } catch {
+        // ignore
+      }
+      return 1;
+    }
+
+    try {
+      const checksumVerified = await verifyFileSha256(binaryPath, expectedHash);
+      if (!checksumVerified) {
+        logError(
+          chalk.red(
+            '  ✗ Downloaded binary checksum did not match the expected release hash.',
+          ),
+        );
+        try {
+          fs.rmSync(binaryPath, { force: true });
+        } catch {
+          // ignore
+        }
+        return 1;
+      }
+      log(chalk.green('  ✓ Binary checksum verified'));
+    } catch (error) {
+      logError(
+        chalk.red(
+          `  ✗ Failed to verify binary checksum: ${error instanceof Error ? error.message : String(error)}`,
+        ),
+      );
+      try {
+        fs.rmSync(binaryPath, { force: true });
+      } catch {
+        // ignore
+      }
+      return 1;
+    }
+
+    if (process.platform !== 'win32') {
+      try {
+        fs.chmodSync(binaryPath, 0o755);
+      } catch (error) {
+        logError(
+          chalk.red(
+            `  ✗ Failed to set executable permission: ${error instanceof Error ? error.message : String(error)}`,
+          ),
+        );
+        return 1;
+      }
+    }
+
+    if (process.platform === 'darwin') {
+      try {
+        execFileSync('xattr', ['-d', 'com.apple.quarantine', binaryPath], {
+          stdio: 'ignore',
+        });
+        log(chalk.green('  ✓ macOS quarantine attribute removed'));
+      } catch {
+        // Expected if the attribute doesn't exist.
+      }
+    }
+  }
+
+  if (!argv.skipModel) {
+    const modelAlreadyDownloaded = isModelDownloaded(binaryPath);
+    if (modelAlreadyDownloaded && !force) {
+      log('');
+      log(chalk.green(`  ✓ Model ${GEMMA_MODEL_NAME} already downloaded`));
+    } else {
+      log('');
+      log(`  Downloading model ${GEMMA_MODEL_NAME}...`);
+      log(chalk.dim('  You may be prompted to accept the Gemma Terms of Use.'));
+      log('');
+
+      const exitCode = await spawnInherited(binaryPath, [
+        'pull',
+        GEMMA_MODEL_NAME,
+      ]);
+      if (exitCode !== 0) {
+        logError('');
+        logError(
+          chalk.red(`  ✗ Model download failed (exit code ${exitCode})`),
+        );
+        return 1;
+      }
+      log('');
+      log(chalk.green(`  ✓ Model ${GEMMA_MODEL_NAME} downloaded`));
+    }
+  }
+
+  log('');
+  log('  Configuring settings...');
+  try {
+    const settings = loadSettings(process.cwd());
+
+    // User scope: security-sensitive settings that must not be overridable
+    // by workspace configs (prevents arbitrary binary execution).
+    const existingUserGemma =
+      settings.forScope(SettingScope.User).settings.experimental
+        ?.gemmaModelRouter ?? {};
+    autoStartServer = existingUserGemma.autoStartServer ?? true;
+    const existingUserExperimental =
+      settings.forScope(SettingScope.User).settings.experimental ?? {};
+    settings.setValue(SettingScope.User, 'experimental', {
+      ...existingUserExperimental,
+      gemmaModelRouter: {
+        autoStartServer,
+        ...(existingUserGemma.binaryPath !== undefined
+          ? { binaryPath: existingUserGemma.binaryPath }
+          : {}),
+      },
+    });
+
+    // Workspace scope: project-isolated settings so the local model only
+    // runs for this specific project, saving resources globally.
+    const existingWorkspaceGemma =
+      settings.forScope(SettingScope.Workspace).settings.experimental
+        ?.gemmaModelRouter ?? {};
+    const existingWorkspaceExperimental =
+      settings.forScope(SettingScope.Workspace).settings.experimental ?? {};
+    settings.setValue(SettingScope.Workspace, 'experimental', {
+      ...existingWorkspaceExperimental,
+      gemmaModelRouter: {
+        ...existingWorkspaceGemma,
+        enabled: true,
+        classifier: {
+          ...existingWorkspaceGemma.classifier,
+          host: `http://localhost:${port}`,
+          model: GEMMA_MODEL_NAME,
+        },
+      },
+    });
+
+    log(chalk.green('  ✓ Settings updated'));
+    log(chalk.dim('    User (~/.gemini/settings.json): autoStartServer'));
+    log(
+      chalk.dim('    Workspace (.gemini/settings.json): enabled, classifier'),
+    );
+    settingsUpdated = true;
+  } catch (error) {
+    logError(
+      chalk.red(
+        `  ✗ Failed to update settings: ${error instanceof Error ? error.message : String(error)}`,
+      ),
+    );
+    logError(
+      '  You can manually add the configuration to ~/.gemini/settings.json',
+    );
+  }
+
+  if (argv.start) {
+    log('');
+    log('  Starting LiteRT server...');
+    serverStarted = await startServer(binaryPath, port);
+    if (serverStarted) {
+      log(chalk.green(`  ✓ Server started on port ${port}`));
+    } else {
+      log(
+        chalk.yellow(
+          `  ! Server may not have started correctly. Check: gemini gemma status`,
+        ),
+      );
+    }
+  }
+
+  const routingActive = settingsUpdated && serverStarted;
+  const setupSucceeded = settingsUpdated && (!argv.start || serverStarted);
+  log('');
+  log(chalk.dim('─'.repeat(40)));
+  if (routingActive) {
+    log(chalk.bold.green('  Setup complete! Local model routing is active.'));
+  } else if (settingsUpdated) {
+    log(
+      chalk.bold.green('  Setup complete! Local model routing is configured.'),
+    );
+  } else {
+    log(
+      chalk.bold.yellow(
+        '  Setup incomplete. Manual settings changes are still required.',
+      ),
+    );
+  }
+  log('');
+  log('  How it works: Every request is classified by the local Gemma model.');
+  log(
+    '  Simple tasks (file reads, quick edits) route to ' +
+      chalk.cyan('Flash') +
+      ' for speed.',
+  );
+  log(
+    '  Complex tasks (debugging, architecture) route to ' +
+      chalk.cyan('Pro') +
+      ' for quality.',
+  );
+  log('  This happens automatically — just use the CLI as usual.');
+  log('');
+  if (!settingsUpdated) {
+    log(
+      chalk.yellow(
+        '  Fix the settings update above, then rerun "gemini gemma status".',
+      ),
+    );
+    log('');
+  } else if (!argv.start) {
+    log(chalk.yellow('  Note: Run "gemini gemma start" to start the server.'));
+    if (autoStartServer) {
+      log(
+        chalk.yellow(
+          '  Or restart the CLI to auto-start it on the next launch.',
+        ),
+      );
+    }
+    log('');
+  } else if (!serverStarted) {
+    log(
+      chalk.yellow(
+        '  Review the server logs and rerun "gemini gemma start" after fixing the issue.',
+      ),
+    );
+    log('');
+  }
+  log('  Useful commands:');
+  log(chalk.dim('    gemini gemma status   Check routing status'));
+  log(chalk.dim('    gemini gemma start    Start the LiteRT server'));
+  log(chalk.dim('    gemini gemma stop     Stop the LiteRT server'));
+  log(chalk.dim('    /gemma               Check status inside a session'));
+  log('');
+
+  return setupSucceeded ? 0 : 1;
+}
+
+export const setupCommand: CommandModule = {
+  command: 'setup',
+  describe: 'Download and configure Gemma local model routing',
+  builder: (yargs) =>
+    yargs
+      .option('port', {
+        type: 'number',
+        default: DEFAULT_PORT,
+        description: 'Port for the LiteRT server',
+      })
+      .option('skip-model', {
+        type: 'boolean',
+        default: false,
+        description: 'Skip model download (binary only)',
+      })
+      .option('start', {
+        type: 'boolean',
+        default: true,
+        description: 'Start the server after setup',
+      })
+      .option('force', {
+        type: 'boolean',
+        default: false,
+        description: 'Re-download binary and model even if already present',
+      })
+      .option('consent', {
+        type: 'boolean',
+        default: false,
+        description: 'Skip interactive consent prompt (implies acceptance)',
+      }),
+  handler: async (argv) => {
+    const exitCode = await handleSetup({
+      port: Number(argv['port']),
+      skipModel: Boolean(argv['skipModel']),
+      start: Boolean(argv['start']),
+      force: Boolean(argv['force']),
+      consent: Boolean(argv['consent']),
+    });
+    await exitCli(exitCode);
+  },
+};
--- a/packages/cli/src/commands/gemma/start.ts
+++ b/packages/cli/src/commands/gemma/start.ts
@ -0,0 +1,123 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import fs from 'node:fs';
+import path from 'node:path';
+import { spawn } from 'node:child_process';
+import chalk from 'chalk';
+import { debugLogger } from '@google/gemini-cli-core';
+import { exitCli } from '../utils.js';
+import {
+  DEFAULT_PORT,
+  getPidFilePath,
+  getLogFilePath,
+  getLiteRtBinDir,
+  SERVER_START_WAIT_MS,
+} from './constants.js';
+import {
+  getBinaryPath,
+  isBinaryInstalled,
+  isServerRunning,
+  resolveGemmaConfig,
+  writeServerProcessInfo,
+} from './platform.js';
+
+export async function startServer(
+  binaryPath: string,
+  port: number,
+): Promise<boolean> {
+  const alreadyRunning = await isServerRunning(port);
+  if (alreadyRunning) {
+    debugLogger.log(`LiteRT server already running on port ${port}`);
+    return true;
+  }
+
+  const logPath = getLogFilePath();
+  fs.mkdirSync(getLiteRtBinDir(), { recursive: true });
+  const tmpDir = path.dirname(getPidFilePath());
+  fs.mkdirSync(tmpDir, { recursive: true });
+
+  const logFd = fs.openSync(logPath, 'a');
+
+  try {
+    const child = spawn(binaryPath, ['serve', `--port=${port}`, '--verbose'], {
+      detached: true,
+      stdio: ['ignore', logFd, logFd],
+    });
+
+    if (child.pid) {
+      writeServerProcessInfo({
+        pid: child.pid,
+        binaryPath,
+        port,
+      });
+    }
+
+    child.unref();
+  } finally {
+    fs.closeSync(logFd);
+  }
+
+  await new Promise((resolve) => setTimeout(resolve, SERVER_START_WAIT_MS));
+  return isServerRunning(port);
+}
+
+export const startCommand: CommandModule = {
+  command: 'start',
+  describe: 'Start the LiteRT-LM server',
+  builder: (yargs) =>
+    yargs.option('port', {
+      type: 'number',
+      description: 'Port for the LiteRT server',
+    }),
+  handler: async (argv) => {
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
+
+    if (!port) {
+      const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+      port = configuredPort;
+    }
+
+    const binaryPath = getBinaryPath();
+    if (!binaryPath || !isBinaryInstalled(binaryPath)) {
+      debugLogger.error(
+        chalk.red(
+          'LiteRT-LM binary not found. Run "gemini gemma setup" first.',
+        ),
+      );
+      await exitCli(1);
+      return;
+    }
+
+    const alreadyRunning = await isServerRunning(port);
+    if (alreadyRunning) {
+      debugLogger.log(
+        chalk.green(`LiteRT server is already running on port ${port}.`),
+      );
+      await exitCli(0);
+      return;
+    }
+
+    debugLogger.log(`Starting LiteRT server on port ${port}...`);
+
+    const started = await startServer(binaryPath, port);
+    if (started) {
+      debugLogger.log(chalk.green(`LiteRT server started on port ${port}.`));
+      debugLogger.log(chalk.dim(`Logs: ${getLogFilePath()}`));
+      await exitCli(0);
+    } else {
+      debugLogger.error(
+        chalk.red('Server may not have started correctly. Check logs:'),
+      );
+      debugLogger.error(chalk.dim(`  ${getLogFilePath()}`));
+      await exitCli(1);
+    }
+  },
+};
--- a/packages/cli/src/commands/gemma/status.ts
+++ b/packages/cli/src/commands/gemma/status.ts
@ -0,0 +1,165 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import chalk from 'chalk';
+import { DEFAULT_PORT, GEMMA_MODEL_NAME } from './constants.js';
+import {
+  detectPlatform,
+  getBinaryPath,
+  isBinaryInstalled,
+  isModelDownloaded,
+  isServerRunning,
+  readServerPid,
+  isProcessRunning,
+  resolveGemmaConfig,
+} from './platform.js';
+import { exitCli } from '../utils.js';
+
+export interface GemmaStatusResult {
+  binaryInstalled: boolean;
+  binaryPath: string | null;
+  modelDownloaded: boolean;
+  serverRunning: boolean;
+  serverPid: number | null;
+  settingsEnabled: boolean;
+  port: number;
+  allPassing: boolean;
+}
+
+export async function checkGemmaStatus(
+  port?: number,
+): Promise<GemmaStatusResult> {
+  const { settingsEnabled, configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+
+  const effectivePort = port ?? configuredPort;
+  const binaryPath = getBinaryPath();
+  const binaryInstalled = isBinaryInstalled(binaryPath);
+  const modelDownloaded =
+    binaryInstalled && binaryPath ? isModelDownloaded(binaryPath) : false;
+  const serverRunning = await isServerRunning(effectivePort);
+  const pid = readServerPid();
+  const serverPid = pid && isProcessRunning(pid) ? pid : null;
+
+  const allPassing =
+    binaryInstalled && modelDownloaded && serverRunning && settingsEnabled;
+
+  return {
+    binaryInstalled,
+    binaryPath,
+    modelDownloaded,
+    serverRunning,
+    serverPid,
+    settingsEnabled,
+    port: effectivePort,
+    allPassing,
+  };
+}
+
+export function formatGemmaStatus(status: GemmaStatusResult): string {
+  const check = (ok: boolean) => (ok ? chalk.green('✓') : chalk.red('✗'));
+
+  const lines: string[] = [
+    '',
+    chalk.bold('Gemma Local Model Routing Status'),
+    chalk.dim('─'.repeat(40)),
+    '',
+  ];
+
+  if (status.binaryInstalled) {
+    lines.push(`  Binary:    ${check(true)} Installed (${status.binaryPath})`);
+  } else {
+    const platform = detectPlatform();
+    if (platform) {
+      lines.push(`  Binary:    ${check(false)} Not installed`);
+      lines.push(chalk.dim(`             Run: gemini gemma setup`));
+    } else {
+      lines.push(
+        `  Binary:    ${check(false)} Unsupported platform (${process.platform}-${process.arch})`,
+      );
+    }
+  }
+
+  if (status.modelDownloaded) {
+    lines.push(`  Model:     ${check(true)} ${GEMMA_MODEL_NAME} downloaded`);
+  } else {
+    lines.push(`  Model:     ${check(false)} ${GEMMA_MODEL_NAME} not found`);
+    if (status.binaryInstalled) {
+      lines.push(
+        chalk.dim(
+          `             Run: ${status.binaryPath} pull ${GEMMA_MODEL_NAME}`,
+        ),
+      );
+    } else {
+      lines.push(chalk.dim(`             Run: gemini gemma setup`));
+    }
+  }
+
+  if (status.serverRunning) {
+    const pidInfo = status.serverPid ? ` (PID ${status.serverPid})` : '';
+    lines.push(
+      `  Server:    ${check(true)} Running on port ${status.port}${pidInfo}`,
+    );
+  } else {
+    lines.push(
+      `  Server:    ${check(false)} Not running on port ${status.port}`,
+    );
+    lines.push(chalk.dim(`             Run: gemini gemma start`));
+  }
+
+  if (status.settingsEnabled) {
+    lines.push(`  Settings:  ${check(true)} Enabled in settings.json`);
+  } else {
+    lines.push(`  Settings:  ${check(false)} Not enabled in settings.json`);
+    lines.push(
+      chalk.dim(
+        `             Run: gemini gemma setup (auto-configures settings)`,
+      ),
+    );
+  }
+
+  lines.push('');
+
+  if (status.allPassing) {
+    lines.push(chalk.green('  Routing is active — no action needed.'));
+    lines.push('');
+    lines.push(
+      chalk.dim(
+        '  Simple requests → Flash (fast) | Complex requests → Pro (powerful)',
+      ),
+    );
+    lines.push(chalk.dim('  This happens automatically on every request.'));
+  } else {
+    lines.push(
+      chalk.yellow(
+        '  Some checks failed. Run "gemini gemma setup" for guided installation.',
+      ),
+    );
+  }
+
+  lines.push('');
+  return lines.join('\n');
+}
+
+export const statusCommand: CommandModule = {
+  command: 'status',
+  describe: 'Check Gemma local model routing status',
+  builder: (yargs) =>
+    yargs.option('port', {
+      type: 'number',
+      description: 'Port to check for the LiteRT server',
+    }),
+  handler: async (argv) => {
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
+    const status = await checkGemmaStatus(port);
+    const output = formatGemmaStatus(status);
+    process.stdout.write(output);
+    await exitCli(status.allPassing ? 0 : 1);
+  },
+};
--- a/packages/cli/src/commands/gemma/stop.test.ts
+++ b/packages/cli/src/commands/gemma/stop.test.ts
@ -0,0 +1,112 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+const mockGetBinaryPath = vi.hoisted(() => vi.fn());
+const mockIsExpectedLiteRtServerProcess = vi.hoisted(() => vi.fn());
+const mockIsProcessRunning = vi.hoisted(() => vi.fn());
+const mockIsServerRunning = vi.hoisted(() => vi.fn());
+const mockReadServerPid = vi.hoisted(() => vi.fn());
+const mockReadServerProcessInfo = vi.hoisted(() => vi.fn());
+const mockResolveGemmaConfig = vi.hoisted(() => vi.fn());
+
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  const { mockCoreDebugLogger } = await import(
+    '../../test-utils/mockDebugLogger.js'
+  );
+  return mockCoreDebugLogger(
+    await importOriginal<typeof import('@google/gemini-cli-core')>(),
+    {
+      stripAnsi: false,
+    },
+  );
+});
+
+vi.mock('./constants.js', () => ({
+  DEFAULT_PORT: 9379,
+  getPidFilePath: vi.fn(() => '/tmp/litert-server.pid'),
+}));
+
+vi.mock('./platform.js', () => ({
+  getBinaryPath: mockGetBinaryPath,
+  isExpectedLiteRtServerProcess: mockIsExpectedLiteRtServerProcess,
+  isProcessRunning: mockIsProcessRunning,
+  isServerRunning: mockIsServerRunning,
+  readServerPid: mockReadServerPid,
+  readServerProcessInfo: mockReadServerProcessInfo,
+  resolveGemmaConfig: mockResolveGemmaConfig,
+}));
+
+vi.mock('../utils.js', () => ({
+  exitCli: vi.fn(),
+}));
+
+import { stopServer } from './stop.js';
+
+describe('gemma stop command', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.useFakeTimers();
+    mockGetBinaryPath.mockReturnValue('/custom/lit');
+    mockResolveGemmaConfig.mockReturnValue({ configuredPort: 9379 });
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+    vi.restoreAllMocks();
+  });
+
+  it('refuses to signal a pid that does not match the expected LiteRT server', async () => {
+    mockReadServerProcessInfo.mockReturnValue({
+      pid: 1234,
+      binaryPath: '/custom/lit',
+      port: 8123,
+    });
+    mockIsProcessRunning.mockReturnValue(true);
+    mockIsExpectedLiteRtServerProcess.mockReturnValue(false);
+
+    const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true);
+
+    await expect(stopServer(8123)).resolves.toBe('unexpected-process');
+    expect(killSpy).not.toHaveBeenCalled();
+  });
+
+  it('stops the verified LiteRT server and removes the pid file', async () => {
+    mockReadServerProcessInfo.mockReturnValue({
+      pid: 1234,
+      binaryPath: '/custom/lit',
+      port: 8123,
+    });
+    mockIsProcessRunning.mockReturnValueOnce(true).mockReturnValueOnce(false);
+    mockIsExpectedLiteRtServerProcess.mockReturnValue(true);
+
+    const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {});
+    const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true);
+
+    const stopPromise = stopServer(8123);
+    await vi.runAllTimersAsync();
+
+    await expect(stopPromise).resolves.toBe('stopped');
+    expect(killSpy).toHaveBeenCalledWith(1234, 'SIGTERM');
+    expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid');
+  });
+
+  it('cleans up a stale pid file when the recorded process is no longer running', async () => {
+    mockReadServerProcessInfo.mockReturnValue({
+      pid: 1234,
+      binaryPath: '/custom/lit',
+      port: 8123,
+    });
+    mockIsProcessRunning.mockReturnValue(false);
+
+    const unlinkSpy = vi.spyOn(fs, 'unlinkSync').mockImplementation(() => {});
+
+    await expect(stopServer(8123)).resolves.toBe('not-running');
+    expect(unlinkSpy).toHaveBeenCalledWith('/tmp/litert-server.pid');
+  });
+});
--- a/packages/cli/src/commands/gemma/stop.ts
+++ b/packages/cli/src/commands/gemma/stop.ts
@ -0,0 +1,155 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { CommandModule } from 'yargs';
+import fs from 'node:fs';
+import chalk from 'chalk';
+import { debugLogger } from '@google/gemini-cli-core';
+import { exitCli } from '../utils.js';
+import { DEFAULT_PORT, getPidFilePath } from './constants.js';
+import {
+  getBinaryPath,
+  isExpectedLiteRtServerProcess,
+  isProcessRunning,
+  isServerRunning,
+  readServerPid,
+  readServerProcessInfo,
+  resolveGemmaConfig,
+} from './platform.js';
+
+export type StopServerResult =
+  | 'stopped'
+  | 'not-running'
+  | 'unexpected-process'
+  | 'failed';
+
+export async function stopServer(
+  expectedPort?: number,
+): Promise<StopServerResult> {
+  const processInfo = readServerProcessInfo();
+  const pidPath = getPidFilePath();
+
+  if (!processInfo) {
+    return 'not-running';
+  }
+
+  const { pid } = processInfo;
+  if (!isProcessRunning(pid)) {
+    debugLogger.log(
+      `Stale PID file found (PID ${pid} is not running), removing ${pidPath}`,
+    );
+    try {
+      fs.unlinkSync(pidPath);
+    } catch {
+      // ignore
+    }
+    return 'not-running';
+  }
+
+  const binaryPath = processInfo.binaryPath ?? getBinaryPath();
+  const port = processInfo.port ?? expectedPort;
+  if (!isExpectedLiteRtServerProcess(pid, { binaryPath, port })) {
+    debugLogger.warn(
+      `Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`,
+    );
+    return 'unexpected-process';
+  }
+
+  try {
+    process.kill(pid, 'SIGTERM');
+  } catch {
+    return 'failed';
+  }
+
+  await new Promise((resolve) => setTimeout(resolve, 1000));
+
+  if (isProcessRunning(pid)) {
+    try {
+      process.kill(pid, 'SIGKILL');
+    } catch {
+      // ignore
+    }
+    await new Promise((resolve) => setTimeout(resolve, 500));
+    if (isProcessRunning(pid)) {
+      return 'failed';
+    }
+  }
+
+  try {
+    fs.unlinkSync(pidPath);
+  } catch {
+    // ignore
+  }
+
+  return 'stopped';
+}
+
+export const stopCommand: CommandModule = {
+  command: 'stop',
+  describe: 'Stop the LiteRT-LM server',
+  builder: (yargs) =>
+    yargs.option('port', {
+      type: 'number',
+      description: 'Port where the LiteRT server is running',
+    }),
+  handler: async (argv) => {
+    let port: number | undefined;
+    if (argv['port'] !== undefined) {
+      port = Number(argv['port']);
+    }
+
+    if (!port) {
+      const { configuredPort } = resolveGemmaConfig(DEFAULT_PORT);
+      port = configuredPort;
+    }
+
+    const processInfo = readServerProcessInfo();
+    const pid = processInfo?.pid ?? readServerPid();
+
+    if (pid !== null && isProcessRunning(pid)) {
+      debugLogger.log(`Stopping LiteRT server (PID ${pid})...`);
+      const result = await stopServer(port);
+      if (result === 'stopped') {
+        debugLogger.log(chalk.green('LiteRT server stopped.'));
+        await exitCli(0);
+      } else if (result === 'unexpected-process') {
+        debugLogger.error(
+          chalk.red(
+            `Refusing to stop PID ${pid} because it does not match the expected LiteRT server process.`,
+          ),
+        );
+        debugLogger.error(
+          chalk.dim(
+            'Remove the stale pid file after verifying the process, or stop the process manually.',
+          ),
+        );
+        await exitCli(1);
+      } else {
+        debugLogger.error(chalk.red('Failed to stop LiteRT server.'));
+        await exitCli(1);
+      }
+      return;
+    }
+
+    const running = await isServerRunning(port);
+    if (running) {
+      debugLogger.log(
+        chalk.yellow(
+          `A server is responding on port ${port}, but it was not started by "gemini gemma start".`,
+        ),
+      );
+      debugLogger.log(
+        chalk.dim(
+          'If you started it manually, stop it from the terminal where it is running.',
+        ),
+      );
+      await exitCli(1);
+    } else {
+      debugLogger.log('No LiteRT server is currently running.');
+      await exitCli(0);
+    }
+  },
+};
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@ -338,6 +338,7 @@ describe('parseArguments', () => {
      { cmd: 'skill list', expected: true },
      { cmd: 'hooks migrate', expected: true },
      { cmd: 'hook migrate', expected: true },
+      { cmd: 'gemma status', expected: true },
      { cmd: 'some query', expected: undefined },
      { cmd: 'hello world', expected: undefined },
    ])(
@ -758,6 +759,12 @@ describe('parseArguments', () => {
    const argv = await parseArguments(settings);
    expect(argv.isCommand).toBe(true);
  });
+
+  it('should set isCommand to true for gemma command', async () => {
+    process.argv = ['node', 'script.js', 'gemma', 'status'];
+    const argv = await parseArguments(createTestMergedSettings());
+    expect(argv.isCommand).toBe(true);
+  });
 });

 describe('loadCliConfig', () => {
@ -3030,6 +3037,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
      experimental: {
        gemmaModelRouter: {
          enabled: true,
+          autoStartServer: false,
+          binaryPath: '/custom/lit',
          classifier: {
            host: 'http://custom:1234',
            model: 'custom-gemma',
@ -3040,6 +3049,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
    const config = await loadCliConfig(settings, 'test-session', argv);
    expect(config.getGemmaModelRouterEnabled()).toBe(true);
    const gemmaSettings = config.getGemmaModelRouterSettings();
+    expect(gemmaSettings.autoStartServer).toBe(false);
+    expect(gemmaSettings.binaryPath).toBe('/custom/lit');
    expect(gemmaSettings.classifier?.host).toBe('http://custom:1234');
    expect(gemmaSettings.classifier?.model).toBe('custom-gemma');
  });
@ -3057,6 +3068,8 @@ describe('loadCliConfig gemmaModelRouter', () => {
    const config = await loadCliConfig(settings, 'test-session', argv);
    expect(config.getGemmaModelRouterEnabled()).toBe(true);
    const gemmaSettings = config.getGemmaModelRouterSettings();
+    expect(gemmaSettings.autoStartServer).toBe(false);
+    expect(gemmaSettings.binaryPath).toBe('');
    expect(gemmaSettings.classifier?.host).toBe('http://localhost:9379');
    expect(gemmaSettings.classifier?.model).toBe('gemma3-1b-gpu-custom');
  });
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@ -13,6 +13,7 @@ import { mcpCommand } from '../commands/mcp.js';
 import { extensionsCommand } from '../commands/extensions.js';
 import { skillsCommand } from '../commands/skills.js';
 import { hooksCommand } from '../commands/hooks.js';
+import { gemmaCommand } from '../commands/gemma.js';
 import {
  setGeminiMdFilename as setServerGeminiMdFilename,
  getCurrentGeminiMdFilename,
@ -181,6 +182,7 @@ export async function parseArguments(
        extensionsCommand,
        skillsCommand,
        hooksCommand,
+        gemmaCommand,
      ];

      const subcommands = commandModules.flatMap((mod) => {
@ -260,6 +262,7 @@ export async function parseArguments(
  yargsInstance.command(extensionsCommand);
  yargsInstance.command(skillsCommand);
  yargsInstance.command(hooksCommand);
+  yargsInstance.command(gemmaCommand);

  yargsInstance
    .command('$0 [query..]', 'Launch Gemini CLI', (yargsInstance) =>
--- a/packages/cli/src/config/settingsSchema.test.ts
+++ b/packages/cli/src/config/settingsSchema.test.ts
@ -471,11 +471,33 @@ describe('SettingsSchema', () => {
      expect(enabled.category).toBe('Experimental');
      expect(enabled.default).toBe(false);
      expect(enabled.requiresRestart).toBe(true);
-      expect(enabled.showInDialog).toBe(false);
+      expect(enabled.showInDialog).toBe(true);
      expect(enabled.description).toBe(
        'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
      );

+      const autoStartServer = gemmaModelRouter.properties.autoStartServer;
+      expect(autoStartServer).toBeDefined();
+      expect(autoStartServer.type).toBe('boolean');
+      expect(autoStartServer.category).toBe('Experimental');
+      expect(autoStartServer.default).toBe(false);
+      expect(autoStartServer.requiresRestart).toBe(true);
+      expect(autoStartServer.showInDialog).toBe(true);
+      expect(autoStartServer.description).toBe(
+        'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
+      );
+
+      const binaryPath = gemmaModelRouter.properties.binaryPath;
+      expect(binaryPath).toBeDefined();
+      expect(binaryPath.type).toBe('string');
+      expect(binaryPath.category).toBe('Experimental');
+      expect(binaryPath.default).toBe('');
+      expect(binaryPath.requiresRestart).toBe(true);
+      expect(binaryPath.showInDialog).toBe(false);
+      expect(binaryPath.description).toBe(
+        'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).',
+      );
+
      const classifier = gemmaModelRouter.properties.classifier;
      expect(classifier).toBeDefined();
      expect(classifier.type).toBe('object');
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@ -2169,6 +2169,26 @@ const SETTINGS_SCHEMA = {
            default: false,
            description:
              'Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.',
+            showInDialog: true,
+          },
+          autoStartServer: {
+            type: 'boolean',
+            label: 'Auto-start LiteRT Server',
+            category: 'Experimental',
+            requiresRestart: true,
+            default: false,
+            description:
+              'Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.',
+            showInDialog: true,
+          },
+          binaryPath: {
+            type: 'string',
+            label: 'LiteRT Binary Path',
+            category: 'Experimental',
+            requiresRestart: true,
+            default: '',
+            description:
+              'Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).',
            showInDialog: false,
          },
          classifier: {
--- a/packages/cli/src/gemini.tsx
+++ b/packages/cli/src/gemini.tsx
@ -612,6 +612,23 @@ export async function main() {
    const initializationResult = await initializeApp(config, settings);
    initAppHandle?.end();

+    import('./services/liteRtServerManager.js')
+      .then(({ LiteRtServerManager }) => {
+        const mergedGemma = settings.merged.experimental?.gemmaModelRouter;
+        if (!mergedGemma) return;
+        // Security: binaryPath and autoStartServer must come from user-scoped
+        // settings only to prevent workspace configs from triggering arbitrary
+        // binary execution.
+        const userGemma = settings.forScope(SettingScope.User).settings
+          .experimental?.gemmaModelRouter;
+        return LiteRtServerManager.ensureRunning({
+          ...mergedGemma,
+          binaryPath: userGemma?.binaryPath,
+          autoStartServer: userGemma?.autoStartServer,
+        });
+      })
+      .catch((e) => debugLogger.warn('LiteRT auto-start import failed:', e));
+
    if (
      settings.merged.security.auth.selectedType ===
        AuthType.LOGIN_WITH_GOOGLE &&
--- a/packages/cli/src/services/BuiltinCommandLoader.ts
+++ b/packages/cli/src/services/BuiltinCommandLoader.ts
@ -61,6 +61,7 @@ import { vimCommand } from '../ui/commands/vimCommand.js';
 import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js';
 import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js';
 import { upgradeCommand } from '../ui/commands/upgradeCommand.js';
+import { gemmaStatusCommand } from '../ui/commands/gemmaStatusCommand.js';

 /**
 * Loads the core, hard-coded slash commands that are an integral part
@ -221,6 +222,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
          : [skillsCommand]
        : []),
      settingsCommand,
+      gemmaStatusCommand,
      tasksCommand,
      vimCommand,
      setupGithubCommand,
--- a/packages/cli/src/services/liteRtServerManager.test.ts
+++ b/packages/cli/src/services/liteRtServerManager.test.ts
@ -0,0 +1,68 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
+
+const mockGetBinaryPath = vi.hoisted(() => vi.fn());
+const mockIsServerRunning = vi.hoisted(() => vi.fn());
+const mockStartServer = vi.hoisted(() => vi.fn());
+
+vi.mock('../commands/gemma/platform.js', () => ({
+  getBinaryPath: mockGetBinaryPath,
+  isServerRunning: mockIsServerRunning,
+}));
+
+vi.mock('../commands/gemma/start.js', () => ({
+  startServer: mockStartServer,
+}));
+
+import { LiteRtServerManager } from './liteRtServerManager.js';
+
+describe('LiteRtServerManager', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.spyOn(fs, 'existsSync').mockReturnValue(true);
+    mockIsServerRunning.mockResolvedValue(false);
+    mockStartServer.mockResolvedValue(true);
+  });
+
+  it('uses the configured custom binary path when auto-starting', async () => {
+    mockGetBinaryPath.mockReturnValue('/user/lit');
+
+    const settings: GemmaModelRouterSettings = {
+      enabled: true,
+      binaryPath: '/workspace/evil',
+      classifier: {
+        host: 'http://localhost:8123',
+      },
+    };
+
+    await LiteRtServerManager.ensureRunning(settings);
+
+    expect(mockGetBinaryPath).toHaveBeenCalledTimes(1);
+    expect(fs.existsSync).toHaveBeenCalledWith('/user/lit');
+    expect(mockStartServer).toHaveBeenCalledWith('/user/lit', 8123);
+  });
+
+  it('falls back to the default binary path when no custom path is configured', async () => {
+    mockGetBinaryPath.mockReturnValue('/default/lit');
+
+    const settings: GemmaModelRouterSettings = {
+      enabled: true,
+      classifier: {
+        host: 'http://localhost:9379',
+      },
+    };
+
+    await LiteRtServerManager.ensureRunning(settings);
+
+    expect(mockGetBinaryPath).toHaveBeenCalledTimes(1);
+    expect(fs.existsSync).toHaveBeenCalledWith('/default/lit');
+    expect(mockStartServer).toHaveBeenCalledWith('/default/lit', 9379);
+  });
+});
--- a/packages/cli/src/services/liteRtServerManager.ts
+++ b/packages/cli/src/services/liteRtServerManager.ts
@ -0,0 +1,59 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs';
+import { debugLogger } from '@google/gemini-cli-core';
+import type { GemmaModelRouterSettings } from '@google/gemini-cli-core';
+import { getBinaryPath, isServerRunning } from '../commands/gemma/platform.js';
+import { DEFAULT_PORT } from '../commands/gemma/constants.js';
+
+export class LiteRtServerManager {
+  static async ensureRunning(
+    gemmaSettings: GemmaModelRouterSettings | undefined,
+  ): Promise<void> {
+    if (!gemmaSettings?.enabled) return;
+    if (gemmaSettings.autoStartServer === false) return;
+    const binaryPath = getBinaryPath();
+    if (!binaryPath || !fs.existsSync(binaryPath)) {
+      debugLogger.log(
+        '[LiteRtServerManager] Binary not installed, skipping auto-start. Run "gemini gemma setup".',
+      );
+      return;
+    }
+
+    const port =
+      parseInt(
+        gemmaSettings.classifier?.host?.match(/:(\d+)/)?.[1] ?? '',
+        10,
+      ) || DEFAULT_PORT;
+
+    const running = await isServerRunning(port);
+    if (running) {
+      debugLogger.log(
+        `[LiteRtServerManager] Server already running on port ${port}`,
+      );
+      return;
+    }
+
+    debugLogger.log(
+      `[LiteRtServerManager] Auto-starting LiteRT server on port ${port}...`,
+    );
+
+    try {
+      const { startServer } = await import('../commands/gemma/start.js');
+      const started = await startServer(binaryPath, port);
+      if (started) {
+        debugLogger.log(`[LiteRtServerManager] Server started on port ${port}`);
+      } else {
+        debugLogger.warn(
+          `[LiteRtServerManager] Server may not have started correctly on port ${port}`,
+        );
+      }
+    } catch (error) {
+      debugLogger.warn('[LiteRtServerManager] Auto-start failed:', error);
+    }
+  }
+}
--- a/packages/cli/src/ui/commands/gemmaStatusCommand.ts
+++ b/packages/cli/src/ui/commands/gemmaStatusCommand.ts
@ -0,0 +1,41 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { CommandKind, type SlashCommand } from './types.js';
+import { MessageType, type HistoryItemGemmaStatus } from '../types.js';
+import { checkGemmaStatus } from '../../commands/gemma/status.js';
+import { GEMMA_MODEL_NAME } from '../../commands/gemma/constants.js';
+
+export const gemmaStatusCommand: SlashCommand = {
+  name: 'gemma',
+  description: 'Check local Gemma model routing status',
+  kind: CommandKind.BUILT_IN,
+  autoExecute: true,
+  isSafeConcurrent: true,
+  action: async (context) => {
+    const port =
+      parseInt(
+        context.services.settings.merged.experimental?.gemmaModelRouter?.classifier?.host?.match(
+          /:(\d+)/,
+        )?.[1] ?? '',
+        10,
+      ) || undefined;
+    const status = await checkGemmaStatus(port);
+    const item: Omit<HistoryItemGemmaStatus, 'id'> = {
+      type: MessageType.GEMMA_STATUS,
+      binaryInstalled: status.binaryInstalled,
+      binaryPath: status.binaryPath,
+      modelName: GEMMA_MODEL_NAME,
+      modelDownloaded: status.modelDownloaded,
+      serverRunning: status.serverRunning,
+      serverPid: status.serverPid,
+      serverPort: status.port,
+      settingsEnabled: status.settingsEnabled,
+      allPassing: status.allPassing,
+    };
+    context.ui.addItem(item);
+  },
+};
--- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx
+++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx
@ -32,6 +32,7 @@ import { ToolsList } from './views/ToolsList.js';
 import { SkillsList } from './views/SkillsList.js';
 import { AgentsStatus } from './views/AgentsStatus.js';
 import { McpStatus } from './views/McpStatus.js';
+import { GemmaStatus } from './views/GemmaStatus.js';
 import { ChatList } from './views/ChatList.js';
 import { ModelMessage } from './messages/ModelMessage.js';
 import { ThinkingMessage } from './messages/ThinkingMessage.js';
@ -228,6 +229,9 @@ export const HistoryItemDisplay: React.FC<HistoryItemDisplayProps> = ({
      {itemForDisplay.type === 'mcp_status' && (
        <McpStatus {...itemForDisplay} serverStatus={getMCPServerStatus} />
      )}
+      {itemForDisplay.type === 'gemma_status' && (
+        <GemmaStatus {...itemForDisplay} />
+      )}
      {itemForDisplay.type === 'chat_list' && (
        <ChatList chats={itemForDisplay.chats} />
      )}
--- a/packages/cli/src/ui/components/views/GemmaStatus.tsx
+++ b/packages/cli/src/ui/components/views/GemmaStatus.tsx
@ -0,0 +1,120 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { Box, Text } from 'ink';
+import type React from 'react';
+import { theme } from '../../semantic-colors.js';
+import type { HistoryItemGemmaStatus } from '../../types.js';
+
+type GemmaStatusProps = Omit<HistoryItemGemmaStatus, 'id' | 'type'>;
+
+const StatusDot: React.FC<{ ok: boolean }> = ({ ok }) => (
+  <Text color={ok ? theme.status.success : theme.status.error}>
+    {ok ? '\u25CF' : '\u25CB'}
+  </Text>
+);
+
+export const GemmaStatus: React.FC<GemmaStatusProps> = ({
+  binaryInstalled,
+  binaryPath,
+  modelName,
+  modelDownloaded,
+  serverRunning,
+  serverPid,
+  serverPort,
+  settingsEnabled,
+  allPassing,
+}) => (
+  <Box flexDirection="column">
+    <Text bold>Gemma Local Model Routing</Text>
+    <Box height={1} />
+
+    <Box>
+      <StatusDot ok={binaryInstalled} />
+      <Text>
+        {' '}
+        <Text bold>Binary: </Text>
+        {binaryInstalled ? (
+          <Text color={theme.text.secondary}>{binaryPath}</Text>
+        ) : (
+          <Text color={theme.status.error}>Not installed</Text>
+        )}
+      </Text>
+    </Box>
+
+    <Box>
+      <StatusDot ok={modelDownloaded} />
+      <Text>
+        {' '}
+        <Text bold>Model: </Text>
+        {modelDownloaded ? (
+          <Text>{modelName}</Text>
+        ) : (
+          <Text color={theme.status.error}>{modelName} not found</Text>
+        )}
+      </Text>
+    </Box>
+
+    <Box>
+      <StatusDot ok={serverRunning} />
+      <Text>
+        {' '}
+        <Text bold>Server: </Text>
+        {serverRunning ? (
+          <Text>
+            port {serverPort}
+            {serverPid ? (
+              <Text color={theme.text.secondary}> (PID {serverPid})</Text>
+            ) : null}
+          </Text>
+        ) : (
+          <Text color={theme.status.error}>
+            not running on port {serverPort}
+          </Text>
+        )}
+      </Text>
+    </Box>
+
+    <Box>
+      <StatusDot ok={settingsEnabled} />
+      <Text>
+        {' '}
+        <Text bold>Settings: </Text>
+        {settingsEnabled ? (
+          <Text>enabled</Text>
+        ) : (
+          <Text color={theme.status.error}>not enabled</Text>
+        )}
+      </Text>
+    </Box>
+
+    <Box marginTop={1}>
+      <Text bold>Active for: </Text>
+      {allPassing ? (
+        <Text color={theme.status.success}>[routing]</Text>
+      ) : (
+        <Text color={theme.text.secondary}>none</Text>
+      )}
+    </Box>
+
+    <Box marginTop={1}>
+      {allPassing ? (
+        <Box flexDirection="column">
+          <Text color={theme.text.secondary}>
+            Simple requests route to Flash, complex requests to Pro.
+          </Text>
+          <Text color={theme.text.secondary}>
+            This happens automatically on every request.
+          </Text>
+        </Box>
+      ) : (
+        <Text color={theme.status.warning}>
+          Run &quot;gemini gemma setup&quot; to install and configure.
+        </Text>
+      )}
+    </Box>
+  </Box>
+);
--- a/packages/cli/src/ui/types.ts
+++ b/packages/cli/src/ui/types.ts
@ -355,6 +355,19 @@ export interface JsonMcpResource {
  description?: string;
 }

+export type HistoryItemGemmaStatus = HistoryItemBase & {
+  type: 'gemma_status';
+  binaryInstalled: boolean;
+  binaryPath: string | null;
+  modelName: string;
+  modelDownloaded: boolean;
+  serverRunning: boolean;
+  serverPid: number | null;
+  serverPort: number;
+  settingsEnabled: boolean;
+  allPassing: boolean;
+};
+
 export type HistoryItemMcpStatus = HistoryItemBase & {
  type: 'mcp_status';
  servers: Record<string, MCPServerConfig>;
@ -404,6 +417,7 @@ export type HistoryItemWithoutId =
  | HistoryItemSkillsList
  | HistoryItemAgentsList
  | HistoryItemMcpStatus
+  | HistoryItemGemmaStatus
  | HistoryItemChatList
  | HistoryItemThinking
  | HistoryItemHint
@ -430,6 +444,7 @@ export enum MessageType {
  SKILLS_LIST = 'skills_list',
  AGENTS_LIST = 'agents_list',
  MCP_STATUS = 'mcp_status',
+  GEMMA_STATUS = 'gemma_status',
  CHAT_LIST = 'chat_list',
  HINT = 'hint',
 }
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@ -1975,6 +1975,8 @@ describe('GemmaModelRouterSettings', () => {
    const config = new Config(baseParams);
    const settings = config.getGemmaModelRouterSettings();
    expect(settings.enabled).toBe(false);
+    expect(settings.autoStartServer).toBe(true);
+    expect(settings.binaryPath).toBe('');
    expect(settings.classifier?.host).toBe('http://localhost:9379');
    expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
  });
@ -1984,6 +1986,8 @@ describe('GemmaModelRouterSettings', () => {
      ...baseParams,
      gemmaModelRouter: {
        enabled: true,
+        autoStartServer: false,
+        binaryPath: '/custom/lit',
        classifier: {
          host: 'http://custom:1234',
          model: 'custom-gemma',
@ -1993,6 +1997,8 @@ describe('GemmaModelRouterSettings', () => {
    const config = new Config(params);
    const settings = config.getGemmaModelRouterSettings();
    expect(settings.enabled).toBe(true);
+    expect(settings.autoStartServer).toBe(false);
+    expect(settings.binaryPath).toBe('/custom/lit');
    expect(settings.classifier?.host).toBe('http://custom:1234');
    expect(settings.classifier?.model).toBe('custom-gemma');
  });
@ -2007,6 +2013,8 @@ describe('GemmaModelRouterSettings', () => {
    const config = new Config(params);
    const settings = config.getGemmaModelRouterSettings();
    expect(settings.enabled).toBe(true);
+    expect(settings.autoStartServer).toBe(true);
+    expect(settings.binaryPath).toBe('');
    expect(settings.classifier?.host).toBe('http://localhost:9379');
    expect(settings.classifier?.model).toBe('gemma3-1b-gpu-custom');
  });
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@ -219,6 +219,8 @@ export interface OutputSettings {

 export interface GemmaModelRouterSettings {
  enabled?: boolean;
+  autoStartServer?: boolean;
+  binaryPath?: string;
  classifier?: {
    host?: string;
    model?: string;
@ -1323,6 +1325,8 @@ export class Config implements McpContext, AgentLoopContext {
    };
    this.gemmaModelRouter = {
      enabled: params.gemmaModelRouter?.enabled ?? false,
+      autoStartServer: params.gemmaModelRouter?.autoStartServer ?? true,
+      binaryPath: params.gemmaModelRouter?.binaryPath ?? '',
      classifier: {
        host:
          params.gemmaModelRouter?.classifier?.host ?? 'http://localhost:9379',
--- a/packages/core/src/core/localLiteRtLmClient.test.ts
+++ b/packages/core/src/core/localLiteRtLmClient.test.ts
@ -7,6 +7,8 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { LocalLiteRtLmClient } from './localLiteRtLmClient.js';
 import type { Config } from '../config/config.js';
+import { GoogleGenAI } from '@google/genai';
+
 const mockGenerateContent = vi.fn();

 vi.mock('@google/genai', () => {
@ -44,6 +46,14 @@ describe('LocalLiteRtLmClient', () => {
    const result = await client.generateJson([], 'test-instruction');

    expect(result).toEqual({ key: 'value' });
+    expect(GoogleGenAI).toHaveBeenCalledWith(
+      expect.objectContaining({
+        apiVersion: 'v1beta',
+        httpOptions: expect.objectContaining({
+          baseUrl: 'http://test-host:1234',
+        }),
+      }),
+    );
    expect(mockGenerateContent).toHaveBeenCalledWith(
      expect.objectContaining({
        model: 'gemma:latest',
--- a/packages/core/src/core/localLiteRtLmClient.ts
+++ b/packages/core/src/core/localLiteRtLmClient.ts
@ -25,6 +25,8 @@ export class LocalLiteRtLmClient {
    this.client = new GoogleGenAI({
      // The LiteRT-LM server does not require an API key, but the SDK requires one to be set even for local endpoints. This is a dummy value and is not used for authentication.
      apiKey: 'no-api-key-needed',
+      apiVersion: 'v1beta',
+      vertexai: false,
      httpOptions: {
        baseUrl: this.host,
        // If the LiteRT-LM server is started but the wrong port is set, there will be a lengthy TCP timeout (here fixed to be 10 seconds).
--- a/schemas/settings.schema.json
+++ b/schemas/settings.schema.json
@ -2920,6 +2920,20 @@
              "default": false,
              "type": "boolean"
            },
+            "autoStartServer": {
+              "title": "Auto-start LiteRT Server",
+              "description": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.",
+              "markdownDescription": "Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
+              "default": false,
+              "type": "boolean"
+            },
+            "binaryPath": {
+              "title": "LiteRT Binary Path",
+              "description": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).",
+              "markdownDescription": "Custom path to the LiteRT-LM binary. Leave empty to use the default location (~/.gemini/bin/litert/).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: ``",
+              "default": "",
+              "type": "string"
+            },
            "classifier": {
              "title": "Classifier",
              "description": "Classifier configuration.",