From 6b13f6453c3f3ba77b6eb468a876dbc777f54eb9 Mon Sep 17 00:00:00 2001 From: Sandy Tao Date: Mon, 20 Apr 2026 15:16:05 -0700 Subject: [PATCH] feat(core): let skill extractor propose GEMINI.md patches Extends the background skill-extraction agent (`confucius`) so it can propose patches against loaded GEMINI.md memory files in addition to SKILL.md updates. Patches are gated through the existing inbox flow and remain user-reviewed before they touch any file on disk. - memoryPatchUtils: add `getAllowedGeminiMdPatchTargets` and `isGeminiMdPatchTarget`. The allow-list is the union of global + project memory files reported by `config.getGeminiMdFilePaths()` minus extension-supplied context files (which would be overwritten on extension upgrades). `resolveAllowedSkillPatchTarget` now accepts either a subpath under a skills root OR an exact match against the GEMINI.md allow-list. `/dev/null` patches against GEMINI.md are rejected naturally because new files are never in the allow-list. - memoryService: build a scope-tagged GEMINI.md targeting context (Global / Workspace root / Workspace subdir / Workspace ancestor) and pass it to the agent. Classify each new patch as skill vs GEMINI.md and surface "project memory update" feedback separately. - SkillExtractionAgent: new `geminiMdContext` parameter, new "UPDATING GEMINI.md (PATCHES)" prompt section with strict gating, and a refreshed combined cap of 0-2 artifacts per run total. - SkillInboxDialog: split patch list into "Skill Updates" and "Project Memory Updates" sections. Tests: 3 new validatePatches cases (positive, /dev/null rejection, extension-file rejection) and 5 new prompt-shape unit tests, plus a new component-level eval suite (evals/gemini_md_extraction.eval.ts) covering the positive case, the duplication gate, and the recurrence gate. --- evals/gemini_md_extraction.eval.ts | 398 ++++++++++++++++++ .../ui/components/SkillInboxDialog.test.tsx | 4 + .../src/ui/components/SkillInboxDialog.tsx | 71 +++- .../src/agents/skill-extraction-agent.test.ts | 48 +++ .../core/src/agents/skill-extraction-agent.ts | 107 ++++- packages/core/src/index.ts | 6 +- .../core/src/services/memoryPatchUtils.ts | 78 +++- .../core/src/services/memoryService.test.ts | 125 ++++++ packages/core/src/services/memoryService.ts | 164 +++++++- 9 files changed, 974 insertions(+), 27 deletions(-) create mode 100644 evals/gemini_md_extraction.eval.ts diff --git a/evals/gemini_md_extraction.eval.ts b/evals/gemini_md_extraction.eval.ts new file mode 100644 index 0000000000..7183bdf8b3 --- /dev/null +++ b/evals/gemini_md_extraction.eval.ts @@ -0,0 +1,398 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fsp from 'node:fs/promises'; +import path from 'node:path'; +import { describe, expect } from 'vitest'; +import { + type Config, + ApprovalMode, + SESSION_FILE_PREFIX, + getProjectHash, + listInboxPatches, + normalizePath, + refreshServerHierarchicalMemory, + startMemoryService, +} from '@google/gemini-cli-core'; +import { componentEvalTest } from './component-test-helper.js'; + +interface SeedSession { + sessionId: string; + summary: string; + userTurns: string[]; + timestampOffsetMinutes: number; +} + +interface MessageRecord { + id: string; + timestamp: string; + type: string; + content: Array<{ text: string }>; +} + +const PACKAGE_JSON = JSON.stringify( + { + name: 'gemini-md-extraction-eval', + private: true, + scripts: { + build: 'echo build', + lint: 'echo lint', + test: 'echo test', + preflight: 'echo preflight', + }, + }, + null, + 2, +); + +/** + * A baseline GEMINI.md that establishes the project as a real one with some + * existing rules. Tests that expect a patch verify the agent ADDS a new rule + * not already present here. Tests that expect NO patch include the candidate + * rule already, exercising the "do not duplicate" gate. + */ +const BASELINE_GEMINI_MD = `# Project Conventions + +## Build + +- Run \`npm run build\` to compile the project. + +## Linting + +- Run \`npm run lint\` to lint the project. +`; + +const GEMINI_MD_WITH_PREFLIGHT_RULE = `${BASELINE_GEMINI_MD} +## Preflight + +- Always run \`npm run preflight\` before opening a PR. +`; + +function workspaceFiles(geminiMdContent: string): Record { + return { + 'package.json': PACKAGE_JSON, + 'README.md': '# GEMINI.md Extraction Eval\n', + 'GEMINI.md': geminiMdContent, + }; +} + +function buildMessages(userTurns: string[]): MessageRecord[] { + const baseTime = new Date(Date.now() - 6 * 60 * 60 * 1000).toISOString(); + return userTurns.flatMap((text, index) => [ + { + id: `u${index + 1}`, + timestamp: baseTime, + type: 'user', + content: [{ text }], + }, + { + id: `a${index + 1}`, + timestamp: baseTime, + type: 'gemini', + content: [{ text: `Acknowledged: ${index + 1}` }], + }, + ]); +} + +async function seedSessions( + config: Config, + sessions: SeedSession[], +): Promise { + const chatsDir = path.join(config.storage.getProjectTempDir(), 'chats'); + await fsp.mkdir(chatsDir, { recursive: true }); + + const projectRoot = config.storage.getProjectRoot(); + + for (const session of sessions) { + const timestamp = new Date( + Date.now() - session.timestampOffsetMinutes * 60 * 1000, + ) + .toISOString() + .slice(0, 16) + .replace(/:/g, '-'); + const filename = `${SESSION_FILE_PREFIX}${timestamp}-${session.sessionId.slice(0, 8)}.json`; + const conversation = { + sessionId: session.sessionId, + projectHash: getProjectHash(projectRoot), + summary: session.summary, + startTime: new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString(), + lastUpdated: new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString(), + messages: buildMessages(session.userTurns), + }; + + await fsp.writeFile( + path.join(chatsDir, filename), + JSON.stringify(conversation, null, 2), + ); + } +} + +/** + * Refreshes hierarchical memory so the workspace `GEMINI.md` is loaded into + * `config.getGeminiMdFilePaths()`. Without this, the extraction agent has no + * targeting context and the GEMINI.md patching capability is inert. + */ +async function loadGeminiMdContext(config: Config): Promise { + await refreshServerHierarchicalMemory(config); +} + +/** + * Returns the comparison key for the workspace GEMINI.md, in the form the + * agent emits in patch diff headers. We use `normalizePath` (which lowercases + * on darwin and Windows and uses forward slashes) so the comparison is robust + * across platforms and against macOS's `/var/folders` -> `/private/var/folders` + * symlink resolution. Patch entries are compared via `normalizePath` too. + */ +async function workspaceGeminiMdPath(config: Config): Promise { + const projectRoot = await fsp.realpath(config.storage.getProjectRoot()); + return normalizePath(path.join(projectRoot, 'GEMINI.md')); +} + +const EXTRACTION_CONFIG_OVERRIDES = { + experimentalAutoMemory: true, + approvalMode: ApprovalMode.YOLO, +}; + +describe('GEMINI.md Extraction', () => { + componentEvalTest('USUALLY_PASSES', { + suiteName: 'gemini-md-extraction', + suiteType: 'component-level', + name: 'patches workspace GEMINI.md when a project rule recurs across sessions', + files: workspaceFiles(BASELINE_GEMINI_MD), + timeout: 180000, + configOverrides: EXTRACTION_CONFIG_OVERRIDES, + setup: async (config) => { + await loadGeminiMdContext(config); + await seedSessions(config, [ + { + sessionId: 'preflight-correction-1', + summary: 'User corrects agent on preflight before pushing', + timestampOffsetMinutes: 420, + userTurns: [ + 'Stop. Before opening any PR in this project you must run npm run preflight first.', + 'I keep telling you this — npm run preflight is mandatory before pushing any branch.', + 'Yes, npm run preflight again. It runs clean, install, build, lint, typecheck, and tests.', + 'Please remember: npm run preflight before every PR in this repo, no exceptions.', + 'The project has its own preflight command for a reason; do not skip it.', + 'Confirmed: npm run preflight finished and the branch is ready to push.', + 'This is a standing project rule, not a one-off — apply it every time.', + 'After preflight passes, then we open the PR.', + 'I should not have to repeat npm run preflight every session.', + 'Acknowledged that npm run preflight is the required pre-PR step.', + ], + }, + { + sessionId: 'preflight-correction-2', + summary: 'User corrects agent again about preflight before PR', + timestampOffsetMinutes: 360, + userTurns: [ + 'You forgot the preflight again — run npm run preflight before this PR too.', + 'I have said this multiple times: npm run preflight is mandatory in this project.', + 'It is the standard pre-PR check that catches lint, type, and test regressions.', + 'Please make this part of your default workflow for this repo.', + 'Yes, run npm run preflight now and then open the PR after it passes.', + 'This is a recurring project workflow, not specific to one branch.', + 'The preflight script exists exactly so we never push broken code.', + 'Confirmed: npm run preflight passed and the PR is ready.', + 'Future sessions in this project should also run npm run preflight before any PR.', + 'Treat npm run preflight as a hard requirement before every PR in this repo.', + ], + }, + ]); + }, + assert: async (config) => { + await startMemoryService(config); + const inboxPatches = await listInboxPatches(config); + + // Expect at least one patch in the inbox. + expect(inboxPatches.length).toBeGreaterThanOrEqual(1); + + // At least one patch should target the workspace GEMINI.md and add a + // preflight rule. We check the diff content directly because the + // filename is agent-chosen. + const workspaceGeminiMd = await workspaceGeminiMdPath(config); + const matchingPatch = inboxPatches.find((patch) => + patch.entries.some( + (entry) => + normalizePath(entry.targetPath) === workspaceGeminiMd && + /preflight/i.test(entry.diffContent), + ), + ); + + expect( + matchingPatch, + `Expected a patch targeting ${workspaceGeminiMd} with a preflight rule. ` + + `Got patches: ${JSON.stringify( + inboxPatches.map((p) => ({ + file: p.fileName, + targets: p.entries.map((e) => e.targetPath), + })), + null, + 2, + )}`, + ).toBeDefined(); + }, + }); + + componentEvalTest('USUALLY_PASSES', { + suiteName: 'gemini-md-extraction', + suiteType: 'component-level', + name: 'does not patch GEMINI.md when the rule is already present', + files: workspaceFiles(GEMINI_MD_WITH_PREFLIGHT_RULE), + timeout: 180000, + configOverrides: EXTRACTION_CONFIG_OVERRIDES, + setup: async (config) => { + await loadGeminiMdContext(config); + // Same recurring preflight signal as the positive case — but the + // workspace GEMINI.md ALREADY documents the rule, so the agent must not + // propose a duplicate patch. + await seedSessions(config, [ + { + sessionId: 'preflight-already-documented-1', + summary: 'User reminds agent about preflight', + timestampOffsetMinutes: 420, + userTurns: [ + 'Run npm run preflight before opening this PR.', + 'You should always run npm run preflight before pushing in this project.', + 'Preflight runs clean, install, build, lint, typecheck, and tests.', + 'Confirmed: npm run preflight finished, ready to push.', + 'This is the standard pre-PR check we use here.', + 'Please apply this every time in this repo.', + 'Yes, npm run preflight again.', + 'It is the documented project workflow.', + 'After it passes we open the PR.', + 'Acknowledged: npm run preflight is the pre-PR step.', + ], + }, + { + sessionId: 'preflight-already-documented-2', + summary: 'User reminds agent about preflight again', + timestampOffsetMinutes: 360, + userTurns: [ + 'Same drill: run npm run preflight before opening this PR too.', + 'It catches lint, type, and test regressions before review.', + 'It is the project standard for every PR in this repo.', + 'Confirmed: npm run preflight passed, PR is ready.', + 'Use it every time in this project.', + 'No exceptions.', + 'Run npm run preflight now.', + 'Then open the PR.', + 'Done.', + 'Standard project workflow.', + ], + }, + ]); + }, + assert: async (config) => { + await startMemoryService(config); + const inboxPatches = await listInboxPatches(config); + + const workspaceGeminiMd = await workspaceGeminiMdPath(config); + + // The agent may legitimately produce no patches at all, OR it may + // produce a skill patch — but it must NOT produce a GEMINI.md patch + // that just restates the preflight rule that's already there. + const duplicatePatch = inboxPatches.find((patch) => + patch.entries.some( + (entry) => + normalizePath(entry.targetPath) === workspaceGeminiMd && + /preflight/i.test(entry.diffContent), + ), + ); + + expect( + duplicatePatch, + `Expected NO GEMINI.md preflight patch (rule already present). ` + + `Got: ${JSON.stringify( + inboxPatches.map((p) => ({ + file: p.fileName, + targets: p.entries.map((e) => e.targetPath), + })), + null, + 2, + )}`, + ).toBeUndefined(); + }, + }); + + componentEvalTest('USUALLY_PASSES', { + suiteName: 'gemini-md-extraction', + suiteType: 'component-level', + name: 'does not propose GEMINI.md patches from a single one-off correction', + files: workspaceFiles(BASELINE_GEMINI_MD), + timeout: 180000, + configOverrides: EXTRACTION_CONFIG_OVERRIDES, + setup: async (config) => { + await loadGeminiMdContext(config); + // Two sessions, but they describe a one-off branch-specific incident + // tied to a specific ticket and date — not a durable project rule. The + // agent should NOT promote this to a GEMINI.md patch. + await seedSessions(config, [ + { + sessionId: 'one-off-incident-1', + summary: 'Debug staging cookie issue for INC-7733', + timestampOffsetMinutes: 420, + userTurns: [ + 'For incident INC-7733 only, clear the staging cookie before retrying the login.', + 'This workaround is just for the 2026-04-12 staging rollout.', + 'Do not generalize this — it is incident-specific to INC-7733.', + 'The branch hotfix/inc-7733 needs this exact one-off cookie clear.', + 'After the cookie clear, the login worked again for this incident.', + 'This is not a recurring project workflow.', + 'Do not turn this into a documented rule.', + 'It only applies to this one staging rollout.', + 'Confirmed: INC-7733 is resolved with the one-off cookie clear.', + 'Close out INC-7733; we do not need to remember this.', + ], + }, + { + sessionId: 'one-off-incident-2', + summary: 'Follow-up cleanup for INC-7733', + timestampOffsetMinutes: 360, + userTurns: [ + 'INC-7733 follow-up: just remove the temporary cookie-clear branch.', + 'This is incident cleanup, not a workflow.', + 'No documentation changes needed for INC-7733.', + 'It was a one-time staging issue tied to that specific rollout.', + 'Confirmed: hotfix branch deleted.', + 'Do not record this as a project convention.', + 'It is not a reusable procedure.', + 'INC-7733 is fully closed.', + 'No durable rule to add to GEMINI.md.', + 'This is purely incident response.', + ], + }, + ]); + }, + assert: async (config) => { + await startMemoryService(config); + const inboxPatches = await listInboxPatches(config); + + const workspaceGeminiMd = await workspaceGeminiMdPath(config); + + const incidentPatch = inboxPatches.find((patch) => + patch.entries.some( + (entry) => + normalizePath(entry.targetPath) === workspaceGeminiMd && + /(INC-7733|cookie|staging)/i.test(entry.diffContent), + ), + ); + + expect( + incidentPatch, + `Expected NO GEMINI.md patch for one-off incident INC-7733. ` + + `Got: ${JSON.stringify( + inboxPatches.map((p) => ({ + file: p.fileName, + targets: p.entries.map((e) => e.targetPath), + })), + null, + 2, + )}`, + ).toBeUndefined(); + }, + }); +}); diff --git a/packages/cli/src/ui/components/SkillInboxDialog.test.tsx b/packages/cli/src/ui/components/SkillInboxDialog.test.tsx index 7121960021..eed60a923f 100644 --- a/packages/cli/src/ui/components/SkillInboxDialog.test.tsx +++ b/packages/cli/src/ui/components/SkillInboxDialog.test.tsx @@ -14,6 +14,7 @@ import { moveInboxSkill, applyInboxPatch, dismissInboxPatch, + isGeminiMdPatchTarget, isProjectSkillPatchTarget, } from '@google/gemini-cli-core'; import { waitFor } from '../../test-utils/async.js'; @@ -32,6 +33,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { moveInboxSkill: vi.fn(), applyInboxPatch: vi.fn(), dismissInboxPatch: vi.fn(), + isGeminiMdPatchTarget: vi.fn(), isProjectSkillPatchTarget: vi.fn(), getErrorMessage: vi.fn((error: unknown) => error instanceof Error ? error.message : String(error), @@ -45,6 +47,7 @@ const mockMoveInboxSkill = vi.mocked(moveInboxSkill); const mockDismissInboxSkill = vi.mocked(dismissInboxSkill); const mockApplyInboxPatch = vi.mocked(applyInboxPatch); const mockDismissInboxPatch = vi.mocked(dismissInboxPatch); +const mockIsGeminiMdPatchTarget = vi.mocked(isGeminiMdPatchTarget); const mockIsProjectSkillPatchTarget = vi.mocked(isProjectSkillPatchTarget); const inboxSkill: InboxSkill = { @@ -170,6 +173,7 @@ describe('SkillInboxDialog', () => { : false; }, ); + mockIsGeminiMdPatchTarget.mockResolvedValue(false); }); afterEach(() => { diff --git a/packages/cli/src/ui/components/SkillInboxDialog.tsx b/packages/cli/src/ui/components/SkillInboxDialog.tsx index 509022c4ff..583d3c5246 100644 --- a/packages/cli/src/ui/components/SkillInboxDialog.tsx +++ b/packages/cli/src/ui/components/SkillInboxDialog.tsx @@ -28,6 +28,7 @@ import { dismissInboxSkill, applyInboxPatch, dismissInboxPatch, + isGeminiMdPatchTarget, isProjectSkillPatchTarget, } from '@google/gemini-cli-core'; @@ -35,7 +36,12 @@ type Phase = 'list' | 'skill-preview' | 'skill-action' | 'patch-preview'; type InboxItem = | { type: 'skill'; skill: InboxSkill } - | { type: 'patch'; patch: InboxPatch; targetsProjectSkills: boolean } + | { + type: 'patch'; + patch: InboxPatch; + targetsProjectSkills: boolean; + targetsGeminiMd: boolean; + } | { type: 'header'; label: string }; interface DestinationChoice { @@ -117,6 +123,18 @@ async function patchTargetsProjectSkills( return entryTargetsProjectSkills.some(Boolean); } +async function patchTargetsGeminiMd( + patch: InboxPatch, + config: Config, +): Promise { + const entryTargetsGeminiMd = await Promise.all( + patch.entries.map((entry) => + isGeminiMdPatchTarget(entry.targetPath, config), + ), + ); + return entryTargetsGeminiMd.some(Boolean); +} + /** * Derives a bracketed origin tag from a skill file path, * matching the existing [Built-in] convention in SkillsList. @@ -208,20 +226,16 @@ export const SkillInboxDialog: React.FC = ({ ]); const patchItems = await Promise.all( patches.map(async (patch): Promise => { - let targetsProjectSkills = false; - try { - targetsProjectSkills = await patchTargetsProjectSkills( - patch, - config, - ); - } catch { - targetsProjectSkills = false; - } + const [targetsProjectSkills, targetsGeminiMd] = await Promise.all([ + patchTargetsProjectSkills(patch, config).catch(() => false), + patchTargetsGeminiMd(patch, config).catch(() => false), + ]); return { type: 'patch', patch, targetsProjectSkills, + targetsGeminiMd, }; }), ); @@ -257,13 +271,22 @@ export const SkillInboxDialog: React.FC = ({ const listItems: Array> = useMemo(() => { const skills = items.filter((i) => i.type === 'skill'); - const patches = items.filter((i) => i.type === 'patch'); + const skillPatches = items.filter( + (i) => i.type === 'patch' && !i.targetsGeminiMd, + ); + const geminiMdPatches = items.filter( + (i) => i.type === 'patch' && i.targetsGeminiMd, + ); const result: Array> = []; - // Only show section headers when both types are present - const showHeaders = skills.length > 0 && patches.length > 0; + // Show section headers whenever more than one category is present so + // GEMINI.md patches stay visually distinct from skill artifacts. + const categoryCount = [skills, skillPatches, geminiMdPatches].filter( + (group) => group.length > 0, + ).length; + const showHeaders = categoryCount > 1; - if (showHeaders) { + if (showHeaders && skills.length > 0) { const header: InboxItem = { type: 'header', label: 'New Skills' }; result.push({ key: 'header:new-skills', @@ -276,7 +299,7 @@ export const SkillInboxDialog: React.FC = ({ result.push({ key: getItemKey(item), value: item }); } - if (showHeaders) { + if (showHeaders && skillPatches.length > 0) { const header: InboxItem = { type: 'header', label: 'Skill Updates' }; result.push({ key: 'header:skill-updates', @@ -285,7 +308,23 @@ export const SkillInboxDialog: React.FC = ({ hideNumber: true, }); } - for (const item of patches) { + for (const item of skillPatches) { + result.push({ key: getItemKey(item), value: item }); + } + + if (showHeaders && geminiMdPatches.length > 0) { + const header: InboxItem = { + type: 'header', + label: 'Project Memory Updates', + }; + result.push({ + key: 'header:project-memory-updates', + value: header, + disabled: true, + hideNumber: true, + }); + } + for (const item of geminiMdPatches) { result.push({ key: getItemKey(item), value: item }); } diff --git a/packages/core/src/agents/skill-extraction-agent.test.ts b/packages/core/src/agents/skill-extraction-agent.test.ts index a67c7db270..3d4b200a7f 100644 --- a/packages/core/src/agents/skill-extraction-agent.test.ts +++ b/packages/core/src/agents/skill-extraction-agent.test.ts @@ -87,4 +87,52 @@ describe('SkillExtractionAgent', () => { 'If recurrence or future reuse is unclear, create no skill and explain why.', ); }); + + it('should always include the GEMINI.md patching section with strict gating', () => { + const prompt = agent.promptConfig.systemPrompt; + + expect(prompt).toContain('UPDATING GEMINI.md (PATCHES)'); + expect(prompt).toContain('NEVER create a new GEMINI.md file'); + expect(prompt).toContain( + 'NEVER duplicate, restate, elaborate on, strengthen, rephrase, or expand', + ); + expect(prompt).toContain( + 'NEVER patch a file that is not in the supplied targeting context', + ); + expect(prompt).toContain('pick the most-specific scope that fits the rule'); + }); + + it('should describe a combined volume cap across skills and patches', () => { + const prompt = agent.promptConfig.systemPrompt; + + expect(prompt).toContain( + 'Aim for 0-2 artifacts per run TOTAL — counting new skills, skill patches,', + ); + expect(prompt).toContain( + 'Prefer fewer, higher-quality artifacts. 0-2 TOTAL per run', + ); + }); + + it('should embed the GEMINI.md targeting context in the query when provided', () => { + const geminiMdContext = + '### Workspace root\n- /repo/GEMINI.md\n\n### Workspace subdirectory\n- /repo/packages/cli/GEMINI.md'; + const agentWithContext = SkillExtractionAgent( + skillsDir, + sessionIndex, + existingSkillsSummary, + geminiMdContext, + ); + + const query = agentWithContext.promptConfig.query ?? ''; + expect(query).toContain('# GEMINI.md Targeting Context'); + expect(query).toContain('/repo/GEMINI.md'); + expect(query).toContain('/repo/packages/cli/GEMINI.md'); + expect(query).toContain('Pick the most-specific scope that'); + }); + + it('should omit the GEMINI.md targeting header when no context is provided', () => { + const query = agent.promptConfig.query ?? ''; + + expect(query).not.toContain('# GEMINI.md Targeting Context'); + }); }); diff --git a/packages/core/src/agents/skill-extraction-agent.ts b/packages/core/src/agents/skill-extraction-agent.ts index 4aa18af388..4cee4b9cfe 100644 --- a/packages/core/src/agents/skill-extraction-agent.ts +++ b/packages/core/src/agents/skill-extraction-agent.ts @@ -119,7 +119,9 @@ function buildSystemPrompt(skillsDir: string): string { '- A code review or investigation with no durable takeaway', '- Output-style preferences that do not materially change procedure', '', - 'Aim for 0-2 skills per run. Quality over quantity.', + 'Aim for 0-2 artifacts per run TOTAL — counting new skills, skill patches,', + 'and GEMINI.md patches together. Quality over quantity. 0 is the most', + 'common outcome.', '', '============================================================', 'HOW TO READ SESSION TRANSCRIPTS', @@ -191,6 +193,90 @@ function buildSystemPrompt(skillsDir: string): string { 'The same quality bar applies: only propose updates backed by evidence from sessions.', '', '============================================================', + 'UPDATING GEMINI.md (PATCHES)', + '============================================================', + '', + 'GEMINI.md (and equivalently named) files are project-memory files loaded', + 'into every agent session in this workspace. They steer FUTURE agents on', + 'project conventions, tech stack, build/test commands, and recurring', + 'workflows. You can propose surgical edits to them using the same .patch', + 'mechanism as for skill files.', + '', + 'The list of files you are allowed to patch is provided below as', + '"GEMINI.md Targeting Context". If no such section appears, no GEMINI.md', + 'files are loaded and you MUST NOT propose any GEMINI.md patches.', + '', + 'Strict gating — propose a GEMINI.md patch ONLY when ALL of these hold:', + '', + '1. **The user repeatedly corrected or restated the rule** across sessions', + ' (or once but with strong durability signal — e.g. it is a stable', + ' project convention). One-off corrections do NOT qualify.', + '2. **The rule is project- or user-specific**, not generic engineering', + ' advice an agent already knows.', + '3. **The rule is concrete and actionable** ("run `npm run preflight`', + ' before opening a PR", "prefer Vitest over Jest", "lint with', + ' `npm run lint -- --fix`"). Vague taste statements do NOT qualify.', + '4. **The rule is NOT already present in the target file.** You MUST', + ' read_file the target before proposing a patch and confirm the rule', + ' is genuinely missing or materially weaker than what you would add.', + '5. **There is a clear future trigger** — the rule will keep mattering', + ' for agents working in this scope.', + '', + 'Targeting — pick the most-specific scope that fits the rule:', + '', + '- **Workspace subdirectory**: rule only applies inside that directory', + ' (e.g. UI conventions for `packages/cli`).', + '- **Workspace root**: rule applies to the whole project.', + '- **Global (~/.gemini/GEMINI.md)**: user-wide preference that should', + ' apply across every project the user works on.', + '- **Workspace ancestor**: avoid unless the rule clearly belongs to that', + ' broader parent project — patching an ancestor from a sub-project is', + ' almost always wrong.', + '', + 'Hard prohibitions:', + '', + '- NEVER create a new GEMINI.md file. /dev/null patches against any', + ' GEMINI.md target will be rejected.', + '- NEVER patch a file that is not in the supplied targeting context.', + ' Extension-supplied context files are intentionally excluded — they are', + ' shipped artifacts and would be overwritten on extension upgrades.', + '- NEVER duplicate, restate, elaborate on, strengthen, rephrase, or expand', + ' a rule that already exists in the target. ONLY propose patches that add', + ' a genuinely NEW rule the file does not yet cover. If the rule is', + ' present in any form — even as a one-line bullet, even without emphasis,', + ' even without an explanation — DO NOT patch.', + '- NEVER include secrets, tokens, or environment-specific identifiers.', + '- NEVER restate generic engineering advice or transient incident details.', + '', + 'Examples of what counts as duplication (DO NOT patch):', + '', + '- Target already says: "- Run `npm run preflight` before opening a PR."', + ' You wanted to add: "**MANDATORY**: Always run `npm run preflight` before', + ' pushing or opening a Pull Request. This runs clean, install, build,', + ' lint, typecheck, and tests."', + ' -> DUPLICATION. The rule is already present; you are just adding', + ' emphasis and elaboration. DO NOT patch.', + '- Target already says: "- Use Vitest for tests."', + ' You wanted to add: "- Prefer Vitest over Jest for all testing."', + ' -> DUPLICATION. Rephrasing the same rule. DO NOT patch.', + '', + 'Examples of what counts as a NEW rule (patch is allowed if other gates pass):', + '', + '- Target has no mention of preflight at all, sessions show users', + ' repeatedly correcting the agent to run `npm run preflight` before PRs.', + ' -> NEW rule. Patch is allowed.', + '- Target documents the build command but not the lint command, sessions', + ' show users repeatedly asking for `npm run lint -- --fix` to be run.', + ' -> NEW rule (lint command not yet documented). Patch is allowed.', + '', + 'Format: identical to skill patches — write a .patch file in your skills', + 'directory using strict unified diff with absolute paths in BOTH the', + '--- and +++ headers, 3 lines of context per hunk, accurate @@ counts.', + 'A single .patch file may contain hunks for multiple GEMINI.md files in', + 'the same scope, but do not mix GEMINI.md hunks and skill hunks in one', + 'file — keep them separate so the user can review each category cleanly.', + '', + '============================================================', 'QUALITY RULES (STRICT)', '============================================================', '', @@ -202,7 +288,8 @@ function buildSystemPrompt(skillsDir: string): string { ' bug/ticket, do NOT create it.', '- Do not create skills for generic advice, output-style preferences, or ephemeral', ' choices that any competent agent would already know or adapt to on the fly.', - '- Prefer fewer, higher-quality skills. 0-2 skills per run is typical. 3+ is unusual.', + '- Prefer fewer, higher-quality artifacts. 0-2 TOTAL per run (counting new', + ' skills, skill patches, and GEMINI.md patches) is typical. 3+ is unusual.', '', '============================================================', 'WORKFLOW', @@ -253,6 +340,7 @@ export const SkillExtractionAgent = ( skillsDir: string, sessionIndex: string, existingSkillsSummary: string, + geminiMdContext: string = '', ): LocalAgentDefinition => ({ kind: 'local', name: 'confucius', @@ -298,6 +386,21 @@ export const SkillExtractionAgent = ( contextParts.push(`# Existing Skills\n\n${existingSkillsSummary}`); } + if (geminiMdContext) { + contextParts.push( + [ + '# GEMINI.md Targeting Context', + '', + 'These are the GEMINI.md (or equivalently named) project-memory files', + 'currently loaded in this workspace. They are the ONLY files you may', + 'propose GEMINI.md patches against. Pick the most-specific scope that', + 'fits the rule. Read the file first before proposing any patch.', + '', + geminiMdContext, + ].join('\n'), + ); + } + contextParts.push( [ '# Session Index', diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 62a0b127bd..98b424e3a0 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -144,7 +144,11 @@ export { startMemoryService, validatePatches, } from './services/memoryService.js'; -export { isProjectSkillPatchTarget } from './services/memoryPatchUtils.js'; +export { + getAllowedGeminiMdPatchTargets, + isGeminiMdPatchTarget, + isProjectSkillPatchTarget, +} from './services/memoryPatchUtils.js'; export * from './context/memoryContextManager.js'; export * from './services/trackerService.js'; export * from './services/trackerTypes.js'; diff --git a/packages/core/src/services/memoryPatchUtils.ts b/packages/core/src/services/memoryPatchUtils.ts index 44b87353fe..2381ba6702 100644 --- a/packages/core/src/services/memoryPatchUtils.ts +++ b/packages/core/src/services/memoryPatchUtils.ts @@ -12,7 +12,8 @@ import type { Config } from '../config/config.js'; import { Storage } from '../config/storage.js'; import { isNodeError } from '../utils/errors.js'; import { debugLogger } from '../utils/debugLogger.js'; -import { isSubpath } from '../utils/paths.js'; +import { isSubpath, normalizePath } from '../utils/paths.js'; +import { getExtensionMemoryPaths } from '../utils/memoryDiscovery.js'; export function getAllowedSkillPatchRoots(config: Config): string[] { return Array.from( @@ -68,6 +69,55 @@ async function getCanonicalAllowedSkillPatchRoots( ); } +/** + * Returns the absolute paths of the GEMINI.md (and equivalently named) memory + * files that are eligible to be patched by the skill-extraction agent. + * + * The set is the user/project memory files reported by the loaded + * configuration, minus any files that originate from installed extensions. + * Extension-supplied context files belong to the extension package and would + * be overwritten on extension upgrades, so they are intentionally excluded. + */ +export function getAllowedGeminiMdPatchTargets(config: Config): string[] { + const allPaths = config.getGeminiMdFilePaths?.() ?? []; + if (allPaths.length === 0) { + return []; + } + // `getExtensionMemoryPaths` returns paths normalized via `normalizePath`, + // which lowercases on darwin and Windows. Compare on the normalized form so + // case differences between the two sources do not let an extension path + // slip through the filter, but return the absolute (non-lowercased) path so + // downstream `realpath`-based resolution still works on case-sensitive + // filesystems. + const extensionPaths = new Set( + getExtensionMemoryPaths(config.getExtensionLoader()), + ); + return Array.from( + new Set( + allPaths + .filter((p) => !extensionPaths.has(normalizePath(p))) + .map((p) => path.resolve(p)), + ), + ); +} + +async function getCanonicalAllowedGeminiMdPatchTargets( + config: Config, +): Promise { + const canonicalTargets = await Promise.all( + getAllowedGeminiMdPatchTargets(config).map((target) => + resolvePathWithExistingAncestors(target), + ), + ); + return Array.from( + new Set( + canonicalTargets.filter( + (target): target is string => typeof target === 'string', + ), + ), + ); +} + export async function resolveAllowedSkillPatchTarget( targetPath: string, config: Config, @@ -83,6 +133,12 @@ export async function resolveAllowedSkillPatchTarget( return canonicalTargetPath; } + const allowedGeminiMdTargets = + await getCanonicalAllowedGeminiMdPatchTargets(config); + if (allowedGeminiMdTargets.includes(canonicalTargetPath)) { + return canonicalTargetPath; + } + return undefined; } @@ -212,6 +268,26 @@ export async function isProjectSkillPatchTarget( return isSubpath(canonicalProjectSkillsDir, canonicalTargetPath); } +/** + * Returns true when `targetPath` resolves to one of the loaded GEMINI.md + * memory files that the skill-extraction agent is allowed to patch. + * Used by the inbox UI to distinguish project-memory patches from skill patches. + */ +export async function isGeminiMdPatchTarget( + targetPath: string, + config: Config, +): Promise { + const canonicalTargetPath = + await resolvePathWithExistingAncestors(targetPath); + if (!canonicalTargetPath) { + return false; + } + + const allowedGeminiMdTargets = + await getCanonicalAllowedGeminiMdPatchTargets(config); + return allowedGeminiMdTargets.includes(canonicalTargetPath); +} + export function hasParsedPatchHunks(parsedPatches: StructuredPatch[]): boolean { return ( parsedPatches.length > 0 && diff --git a/packages/core/src/services/memoryService.test.ts b/packages/core/src/services/memoryService.test.ts index 69d7183ece..ac949459ee 100644 --- a/packages/core/src/services/memoryService.test.ts +++ b/packages/core/src/services/memoryService.test.ts @@ -518,6 +518,10 @@ describe('memoryService', () => { getMessageBus: vi.fn(), getGeminiClient: vi.fn(), getSkillManager: vi.fn().mockReturnValue({ getSkills: () => [] }), + getGeminiMdFilePaths: vi.fn().mockReturnValue([]), + getExtensionLoader: vi + .fn() + .mockReturnValue({ getExtensions: () => [] }), modelConfigService: { registerRuntimeModelConfig: vi.fn(), }, @@ -902,6 +906,8 @@ describe('memoryService', () => { storage: { getProjectSkillsDir: () => projectSkillsDir, }, + getGeminiMdFilePaths: () => [] as string[], + getExtensionLoader: () => ({ getExtensions: () => [] }), } as unknown as Config; }); @@ -1162,6 +1168,117 @@ describe('memoryService', () => { await expect(fs.access(patchPath)).rejects.toThrow(); expect(await fs.readFile(outsideFile, 'utf-8')).not.toContain('line2.5'); }); + + it('keeps patches that target a loaded GEMINI.md file', async () => { + const { validatePatches } = await import('./memoryService.js'); + + await fs.mkdir(skillsDir, { recursive: true }); + const repoDir = path.join(tmpDir, 'repo-with-gemini-md'); + await fs.mkdir(repoDir, { recursive: true }); + const geminiMdFile = path.join(repoDir, 'GEMINI.md'); + await fs.writeFile(geminiMdFile, 'rule one\nrule two\nrule three\n'); + + const allowingConfig = Object.assign({}, validateConfig, { + getGeminiMdFilePaths: () => [geminiMdFile], + }) as unknown as Config; + + const patchPath = path.join(skillsDir, 'gemini-md-update.patch'); + await fs.writeFile( + patchPath, + [ + `--- ${geminiMdFile}`, + `+++ ${geminiMdFile}`, + '@@ -1,3 +1,4 @@', + ' rule one', + ' rule two', + '+rule two-and-a-half', + ' rule three', + '', + ].join('\n'), + ); + + const result = await validatePatches(skillsDir, allowingConfig); + + expect(result).toEqual(['gemini-md-update.patch']); + await expect(fs.access(patchPath)).resolves.toBeUndefined(); + }); + + it('rejects /dev/null patches that try to create a new GEMINI.md', async () => { + const { validatePatches } = await import('./memoryService.js'); + + await fs.mkdir(skillsDir, { recursive: true }); + const newGeminiMdPath = path.join(tmpDir, 'NEW_GEMINI.md'); + // The path is not in the allow-list because it isn't a loaded file. + const allowingConfig = Object.assign({}, validateConfig, { + getGeminiMdFilePaths: () => [] as string[], + }) as unknown as Config; + + const patchPath = path.join(skillsDir, 'create-gemini-md.patch'); + await fs.writeFile( + patchPath, + [ + '--- /dev/null', + `+++ ${newGeminiMdPath}`, + '@@ -0,0 +1 @@', + '+brand new rule', + '', + ].join('\n'), + ); + + const result = await validatePatches(skillsDir, allowingConfig); + + expect(result).toEqual([]); + await expect(fs.access(patchPath)).rejects.toThrow(); + await expect(fs.access(newGeminiMdPath)).rejects.toThrow(); + }); + + it('rejects patches that target an extension-supplied GEMINI.md', async () => { + const { validatePatches } = await import('./memoryService.js'); + + await fs.mkdir(skillsDir, { recursive: true }); + const extensionDir = path.join(tmpDir, 'extension'); + await fs.mkdir(extensionDir, { recursive: true }); + const extensionContextFile = path.join(extensionDir, 'GEMINI.md'); + await fs.writeFile( + extensionContextFile, + 'extension rule one\nextension rule two\n', + ); + + // Note: the file appears in getGeminiMdFilePaths (it's loaded into + // memory at runtime), but it's also reported by the extension loader, + // so the allow-list must subtract it. + const blockingConfig = Object.assign({}, validateConfig, { + getGeminiMdFilePaths: () => [extensionContextFile], + getExtensionLoader: () => ({ + getExtensions: () => [ + { isActive: true, contextFiles: [extensionContextFile] }, + ], + }), + }) as unknown as Config; + + const patchPath = path.join(skillsDir, 'patch-extension.patch'); + await fs.writeFile( + patchPath, + [ + `--- ${extensionContextFile}`, + `+++ ${extensionContextFile}`, + '@@ -1,2 +1,3 @@', + ' extension rule one', + ' extension rule two', + '+sneaky added rule', + '', + ].join('\n'), + ); + + const result = await validatePatches(skillsDir, blockingConfig); + + expect(result).toEqual([]); + await expect(fs.access(patchPath)).rejects.toThrow(); + // Confirm the extension file was NOT modified. + expect(await fs.readFile(extensionContextFile, 'utf-8')).toBe( + 'extension rule one\nextension rule two\n', + ); + }); }); describe('startMemoryService feedback for patch-only runs', () => { @@ -1231,6 +1348,10 @@ describe('memoryService', () => { getMessageBus: vi.fn(), getGeminiClient: vi.fn(), getSkillManager: vi.fn().mockReturnValue({ getSkills: () => [] }), + getGeminiMdFilePaths: vi.fn().mockReturnValue([]), + getExtensionLoader: vi + .fn() + .mockReturnValue({ getExtensions: () => [] }), modelConfigService: { registerRuntimeModelConfig: vi.fn(), }, @@ -1312,6 +1433,10 @@ describe('memoryService', () => { getMessageBus: vi.fn(), getGeminiClient: vi.fn(), getSkillManager: vi.fn().mockReturnValue({ getSkills: () => [] }), + getGeminiMdFilePaths: vi.fn().mockReturnValue([]), + getExtensionLoader: vi + .fn() + .mockReturnValue({ getExtensions: () => [] }), modelConfigService: { registerRuntimeModelConfig: vi.fn(), }, diff --git a/packages/core/src/services/memoryService.ts b/packages/core/src/services/memoryService.ts index 29b2b18701..f2210fa079 100644 --- a/packages/core/src/services/memoryService.ts +++ b/packages/core/src/services/memoryService.ts @@ -31,7 +31,9 @@ import { Storage } from '../config/storage.js'; import type { AgentLoopContext } from '../config/agent-loop-context.js'; import { applyParsedSkillPatches, + getAllowedGeminiMdPatchTargets, hasParsedPatchHunks, + isGeminiMdPatchTarget, } from './memoryPatchUtils.js'; const LOCK_FILENAME = '.extraction.lock'; @@ -468,6 +470,106 @@ async function buildExistingSkillsSummary( return sections.join('\n\n'); } +/** + * Builds a markdown summary of the GEMINI.md (and equivalently named) memory + * files the extraction agent is allowed to patch, grouped by scope so the + * agent can pick the most-specific target. Returns an empty string when no + * eligible files are loaded. + * + * Scopes: + * - **Global**: files under the user's `~/.gemini/` directory. + * - **Workspace root**: files at a workspace root directory. + * - **Workspace subdir**: files inside a workspace root. + * - **Workspace ancestor**: files above a workspace root (typically discovered + * via the upward traversal stopping at a `.git` boundary). + */ +function buildGeminiMdContext(config: Config): string { + const allowedPaths = getAllowedGeminiMdPatchTargets(config); + if (allowedPaths.length === 0) { + return ''; + } + + const userGeminiDir = path.resolve(Storage.getGlobalGeminiDir()); + const workspaceRoots = (() => { + try { + return config + .getWorkspaceContext() + .getDirectories() + .map((d) => path.resolve(d)); + } catch { + return [] as string[]; + } + })(); + + const buckets = { + global: [] as string[], + workspaceRoot: [] as string[], + workspaceSubdir: [] as string[], + workspaceAncestor: [] as string[], + }; + + for (const target of allowedPaths) { + const targetDir = path.dirname(target); + if ( + targetDir === userGeminiDir || + targetDir.startsWith(userGeminiDir + path.sep) + ) { + buckets.global.push(target); + continue; + } + + const matchedRoot = workspaceRoots.find((root) => targetDir === root); + if (matchedRoot) { + buckets.workspaceRoot.push(target); + continue; + } + + const containingRoot = workspaceRoots.find((root) => + targetDir.startsWith(root + path.sep), + ); + if (containingRoot) { + buckets.workspaceSubdir.push(target); + continue; + } + + const ancestorRoot = workspaceRoots.find( + (root) => root === targetDir || root.startsWith(targetDir + path.sep), + ); + if (ancestorRoot) { + buckets.workspaceAncestor.push(target); + continue; + } + + // Path does not fit any known scope (e.g. an extra include-directory + // outside the workspace). Treat as ancestor-equivalent for prompt clarity. + buckets.workspaceAncestor.push(target); + } + + const sections: string[] = []; + if (buckets.global.length > 0) { + sections.push( + `### Global (~/.gemini)\nUser-wide preferences. Patch only when the rule applies to every project.\n${buckets.global.map((p) => `- ${p}`).join('\n')}`, + ); + } + if (buckets.workspaceRoot.length > 0) { + sections.push( + `### Workspace root\nProject-wide conventions. The most common patch target.\n${buckets.workspaceRoot.map((p) => `- ${p}`).join('\n')}`, + ); + } + if (buckets.workspaceSubdir.length > 0) { + sections.push( + `### Workspace subdirectory\nScoped rules — prefer these when the convention only applies inside this directory.\n${buckets.workspaceSubdir.map((p) => `- ${p}`).join('\n')}`, + ); + } + if (buckets.workspaceAncestor.length > 0) { + sections.push( + `### Workspace ancestor\nLikely owned by a parent project. Patch only when the rule clearly belongs to that broader scope.\n${buckets.workspaceAncestor.map((p) => `- ${p}`).join('\n')}`, + ); + } + + return sections.join('\n\n'); +} + /** * Builds an AgentLoopContext from a Config for background agent execution. */ @@ -684,11 +786,21 @@ export async function startMemoryService(config: Config): Promise { ); } + // Build the GEMINI.md targeting context (loaded user/project memory files, + // grouped by scope). Extension-supplied context files are excluded so the + // agent cannot propose patches that would be overwritten on extension + // upgrades. + const geminiMdContext = buildGeminiMdContext(config); + if (geminiMdContext) { + debugLogger.log(`[MemoryService] GEMINI.md context:\n${geminiMdContext}`); + } + // Build agent definition and context const agentDefinition = SkillExtractionAgent( skillsDir, sessionIndex, existingSkillsSummary, + geminiMdContext, ); const context = buildAgentLoopContext(config); @@ -728,6 +840,8 @@ export async function startMemoryService(config: Config): Promise { // Validate any .patch files the agent generated const validPatches = await validatePatches(skillsDir, config); const patchesCreatedThisRun: string[] = []; + const skillPatchesThisRun: string[] = []; + const geminiMdPatchesThisRun: string[] = []; for (const patchFile of validPatches) { const patchPath = path.join(skillsDir, patchFile); let currentContent: string; @@ -736,13 +850,39 @@ export async function startMemoryService(config: Config): Promise { } catch { continue; } - if (patchContentsBefore.get(patchFile) !== currentContent) { - patchesCreatedThisRun.push(patchFile); + if (patchContentsBefore.get(patchFile) === currentContent) { + continue; + } + patchesCreatedThisRun.push(patchFile); + + // Classify by inspecting the parsed targets — a single patch file may + // contain hunks for multiple files, so we check all of them. + let targetsGeminiMd = false; + try { + const parsed = Diff.parsePatch(currentContent); + for (const entry of parsed) { + const targetPath = entry.newFileName ?? entry.oldFileName ?? ''; + if (!targetPath || targetPath === '/dev/null') continue; + if (await isGeminiMdPatchTarget(targetPath, config)) { + targetsGeminiMd = true; + break; + } + } + } catch { + // Parsing already passed validatePatches, so this is unexpected, but + // fall back to treating the patch as a skill patch if classification + // fails. + } + + if (targetsGeminiMd) { + geminiMdPatchesThisRun.push(patchFile); + } else { + skillPatchesThisRun.push(patchFile); } } if (validPatches.length > 0) { debugLogger.log( - `[MemoryService] ${validPatches.length} valid patch(es) currently in inbox; ${patchesCreatedThisRun.length} created or updated this run`, + `[MemoryService] ${validPatches.length} valid patch(es) currently in inbox; ${patchesCreatedThisRun.length} created or updated this run (${skillPatchesThisRun.length} skill, ${geminiMdPatchesThisRun.length} GEMINI.md)`, ); } @@ -764,9 +904,14 @@ export async function startMemoryService(config: Config): Promise { `created ${skillsCreated.length} skill(s): ${skillsCreated.join(', ')}`, ); } - if (patchesCreatedThisRun.length > 0) { + if (skillPatchesThisRun.length > 0) { completionParts.push( - `prepared ${patchesCreatedThisRun.length} patch(es): ${patchesCreatedThisRun.join(', ')}`, + `prepared ${skillPatchesThisRun.length} skill patch(es): ${skillPatchesThisRun.join(', ')}`, + ); + } + if (geminiMdPatchesThisRun.length > 0) { + completionParts.push( + `prepared ${geminiMdPatchesThisRun.length} GEMINI.md patch(es): ${geminiMdPatchesThisRun.join(', ')}`, ); } debugLogger.log( @@ -778,9 +923,14 @@ export async function startMemoryService(config: Config): Promise { `${skillsCreated.length} new skill${skillsCreated.length > 1 ? 's' : ''} extracted from past sessions: ${skillsCreated.join(', ')}`, ); } - if (patchesCreatedThisRun.length > 0) { + if (skillPatchesThisRun.length > 0) { feedbackParts.push( - `${patchesCreatedThisRun.length} skill update${patchesCreatedThisRun.length > 1 ? 's' : ''} extracted from past sessions`, + `${skillPatchesThisRun.length} skill update${skillPatchesThisRun.length > 1 ? 's' : ''} extracted from past sessions`, + ); + } + if (geminiMdPatchesThisRun.length > 0) { + feedbackParts.push( + `${geminiMdPatchesThisRun.length} project memory update${geminiMdPatchesThisRun.length > 1 ? 's' : ''} extracted from past sessions`, ); } coreEvents.emitFeedback(