feat(core): let skill extractor propose GEMINI.md patches

Extends the background skill-extraction agent (`confucius`) so it can
propose patches against loaded GEMINI.md memory files in addition to
SKILL.md updates. Patches are gated through the existing inbox flow and
remain user-reviewed before they touch any file on disk.

- memoryPatchUtils: add `getAllowedGeminiMdPatchTargets` and
  `isGeminiMdPatchTarget`. The allow-list is the union of global +
  project memory files reported by `config.getGeminiMdFilePaths()`
  minus extension-supplied context files (which would be overwritten
  on extension upgrades). `resolveAllowedSkillPatchTarget` now accepts
  either a subpath under a skills root OR an exact match against the
  GEMINI.md allow-list. `/dev/null` patches against GEMINI.md are
  rejected naturally because new files are never in the allow-list.
- memoryService: build a scope-tagged GEMINI.md targeting context
  (Global / Workspace root / Workspace subdir / Workspace ancestor)
  and pass it to the agent. Classify each new patch as skill vs
  GEMINI.md and surface "project memory update" feedback separately.
- SkillExtractionAgent: new `geminiMdContext` parameter, new
  "UPDATING GEMINI.md (PATCHES)" prompt section with strict gating,
  and a refreshed combined cap of 0-2 artifacts per run total.
- SkillInboxDialog: split patch list into "Skill Updates" and
  "Project Memory Updates" sections.

Tests: 3 new validatePatches cases (positive, /dev/null rejection,
extension-file rejection) and 5 new prompt-shape unit tests, plus a
new component-level eval suite (evals/gemini_md_extraction.eval.ts)
covering the positive case, the duplication gate, and the recurrence
gate.
This commit is contained in:
Sandy Tao 2026-04-20 15:16:05 -07:00
parent 4b2091d402
commit 6b13f6453c
9 changed files with 974 additions and 27 deletions

View file

@ -0,0 +1,398 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fsp from 'node:fs/promises';
import path from 'node:path';
import { describe, expect } from 'vitest';
import {
type Config,
ApprovalMode,
SESSION_FILE_PREFIX,
getProjectHash,
listInboxPatches,
normalizePath,
refreshServerHierarchicalMemory,
startMemoryService,
} from '@google/gemini-cli-core';
import { componentEvalTest } from './component-test-helper.js';
interface SeedSession {
sessionId: string;
summary: string;
userTurns: string[];
timestampOffsetMinutes: number;
}
interface MessageRecord {
id: string;
timestamp: string;
type: string;
content: Array<{ text: string }>;
}
const PACKAGE_JSON = JSON.stringify(
{
name: 'gemini-md-extraction-eval',
private: true,
scripts: {
build: 'echo build',
lint: 'echo lint',
test: 'echo test',
preflight: 'echo preflight',
},
},
null,
2,
);
/**
* A baseline GEMINI.md that establishes the project as a real one with some
* existing rules. Tests that expect a patch verify the agent ADDS a new rule
* not already present here. Tests that expect NO patch include the candidate
* rule already, exercising the "do not duplicate" gate.
*/
const BASELINE_GEMINI_MD = `# Project Conventions
## Build
- Run \`npm run build\` to compile the project.
## Linting
- Run \`npm run lint\` to lint the project.
`;
const GEMINI_MD_WITH_PREFLIGHT_RULE = `${BASELINE_GEMINI_MD}
## Preflight
- Always run \`npm run preflight\` before opening a PR.
`;
function workspaceFiles(geminiMdContent: string): Record<string, string> {
return {
'package.json': PACKAGE_JSON,
'README.md': '# GEMINI.md Extraction Eval\n',
'GEMINI.md': geminiMdContent,
};
}
function buildMessages(userTurns: string[]): MessageRecord[] {
const baseTime = new Date(Date.now() - 6 * 60 * 60 * 1000).toISOString();
return userTurns.flatMap((text, index) => [
{
id: `u${index + 1}`,
timestamp: baseTime,
type: 'user',
content: [{ text }],
},
{
id: `a${index + 1}`,
timestamp: baseTime,
type: 'gemini',
content: [{ text: `Acknowledged: ${index + 1}` }],
},
]);
}
async function seedSessions(
config: Config,
sessions: SeedSession[],
): Promise<void> {
const chatsDir = path.join(config.storage.getProjectTempDir(), 'chats');
await fsp.mkdir(chatsDir, { recursive: true });
const projectRoot = config.storage.getProjectRoot();
for (const session of sessions) {
const timestamp = new Date(
Date.now() - session.timestampOffsetMinutes * 60 * 1000,
)
.toISOString()
.slice(0, 16)
.replace(/:/g, '-');
const filename = `${SESSION_FILE_PREFIX}${timestamp}-${session.sessionId.slice(0, 8)}.json`;
const conversation = {
sessionId: session.sessionId,
projectHash: getProjectHash(projectRoot),
summary: session.summary,
startTime: new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString(),
lastUpdated: new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString(),
messages: buildMessages(session.userTurns),
};
await fsp.writeFile(
path.join(chatsDir, filename),
JSON.stringify(conversation, null, 2),
);
}
}
/**
* Refreshes hierarchical memory so the workspace `GEMINI.md` is loaded into
* `config.getGeminiMdFilePaths()`. Without this, the extraction agent has no
* targeting context and the GEMINI.md patching capability is inert.
*/
async function loadGeminiMdContext(config: Config): Promise<void> {
await refreshServerHierarchicalMemory(config);
}
/**
* Returns the comparison key for the workspace GEMINI.md, in the form the
* agent emits in patch diff headers. We use `normalizePath` (which lowercases
* on darwin and Windows and uses forward slashes) so the comparison is robust
* across platforms and against macOS's `/var/folders` -> `/private/var/folders`
* symlink resolution. Patch entries are compared via `normalizePath` too.
*/
async function workspaceGeminiMdPath(config: Config): Promise<string> {
const projectRoot = await fsp.realpath(config.storage.getProjectRoot());
return normalizePath(path.join(projectRoot, 'GEMINI.md'));
}
const EXTRACTION_CONFIG_OVERRIDES = {
experimentalAutoMemory: true,
approvalMode: ApprovalMode.YOLO,
};
describe('GEMINI.md Extraction', () => {
componentEvalTest('USUALLY_PASSES', {
suiteName: 'gemini-md-extraction',
suiteType: 'component-level',
name: 'patches workspace GEMINI.md when a project rule recurs across sessions',
files: workspaceFiles(BASELINE_GEMINI_MD),
timeout: 180000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
await loadGeminiMdContext(config);
await seedSessions(config, [
{
sessionId: 'preflight-correction-1',
summary: 'User corrects agent on preflight before pushing',
timestampOffsetMinutes: 420,
userTurns: [
'Stop. Before opening any PR in this project you must run npm run preflight first.',
'I keep telling you this — npm run preflight is mandatory before pushing any branch.',
'Yes, npm run preflight again. It runs clean, install, build, lint, typecheck, and tests.',
'Please remember: npm run preflight before every PR in this repo, no exceptions.',
'The project has its own preflight command for a reason; do not skip it.',
'Confirmed: npm run preflight finished and the branch is ready to push.',
'This is a standing project rule, not a one-off — apply it every time.',
'After preflight passes, then we open the PR.',
'I should not have to repeat npm run preflight every session.',
'Acknowledged that npm run preflight is the required pre-PR step.',
],
},
{
sessionId: 'preflight-correction-2',
summary: 'User corrects agent again about preflight before PR',
timestampOffsetMinutes: 360,
userTurns: [
'You forgot the preflight again — run npm run preflight before this PR too.',
'I have said this multiple times: npm run preflight is mandatory in this project.',
'It is the standard pre-PR check that catches lint, type, and test regressions.',
'Please make this part of your default workflow for this repo.',
'Yes, run npm run preflight now and then open the PR after it passes.',
'This is a recurring project workflow, not specific to one branch.',
'The preflight script exists exactly so we never push broken code.',
'Confirmed: npm run preflight passed and the PR is ready.',
'Future sessions in this project should also run npm run preflight before any PR.',
'Treat npm run preflight as a hard requirement before every PR in this repo.',
],
},
]);
},
assert: async (config) => {
await startMemoryService(config);
const inboxPatches = await listInboxPatches(config);
// Expect at least one patch in the inbox.
expect(inboxPatches.length).toBeGreaterThanOrEqual(1);
// At least one patch should target the workspace GEMINI.md and add a
// preflight rule. We check the diff content directly because the
// filename is agent-chosen.
const workspaceGeminiMd = await workspaceGeminiMdPath(config);
const matchingPatch = inboxPatches.find((patch) =>
patch.entries.some(
(entry) =>
normalizePath(entry.targetPath) === workspaceGeminiMd &&
/preflight/i.test(entry.diffContent),
),
);
expect(
matchingPatch,
`Expected a patch targeting ${workspaceGeminiMd} with a preflight rule. ` +
`Got patches: ${JSON.stringify(
inboxPatches.map((p) => ({
file: p.fileName,
targets: p.entries.map((e) => e.targetPath),
})),
null,
2,
)}`,
).toBeDefined();
},
});
componentEvalTest('USUALLY_PASSES', {
suiteName: 'gemini-md-extraction',
suiteType: 'component-level',
name: 'does not patch GEMINI.md when the rule is already present',
files: workspaceFiles(GEMINI_MD_WITH_PREFLIGHT_RULE),
timeout: 180000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
await loadGeminiMdContext(config);
// Same recurring preflight signal as the positive case — but the
// workspace GEMINI.md ALREADY documents the rule, so the agent must not
// propose a duplicate patch.
await seedSessions(config, [
{
sessionId: 'preflight-already-documented-1',
summary: 'User reminds agent about preflight',
timestampOffsetMinutes: 420,
userTurns: [
'Run npm run preflight before opening this PR.',
'You should always run npm run preflight before pushing in this project.',
'Preflight runs clean, install, build, lint, typecheck, and tests.',
'Confirmed: npm run preflight finished, ready to push.',
'This is the standard pre-PR check we use here.',
'Please apply this every time in this repo.',
'Yes, npm run preflight again.',
'It is the documented project workflow.',
'After it passes we open the PR.',
'Acknowledged: npm run preflight is the pre-PR step.',
],
},
{
sessionId: 'preflight-already-documented-2',
summary: 'User reminds agent about preflight again',
timestampOffsetMinutes: 360,
userTurns: [
'Same drill: run npm run preflight before opening this PR too.',
'It catches lint, type, and test regressions before review.',
'It is the project standard for every PR in this repo.',
'Confirmed: npm run preflight passed, PR is ready.',
'Use it every time in this project.',
'No exceptions.',
'Run npm run preflight now.',
'Then open the PR.',
'Done.',
'Standard project workflow.',
],
},
]);
},
assert: async (config) => {
await startMemoryService(config);
const inboxPatches = await listInboxPatches(config);
const workspaceGeminiMd = await workspaceGeminiMdPath(config);
// The agent may legitimately produce no patches at all, OR it may
// produce a skill patch — but it must NOT produce a GEMINI.md patch
// that just restates the preflight rule that's already there.
const duplicatePatch = inboxPatches.find((patch) =>
patch.entries.some(
(entry) =>
normalizePath(entry.targetPath) === workspaceGeminiMd &&
/preflight/i.test(entry.diffContent),
),
);
expect(
duplicatePatch,
`Expected NO GEMINI.md preflight patch (rule already present). ` +
`Got: ${JSON.stringify(
inboxPatches.map((p) => ({
file: p.fileName,
targets: p.entries.map((e) => e.targetPath),
})),
null,
2,
)}`,
).toBeUndefined();
},
});
componentEvalTest('USUALLY_PASSES', {
suiteName: 'gemini-md-extraction',
suiteType: 'component-level',
name: 'does not propose GEMINI.md patches from a single one-off correction',
files: workspaceFiles(BASELINE_GEMINI_MD),
timeout: 180000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
await loadGeminiMdContext(config);
// Two sessions, but they describe a one-off branch-specific incident
// tied to a specific ticket and date — not a durable project rule. The
// agent should NOT promote this to a GEMINI.md patch.
await seedSessions(config, [
{
sessionId: 'one-off-incident-1',
summary: 'Debug staging cookie issue for INC-7733',
timestampOffsetMinutes: 420,
userTurns: [
'For incident INC-7733 only, clear the staging cookie before retrying the login.',
'This workaround is just for the 2026-04-12 staging rollout.',
'Do not generalize this — it is incident-specific to INC-7733.',
'The branch hotfix/inc-7733 needs this exact one-off cookie clear.',
'After the cookie clear, the login worked again for this incident.',
'This is not a recurring project workflow.',
'Do not turn this into a documented rule.',
'It only applies to this one staging rollout.',
'Confirmed: INC-7733 is resolved with the one-off cookie clear.',
'Close out INC-7733; we do not need to remember this.',
],
},
{
sessionId: 'one-off-incident-2',
summary: 'Follow-up cleanup for INC-7733',
timestampOffsetMinutes: 360,
userTurns: [
'INC-7733 follow-up: just remove the temporary cookie-clear branch.',
'This is incident cleanup, not a workflow.',
'No documentation changes needed for INC-7733.',
'It was a one-time staging issue tied to that specific rollout.',
'Confirmed: hotfix branch deleted.',
'Do not record this as a project convention.',
'It is not a reusable procedure.',
'INC-7733 is fully closed.',
'No durable rule to add to GEMINI.md.',
'This is purely incident response.',
],
},
]);
},
assert: async (config) => {
await startMemoryService(config);
const inboxPatches = await listInboxPatches(config);
const workspaceGeminiMd = await workspaceGeminiMdPath(config);
const incidentPatch = inboxPatches.find((patch) =>
patch.entries.some(
(entry) =>
normalizePath(entry.targetPath) === workspaceGeminiMd &&
/(INC-7733|cookie|staging)/i.test(entry.diffContent),
),
);
expect(
incidentPatch,
`Expected NO GEMINI.md patch for one-off incident INC-7733. ` +
`Got: ${JSON.stringify(
inboxPatches.map((p) => ({
file: p.fileName,
targets: p.entries.map((e) => e.targetPath),
})),
null,
2,
)}`,
).toBeUndefined();
},
});
});

View file

@ -14,6 +14,7 @@ import {
moveInboxSkill,
applyInboxPatch,
dismissInboxPatch,
isGeminiMdPatchTarget,
isProjectSkillPatchTarget,
} from '@google/gemini-cli-core';
import { waitFor } from '../../test-utils/async.js';
@ -32,6 +33,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => {
moveInboxSkill: vi.fn(),
applyInboxPatch: vi.fn(),
dismissInboxPatch: vi.fn(),
isGeminiMdPatchTarget: vi.fn(),
isProjectSkillPatchTarget: vi.fn(),
getErrorMessage: vi.fn((error: unknown) =>
error instanceof Error ? error.message : String(error),
@ -45,6 +47,7 @@ const mockMoveInboxSkill = vi.mocked(moveInboxSkill);
const mockDismissInboxSkill = vi.mocked(dismissInboxSkill);
const mockApplyInboxPatch = vi.mocked(applyInboxPatch);
const mockDismissInboxPatch = vi.mocked(dismissInboxPatch);
const mockIsGeminiMdPatchTarget = vi.mocked(isGeminiMdPatchTarget);
const mockIsProjectSkillPatchTarget = vi.mocked(isProjectSkillPatchTarget);
const inboxSkill: InboxSkill = {
@ -170,6 +173,7 @@ describe('SkillInboxDialog', () => {
: false;
},
);
mockIsGeminiMdPatchTarget.mockResolvedValue(false);
});
afterEach(() => {

View file

@ -28,6 +28,7 @@ import {
dismissInboxSkill,
applyInboxPatch,
dismissInboxPatch,
isGeminiMdPatchTarget,
isProjectSkillPatchTarget,
} from '@google/gemini-cli-core';
@ -35,7 +36,12 @@ type Phase = 'list' | 'skill-preview' | 'skill-action' | 'patch-preview';
type InboxItem =
| { type: 'skill'; skill: InboxSkill }
| { type: 'patch'; patch: InboxPatch; targetsProjectSkills: boolean }
| {
type: 'patch';
patch: InboxPatch;
targetsProjectSkills: boolean;
targetsGeminiMd: boolean;
}
| { type: 'header'; label: string };
interface DestinationChoice {
@ -117,6 +123,18 @@ async function patchTargetsProjectSkills(
return entryTargetsProjectSkills.some(Boolean);
}
async function patchTargetsGeminiMd(
patch: InboxPatch,
config: Config,
): Promise<boolean> {
const entryTargetsGeminiMd = await Promise.all(
patch.entries.map((entry) =>
isGeminiMdPatchTarget(entry.targetPath, config),
),
);
return entryTargetsGeminiMd.some(Boolean);
}
/**
* Derives a bracketed origin tag from a skill file path,
* matching the existing [Built-in] convention in SkillsList.
@ -208,20 +226,16 @@ export const SkillInboxDialog: React.FC<SkillInboxDialogProps> = ({
]);
const patchItems = await Promise.all(
patches.map(async (patch): Promise<InboxItem> => {
let targetsProjectSkills = false;
try {
targetsProjectSkills = await patchTargetsProjectSkills(
patch,
config,
);
} catch {
targetsProjectSkills = false;
}
const [targetsProjectSkills, targetsGeminiMd] = await Promise.all([
patchTargetsProjectSkills(patch, config).catch(() => false),
patchTargetsGeminiMd(patch, config).catch(() => false),
]);
return {
type: 'patch',
patch,
targetsProjectSkills,
targetsGeminiMd,
};
}),
);
@ -257,13 +271,22 @@ export const SkillInboxDialog: React.FC<SkillInboxDialogProps> = ({
const listItems: Array<SelectionListItem<InboxItem>> = useMemo(() => {
const skills = items.filter((i) => i.type === 'skill');
const patches = items.filter((i) => i.type === 'patch');
const skillPatches = items.filter(
(i) => i.type === 'patch' && !i.targetsGeminiMd,
);
const geminiMdPatches = items.filter(
(i) => i.type === 'patch' && i.targetsGeminiMd,
);
const result: Array<SelectionListItem<InboxItem>> = [];
// Only show section headers when both types are present
const showHeaders = skills.length > 0 && patches.length > 0;
// Show section headers whenever more than one category is present so
// GEMINI.md patches stay visually distinct from skill artifacts.
const categoryCount = [skills, skillPatches, geminiMdPatches].filter(
(group) => group.length > 0,
).length;
const showHeaders = categoryCount > 1;
if (showHeaders) {
if (showHeaders && skills.length > 0) {
const header: InboxItem = { type: 'header', label: 'New Skills' };
result.push({
key: 'header:new-skills',
@ -276,7 +299,7 @@ export const SkillInboxDialog: React.FC<SkillInboxDialogProps> = ({
result.push({ key: getItemKey(item), value: item });
}
if (showHeaders) {
if (showHeaders && skillPatches.length > 0) {
const header: InboxItem = { type: 'header', label: 'Skill Updates' };
result.push({
key: 'header:skill-updates',
@ -285,7 +308,23 @@ export const SkillInboxDialog: React.FC<SkillInboxDialogProps> = ({
hideNumber: true,
});
}
for (const item of patches) {
for (const item of skillPatches) {
result.push({ key: getItemKey(item), value: item });
}
if (showHeaders && geminiMdPatches.length > 0) {
const header: InboxItem = {
type: 'header',
label: 'Project Memory Updates',
};
result.push({
key: 'header:project-memory-updates',
value: header,
disabled: true,
hideNumber: true,
});
}
for (const item of geminiMdPatches) {
result.push({ key: getItemKey(item), value: item });
}

View file

@ -87,4 +87,52 @@ describe('SkillExtractionAgent', () => {
'If recurrence or future reuse is unclear, create no skill and explain why.',
);
});
it('should always include the GEMINI.md patching section with strict gating', () => {
const prompt = agent.promptConfig.systemPrompt;
expect(prompt).toContain('UPDATING GEMINI.md (PATCHES)');
expect(prompt).toContain('NEVER create a new GEMINI.md file');
expect(prompt).toContain(
'NEVER duplicate, restate, elaborate on, strengthen, rephrase, or expand',
);
expect(prompt).toContain(
'NEVER patch a file that is not in the supplied targeting context',
);
expect(prompt).toContain('pick the most-specific scope that fits the rule');
});
it('should describe a combined volume cap across skills and patches', () => {
const prompt = agent.promptConfig.systemPrompt;
expect(prompt).toContain(
'Aim for 0-2 artifacts per run TOTAL — counting new skills, skill patches,',
);
expect(prompt).toContain(
'Prefer fewer, higher-quality artifacts. 0-2 TOTAL per run',
);
});
it('should embed the GEMINI.md targeting context in the query when provided', () => {
const geminiMdContext =
'### Workspace root\n- /repo/GEMINI.md\n\n### Workspace subdirectory\n- /repo/packages/cli/GEMINI.md';
const agentWithContext = SkillExtractionAgent(
skillsDir,
sessionIndex,
existingSkillsSummary,
geminiMdContext,
);
const query = agentWithContext.promptConfig.query ?? '';
expect(query).toContain('# GEMINI.md Targeting Context');
expect(query).toContain('/repo/GEMINI.md');
expect(query).toContain('/repo/packages/cli/GEMINI.md');
expect(query).toContain('Pick the most-specific scope that');
});
it('should omit the GEMINI.md targeting header when no context is provided', () => {
const query = agent.promptConfig.query ?? '';
expect(query).not.toContain('# GEMINI.md Targeting Context');
});
});

View file

@ -119,7 +119,9 @@ function buildSystemPrompt(skillsDir: string): string {
'- A code review or investigation with no durable takeaway',
'- Output-style preferences that do not materially change procedure',
'',
'Aim for 0-2 skills per run. Quality over quantity.',
'Aim for 0-2 artifacts per run TOTAL — counting new skills, skill patches,',
'and GEMINI.md patches together. Quality over quantity. 0 is the most',
'common outcome.',
'',
'============================================================',
'HOW TO READ SESSION TRANSCRIPTS',
@ -191,6 +193,90 @@ function buildSystemPrompt(skillsDir: string): string {
'The same quality bar applies: only propose updates backed by evidence from sessions.',
'',
'============================================================',
'UPDATING GEMINI.md (PATCHES)',
'============================================================',
'',
'GEMINI.md (and equivalently named) files are project-memory files loaded',
'into every agent session in this workspace. They steer FUTURE agents on',
'project conventions, tech stack, build/test commands, and recurring',
'workflows. You can propose surgical edits to them using the same .patch',
'mechanism as for skill files.',
'',
'The list of files you are allowed to patch is provided below as',
'"GEMINI.md Targeting Context". If no such section appears, no GEMINI.md',
'files are loaded and you MUST NOT propose any GEMINI.md patches.',
'',
'Strict gating — propose a GEMINI.md patch ONLY when ALL of these hold:',
'',
'1. **The user repeatedly corrected or restated the rule** across sessions',
' (or once but with strong durability signal — e.g. it is a stable',
' project convention). One-off corrections do NOT qualify.',
'2. **The rule is project- or user-specific**, not generic engineering',
' advice an agent already knows.',
'3. **The rule is concrete and actionable** ("run `npm run preflight`',
' before opening a PR", "prefer Vitest over Jest", "lint with',
' `npm run lint -- --fix`"). Vague taste statements do NOT qualify.',
'4. **The rule is NOT already present in the target file.** You MUST',
' read_file the target before proposing a patch and confirm the rule',
' is genuinely missing or materially weaker than what you would add.',
'5. **There is a clear future trigger** — the rule will keep mattering',
' for agents working in this scope.',
'',
'Targeting — pick the most-specific scope that fits the rule:',
'',
'- **Workspace subdirectory**: rule only applies inside that directory',
' (e.g. UI conventions for `packages/cli`).',
'- **Workspace root**: rule applies to the whole project.',
'- **Global (~/.gemini/GEMINI.md)**: user-wide preference that should',
' apply across every project the user works on.',
'- **Workspace ancestor**: avoid unless the rule clearly belongs to that',
' broader parent project — patching an ancestor from a sub-project is',
' almost always wrong.',
'',
'Hard prohibitions:',
'',
'- NEVER create a new GEMINI.md file. /dev/null patches against any',
' GEMINI.md target will be rejected.',
'- NEVER patch a file that is not in the supplied targeting context.',
' Extension-supplied context files are intentionally excluded — they are',
' shipped artifacts and would be overwritten on extension upgrades.',
'- NEVER duplicate, restate, elaborate on, strengthen, rephrase, or expand',
' a rule that already exists in the target. ONLY propose patches that add',
' a genuinely NEW rule the file does not yet cover. If the rule is',
' present in any form — even as a one-line bullet, even without emphasis,',
' even without an explanation — DO NOT patch.',
'- NEVER include secrets, tokens, or environment-specific identifiers.',
'- NEVER restate generic engineering advice or transient incident details.',
'',
'Examples of what counts as duplication (DO NOT patch):',
'',
'- Target already says: "- Run `npm run preflight` before opening a PR."',
' You wanted to add: "**MANDATORY**: Always run `npm run preflight` before',
' pushing or opening a Pull Request. This runs clean, install, build,',
' lint, typecheck, and tests."',
' -> DUPLICATION. The rule is already present; you are just adding',
' emphasis and elaboration. DO NOT patch.',
'- Target already says: "- Use Vitest for tests."',
' You wanted to add: "- Prefer Vitest over Jest for all testing."',
' -> DUPLICATION. Rephrasing the same rule. DO NOT patch.',
'',
'Examples of what counts as a NEW rule (patch is allowed if other gates pass):',
'',
'- Target has no mention of preflight at all, sessions show users',
' repeatedly correcting the agent to run `npm run preflight` before PRs.',
' -> NEW rule. Patch is allowed.',
'- Target documents the build command but not the lint command, sessions',
' show users repeatedly asking for `npm run lint -- --fix` to be run.',
' -> NEW rule (lint command not yet documented). Patch is allowed.',
'',
'Format: identical to skill patches — write a .patch file in your skills',
'directory using strict unified diff with absolute paths in BOTH the',
'--- and +++ headers, 3 lines of context per hunk, accurate @@ counts.',
'A single .patch file may contain hunks for multiple GEMINI.md files in',
'the same scope, but do not mix GEMINI.md hunks and skill hunks in one',
'file — keep them separate so the user can review each category cleanly.',
'',
'============================================================',
'QUALITY RULES (STRICT)',
'============================================================',
'',
@ -202,7 +288,8 @@ function buildSystemPrompt(skillsDir: string): string {
' bug/ticket, do NOT create it.',
'- Do not create skills for generic advice, output-style preferences, or ephemeral',
' choices that any competent agent would already know or adapt to on the fly.',
'- Prefer fewer, higher-quality skills. 0-2 skills per run is typical. 3+ is unusual.',
'- Prefer fewer, higher-quality artifacts. 0-2 TOTAL per run (counting new',
' skills, skill patches, and GEMINI.md patches) is typical. 3+ is unusual.',
'',
'============================================================',
'WORKFLOW',
@ -253,6 +340,7 @@ export const SkillExtractionAgent = (
skillsDir: string,
sessionIndex: string,
existingSkillsSummary: string,
geminiMdContext: string = '',
): LocalAgentDefinition<typeof SkillExtractionSchema> => ({
kind: 'local',
name: 'confucius',
@ -298,6 +386,21 @@ export const SkillExtractionAgent = (
contextParts.push(`# Existing Skills\n\n${existingSkillsSummary}`);
}
if (geminiMdContext) {
contextParts.push(
[
'# GEMINI.md Targeting Context',
'',
'These are the GEMINI.md (or equivalently named) project-memory files',
'currently loaded in this workspace. They are the ONLY files you may',
'propose GEMINI.md patches against. Pick the most-specific scope that',
'fits the rule. Read the file first before proposing any patch.',
'',
geminiMdContext,
].join('\n'),
);
}
contextParts.push(
[
'# Session Index',

View file

@ -144,7 +144,11 @@ export {
startMemoryService,
validatePatches,
} from './services/memoryService.js';
export { isProjectSkillPatchTarget } from './services/memoryPatchUtils.js';
export {
getAllowedGeminiMdPatchTargets,
isGeminiMdPatchTarget,
isProjectSkillPatchTarget,
} from './services/memoryPatchUtils.js';
export * from './context/memoryContextManager.js';
export * from './services/trackerService.js';
export * from './services/trackerTypes.js';

View file

@ -12,7 +12,8 @@ import type { Config } from '../config/config.js';
import { Storage } from '../config/storage.js';
import { isNodeError } from '../utils/errors.js';
import { debugLogger } from '../utils/debugLogger.js';
import { isSubpath } from '../utils/paths.js';
import { isSubpath, normalizePath } from '../utils/paths.js';
import { getExtensionMemoryPaths } from '../utils/memoryDiscovery.js';
export function getAllowedSkillPatchRoots(config: Config): string[] {
return Array.from(
@ -68,6 +69,55 @@ async function getCanonicalAllowedSkillPatchRoots(
);
}
/**
* Returns the absolute paths of the GEMINI.md (and equivalently named) memory
* files that are eligible to be patched by the skill-extraction agent.
*
* The set is the user/project memory files reported by the loaded
* configuration, minus any files that originate from installed extensions.
* Extension-supplied context files belong to the extension package and would
* be overwritten on extension upgrades, so they are intentionally excluded.
*/
export function getAllowedGeminiMdPatchTargets(config: Config): string[] {
const allPaths = config.getGeminiMdFilePaths?.() ?? [];
if (allPaths.length === 0) {
return [];
}
// `getExtensionMemoryPaths` returns paths normalized via `normalizePath`,
// which lowercases on darwin and Windows. Compare on the normalized form so
// case differences between the two sources do not let an extension path
// slip through the filter, but return the absolute (non-lowercased) path so
// downstream `realpath`-based resolution still works on case-sensitive
// filesystems.
const extensionPaths = new Set(
getExtensionMemoryPaths(config.getExtensionLoader()),
);
return Array.from(
new Set(
allPaths
.filter((p) => !extensionPaths.has(normalizePath(p)))
.map((p) => path.resolve(p)),
),
);
}
async function getCanonicalAllowedGeminiMdPatchTargets(
config: Config,
): Promise<string[]> {
const canonicalTargets = await Promise.all(
getAllowedGeminiMdPatchTargets(config).map((target) =>
resolvePathWithExistingAncestors(target),
),
);
return Array.from(
new Set(
canonicalTargets.filter(
(target): target is string => typeof target === 'string',
),
),
);
}
export async function resolveAllowedSkillPatchTarget(
targetPath: string,
config: Config,
@ -83,6 +133,12 @@ export async function resolveAllowedSkillPatchTarget(
return canonicalTargetPath;
}
const allowedGeminiMdTargets =
await getCanonicalAllowedGeminiMdPatchTargets(config);
if (allowedGeminiMdTargets.includes(canonicalTargetPath)) {
return canonicalTargetPath;
}
return undefined;
}
@ -212,6 +268,26 @@ export async function isProjectSkillPatchTarget(
return isSubpath(canonicalProjectSkillsDir, canonicalTargetPath);
}
/**
* Returns true when `targetPath` resolves to one of the loaded GEMINI.md
* memory files that the skill-extraction agent is allowed to patch.
* Used by the inbox UI to distinguish project-memory patches from skill patches.
*/
export async function isGeminiMdPatchTarget(
targetPath: string,
config: Config,
): Promise<boolean> {
const canonicalTargetPath =
await resolvePathWithExistingAncestors(targetPath);
if (!canonicalTargetPath) {
return false;
}
const allowedGeminiMdTargets =
await getCanonicalAllowedGeminiMdPatchTargets(config);
return allowedGeminiMdTargets.includes(canonicalTargetPath);
}
export function hasParsedPatchHunks(parsedPatches: StructuredPatch[]): boolean {
return (
parsedPatches.length > 0 &&

View file

@ -518,6 +518,10 @@ describe('memoryService', () => {
getMessageBus: vi.fn(),
getGeminiClient: vi.fn(),
getSkillManager: vi.fn().mockReturnValue({ getSkills: () => [] }),
getGeminiMdFilePaths: vi.fn().mockReturnValue([]),
getExtensionLoader: vi
.fn()
.mockReturnValue({ getExtensions: () => [] }),
modelConfigService: {
registerRuntimeModelConfig: vi.fn(),
},
@ -902,6 +906,8 @@ describe('memoryService', () => {
storage: {
getProjectSkillsDir: () => projectSkillsDir,
},
getGeminiMdFilePaths: () => [] as string[],
getExtensionLoader: () => ({ getExtensions: () => [] }),
} as unknown as Config;
});
@ -1162,6 +1168,117 @@ describe('memoryService', () => {
await expect(fs.access(patchPath)).rejects.toThrow();
expect(await fs.readFile(outsideFile, 'utf-8')).not.toContain('line2.5');
});
it('keeps patches that target a loaded GEMINI.md file', async () => {
const { validatePatches } = await import('./memoryService.js');
await fs.mkdir(skillsDir, { recursive: true });
const repoDir = path.join(tmpDir, 'repo-with-gemini-md');
await fs.mkdir(repoDir, { recursive: true });
const geminiMdFile = path.join(repoDir, 'GEMINI.md');
await fs.writeFile(geminiMdFile, 'rule one\nrule two\nrule three\n');
const allowingConfig = Object.assign({}, validateConfig, {
getGeminiMdFilePaths: () => [geminiMdFile],
}) as unknown as Config;
const patchPath = path.join(skillsDir, 'gemini-md-update.patch');
await fs.writeFile(
patchPath,
[
`--- ${geminiMdFile}`,
`+++ ${geminiMdFile}`,
'@@ -1,3 +1,4 @@',
' rule one',
' rule two',
'+rule two-and-a-half',
' rule three',
'',
].join('\n'),
);
const result = await validatePatches(skillsDir, allowingConfig);
expect(result).toEqual(['gemini-md-update.patch']);
await expect(fs.access(patchPath)).resolves.toBeUndefined();
});
it('rejects /dev/null patches that try to create a new GEMINI.md', async () => {
const { validatePatches } = await import('./memoryService.js');
await fs.mkdir(skillsDir, { recursive: true });
const newGeminiMdPath = path.join(tmpDir, 'NEW_GEMINI.md');
// The path is not in the allow-list because it isn't a loaded file.
const allowingConfig = Object.assign({}, validateConfig, {
getGeminiMdFilePaths: () => [] as string[],
}) as unknown as Config;
const patchPath = path.join(skillsDir, 'create-gemini-md.patch');
await fs.writeFile(
patchPath,
[
'--- /dev/null',
`+++ ${newGeminiMdPath}`,
'@@ -0,0 +1 @@',
'+brand new rule',
'',
].join('\n'),
);
const result = await validatePatches(skillsDir, allowingConfig);
expect(result).toEqual([]);
await expect(fs.access(patchPath)).rejects.toThrow();
await expect(fs.access(newGeminiMdPath)).rejects.toThrow();
});
it('rejects patches that target an extension-supplied GEMINI.md', async () => {
const { validatePatches } = await import('./memoryService.js');
await fs.mkdir(skillsDir, { recursive: true });
const extensionDir = path.join(tmpDir, 'extension');
await fs.mkdir(extensionDir, { recursive: true });
const extensionContextFile = path.join(extensionDir, 'GEMINI.md');
await fs.writeFile(
extensionContextFile,
'extension rule one\nextension rule two\n',
);
// Note: the file appears in getGeminiMdFilePaths (it's loaded into
// memory at runtime), but it's also reported by the extension loader,
// so the allow-list must subtract it.
const blockingConfig = Object.assign({}, validateConfig, {
getGeminiMdFilePaths: () => [extensionContextFile],
getExtensionLoader: () => ({
getExtensions: () => [
{ isActive: true, contextFiles: [extensionContextFile] },
],
}),
}) as unknown as Config;
const patchPath = path.join(skillsDir, 'patch-extension.patch');
await fs.writeFile(
patchPath,
[
`--- ${extensionContextFile}`,
`+++ ${extensionContextFile}`,
'@@ -1,2 +1,3 @@',
' extension rule one',
' extension rule two',
'+sneaky added rule',
'',
].join('\n'),
);
const result = await validatePatches(skillsDir, blockingConfig);
expect(result).toEqual([]);
await expect(fs.access(patchPath)).rejects.toThrow();
// Confirm the extension file was NOT modified.
expect(await fs.readFile(extensionContextFile, 'utf-8')).toBe(
'extension rule one\nextension rule two\n',
);
});
});
describe('startMemoryService feedback for patch-only runs', () => {
@ -1231,6 +1348,10 @@ describe('memoryService', () => {
getMessageBus: vi.fn(),
getGeminiClient: vi.fn(),
getSkillManager: vi.fn().mockReturnValue({ getSkills: () => [] }),
getGeminiMdFilePaths: vi.fn().mockReturnValue([]),
getExtensionLoader: vi
.fn()
.mockReturnValue({ getExtensions: () => [] }),
modelConfigService: {
registerRuntimeModelConfig: vi.fn(),
},
@ -1312,6 +1433,10 @@ describe('memoryService', () => {
getMessageBus: vi.fn(),
getGeminiClient: vi.fn(),
getSkillManager: vi.fn().mockReturnValue({ getSkills: () => [] }),
getGeminiMdFilePaths: vi.fn().mockReturnValue([]),
getExtensionLoader: vi
.fn()
.mockReturnValue({ getExtensions: () => [] }),
modelConfigService: {
registerRuntimeModelConfig: vi.fn(),
},

View file

@ -31,7 +31,9 @@ import { Storage } from '../config/storage.js';
import type { AgentLoopContext } from '../config/agent-loop-context.js';
import {
applyParsedSkillPatches,
getAllowedGeminiMdPatchTargets,
hasParsedPatchHunks,
isGeminiMdPatchTarget,
} from './memoryPatchUtils.js';
const LOCK_FILENAME = '.extraction.lock';
@ -468,6 +470,106 @@ async function buildExistingSkillsSummary(
return sections.join('\n\n');
}
/**
* Builds a markdown summary of the GEMINI.md (and equivalently named) memory
* files the extraction agent is allowed to patch, grouped by scope so the
* agent can pick the most-specific target. Returns an empty string when no
* eligible files are loaded.
*
* Scopes:
* - **Global**: files under the user's `~/.gemini/` directory.
* - **Workspace root**: files at a workspace root directory.
* - **Workspace subdir**: files inside a workspace root.
* - **Workspace ancestor**: files above a workspace root (typically discovered
* via the upward traversal stopping at a `.git` boundary).
*/
function buildGeminiMdContext(config: Config): string {
const allowedPaths = getAllowedGeminiMdPatchTargets(config);
if (allowedPaths.length === 0) {
return '';
}
const userGeminiDir = path.resolve(Storage.getGlobalGeminiDir());
const workspaceRoots = (() => {
try {
return config
.getWorkspaceContext()
.getDirectories()
.map((d) => path.resolve(d));
} catch {
return [] as string[];
}
})();
const buckets = {
global: [] as string[],
workspaceRoot: [] as string[],
workspaceSubdir: [] as string[],
workspaceAncestor: [] as string[],
};
for (const target of allowedPaths) {
const targetDir = path.dirname(target);
if (
targetDir === userGeminiDir ||
targetDir.startsWith(userGeminiDir + path.sep)
) {
buckets.global.push(target);
continue;
}
const matchedRoot = workspaceRoots.find((root) => targetDir === root);
if (matchedRoot) {
buckets.workspaceRoot.push(target);
continue;
}
const containingRoot = workspaceRoots.find((root) =>
targetDir.startsWith(root + path.sep),
);
if (containingRoot) {
buckets.workspaceSubdir.push(target);
continue;
}
const ancestorRoot = workspaceRoots.find(
(root) => root === targetDir || root.startsWith(targetDir + path.sep),
);
if (ancestorRoot) {
buckets.workspaceAncestor.push(target);
continue;
}
// Path does not fit any known scope (e.g. an extra include-directory
// outside the workspace). Treat as ancestor-equivalent for prompt clarity.
buckets.workspaceAncestor.push(target);
}
const sections: string[] = [];
if (buckets.global.length > 0) {
sections.push(
`### Global (~/.gemini)\nUser-wide preferences. Patch only when the rule applies to every project.\n${buckets.global.map((p) => `- ${p}`).join('\n')}`,
);
}
if (buckets.workspaceRoot.length > 0) {
sections.push(
`### Workspace root\nProject-wide conventions. The most common patch target.\n${buckets.workspaceRoot.map((p) => `- ${p}`).join('\n')}`,
);
}
if (buckets.workspaceSubdir.length > 0) {
sections.push(
`### Workspace subdirectory\nScoped rules — prefer these when the convention only applies inside this directory.\n${buckets.workspaceSubdir.map((p) => `- ${p}`).join('\n')}`,
);
}
if (buckets.workspaceAncestor.length > 0) {
sections.push(
`### Workspace ancestor\nLikely owned by a parent project. Patch only when the rule clearly belongs to that broader scope.\n${buckets.workspaceAncestor.map((p) => `- ${p}`).join('\n')}`,
);
}
return sections.join('\n\n');
}
/**
* Builds an AgentLoopContext from a Config for background agent execution.
*/
@ -684,11 +786,21 @@ export async function startMemoryService(config: Config): Promise<void> {
);
}
// Build the GEMINI.md targeting context (loaded user/project memory files,
// grouped by scope). Extension-supplied context files are excluded so the
// agent cannot propose patches that would be overwritten on extension
// upgrades.
const geminiMdContext = buildGeminiMdContext(config);
if (geminiMdContext) {
debugLogger.log(`[MemoryService] GEMINI.md context:\n${geminiMdContext}`);
}
// Build agent definition and context
const agentDefinition = SkillExtractionAgent(
skillsDir,
sessionIndex,
existingSkillsSummary,
geminiMdContext,
);
const context = buildAgentLoopContext(config);
@ -728,6 +840,8 @@ export async function startMemoryService(config: Config): Promise<void> {
// Validate any .patch files the agent generated
const validPatches = await validatePatches(skillsDir, config);
const patchesCreatedThisRun: string[] = [];
const skillPatchesThisRun: string[] = [];
const geminiMdPatchesThisRun: string[] = [];
for (const patchFile of validPatches) {
const patchPath = path.join(skillsDir, patchFile);
let currentContent: string;
@ -736,13 +850,39 @@ export async function startMemoryService(config: Config): Promise<void> {
} catch {
continue;
}
if (patchContentsBefore.get(patchFile) !== currentContent) {
patchesCreatedThisRun.push(patchFile);
if (patchContentsBefore.get(patchFile) === currentContent) {
continue;
}
patchesCreatedThisRun.push(patchFile);
// Classify by inspecting the parsed targets — a single patch file may
// contain hunks for multiple files, so we check all of them.
let targetsGeminiMd = false;
try {
const parsed = Diff.parsePatch(currentContent);
for (const entry of parsed) {
const targetPath = entry.newFileName ?? entry.oldFileName ?? '';
if (!targetPath || targetPath === '/dev/null') continue;
if (await isGeminiMdPatchTarget(targetPath, config)) {
targetsGeminiMd = true;
break;
}
}
} catch {
// Parsing already passed validatePatches, so this is unexpected, but
// fall back to treating the patch as a skill patch if classification
// fails.
}
if (targetsGeminiMd) {
geminiMdPatchesThisRun.push(patchFile);
} else {
skillPatchesThisRun.push(patchFile);
}
}
if (validPatches.length > 0) {
debugLogger.log(
`[MemoryService] ${validPatches.length} valid patch(es) currently in inbox; ${patchesCreatedThisRun.length} created or updated this run`,
`[MemoryService] ${validPatches.length} valid patch(es) currently in inbox; ${patchesCreatedThisRun.length} created or updated this run (${skillPatchesThisRun.length} skill, ${geminiMdPatchesThisRun.length} GEMINI.md)`,
);
}
@ -764,9 +904,14 @@ export async function startMemoryService(config: Config): Promise<void> {
`created ${skillsCreated.length} skill(s): ${skillsCreated.join(', ')}`,
);
}
if (patchesCreatedThisRun.length > 0) {
if (skillPatchesThisRun.length > 0) {
completionParts.push(
`prepared ${patchesCreatedThisRun.length} patch(es): ${patchesCreatedThisRun.join(', ')}`,
`prepared ${skillPatchesThisRun.length} skill patch(es): ${skillPatchesThisRun.join(', ')}`,
);
}
if (geminiMdPatchesThisRun.length > 0) {
completionParts.push(
`prepared ${geminiMdPatchesThisRun.length} GEMINI.md patch(es): ${geminiMdPatchesThisRun.join(', ')}`,
);
}
debugLogger.log(
@ -778,9 +923,14 @@ export async function startMemoryService(config: Config): Promise<void> {
`${skillsCreated.length} new skill${skillsCreated.length > 1 ? 's' : ''} extracted from past sessions: ${skillsCreated.join(', ')}`,
);
}
if (patchesCreatedThisRun.length > 0) {
if (skillPatchesThisRun.length > 0) {
feedbackParts.push(
`${patchesCreatedThisRun.length} skill update${patchesCreatedThisRun.length > 1 ? 's' : ''} extracted from past sessions`,
`${skillPatchesThisRun.length} skill update${skillPatchesThisRun.length > 1 ? 's' : ''} extracted from past sessions`,
);
}
if (geminiMdPatchesThisRun.length > 0) {
feedbackParts.push(
`${geminiMdPatchesThisRun.length} project memory update${geminiMdPatchesThisRun.length > 1 ? 's' : ''} extracted from past sessions`,
);
}
coreEvents.emitFeedback(