feat(onboarding): structured hunk ops for updateDocument (#13989)

*  feat(onboarding): structured hunk ops for updateDocument

Extend `updateDocument` (and the underlying `@lobechat/markdown-patch`) with
explicit hunk modes so agents can unambiguously express deletes and inserts
instead of encoding them as clever search/replace pairs.

Modes: `replace` (default, backward-compatible), `delete`, `deleteLines`,
`insertAt`, `replaceLines`. Line-based modes use 1-based inclusive ranges
and are applied after content-based hunks, sorted by anchor line descending
so earlier lines stay stable. New error codes: `LINE_OUT_OF_RANGE`,
`INVALID_LINE_RANGE`, `LINE_OVERLAP`.

Onboarding document injection now prefixes each line with its 1-based number
(cat -n style) so the agent can cite line numbers when issuing line-based
hunks. Tool description, system role, and per-phase action hints updated to
teach the new shape.

* 🐛 fix(onboarding): align patchOnboardingDocument zod schema with structured hunks

The tRPC input schema still accepted only the legacy `{search, replace}` shape,
so agent calls using the new `insertAt`/`delete`/`deleteLines`/`replaceLines`
hunk modes were rejected before reaching `applyMarkdownPatch`. Switch to a
z.union matching MarkdownPatchHunk.

* 🐛 fix(markdown-patch): validate line ranges before overlap detection

Previously the overlap loop ran before per-hunk range validation, so an
invalid range (e.g. startLine=0 or endLine<startLine) combined with another
line hunk would be misreported as LINE_OVERLAP instead of the real
LINE_OUT_OF_RANGE / INVALID_LINE_RANGE. Validate each line hunk against the
baseline line count first, then run overlap detection on valid ranges only.
This commit is contained in:
Innei 2026-04-20 21:17:28 +08:00 committed by GitHub
parent a939962fa1
commit a59a9c4943
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 603 additions and 65 deletions

View file

@ -110,31 +110,102 @@ export const WebOnboardingManifest: BuiltinToolManifest = {
},
{
description:
"Update an existing document by applying byte-exact SEARCH/REPLACE hunks. This is the preferred way to persist new information incrementally — it is cheaper, safer, and less error-prone than rewriting the full document with writeDocument. Each hunk's search must match the current document exactly (whitespace, punctuation, casing). If the search appears multiple times, add surrounding context to make it unique or set replaceAll=true. On failure (HUNK_NOT_FOUND / HUNK_AMBIGUOUS), adjust the search string and retry; do not fall back to writeDocument unless most of the document must change.",
'Update an existing document by applying structured hunks. Preferred over writeDocument for every incremental edit — cheaper, safer, less error-prone. Each hunk picks ONE mode:\n' +
'- `replace` (default): byte-exact SEARCH → REPLACE. For small textual tweaks.\n' +
'- `delete`: remove the byte-exact SEARCH region.\n' +
'- `deleteLines`: drop lines [startLine, endLine] (1-based, inclusive). Use the line numbers shown in <current_*_document>.\n' +
'- `insertAt`: insert `content` before `line`. Use `line = totalLines + 1` to append to the end; `line = 1` to prepend.\n' +
'- `replaceLines`: replace lines [startLine, endLine] with `content`.\n' +
'Line-based hunks REQUIRE the line numbers from the injected <current_soul_document> / <current_user_persona> view. On failure (HUNK_NOT_FOUND / HUNK_AMBIGUOUS / LINE_OUT_OF_RANGE / LINE_OVERLAP), re-check the injected document and retry with corrected hunks; do NOT fall back to writeDocument unless most of the document must change.',
name: WebOnboardingApiName.updateDocument,
parameters: {
properties: {
hunks: {
description: 'Ordered list of SEARCH/REPLACE hunks applied sequentially.',
description:
'Ordered list of hunks. Content-based hunks (replace/delete) run first in order; line-based hunks (deleteLines/insertAt/replaceLines) run afterward, highest line first.',
items: {
additionalProperties: false,
properties: {
replace: {
description: 'Replacement text; may be empty to delete the matched region.',
type: 'string',
oneOf: [
{
additionalProperties: false,
properties: {
mode: { const: 'replace', type: 'string' },
replace: {
description: 'Replacement text; may be empty to delete the matched region.',
type: 'string',
},
replaceAll: {
description: 'Replace every occurrence of search. Defaults to false.',
type: 'boolean',
},
search: {
description: 'Byte-exact substring to locate in the current document.',
type: 'string',
},
},
required: ['search', 'replace'],
type: 'object',
},
replaceAll: {
description:
'Replace every occurrence of search. Defaults to false; leave unset unless you explicitly want a global replace.',
type: 'boolean',
{
additionalProperties: false,
properties: {
mode: { const: 'delete', type: 'string' },
replaceAll: { type: 'boolean' },
search: {
description: 'Byte-exact substring to remove.',
type: 'string',
},
},
required: ['mode', 'search'],
type: 'object',
},
search: {
description: 'Byte-exact substring to locate in the current document.',
type: 'string',
{
additionalProperties: false,
properties: {
endLine: {
description: 'Inclusive 1-based end line.',
type: 'integer',
},
mode: { const: 'deleteLines', type: 'string' },
startLine: {
description: 'Inclusive 1-based start line.',
type: 'integer',
},
},
required: ['mode', 'startLine', 'endLine'],
type: 'object',
},
},
required: ['search', 'replace'],
type: 'object',
{
additionalProperties: false,
properties: {
content: {
description: 'Text to insert; may span multiple lines (use \\n).',
type: 'string',
},
line: {
description:
'1-based line to insert before. Use `totalLines + 1` to append to the end.',
type: 'integer',
},
mode: { const: 'insertAt', type: 'string' },
},
required: ['mode', 'line', 'content'],
type: 'object',
},
{
additionalProperties: false,
properties: {
content: {
description: 'Replacement text; may be empty to delete the range.',
type: 'string',
},
endLine: { type: 'integer' },
mode: { const: 'replaceLines', type: 'string' },
startLine: { type: 'integer' },
},
required: ['mode', 'startLine', 'endLine', 'content'],
type: 'object',
},
],
},
minItems: 1,
type: 'array',

View file

@ -14,8 +14,14 @@ Turn protocol:
Persistence rules:
1. Use saveUserQuestion only for these structured onboarding fields: agentName, agentEmoji, fullName, interests, and responseLanguage. Use it only when that information emerges naturally in conversation.
2. saveUserQuestion updates lightweight onboarding state; it never writes markdown content.
3. Use writeDocument **only for the very first write** when the document is empty (or for a rare full structural rewrite). For every subsequent edit even adding a single line use **updateDocument** with SEARCH/REPLACE hunks. updateDocument is cheaper, safer, and less error-prone than rewriting the full document. The current contents of SOUL.md and User Persona are automatically injected into your context (in <current_soul_document> and <current_user_persona> tags), so you do not need to call readDocument to read them. Use readDocument only if you suspect the injected content may be stale.
4. updateDocument takes an ordered list of SEARCH/REPLACE hunks. Each search must match the current document byte-exact (whitespace, punctuation, casing); hunks are applied sequentially so later hunks see earlier results. If a hunk reports HUNK_NOT_FOUND, re-check the injected document against your search string; if HUNK_AMBIGUOUS, add surrounding context to make it unique (or pass replaceAll=true only when a global replace is intended).
3. Use writeDocument **only for the very first write** when the document is empty (or for a rare full structural rewrite). For every subsequent edit even adding a single line use **updateDocument**. updateDocument is cheaper, safer, and less error-prone than rewriting the full document. The current contents of SOUL.md and User Persona are automatically injected into your context (in <current_soul_document> and <current_user_persona> tags, each line prefixed with its 1-based line number and a \`\` separator), so you do not need to call readDocument to read them. Use readDocument only if you suspect the injected content may be stale.
4. updateDocument takes an ordered list of structured hunks. Pick the hunk mode that best fits the edit:
- \`replace\` (default): byte-exact SEARCH → REPLACE. Use for small textual tweaks.
- \`delete\`: byte-exact SEARCH removed. Use to cut a block you can uniquely identify by its text.
- \`deleteLines\`: \`{ mode: "deleteLines", startLine, endLine }\` to remove a line range (inclusive, 1-based, from the injected line numbers).
- \`insertAt\`: \`{ mode: "insertAt", line, content }\` to insert before \`line\`. Use \`line = totalLines + 1\` to append to the end.
- \`replaceLines\`: \`{ mode: "replaceLines", startLine, endLine, content }\` to swap a line range with new content.
Prefer the line-based modes whenever you can read the target lines from the injected document they are the most robust. Fall back to \`replace\`/\`delete\` for fuzzy textual edits. Content-based hunks run first in order; line-based hunks run afterward (highest line first), so mixing them in one call is safe. On errors (HUNK_NOT_FOUND / HUNK_AMBIGUOUS / LINE_OUT_OF_RANGE / INVALID_LINE_RANGE / LINE_OVERLAP), re-check the injected document and retry with corrected hunks.
5. Document tools are the only markdown persistence path.
6. Keep a working copy of each document in memory (seeded from the injected content), and merge new information into that copy before each writeDocument or updateDocument call.
7. SOUL.md (type: "soul") is for agent identity only: name, creature or nature, vibe, emoji, and the base template structure.

View file

@ -52,18 +52,18 @@ export class OnboardingActionHintInjector extends BaseVirtualLastUserContentProv
// Phase-specific persistence reminders
if (phase.includes('Agent Identity')) {
hints.push(
'When the user settles on a name and emoji: call saveUserQuestion with agentName and agentEmoji, then persist SOUL.md. If SOUL.md is already non-empty, call updateDocument(type="soul", hunks=[{search, replace}]) to amend only the changed lines; if empty, use writeDocument(type="soul") for the initial write.',
'When the user settles on a name and emoji: call saveUserQuestion with agentName and agentEmoji, then persist SOUL.md. If SOUL.md is already non-empty, call updateDocument(type="soul") with the hunk mode that matches your edit — `insertAt`/`replaceLines`/`deleteLines` when you can read the line numbers from <current_soul_document>, or `replace` for a textual tweak. If empty, use writeDocument(type="soul") for the initial write.',
);
} else if (phase.includes('User Identity')) {
hints.push(
'THIS TURN, as soon as the user tells you their name, call saveUserQuestion with fullName — do NOT wait until you also know their role. Persist the name immediately.',
);
hints.push(
'Seed the persona document the moment you have ANY useful fact about the user (just a name, just a role, or both). If empty, call writeDocument(type="persona") with a short initial draft containing whatever you know so far (even one line). If already non-empty, call updateDocument(type="persona", hunks=[{search, replace}]) to add the new lines. Do NOT defer persistence until more facts arrive.',
'Seed the persona document the moment you have ANY useful fact about the user (just a name, just a role, or both). If empty, call writeDocument(type="persona") with a short initial draft containing whatever you know so far (even one line). If already non-empty, call updateDocument(type="persona") with `insertAt` at the end of the right section (use `line = totalLines + 1` to append) or `replace` for a textual tweak. Do NOT defer persistence until more facts arrive.',
);
} else if (phase.includes('Discovery')) {
hints.push(
'Each turn where you learn a new fact (pain point, goal, preference, workflow detail, interest), call updateDocument(type="persona", hunks=[{search, replace}]) to append that fact to User Persona BEFORE replying. This is the default every turn — not an end-of-phase action. Do NOT save facts only in memory waiting for a final full write. After sufficient discovery (5-6 exchanges), also call saveUserQuestion with interests and responseLanguage. Use writeDocument(type="persona") only if the document is still empty.',
'Each turn where you learn a new fact (pain point, goal, preference, workflow detail, interest), call updateDocument(type="persona") BEFORE replying. Preferred shape: `{ mode: "insertAt", line: <line shown in <current_user_persona>>, content: "- new fact" }`. This is the default every turn — not an end-of-phase action. Do NOT save facts only in memory waiting for a final full write. After sufficient discovery (5-6 exchanges), also call saveUserQuestion with interests and responseLanguage. Use writeDocument(type="persona") only if the document is still empty.',
);
hints.push(
'EARLY EXIT: If the user signals they want to finish (e.g., "好了", "谢谢", "行", "Done", asking for summary, or any completion signal), STOP exploring immediately. Save whatever fields you have (call saveUserQuestion with interests even if partial), present a brief summary, then call finishOnboarding. Do NOT continue asking questions after a completion signal.',
@ -75,7 +75,7 @@ export class OnboardingActionHintInjector extends BaseVirtualLastUserContentProv
}
hints.push(
'PERSISTENCE RULE: Call the persistence tools (saveUserQuestion, writeDocument, updateDocument) to save information as you collect it — simply acknowledging in conversation is NOT enough. For document writes: use writeDocument only for the first write when the document is empty; for every subsequent edit use updateDocument with SEARCH/REPLACE hunks.',
'PERSISTENCE RULE: Call the persistence tools (saveUserQuestion, writeDocument, updateDocument) to save information as you collect it — simply acknowledging in conversation is NOT enough. For document writes: use writeDocument only for the first write when the document is empty; for every subsequent edit use updateDocument with the appropriate hunk mode (`insertAt` / `replaceLines` / `deleteLines` for line-based edits, `replace` / `delete` for byte-exact textual edits). The injected <current_*_document> view shows each line prefixed with its 1-based number and `→` — use those numbers for line-based hunks.',
);
hints.push(
'REMINDER: If the user says "好了", "谢谢", "行", "Done", "Thanks", or gives any completion signal at ANY phase, you MUST wrap up immediately and call finishOnboarding. This overrides all other phase rules.',

View file

@ -55,16 +55,29 @@ export class OnboardingContextInjector extends BaseFirstUserContentProvider {
if (onboardingContext.soulContent) {
parts.push(
`<current_soul_document>\n${onboardingContext.soulContent}\n</current_soul_document>`,
`<current_soul_document>\n${numberLines(onboardingContext.soulContent)}\n</current_soul_document>`,
);
}
if (onboardingContext.personaContent) {
parts.push(
`<current_user_persona>\n${onboardingContext.personaContent}\n</current_user_persona>`,
`<current_user_persona>\n${numberLines(onboardingContext.personaContent)}\n</current_user_persona>`,
);
}
return `<onboarding_context>\n${parts.join('\n\n')}\n</onboarding_context>`;
}
}
/**
* Prefix each line with a 1-based line number and `` separator, mirroring the
* format the updateDocument tool's line-based hunks (`deleteLines`, `insertAt`,
* `replaceLines`) expect. A trailing newline is treated as a terminator, not as
* a phantom empty line.
*/
const numberLines = (source: string): string => {
const normalized = source.endsWith('\n') ? source.slice(0, -1) : source;
const lines = normalized === '' ? [''] : normalized.split('\n');
const width = Math.max(String(lines.length).length, 2);
return lines.map((line, i) => `${String(i + 1).padStart(width, ' ')}${line}`).join('\n');
};

View file

@ -40,6 +40,33 @@ describe('OnboardingContextInjector', () => {
expect(result.messages[2].content).toBe('Hello');
});
it('should prefix soul and persona content with 1-based line numbers', async () => {
const provider = new OnboardingContextInjector({
enabled: true,
onboardingContext: {
personaContent: 'Line A\nLine B\n',
phaseGuidance: '<phase>collect-profile</phase>',
soulContent: '# SOUL\n\n## Identity\nname: Cat\n',
},
});
const result = await provider.process(
createContext([
{ content: 'System role', role: 'system' },
{ content: 'Hello', role: 'user' },
]),
);
const injected = result.messages[1].content as string;
expect(injected).toMatch(/ 1→# SOUL/);
expect(injected).toMatch(/ 3→## Identity/);
expect(injected).toMatch(/ 4→name: Cat/);
expect(injected).toMatch(/ 1→Line A/);
expect(injected).toMatch(/ 2→Line B/);
// Should not contain trailing phantom empty numbered line
expect(injected).not.toMatch(/ 5→\n<\/current_soul_document>/);
});
it('should skip reinjection when onboarding context already exists in messages', async () => {
const provider = new OnboardingContextInjector({
enabled: true,

View file

@ -141,4 +141,170 @@ describe('formatMarkdownPatchError', () => {
const msg = formatMarkdownPatchError({ code: 'EMPTY_HUNKS', hunkIndex: -1 });
expect(msg).toMatch(/No hunks/);
});
it('formats INVALID_LINE_RANGE', () => {
const msg = formatMarkdownPatchError({ code: 'INVALID_LINE_RANGE', hunkIndex: 0 });
expect(msg).toMatch(/endLine < startLine/);
});
it('formats LINE_OUT_OF_RANGE', () => {
const msg = formatMarkdownPatchError({
code: 'LINE_OUT_OF_RANGE',
hunkIndex: 1,
totalLines: 4,
});
expect(msg).toMatch(/\[1, 4\]/);
expect(msg).toMatch(/line 5/);
});
it('formats LINE_OVERLAP', () => {
const msg = formatMarkdownPatchError({ code: 'LINE_OVERLAP', hunkIndex: 2 });
expect(msg).toMatch(/Hunk #2/);
expect(msg).toMatch(/overlaps/);
});
});
describe('applyMarkdownPatch - structured ops', () => {
it('delete mode removes matched region', () => {
const source = 'one\ntwo\nthree\n';
const result = applyMarkdownPatch(source, [{ mode: 'delete', search: 'two\n' }]);
expect(result.ok).toBe(true);
if (result.ok) {
expect(result.content).toBe('one\nthree\n');
expect(result.applied).toBe(1);
}
});
it('delete mode rejects ambiguous without replaceAll', () => {
const result = applyMarkdownPatch('x\nx\n', [{ mode: 'delete', search: 'x\n' }]);
expect(result.ok).toBe(false);
if (!result.ok) expect(result.error.code).toBe('HUNK_AMBIGUOUS');
});
it('deleteLines removes inclusive range', () => {
const source = 'a\nb\nc\nd\n';
const result = applyMarkdownPatch(source, [{ endLine: 3, mode: 'deleteLines', startLine: 2 }]);
expect(result.ok).toBe(true);
if (result.ok) expect(result.content).toBe('a\nd\n');
});
it('deleteLines rejects endLine < startLine', () => {
const source = 'a\nb\n';
const result = applyMarkdownPatch(source, [{ endLine: 1, mode: 'deleteLines', startLine: 2 }]);
expect(result.ok).toBe(false);
if (!result.ok) expect(result.error.code).toBe('INVALID_LINE_RANGE');
});
it('deleteLines rejects out-of-range', () => {
const source = 'a\nb\n';
const result = applyMarkdownPatch(source, [{ endLine: 5, mode: 'deleteLines', startLine: 1 }]);
expect(result.ok).toBe(false);
if (!result.ok) expect(result.error.code).toBe('LINE_OUT_OF_RANGE');
});
it('insertAt inserts before given line (1-based)', () => {
const source = 'a\nb\nc\n';
const result = applyMarkdownPatch(source, [{ content: 'X', line: 2, mode: 'insertAt' }]);
expect(result.ok).toBe(true);
if (result.ok) expect(result.content).toBe('a\nX\nb\nc\n');
});
it('insertAt at totalLines + 1 appends to end', () => {
const source = 'a\nb\nc';
const result = applyMarkdownPatch(source, [{ content: 'Z', line: 4, mode: 'insertAt' }]);
expect(result.ok).toBe(true);
if (result.ok) expect(result.content).toBe('a\nb\nc\nZ');
});
it('insertAt at line 1 prepends', () => {
const source = 'a\nb\n';
const result = applyMarkdownPatch(source, [{ content: 'HEAD', line: 1, mode: 'insertAt' }]);
expect(result.ok).toBe(true);
if (result.ok) expect(result.content).toBe('HEAD\na\nb\n');
});
it('insertAt rejects line out of range', () => {
const source = 'a\nb\n';
const result = applyMarkdownPatch(source, [{ content: 'X', line: 5, mode: 'insertAt' }]);
expect(result.ok).toBe(false);
if (!result.ok) expect(result.error.code).toBe('LINE_OUT_OF_RANGE');
});
it('replaceLines swaps inclusive range', () => {
const source = 'one\ntwo\nthree\nfour\n';
const result = applyMarkdownPatch(source, [
{ content: 'TWO\nTHREE', endLine: 3, mode: 'replaceLines', startLine: 2 },
]);
expect(result.ok).toBe(true);
if (result.ok) expect(result.content).toBe('one\nTWO\nTHREE\nfour\n');
});
it('applies content-based hunks before line-based hunks', () => {
const source = 'header\nbody\nfoot\n';
const result = applyMarkdownPatch(source, [
{ endLine: 2, mode: 'deleteLines', startLine: 2 },
{ mode: 'replace', replace: 'HEADER', search: 'header' },
]);
expect(result.ok).toBe(true);
if (result.ok) expect(result.content).toBe('HEADER\nfoot\n');
});
it('applies multiple line hunks in descending order so lower-line hunks stay correct', () => {
const source = 'L1\nL2\nL3\nL4\nL5\n';
const result = applyMarkdownPatch(source, [
{ endLine: 2, mode: 'deleteLines', startLine: 2 },
{ content: 'INS', line: 4, mode: 'insertAt' },
]);
expect(result.ok).toBe(true);
if (result.ok) {
// insertAt 4 applied against original (before deleteLines) -> between L3 and L4
// deleteLines [2,2] applied after -> removes L2
expect(result.content).toBe('L1\nL3\nINS\nL4\nL5\n');
}
});
it('rejects overlapping line hunks', () => {
const source = 'a\nb\nc\nd\n';
const result = applyMarkdownPatch(source, [
{ endLine: 3, mode: 'deleteLines', startLine: 2 },
{ content: 'X', line: 3, mode: 'insertAt' },
]);
expect(result.ok).toBe(false);
if (!result.ok) expect(result.error.code).toBe('LINE_OVERLAP');
});
it('reports invalid line range before overlap when a hunk has startLine < 1', () => {
const source = 'a\nb\nc\n';
const result = applyMarkdownPatch(source, [
{ endLine: 1, mode: 'deleteLines', startLine: 0 },
{ content: 'X', line: 1, mode: 'insertAt' },
]);
expect(result.ok).toBe(false);
if (!result.ok) expect(result.error.code).toBe('LINE_OUT_OF_RANGE');
});
it('reports invalid line range before overlap when endLine < startLine', () => {
const source = 'a\nb\nc\n';
const result = applyMarkdownPatch(source, [
{ endLine: 1, mode: 'deleteLines', startLine: 3 },
{ content: 'X', line: 2, mode: 'insertAt' },
]);
expect(result.ok).toBe(false);
if (!result.ok) expect(result.error.code).toBe('INVALID_LINE_RANGE');
});
it('defaults mode to replace when omitted (backward compat)', () => {
const source = 'hello';
const result = applyMarkdownPatch(source, [{ replace: 'world', search: 'hello' }]);
expect(result.ok).toBe(true);
if (result.ok) expect(result.content).toBe('world');
});
});

View file

@ -1,4 +1,13 @@
import type { MarkdownPatchHunk, MarkdownPatchResult } from './types';
import type {
MarkdownPatchDeleteHunk,
MarkdownPatchDeleteLinesHunk,
MarkdownPatchErrorDetail,
MarkdownPatchHunk,
MarkdownPatchInsertAtHunk,
MarkdownPatchReplaceHunk,
MarkdownPatchReplaceLinesHunk,
MarkdownPatchResult,
} from './types';
const countOccurrences = (source: string, needle: string): number => {
if (!needle) return 0;
@ -14,16 +23,167 @@ const countOccurrences = (source: string, needle: string): number => {
return count;
};
const getMode = (hunk: MarkdownPatchHunk) => hunk.mode ?? 'replace';
const isLineBased = (
hunk: MarkdownPatchHunk,
): hunk is
| MarkdownPatchDeleteLinesHunk
| MarkdownPatchInsertAtHunk
| MarkdownPatchReplaceLinesHunk =>
getMode(hunk) === 'deleteLines' ||
getMode(hunk) === 'insertAt' ||
getMode(hunk) === 'replaceLines';
const applyContentHunk = (
source: string,
hunk: MarkdownPatchReplaceHunk | MarkdownPatchDeleteHunk,
hunkIndex: number,
): { content: string; count: number } | MarkdownPatchErrorDetail => {
if (!hunk.search) {
return { code: 'EMPTY_SEARCH', hunkIndex };
}
const occurrences = countOccurrences(source, hunk.search);
if (occurrences === 0) {
return { code: 'HUNK_NOT_FOUND', hunkIndex, search: hunk.search };
}
if (occurrences > 1 && !hunk.replaceAll) {
return { code: 'HUNK_AMBIGUOUS', hunkIndex, occurrences };
}
const replacement = getMode(hunk) === 'delete' ? '' : (hunk as MarkdownPatchReplaceHunk).replace;
const next = hunk.replaceAll
? source.split(hunk.search).join(replacement)
: source.replace(hunk.search, replacement);
return { content: next, count: hunk.replaceAll ? occurrences : 1 };
};
// Treat a trailing newline as a line terminator, not as producing an empty
// phantom line. This matches how users count lines in the numbered injected
// document and how editors display them.
const splitLines = (source: string): string[] => {
if (source === '') return [];
const normalized = source.endsWith('\n') ? source.slice(0, -1) : source;
return normalized.split('\n');
};
const joinLines = (lines: string[], preserveTrailingNewline: boolean): string => {
const joined = lines.join('\n');
return preserveTrailingNewline && lines.length > 0 ? joined + '\n' : joined;
};
const validateLineHunk = (
hunk: MarkdownPatchDeleteLinesHunk | MarkdownPatchInsertAtHunk | MarkdownPatchReplaceLinesHunk,
totalLines: number,
hunkIndex: number,
): MarkdownPatchErrorDetail | null => {
if (getMode(hunk) === 'insertAt') {
const { line } = hunk as MarkdownPatchInsertAtHunk;
if (!Number.isInteger(line) || line < 1 || line > totalLines + 1) {
return { code: 'LINE_OUT_OF_RANGE', hunkIndex, line, totalLines };
}
return null;
}
const { startLine, endLine } = hunk as
| MarkdownPatchDeleteLinesHunk
| MarkdownPatchReplaceLinesHunk;
if (!Number.isInteger(startLine) || !Number.isInteger(endLine)) {
return { code: 'LINE_OUT_OF_RANGE', hunkIndex, totalLines };
}
if (endLine < startLine) {
return { code: 'INVALID_LINE_RANGE', hunkIndex };
}
if (startLine < 1 || endLine > totalLines) {
return { code: 'LINE_OUT_OF_RANGE', hunkIndex, totalLines };
}
return null;
};
interface IndexedLineHunk {
hunk: MarkdownPatchDeleteLinesHunk | MarkdownPatchInsertAtHunk | MarkdownPatchReplaceLinesHunk;
index: number;
}
const getAnchor = (h: IndexedLineHunk) => {
const mode = getMode(h.hunk);
if (mode === 'insertAt') return (h.hunk as MarkdownPatchInsertAtHunk).line;
return (h.hunk as MarkdownPatchDeleteLinesHunk | MarkdownPatchReplaceLinesHunk).startLine;
};
const rangeOverlapsOrTouches = (a: IndexedLineHunk, b: IndexedLineHunk): boolean => {
const toRange = (h: IndexedLineHunk): [number, number] => {
const mode = getMode(h.hunk);
if (mode === 'insertAt') {
const l = (h.hunk as MarkdownPatchInsertAtHunk).line;
return [l, l];
}
const r = h.hunk as MarkdownPatchDeleteLinesHunk | MarkdownPatchReplaceLinesHunk;
return [r.startLine, r.endLine];
};
const [aStart, aEnd] = toRange(a);
const [bStart, bEnd] = toRange(b);
// insertAt at position N touches the boundary around lines; treat same `line`
// or boundary overlap with a range as an overlap to keep semantics predictable.
return aStart <= bEnd && bStart <= aEnd;
};
const applyLineHunk = (
lines: string[],
hunk: MarkdownPatchDeleteLinesHunk | MarkdownPatchInsertAtHunk | MarkdownPatchReplaceLinesHunk,
): string[] => {
const mode = getMode(hunk);
if (mode === 'insertAt') {
const { line, content } = hunk as MarkdownPatchInsertAtHunk;
const inserted = content === '' ? [''] : content.split('\n');
const next = lines.slice();
next.splice(line - 1, 0, ...inserted);
return next;
}
const { startLine, endLine } = hunk as
| MarkdownPatchDeleteLinesHunk
| MarkdownPatchReplaceLinesHunk;
const removeCount = endLine - startLine + 1;
const next = lines.slice();
if (mode === 'deleteLines') {
next.splice(startLine - 1, removeCount);
return next;
}
const { content } = hunk as MarkdownPatchReplaceLinesHunk;
const replacement = content === '' ? [] : content.split('\n');
next.splice(startLine - 1, removeCount, ...replacement);
return next;
};
/**
* Apply a list of byte-exact SEARCH/REPLACE hunks to a markdown document.
* Apply a list of hunks to a markdown document.
*
* Semantics:
* - Each hunk's `search` must appear verbatim in the current document.
* - Whitespace, punctuation, casing differences are not tolerated.
* - If `search` appears multiple times, caller must set `replaceAll: true`,
* otherwise the hunk is rejected as ambiguous.
* - Hunks are applied sequentially; later hunks see earlier results.
* - First error aborts the whole patch; no partial application is committed.
* Modes:
* - `replace` (default): byte-exact SEARCH REPLACE.
* - `delete`: byte-exact SEARCH removed from document.
* - `deleteLines`: remove lines `[startLine, endLine]` (1-based, inclusive).
* - `insertAt`: insert `content` before `line`; `line = totalLines + 1` appends.
* - `replaceLines`: replace `[startLine, endLine]` with `content`.
*
* Execution order:
* 1. Content-based hunks (`replace`, `delete`) run in declaration order.
* 2. Line-based hunks run afterward, sorted by anchor line descending, so
* earlier-line hunks are unaffected by shifts caused by later-line hunks.
* 3. Line-based hunks whose ranges overlap are rejected as `LINE_OVERLAP`.
*
* First error aborts the whole patch; no partial application is committed.
*/
export const applyMarkdownPatch = (
source: string,
@ -36,33 +196,54 @@ export const applyMarkdownPatch = (
let current = source;
let applied = 0;
const lineHunks: IndexedLineHunk[] = [];
for (const [hunkIndex, hunk] of hunks.entries()) {
if (!hunk.search) {
return { error: { code: 'EMPTY_SEARCH', hunkIndex }, ok: false };
if (isLineBased(hunk)) {
lineHunks.push({ hunk, index: hunkIndex });
continue;
}
const occurrences = countOccurrences(current, hunk.search);
if (occurrences === 0) {
return {
error: { code: 'HUNK_NOT_FOUND', hunkIndex, search: hunk.search },
ok: false,
};
const result = applyContentHunk(current, hunk, hunkIndex);
if ('code' in result) {
return { error: result, ok: false };
}
if (occurrences > 1 && !hunk.replaceAll) {
return {
error: { code: 'HUNK_AMBIGUOUS', hunkIndex, occurrences },
ok: false,
};
}
current = hunk.replaceAll
? current.split(hunk.search).join(hunk.replace)
: current.replace(hunk.search, hunk.replace);
applied += hunk.replaceAll ? occurrences : 1;
current = result.content;
applied += result.count;
}
return { applied, content: current, ok: true };
if (lineHunks.length === 0) {
return { applied, content: current, ok: true };
}
const sorted = lineHunks.slice().sort((a, b) => getAnchor(b) - getAnchor(a));
const preserveTrailingNewline = current.endsWith('\n');
let lines = splitLines(current);
const baselineTotalLines = lines.length;
// Validate each hunk against the baseline first, so invalid ranges surface
// as LINE_OUT_OF_RANGE / INVALID_LINE_RANGE instead of being misreported as
// LINE_OVERLAP by the overlap check below.
for (const { hunk, index } of sorted) {
const error = validateLineHunk(hunk, baselineTotalLines, index);
if (error) {
return { error, ok: false };
}
}
for (let i = 0; i < sorted.length - 1; i += 1) {
for (let j = i + 1; j < sorted.length; j += 1) {
if (rangeOverlapsOrTouches(sorted[i], sorted[j])) {
return { error: { code: 'LINE_OVERLAP', hunkIndex: sorted[i].index }, ok: false };
}
}
}
for (const { hunk } of sorted) {
lines = applyLineHunk(lines, hunk);
applied += 1;
}
return { applied, content: joinLines(lines, preserveTrailingNewline), ok: true };
};

View file

@ -4,7 +4,7 @@ export const formatMarkdownPatchError = (error: MarkdownPatchErrorDetail): strin
const idx = error.hunkIndex;
switch (error.code) {
case 'EMPTY_HUNKS': {
return 'No hunks provided. Include at least one { search, replace } entry.';
return 'No hunks provided. Include at least one hunk (replace / delete / deleteLines / insertAt / replaceLines).';
}
case 'EMPTY_SEARCH': {
return `Hunk #${idx} has empty search. Provide a non-empty substring to locate.`;
@ -16,5 +16,15 @@ export const formatMarkdownPatchError = (error: MarkdownPatchErrorDetail): strin
const n = error.occurrences ?? 0;
return `Hunk #${idx} search matches ${n} locations. Add surrounding context to uniquify, or set replaceAll=true to replace every occurrence.`;
}
case 'INVALID_LINE_RANGE': {
return `Hunk #${idx} has endLine < startLine. Use inclusive 1-based line numbers where endLine >= startLine.`;
}
case 'LINE_OUT_OF_RANGE': {
const total = error.totalLines ?? 0;
return `Hunk #${idx} references a line outside [1, ${total}] (insertAt may also target line ${total + 1} to append). Re-check the injected document's line numbers.`;
}
case 'LINE_OVERLAP': {
return `Hunk #${idx} overlaps another line-based hunk in the same call. Split them across multiple updateDocument calls or merge them into one hunk.`;
}
}
};

View file

@ -1,9 +1,44 @@
export interface MarkdownPatchHunk {
export type MarkdownPatchMode = 'replace' | 'delete' | 'deleteLines' | 'insertAt' | 'replaceLines';
export interface MarkdownPatchReplaceHunk {
mode?: 'replace';
replace: string;
replaceAll?: boolean;
search: string;
}
export interface MarkdownPatchDeleteHunk {
mode: 'delete';
replaceAll?: boolean;
search: string;
}
export interface MarkdownPatchDeleteLinesHunk {
endLine: number;
mode: 'deleteLines';
startLine: number;
}
export interface MarkdownPatchInsertAtHunk {
content: string;
line: number;
mode: 'insertAt';
}
export interface MarkdownPatchReplaceLinesHunk {
content: string;
endLine: number;
mode: 'replaceLines';
startLine: number;
}
export type MarkdownPatchHunk =
| MarkdownPatchReplaceHunk
| MarkdownPatchDeleteHunk
| MarkdownPatchDeleteLinesHunk
| MarkdownPatchInsertAtHunk
| MarkdownPatchReplaceLinesHunk;
export interface MarkdownPatchSuccess {
applied: number;
content: string;
@ -14,13 +49,18 @@ export type MarkdownPatchErrorCode =
| 'EMPTY_HUNKS'
| 'EMPTY_SEARCH'
| 'HUNK_AMBIGUOUS'
| 'HUNK_NOT_FOUND';
| 'HUNK_NOT_FOUND'
| 'INVALID_LINE_RANGE'
| 'LINE_OUT_OF_RANGE'
| 'LINE_OVERLAP';
export interface MarkdownPatchErrorDetail {
code: MarkdownPatchErrorCode;
hunkIndex: number;
line?: number;
occurrences?: number;
search?: string;
totalLines?: number;
}
export interface MarkdownPatchFailure {

View file

@ -314,11 +314,35 @@ export const userRouter = router({
z.object({
hunks: z
.array(
z.object({
replace: z.string(),
replaceAll: z.boolean().optional(),
search: z.string(),
}),
z.union([
z.object({
mode: z.literal('replace').optional(),
replace: z.string(),
replaceAll: z.boolean().optional(),
search: z.string(),
}),
z.object({
mode: z.literal('delete'),
replaceAll: z.boolean().optional(),
search: z.string(),
}),
z.object({
endLine: z.number().int(),
mode: z.literal('deleteLines'),
startLine: z.number().int(),
}),
z.object({
content: z.string(),
line: z.number().int(),
mode: z.literal('insertAt'),
}),
z.object({
content: z.string(),
endLine: z.number().int(),
mode: z.literal('replaceLines'),
startLine: z.number().int(),
}),
]),
)
.min(1),
type: z.enum(['soul', 'persona']),