diff --git a/docs/reference/memport.md b/docs/reference/memport.md index a8c2da5a2d..882fd3dfe8 100644 --- a/docs/reference/memport.md +++ b/docs/reference/memport.md @@ -120,6 +120,12 @@ code spans, ensuring that `@` imports inside these regions are properly ignored. This provides robust handling of nested code blocks and complex Markdown structures. +## HTML comment handling + +The import processor removes HTML comments from memory content before resolving +imports. Any `@` imports inside HTML comments are ignored, and the comments do +not appear in the final prompt. + ## Import tree structure The processor returns an import tree that shows the hierarchy of imported files, diff --git a/packages/core/src/utils/memoryImportProcessor.test.ts b/packages/core/src/utils/memoryImportProcessor.test.ts index 3c9a74b604..57cf2feb7a 100644 --- a/packages/core/src/utils/memoryImportProcessor.test.ts +++ b/packages/core/src/utils/memoryImportProcessor.test.ts @@ -378,6 +378,32 @@ describe('memoryImportProcessor', () => { ); }); + it('should strip HTML comments and ignore imports inside them', async () => { + const content = [ + 'Header', + '', + 'Real import @./real.md', + 'Inline comment after text', + ].join('\n'); + const basePath = testPath('test', 'path'); + const importedContent = 'Real imported content'; + + mockedFs.access.mockResolvedValue(undefined); + mockedFs.readFile.mockResolvedValue(importedContent); + + const result = await processImports(content, basePath, true); + + expect(result.content).toContain(importedContent); + expect(result.content).not.toContain('user comment that should be removed'); + expect(result.content).not.toContain('fake.md'); + + expect(mockedFs.readFile).toHaveBeenCalledTimes(1); + expect(mockedFs.readFile).toHaveBeenCalledWith( + path.resolve(basePath, './real.md'), + 'utf-8', + ); + }); + it('should handle nested tokens and non-unique content correctly', async () => { // This test verifies the robust findCodeRegions implementation // that recursively walks the token tree and handles non-unique content diff --git a/packages/core/src/utils/memoryImportProcessor.ts b/packages/core/src/utils/memoryImportProcessor.ts index dc4b0b8537..aa64e18c4d 100644 --- a/packages/core/src/utils/memoryImportProcessor.ts +++ b/packages/core/src/utils/memoryImportProcessor.ts @@ -176,6 +176,31 @@ function findCodeRegions(content: string): Array<[number, number]> { return regions; } +function stripHtmlComments(content: string): string { + const startToken = ''; + let start = content.indexOf(startToken); + if (start === -1) return content; + + const parts: string[] = []; + let cursor = 0; + + while (start !== -1) { + parts.push(content.slice(cursor, start)); + + const end = content.indexOf(endToken, start + startToken.length); + if (end === -1) { + return parts.join(''); + } + + cursor = end + endToken.length; + start = content.indexOf(startToken, cursor); + } + + parts.push(content.slice(cursor)); + return parts.join(''); +} + /** * Processes import statements in GEMINI.md content * Supports @path/to/file syntax for importing content from other files @@ -200,6 +225,8 @@ export async function processImports( importFormat: 'flat' | 'tree' = 'tree', boundaryMarkers: readonly string[] = ['.git'], ): Promise { + content = stripHtmlComments(content); + if (!projectRoot) { projectRoot = await findProjectRoot(basePath, boundaryMarkers); } @@ -230,6 +257,7 @@ export async function processImports( filePath: string, depth: number, ) { + const sanitizedContent = stripHtmlComments(fileContent); // Normalize the file path to ensure consistent comparison const normalizedPath = path.normalize(filePath); @@ -240,11 +268,11 @@ export async function processImports( processedFiles.add(normalizedPath); // Add this file to the flat list - flatFiles.push({ path: normalizedPath, content: fileContent }); + flatFiles.push({ path: normalizedPath, content: sanitizedContent }); // Find imports in this file - const codeRegions = findCodeRegions(fileContent); - const imports = findImports(fileContent); + const codeRegions = findCodeRegions(sanitizedContent); + const imports = findImports(sanitizedContent); // Process imports in reverse order to handle indices correctly for (let i = imports.length - 1; i >= 0; i--) {