This commit is contained in:
Jagjeevan Kashid 2026-04-21 04:29:12 +00:00 committed by GitHub
commit 0e55c63c71
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 63 additions and 3 deletions

View file

@ -120,6 +120,12 @@ code spans, ensuring that `@` imports inside these regions are properly ignored.
This provides robust handling of nested code blocks and complex Markdown
structures.
## HTML comment handling
The import processor removes HTML comments from memory content before resolving
imports. Any `@` imports inside HTML comments are ignored, and the comments do
not appear in the final prompt.
## Import tree structure
The processor returns an import tree that shows the hierarchy of imported files,

View file

@ -378,6 +378,32 @@ describe('memoryImportProcessor', () => {
);
});
it('should strip HTML comments and ignore imports inside them', async () => {
const content = [
'Header',
'<!-- user comment that should be removed -->',
'Real import @./real.md',
'Inline comment <!-- @./fake.md --> after text',
].join('\n');
const basePath = testPath('test', 'path');
const importedContent = 'Real imported content';
mockedFs.access.mockResolvedValue(undefined);
mockedFs.readFile.mockResolvedValue(importedContent);
const result = await processImports(content, basePath, true);
expect(result.content).toContain(importedContent);
expect(result.content).not.toContain('user comment that should be removed');
expect(result.content).not.toContain('fake.md');
expect(mockedFs.readFile).toHaveBeenCalledTimes(1);
expect(mockedFs.readFile).toHaveBeenCalledWith(
path.resolve(basePath, './real.md'),
'utf-8',
);
});
it('should handle nested tokens and non-unique content correctly', async () => {
// This test verifies the robust findCodeRegions implementation
// that recursively walks the token tree and handles non-unique content

View file

@ -176,6 +176,31 @@ function findCodeRegions(content: string): Array<[number, number]> {
return regions;
}
function stripHtmlComments(content: string): string {
const startToken = '<!--';
const endToken = '-->';
let start = content.indexOf(startToken);
if (start === -1) return content;
const parts: string[] = [];
let cursor = 0;
while (start !== -1) {
parts.push(content.slice(cursor, start));
const end = content.indexOf(endToken, start + startToken.length);
if (end === -1) {
return parts.join('');
}
cursor = end + endToken.length;
start = content.indexOf(startToken, cursor);
}
parts.push(content.slice(cursor));
return parts.join('');
}
/**
* Processes import statements in GEMINI.md content
* Supports @path/to/file syntax for importing content from other files
@ -200,6 +225,8 @@ export async function processImports(
importFormat: 'flat' | 'tree' = 'tree',
boundaryMarkers: readonly string[] = ['.git'],
): Promise<ProcessImportsResult> {
content = stripHtmlComments(content);
if (!projectRoot) {
projectRoot = await findProjectRoot(basePath, boundaryMarkers);
}
@ -230,6 +257,7 @@ export async function processImports(
filePath: string,
depth: number,
) {
const sanitizedContent = stripHtmlComments(fileContent);
// Normalize the file path to ensure consistent comparison
const normalizedPath = path.normalize(filePath);
@ -240,11 +268,11 @@ export async function processImports(
processedFiles.add(normalizedPath);
// Add this file to the flat list
flatFiles.push({ path: normalizedPath, content: fileContent });
flatFiles.push({ path: normalizedPath, content: sanitizedContent });
// Find imports in this file
const codeRegions = findCodeRegions(fileContent);
const imports = findImports(fileContent);
const codeRegions = findCodeRegions(sanitizedContent);
const imports = findImports(sanitizedContent);
// Process imports in reverse order to handle indices correctly
for (let i = imports.length - 1; i >= 0; i--) {