mirror of
https://github.com/google-gemini/gemini-cli
synced 2026-04-21 13:37:17 +00:00
Merge 3ab76acdae into a38e2f0048
This commit is contained in:
commit
4e21b8e5f8
2 changed files with 289 additions and 2 deletions
|
|
@ -6,7 +6,10 @@
|
|||
|
||||
import { describe, it, expect, beforeAll, vi } from 'vitest';
|
||||
import chalk from 'chalk';
|
||||
import { parseMarkdownToANSI } from './markdownParsingUtils.js';
|
||||
import {
|
||||
parseMarkdownToANSI,
|
||||
stripTrailingPunctuation,
|
||||
} from './markdownParsingUtils.js';
|
||||
|
||||
// Mock the theme to use explicit colors instead of empty strings from the default theme.
|
||||
// This ensures that ansiColorize actually applies ANSI codes that we can verify.
|
||||
|
|
@ -222,5 +225,217 @@ describe('parsingUtils', () => {
|
|||
),
|
||||
);
|
||||
});
|
||||
|
||||
it('should strip trailing period from bare URL', () => {
|
||||
const input = 'Visit https://example.com.';
|
||||
const output = parseMarkdownToANSI(input);
|
||||
expect(output).toBe(
|
||||
`${primary('Visit ')}${link('https://example.com')}${primary('.')}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should strip trailing comma from bare URL', () => {
|
||||
const input = 'See https://example.com, then continue';
|
||||
const output = parseMarkdownToANSI(input);
|
||||
expect(output).toBe(
|
||||
`${primary('See ')}${link('https://example.com')}${primary(',')}${primary(' then continue')}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should strip multiple trailing punctuation from bare URL', () => {
|
||||
const input = 'Is it https://example.com?!';
|
||||
const output = parseMarkdownToANSI(input);
|
||||
expect(output).toBe(
|
||||
`${primary('Is it ')}${link('https://example.com')}${primary('?!')}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should preserve balanced parentheses in bare URL (Wikipedia)', () => {
|
||||
const input = 'See https://en.wikipedia.org/wiki/Foo_(bar) for details';
|
||||
const output = parseMarkdownToANSI(input);
|
||||
expect(output).toBe(
|
||||
`${primary('See ')}${link('https://en.wikipedia.org/wiki/Foo_(bar)')}${primary(' for details')}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should strip trailing period after balanced parens in bare URL', () => {
|
||||
const input = 'See https://en.wikipedia.org/wiki/Foo_(bar).';
|
||||
const output = parseMarkdownToANSI(input);
|
||||
expect(output).toBe(
|
||||
`${primary('See ')}${link('https://en.wikipedia.org/wiki/Foo_(bar)')}${primary('.')}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should not modify bare URL without trailing punctuation', () => {
|
||||
const input = 'Visit https://example.com/path now';
|
||||
const output = parseMarkdownToANSI(input);
|
||||
expect(output).toBe(
|
||||
`${primary('Visit ')}${link('https://example.com/path')}${primary(' now')}`,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripTrailingPunctuation', () => {
|
||||
it('should strip a trailing period', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com.')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '.',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip a trailing comma', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com,')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: ',',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip trailing semicolon', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com;')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: ';',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip trailing colon', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com/path:')).toEqual({
|
||||
cleanUrl: 'https://example.com/path',
|
||||
trailing: ':',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip trailing exclamation mark', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com!')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '!',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip trailing question mark', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com?')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '?',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip multiple trailing punctuation chars', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com?!')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '?!',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip trailing quotes', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com"')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '"',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip trailing single quote', () => {
|
||||
expect(stripTrailingPunctuation("https://example.com'")).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: "'",
|
||||
});
|
||||
});
|
||||
|
||||
it('should preserve balanced parentheses', () => {
|
||||
expect(
|
||||
stripTrailingPunctuation('https://en.wikipedia.org/wiki/Foo_(bar)'),
|
||||
).toEqual({
|
||||
cleanUrl: 'https://en.wikipedia.org/wiki/Foo_(bar)',
|
||||
trailing: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip unbalanced trailing paren', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com)')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: ')',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip period after balanced parens', () => {
|
||||
expect(
|
||||
stripTrailingPunctuation('https://en.wikipedia.org/wiki/Foo_(bar).'),
|
||||
).toEqual({
|
||||
cleanUrl: 'https://en.wikipedia.org/wiki/Foo_(bar)',
|
||||
trailing: '.',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle nested balanced parentheses', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com/a_(b_(c))')).toEqual(
|
||||
{
|
||||
cleanUrl: 'https://example.com/a_(b_(c))',
|
||||
trailing: '',
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it('should strip trailing bracket', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com]')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: ']',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip trailing angle bracket', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com>')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '>',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip trailing curly brace', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com}')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '}',
|
||||
});
|
||||
});
|
||||
|
||||
it('should return unchanged URL with no trailing punctuation', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com/path')).toEqual({
|
||||
cleanUrl: 'https://example.com/path',
|
||||
trailing: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle URL with query params and trailing period', () => {
|
||||
expect(
|
||||
stripTrailingPunctuation('https://example.com/search?q=test.'),
|
||||
).toEqual({
|
||||
cleanUrl: 'https://example.com/search?q=test',
|
||||
trailing: '.',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip CJK fullwidth period', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com\u3002')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '\u3002',
|
||||
});
|
||||
});
|
||||
|
||||
it('should strip CJK fullwidth comma', () => {
|
||||
expect(stripTrailingPunctuation('https://example.com\uFF0C')).toEqual({
|
||||
cleanUrl: 'https://example.com',
|
||||
trailing: '\uFF0C',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle empty string', () => {
|
||||
expect(stripTrailingPunctuation('')).toEqual({
|
||||
cleanUrl: '',
|
||||
trailing: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should not strip periods that are part of the domain', () => {
|
||||
expect(stripTrailingPunctuation('https://www.example.com/path')).toEqual({
|
||||
cleanUrl: 'https://www.example.com/path',
|
||||
trailing: '',
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -13,6 +13,74 @@ import {
|
|||
import { theme } from '../semantic-colors.js';
|
||||
import { debugLogger } from '@google/gemini-cli-core';
|
||||
|
||||
// Characters that should be stripped from the end of bare URLs.
|
||||
// Includes common punctuation and CJK fullwidth equivalents.
|
||||
const TRAILING_PUNCT = new Set([
|
||||
'.',
|
||||
',',
|
||||
';',
|
||||
':',
|
||||
'!',
|
||||
'?',
|
||||
"'",
|
||||
'"',
|
||||
')',
|
||||
']',
|
||||
'>',
|
||||
'}',
|
||||
// CJK fullwidth equivalents
|
||||
'\u3002', // Ideographic full stop
|
||||
'\uFF0C', // Fullwidth comma
|
||||
'\uFF1B', // Fullwidth semicolon
|
||||
'\uFF1A', // Fullwidth colon
|
||||
'\uFF01', // Fullwidth exclamation
|
||||
'\uFF1F', // Fullwidth question mark
|
||||
'\u300D', // Right corner bracket
|
||||
'\u300F', // Right white corner bracket
|
||||
'\uFF09', // Fullwidth right parenthesis
|
||||
'\u3011', // Right black lenticular bracket
|
||||
'\uFF3D', // Fullwidth right square bracket
|
||||
'\uFF1E', // Fullwidth greater-than
|
||||
'\uFF5D', // Fullwidth right curly bracket
|
||||
]);
|
||||
|
||||
/**
|
||||
* Strips trailing punctuation from a URL while preserving balanced parentheses.
|
||||
* This handles Wikipedia-style URLs like https://en.wikipedia.org/wiki/Foo_(bar)
|
||||
* where the closing paren is part of the URL, not trailing punctuation.
|
||||
*
|
||||
* Returns the cleaned URL and any stripped trailing characters.
|
||||
*/
|
||||
export const stripTrailingPunctuation = (
|
||||
url: string,
|
||||
): { cleanUrl: string; trailing: string } => {
|
||||
let end = url.length;
|
||||
|
||||
while (end > 0 && TRAILING_PUNCT.has(url[end - 1])) {
|
||||
const ch = url[end - 1];
|
||||
|
||||
// Preserve balanced parentheses (for Wikipedia URLs etc.)
|
||||
if (ch === ')' || ch === '\uFF09') {
|
||||
const open = ch === ')' ? '(' : '\uFF08';
|
||||
const urlPortion = url.slice(0, end);
|
||||
let depth = 0;
|
||||
for (const c of urlPortion) {
|
||||
if (c === open) depth++;
|
||||
else if (c === ch) depth--;
|
||||
}
|
||||
// depth < 0 means more closing than opening, so this one is trailing
|
||||
if (depth >= 0) break;
|
||||
}
|
||||
|
||||
end--;
|
||||
}
|
||||
|
||||
return {
|
||||
cleanUrl: url.slice(0, end),
|
||||
trailing: url.slice(end),
|
||||
};
|
||||
};
|
||||
|
||||
// Constants for Markdown parsing
|
||||
const BOLD_MARKER_LENGTH = 2; // For "**"
|
||||
const ITALIC_MARKER_LENGTH = 1; // For "*" or "_"
|
||||
|
|
@ -197,7 +265,11 @@ export const parseMarkdownToANSI = (
|
|||
),
|
||||
);
|
||||
} else if (fullMatch.match(/^https?:\/\//)) {
|
||||
styledPart = ansiColorize(fullMatch, theme.text.link);
|
||||
const { cleanUrl, trailing } = stripTrailingPunctuation(fullMatch);
|
||||
styledPart = ansiColorize(cleanUrl, theme.text.link);
|
||||
if (trailing) {
|
||||
styledPart += ansiColorize(trailing, baseColor);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
debugLogger.warn('Error parsing inline markdown part:', fullMatch, e);
|
||||
|
|
|
|||
Loading…
Reference in a new issue