This commit is contained in:
Yuvraj Angad Singh 2026-04-21 10:03:48 +07:00 committed by GitHub
commit 4e21b8e5f8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 289 additions and 2 deletions

View file

@ -6,7 +6,10 @@
import { describe, it, expect, beforeAll, vi } from 'vitest';
import chalk from 'chalk';
import { parseMarkdownToANSI } from './markdownParsingUtils.js';
import {
parseMarkdownToANSI,
stripTrailingPunctuation,
} from './markdownParsingUtils.js';
// Mock the theme to use explicit colors instead of empty strings from the default theme.
// This ensures that ansiColorize actually applies ANSI codes that we can verify.
@ -222,5 +225,217 @@ describe('parsingUtils', () => {
),
);
});
it('should strip trailing period from bare URL', () => {
const input = 'Visit https://example.com.';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('Visit ')}${link('https://example.com')}${primary('.')}`,
);
});
it('should strip trailing comma from bare URL', () => {
const input = 'See https://example.com, then continue';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('See ')}${link('https://example.com')}${primary(',')}${primary(' then continue')}`,
);
});
it('should strip multiple trailing punctuation from bare URL', () => {
const input = 'Is it https://example.com?!';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('Is it ')}${link('https://example.com')}${primary('?!')}`,
);
});
it('should preserve balanced parentheses in bare URL (Wikipedia)', () => {
const input = 'See https://en.wikipedia.org/wiki/Foo_(bar) for details';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('See ')}${link('https://en.wikipedia.org/wiki/Foo_(bar)')}${primary(' for details')}`,
);
});
it('should strip trailing period after balanced parens in bare URL', () => {
const input = 'See https://en.wikipedia.org/wiki/Foo_(bar).';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('See ')}${link('https://en.wikipedia.org/wiki/Foo_(bar)')}${primary('.')}`,
);
});
it('should not modify bare URL without trailing punctuation', () => {
const input = 'Visit https://example.com/path now';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('Visit ')}${link('https://example.com/path')}${primary(' now')}`,
);
});
});
describe('stripTrailingPunctuation', () => {
it('should strip a trailing period', () => {
expect(stripTrailingPunctuation('https://example.com.')).toEqual({
cleanUrl: 'https://example.com',
trailing: '.',
});
});
it('should strip a trailing comma', () => {
expect(stripTrailingPunctuation('https://example.com,')).toEqual({
cleanUrl: 'https://example.com',
trailing: ',',
});
});
it('should strip trailing semicolon', () => {
expect(stripTrailingPunctuation('https://example.com;')).toEqual({
cleanUrl: 'https://example.com',
trailing: ';',
});
});
it('should strip trailing colon', () => {
expect(stripTrailingPunctuation('https://example.com/path:')).toEqual({
cleanUrl: 'https://example.com/path',
trailing: ':',
});
});
it('should strip trailing exclamation mark', () => {
expect(stripTrailingPunctuation('https://example.com!')).toEqual({
cleanUrl: 'https://example.com',
trailing: '!',
});
});
it('should strip trailing question mark', () => {
expect(stripTrailingPunctuation('https://example.com?')).toEqual({
cleanUrl: 'https://example.com',
trailing: '?',
});
});
it('should strip multiple trailing punctuation chars', () => {
expect(stripTrailingPunctuation('https://example.com?!')).toEqual({
cleanUrl: 'https://example.com',
trailing: '?!',
});
});
it('should strip trailing quotes', () => {
expect(stripTrailingPunctuation('https://example.com"')).toEqual({
cleanUrl: 'https://example.com',
trailing: '"',
});
});
it('should strip trailing single quote', () => {
expect(stripTrailingPunctuation("https://example.com'")).toEqual({
cleanUrl: 'https://example.com',
trailing: "'",
});
});
it('should preserve balanced parentheses', () => {
expect(
stripTrailingPunctuation('https://en.wikipedia.org/wiki/Foo_(bar)'),
).toEqual({
cleanUrl: 'https://en.wikipedia.org/wiki/Foo_(bar)',
trailing: '',
});
});
it('should strip unbalanced trailing paren', () => {
expect(stripTrailingPunctuation('https://example.com)')).toEqual({
cleanUrl: 'https://example.com',
trailing: ')',
});
});
it('should strip period after balanced parens', () => {
expect(
stripTrailingPunctuation('https://en.wikipedia.org/wiki/Foo_(bar).'),
).toEqual({
cleanUrl: 'https://en.wikipedia.org/wiki/Foo_(bar)',
trailing: '.',
});
});
it('should handle nested balanced parentheses', () => {
expect(stripTrailingPunctuation('https://example.com/a_(b_(c))')).toEqual(
{
cleanUrl: 'https://example.com/a_(b_(c))',
trailing: '',
},
);
});
it('should strip trailing bracket', () => {
expect(stripTrailingPunctuation('https://example.com]')).toEqual({
cleanUrl: 'https://example.com',
trailing: ']',
});
});
it('should strip trailing angle bracket', () => {
expect(stripTrailingPunctuation('https://example.com>')).toEqual({
cleanUrl: 'https://example.com',
trailing: '>',
});
});
it('should strip trailing curly brace', () => {
expect(stripTrailingPunctuation('https://example.com}')).toEqual({
cleanUrl: 'https://example.com',
trailing: '}',
});
});
it('should return unchanged URL with no trailing punctuation', () => {
expect(stripTrailingPunctuation('https://example.com/path')).toEqual({
cleanUrl: 'https://example.com/path',
trailing: '',
});
});
it('should handle URL with query params and trailing period', () => {
expect(
stripTrailingPunctuation('https://example.com/search?q=test.'),
).toEqual({
cleanUrl: 'https://example.com/search?q=test',
trailing: '.',
});
});
it('should strip CJK fullwidth period', () => {
expect(stripTrailingPunctuation('https://example.com\u3002')).toEqual({
cleanUrl: 'https://example.com',
trailing: '\u3002',
});
});
it('should strip CJK fullwidth comma', () => {
expect(stripTrailingPunctuation('https://example.com\uFF0C')).toEqual({
cleanUrl: 'https://example.com',
trailing: '\uFF0C',
});
});
it('should handle empty string', () => {
expect(stripTrailingPunctuation('')).toEqual({
cleanUrl: '',
trailing: '',
});
});
it('should not strip periods that are part of the domain', () => {
expect(stripTrailingPunctuation('https://www.example.com/path')).toEqual({
cleanUrl: 'https://www.example.com/path',
trailing: '',
});
});
});
});

View file

@ -13,6 +13,74 @@ import {
import { theme } from '../semantic-colors.js';
import { debugLogger } from '@google/gemini-cli-core';
// Characters that should be stripped from the end of bare URLs.
// Includes common punctuation and CJK fullwidth equivalents.
const TRAILING_PUNCT = new Set([
'.',
',',
';',
':',
'!',
'?',
"'",
'"',
')',
']',
'>',
'}',
// CJK fullwidth equivalents
'\u3002', // Ideographic full stop
'\uFF0C', // Fullwidth comma
'\uFF1B', // Fullwidth semicolon
'\uFF1A', // Fullwidth colon
'\uFF01', // Fullwidth exclamation
'\uFF1F', // Fullwidth question mark
'\u300D', // Right corner bracket
'\u300F', // Right white corner bracket
'\uFF09', // Fullwidth right parenthesis
'\u3011', // Right black lenticular bracket
'\uFF3D', // Fullwidth right square bracket
'\uFF1E', // Fullwidth greater-than
'\uFF5D', // Fullwidth right curly bracket
]);
/**
* Strips trailing punctuation from a URL while preserving balanced parentheses.
* This handles Wikipedia-style URLs like https://en.wikipedia.org/wiki/Foo_(bar)
* where the closing paren is part of the URL, not trailing punctuation.
*
* Returns the cleaned URL and any stripped trailing characters.
*/
export const stripTrailingPunctuation = (
url: string,
): { cleanUrl: string; trailing: string } => {
let end = url.length;
while (end > 0 && TRAILING_PUNCT.has(url[end - 1])) {
const ch = url[end - 1];
// Preserve balanced parentheses (for Wikipedia URLs etc.)
if (ch === ')' || ch === '\uFF09') {
const open = ch === ')' ? '(' : '\uFF08';
const urlPortion = url.slice(0, end);
let depth = 0;
for (const c of urlPortion) {
if (c === open) depth++;
else if (c === ch) depth--;
}
// depth < 0 means more closing than opening, so this one is trailing
if (depth >= 0) break;
}
end--;
}
return {
cleanUrl: url.slice(0, end),
trailing: url.slice(end),
};
};
// Constants for Markdown parsing
const BOLD_MARKER_LENGTH = 2; // For "**"
const ITALIC_MARKER_LENGTH = 1; // For "*" or "_"
@ -197,7 +265,11 @@ export const parseMarkdownToANSI = (
),
);
} else if (fullMatch.match(/^https?:\/\//)) {
styledPart = ansiColorize(fullMatch, theme.text.link);
const { cleanUrl, trailing } = stripTrailingPunctuation(fullMatch);
styledPart = ansiColorize(cleanUrl, theme.text.link);
if (trailing) {
styledPart += ansiColorize(trailing, baseColor);
}
}
} catch (e) {
debugLogger.warn('Error parsing inline markdown part:', fullMatch, e);