diff --git a/packages/cli/src/ui/utils/markdownParsingUtils.test.ts b/packages/cli/src/ui/utils/markdownParsingUtils.test.ts index c32bda58fa..7bc32e763a 100644 --- a/packages/cli/src/ui/utils/markdownParsingUtils.test.ts +++ b/packages/cli/src/ui/utils/markdownParsingUtils.test.ts @@ -6,7 +6,10 @@ import { describe, it, expect, beforeAll, vi } from 'vitest'; import chalk from 'chalk'; -import { parseMarkdownToANSI } from './markdownParsingUtils.js'; +import { + parseMarkdownToANSI, + stripTrailingPunctuation, +} from './markdownParsingUtils.js'; // Mock the theme to use explicit colors instead of empty strings from the default theme. // This ensures that ansiColorize actually applies ANSI codes that we can verify. @@ -222,5 +225,217 @@ describe('parsingUtils', () => { ), ); }); + + it('should strip trailing period from bare URL', () => { + const input = 'Visit https://example.com.'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('Visit ')}${link('https://example.com')}${primary('.')}`, + ); + }); + + it('should strip trailing comma from bare URL', () => { + const input = 'See https://example.com, then continue'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('See ')}${link('https://example.com')}${primary(',')}${primary(' then continue')}`, + ); + }); + + it('should strip multiple trailing punctuation from bare URL', () => { + const input = 'Is it https://example.com?!'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('Is it ')}${link('https://example.com')}${primary('?!')}`, + ); + }); + + it('should preserve balanced parentheses in bare URL (Wikipedia)', () => { + const input = 'See https://en.wikipedia.org/wiki/Foo_(bar) for details'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('See ')}${link('https://en.wikipedia.org/wiki/Foo_(bar)')}${primary(' for details')}`, + ); + }); + + it('should strip trailing period after balanced parens in bare URL', () => { + const input = 'See https://en.wikipedia.org/wiki/Foo_(bar).'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('See ')}${link('https://en.wikipedia.org/wiki/Foo_(bar)')}${primary('.')}`, + ); + }); + + it('should not modify bare URL without trailing punctuation', () => { + const input = 'Visit https://example.com/path now'; + const output = parseMarkdownToANSI(input); + expect(output).toBe( + `${primary('Visit ')}${link('https://example.com/path')}${primary(' now')}`, + ); + }); + }); + + describe('stripTrailingPunctuation', () => { + it('should strip a trailing period', () => { + expect(stripTrailingPunctuation('https://example.com.')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '.', + }); + }); + + it('should strip a trailing comma', () => { + expect(stripTrailingPunctuation('https://example.com,')).toEqual({ + cleanUrl: 'https://example.com', + trailing: ',', + }); + }); + + it('should strip trailing semicolon', () => { + expect(stripTrailingPunctuation('https://example.com;')).toEqual({ + cleanUrl: 'https://example.com', + trailing: ';', + }); + }); + + it('should strip trailing colon', () => { + expect(stripTrailingPunctuation('https://example.com/path:')).toEqual({ + cleanUrl: 'https://example.com/path', + trailing: ':', + }); + }); + + it('should strip trailing exclamation mark', () => { + expect(stripTrailingPunctuation('https://example.com!')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '!', + }); + }); + + it('should strip trailing question mark', () => { + expect(stripTrailingPunctuation('https://example.com?')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '?', + }); + }); + + it('should strip multiple trailing punctuation chars', () => { + expect(stripTrailingPunctuation('https://example.com?!')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '?!', + }); + }); + + it('should strip trailing quotes', () => { + expect(stripTrailingPunctuation('https://example.com"')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '"', + }); + }); + + it('should strip trailing single quote', () => { + expect(stripTrailingPunctuation("https://example.com'")).toEqual({ + cleanUrl: 'https://example.com', + trailing: "'", + }); + }); + + it('should preserve balanced parentheses', () => { + expect( + stripTrailingPunctuation('https://en.wikipedia.org/wiki/Foo_(bar)'), + ).toEqual({ + cleanUrl: 'https://en.wikipedia.org/wiki/Foo_(bar)', + trailing: '', + }); + }); + + it('should strip unbalanced trailing paren', () => { + expect(stripTrailingPunctuation('https://example.com)')).toEqual({ + cleanUrl: 'https://example.com', + trailing: ')', + }); + }); + + it('should strip period after balanced parens', () => { + expect( + stripTrailingPunctuation('https://en.wikipedia.org/wiki/Foo_(bar).'), + ).toEqual({ + cleanUrl: 'https://en.wikipedia.org/wiki/Foo_(bar)', + trailing: '.', + }); + }); + + it('should handle nested balanced parentheses', () => { + expect(stripTrailingPunctuation('https://example.com/a_(b_(c))')).toEqual( + { + cleanUrl: 'https://example.com/a_(b_(c))', + trailing: '', + }, + ); + }); + + it('should strip trailing bracket', () => { + expect(stripTrailingPunctuation('https://example.com]')).toEqual({ + cleanUrl: 'https://example.com', + trailing: ']', + }); + }); + + it('should strip trailing angle bracket', () => { + expect(stripTrailingPunctuation('https://example.com>')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '>', + }); + }); + + it('should strip trailing curly brace', () => { + expect(stripTrailingPunctuation('https://example.com}')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '}', + }); + }); + + it('should return unchanged URL with no trailing punctuation', () => { + expect(stripTrailingPunctuation('https://example.com/path')).toEqual({ + cleanUrl: 'https://example.com/path', + trailing: '', + }); + }); + + it('should handle URL with query params and trailing period', () => { + expect( + stripTrailingPunctuation('https://example.com/search?q=test.'), + ).toEqual({ + cleanUrl: 'https://example.com/search?q=test', + trailing: '.', + }); + }); + + it('should strip CJK fullwidth period', () => { + expect(stripTrailingPunctuation('https://example.com\u3002')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '\u3002', + }); + }); + + it('should strip CJK fullwidth comma', () => { + expect(stripTrailingPunctuation('https://example.com\uFF0C')).toEqual({ + cleanUrl: 'https://example.com', + trailing: '\uFF0C', + }); + }); + + it('should handle empty string', () => { + expect(stripTrailingPunctuation('')).toEqual({ + cleanUrl: '', + trailing: '', + }); + }); + + it('should not strip periods that are part of the domain', () => { + expect(stripTrailingPunctuation('https://www.example.com/path')).toEqual({ + cleanUrl: 'https://www.example.com/path', + trailing: '', + }); + }); }); }); diff --git a/packages/cli/src/ui/utils/markdownParsingUtils.ts b/packages/cli/src/ui/utils/markdownParsingUtils.ts index 10f7cb7a40..f5f117d5cf 100644 --- a/packages/cli/src/ui/utils/markdownParsingUtils.ts +++ b/packages/cli/src/ui/utils/markdownParsingUtils.ts @@ -13,6 +13,74 @@ import { import { theme } from '../semantic-colors.js'; import { debugLogger } from '@google/gemini-cli-core'; +// Characters that should be stripped from the end of bare URLs. +// Includes common punctuation and CJK fullwidth equivalents. +const TRAILING_PUNCT = new Set([ + '.', + ',', + ';', + ':', + '!', + '?', + "'", + '"', + ')', + ']', + '>', + '}', + // CJK fullwidth equivalents + '\u3002', // Ideographic full stop + '\uFF0C', // Fullwidth comma + '\uFF1B', // Fullwidth semicolon + '\uFF1A', // Fullwidth colon + '\uFF01', // Fullwidth exclamation + '\uFF1F', // Fullwidth question mark + '\u300D', // Right corner bracket + '\u300F', // Right white corner bracket + '\uFF09', // Fullwidth right parenthesis + '\u3011', // Right black lenticular bracket + '\uFF3D', // Fullwidth right square bracket + '\uFF1E', // Fullwidth greater-than + '\uFF5D', // Fullwidth right curly bracket +]); + +/** + * Strips trailing punctuation from a URL while preserving balanced parentheses. + * This handles Wikipedia-style URLs like https://en.wikipedia.org/wiki/Foo_(bar) + * where the closing paren is part of the URL, not trailing punctuation. + * + * Returns the cleaned URL and any stripped trailing characters. + */ +export const stripTrailingPunctuation = ( + url: string, +): { cleanUrl: string; trailing: string } => { + let end = url.length; + + while (end > 0 && TRAILING_PUNCT.has(url[end - 1])) { + const ch = url[end - 1]; + + // Preserve balanced parentheses (for Wikipedia URLs etc.) + if (ch === ')' || ch === '\uFF09') { + const open = ch === ')' ? '(' : '\uFF08'; + const urlPortion = url.slice(0, end); + let depth = 0; + for (const c of urlPortion) { + if (c === open) depth++; + else if (c === ch) depth--; + } + // depth < 0 means more closing than opening, so this one is trailing + if (depth >= 0) break; + } + + end--; + } + + return { + cleanUrl: url.slice(0, end), + trailing: url.slice(end), + }; +}; + // Constants for Markdown parsing const BOLD_MARKER_LENGTH = 2; // For "**" const ITALIC_MARKER_LENGTH = 1; // For "*" or "_" @@ -197,7 +265,11 @@ export const parseMarkdownToANSI = ( ), ); } else if (fullMatch.match(/^https?:\/\//)) { - styledPart = ansiColorize(fullMatch, theme.text.link); + const { cleanUrl, trailing } = stripTrailingPunctuation(fullMatch); + styledPart = ansiColorize(cleanUrl, theme.text.link); + if (trailing) { + styledPart += ansiColorize(trailing, baseColor); + } } } catch (e) { debugLogger.warn('Error parsing inline markdown part:', fullMatch, e);