fix(compiler): correctly compile long numeric HTML entities (#64297)

Fixes an issue where long numeric HTML entities (e.g. 🛈) were incorrectly compiled due to the use of 4-digit

PR Close #64297
This commit is contained in:
SkyZeroZx 2025-10-08 12:41:16 -05:00 committed by Jessica Janiuk
parent 062a696673
commit 9a7529dd66
3 changed files with 72 additions and 1 deletions

View file

@ -692,7 +692,7 @@ class _Tokenizer {
this._cursor.advance();
try {
const charCode = parseInt(strNum, isHex ? 16 : 10);
this._endToken([String.fromCharCode(charCode), this._cursor.getChars(start)]);
this._endToken([String.fromCodePoint(charCode), this._cursor.getChars(start)]);
} catch {
throw this._createError(
_unknownEntityErrorMsg(this._cursor.getChars(start)),

View file

@ -52,6 +52,22 @@ describe('HtmlParser', () => {
]);
});
it('should parse text nodes with HTML entities (5+ hex digits)', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source)
expect(humanizeDom(parser.parse('<div>&#x1F6C8;</div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
[html.Text, '\u{1F6C8}', 1, [''], ['\u{1F6C8}', '&#x1F6C8;'], ['']],
]);
});
it('should parse text nodes with decimal HTML entities (5+ digits)', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source) as decimal 128712
expect(humanizeDom(parser.parse('<div>&#128712;</div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
[html.Text, '\u{1F6C8}', 1, [''], ['\u{1F6C8}', '&#128712;'], ['']],
]);
});
it('should normalize line endings within CDATA', () => {
const parsed = parser.parse('<![CDATA[ line 1 \r\n line 2 ]]>', 'TestComp');
expect(humanizeDom(parsed)).toEqual([
@ -326,6 +342,22 @@ describe('HtmlParser', () => {
]);
});
it('should parse attributes containing encoded entities (5+ hex digits)', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source)
expect(humanizeDom(parser.parse('<div foo="&#x1F6C8;"></div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
[html.Attribute, 'foo', '\u{1F6C8}', [''], ['\u{1F6C8}', '&#x1F6C8;'], ['']],
]);
});
it('should parse attributes containing encoded decimal entities (5+ digits)', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source) as decimal 128712
expect(humanizeDom(parser.parse('<div foo="&#128712;"></div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
[html.Attribute, 'foo', '\u{1F6C8}', [''], ['\u{1F6C8}', '&#128712;'], ['']],
]);
});
it('should parse attributes containing unquoted interpolation', () => {
expect(humanizeDom(parser.parse('<div foo={{message}}></div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
@ -1632,6 +1664,25 @@ describe('HtmlParser', () => {
]);
});
it('should decode HTML entities with 5+ hex digits in interpolations', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source)
expect(
humanizeDomSourceSpans(parser.parse('{{&#x1F6C8;}}' + '{{&#128712;}}', 'TestComp')),
).toEqual([
[
html.Text,
'{{\u{1F6C8}}}' + '{{\u{1F6C8}}}',
0,
[''],
['{{', '&#x1F6C8;', '}}'],
[''],
['{{', '&#128712;', '}}'],
[''],
'{{&#x1F6C8;}}' + '{{&#128712;}}',
],
]);
});
it('should support interpolations in text', () => {
expect(
humanizeDomSourceSpans(parser.parse('<div> pre {{ value }} post </div>', 'TestComp')),

View file

@ -2136,6 +2136,26 @@ describe('HtmlLexer', () => {
]);
});
it('should parse entities with more than 4 hex digits', () => {
// Test 5 hex digit entity: &#x1F6C8; (🛈 - Circled Information Source)
expect(tokenizeAndHumanizeParts('&#x1F6C8;')).toEqual([
[TokenType.TEXT, ''],
[TokenType.ENCODED_ENTITY, '\u{1F6C8}', '&#x1F6C8;'],
[TokenType.TEXT, ''],
[TokenType.EOF],
]);
});
it('should parse entities with more than 4 decimal digits', () => {
// Test decimal entity: &#128712; (🛈 - Circled Information Source)
expect(tokenizeAndHumanizeParts('&#128712;')).toEqual([
[TokenType.TEXT, ''],
[TokenType.ENCODED_ENTITY, '\u{1F6C8}', '&#128712;'],
[TokenType.TEXT, ''],
[TokenType.EOF],
]);
});
it('should store the locations', () => {
expect(tokenizeAndHumanizeSourceSpans('a&amp;b')).toEqual([
[TokenType.TEXT, 'a'],