angular/packages/compiler/src/ml_parser/html_whitespaces.ts
Joey Perrott 9dbe6fc18b refactor: update license text to point to angular.dev (#57901)
Update license text to point to angular.dev instead of angular.io

PR Close #57901
2024-09-24 15:33:00 +02:00

301 lines
11 KiB
TypeScript

/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.dev/license
*/
import * as html from './ast';
import {NGSP_UNICODE} from './entities';
import {ParseTreeResult} from './parser';
import {InterpolatedTextToken, TextToken, TokenType} from './tokens';
export const PRESERVE_WS_ATTR_NAME = 'ngPreserveWhitespaces';
const SKIP_WS_TRIM_TAGS = new Set(['pre', 'template', 'textarea', 'script', 'style']);
// Equivalent to \s with \u00a0 (non-breaking space) excluded.
// Based on https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
const WS_CHARS = ' \f\n\r\t\v\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff';
const NO_WS_REGEXP = new RegExp(`[^${WS_CHARS}]`);
const WS_REPLACE_REGEXP = new RegExp(`[${WS_CHARS}]{2,}`, 'g');
function hasPreserveWhitespacesAttr(attrs: html.Attribute[]): boolean {
return attrs.some((attr: html.Attribute) => attr.name === PRESERVE_WS_ATTR_NAME);
}
/**
* &ngsp; is a placeholder for non-removable space
* &ngsp; is converted to the 0xE500 PUA (Private Use Areas) unicode character
* and later on replaced by a space.
*/
export function replaceNgsp(value: string): string {
// lexer is replacing the &ngsp; pseudo-entity with NGSP_UNICODE
return value.replace(new RegExp(NGSP_UNICODE, 'g'), ' ');
}
/**
* This visitor can walk HTML parse tree and remove / trim text nodes using the following rules:
* - consider spaces, tabs and new lines as whitespace characters;
* - drop text nodes consisting of whitespace characters only;
* - for all other text nodes replace consecutive whitespace characters with one space;
* - convert &ngsp; pseudo-entity to a single space;
*
* Removal and trimming of whitespaces have positive performance impact (less code to generate
* while compiling templates, faster view creation). At the same time it can be "destructive"
* in some cases (whitespaces can influence layout). Because of the potential of breaking layout
* this visitor is not activated by default in Angular 5 and people need to explicitly opt-in for
* whitespace removal. The default option for whitespace removal will be revisited in Angular 6
* and might be changed to "on" by default.
*
* If `originalNodeMap` is provided, the transformed nodes will be mapped back to their original
* inputs. Any output nodes not in the map were not transformed. This supports correlating and
* porting information between the trimmed nodes and original nodes (such as `i18n` properties)
* such that trimming whitespace does not does not drop required information from the node.
*/
export class WhitespaceVisitor implements html.Visitor {
// How many ICU expansions which are currently being visited. ICUs can be nested, so this
// tracks the current depth of nesting. If this depth is greater than 0, then this visitor is
// currently processing content inside an ICU expansion.
private icuExpansionDepth = 0;
constructor(
private readonly preserveSignificantWhitespace: boolean,
private readonly originalNodeMap?: Map<html.Node, html.Node>,
private readonly requireContext = true,
) {}
visitElement(element: html.Element, context: any): any {
if (SKIP_WS_TRIM_TAGS.has(element.name) || hasPreserveWhitespacesAttr(element.attrs)) {
// don't descent into elements where we need to preserve whitespaces
// but still visit all attributes to eliminate one used as a market to preserve WS
const newElement = new html.Element(
element.name,
visitAllWithSiblings(this, element.attrs),
element.children,
element.sourceSpan,
element.startSourceSpan,
element.endSourceSpan,
element.i18n,
);
this.originalNodeMap?.set(newElement, element);
return newElement;
}
const newElement = new html.Element(
element.name,
element.attrs,
visitAllWithSiblings(this, element.children),
element.sourceSpan,
element.startSourceSpan,
element.endSourceSpan,
element.i18n,
);
this.originalNodeMap?.set(newElement, element);
return newElement;
}
visitAttribute(attribute: html.Attribute, context: any): any {
return attribute.name !== PRESERVE_WS_ATTR_NAME ? attribute : null;
}
visitText(text: html.Text, context: SiblingVisitorContext | null): any {
const isNotBlank = text.value.match(NO_WS_REGEXP);
const hasExpansionSibling =
context && (context.prev instanceof html.Expansion || context.next instanceof html.Expansion);
// Do not trim whitespace within ICU expansions when preserving significant whitespace.
// Historically, ICU whitespace was never trimmed and this is really a bug. However fixing it
// would change message IDs which we can't easily do. Instead we only trim ICU whitespace within
// ICU expansions when not preserving significant whitespace, which is the new behavior where it
// most matters.
const inIcuExpansion = this.icuExpansionDepth > 0;
if (inIcuExpansion && this.preserveSignificantWhitespace) return text;
if (isNotBlank || hasExpansionSibling) {
// Process the whitespace in the tokens of this Text node
const tokens = text.tokens.map((token) =>
token.type === TokenType.TEXT ? createWhitespaceProcessedTextToken(token) : token,
);
// Fully trim message when significant whitespace is not preserved.
if (!this.preserveSignificantWhitespace && tokens.length > 0) {
// The first token should only call `.trimStart()` and the last token
// should only call `.trimEnd()`, but there might be only one token which
// needs to call both.
const firstToken = tokens[0]!;
tokens.splice(0, 1, trimLeadingWhitespace(firstToken, context));
const lastToken = tokens[tokens.length - 1]; // Could be the same as the first token.
tokens.splice(tokens.length - 1, 1, trimTrailingWhitespace(lastToken, context));
}
// Process the whitespace of the value of this Text node. Also trim the leading/trailing
// whitespace when we don't need to preserve significant whitespace.
const processed = processWhitespace(text.value);
const value = this.preserveSignificantWhitespace
? processed
: trimLeadingAndTrailingWhitespace(processed, context);
const result = new html.Text(value, text.sourceSpan, tokens, text.i18n);
this.originalNodeMap?.set(result, text);
return result;
}
return null;
}
visitComment(comment: html.Comment, context: any): any {
return comment;
}
visitExpansion(expansion: html.Expansion, context: any): any {
this.icuExpansionDepth++;
let newExpansion: html.Expansion;
try {
newExpansion = new html.Expansion(
expansion.switchValue,
expansion.type,
visitAllWithSiblings(this, expansion.cases),
expansion.sourceSpan,
expansion.switchValueSourceSpan,
expansion.i18n,
);
} finally {
this.icuExpansionDepth--;
}
this.originalNodeMap?.set(newExpansion, expansion);
return newExpansion;
}
visitExpansionCase(expansionCase: html.ExpansionCase, context: any): any {
const newExpansionCase = new html.ExpansionCase(
expansionCase.value,
visitAllWithSiblings(this, expansionCase.expression),
expansionCase.sourceSpan,
expansionCase.valueSourceSpan,
expansionCase.expSourceSpan,
);
this.originalNodeMap?.set(newExpansionCase, expansionCase);
return newExpansionCase;
}
visitBlock(block: html.Block, context: any): any {
const newBlock = new html.Block(
block.name,
block.parameters,
visitAllWithSiblings(this, block.children),
block.sourceSpan,
block.nameSpan,
block.startSourceSpan,
block.endSourceSpan,
);
this.originalNodeMap?.set(newBlock, block);
return newBlock;
}
visitBlockParameter(parameter: html.BlockParameter, context: any) {
return parameter;
}
visitLetDeclaration(decl: html.LetDeclaration, context: any) {
return decl;
}
visit(_node: html.Node, context: any) {
// `visitAllWithSiblings` provides context necessary for ICU messages to be handled correctly.
// Prefer that over calling `html.visitAll` directly on this visitor.
if (this.requireContext && !context) {
throw new Error(
`WhitespaceVisitor requires context. Visit via \`visitAllWithSiblings\` to get this context.`,
);
}
return false;
}
}
function trimLeadingWhitespace(
token: InterpolatedTextToken,
context: SiblingVisitorContext | null,
): InterpolatedTextToken {
if (token.type !== TokenType.TEXT) return token;
const isFirstTokenInTag = !context?.prev;
if (!isFirstTokenInTag) return token;
return transformTextToken(token, (text) => text.trimStart());
}
function trimTrailingWhitespace(
token: InterpolatedTextToken,
context: SiblingVisitorContext | null,
): InterpolatedTextToken {
if (token.type !== TokenType.TEXT) return token;
const isLastTokenInTag = !context?.next;
if (!isLastTokenInTag) return token;
return transformTextToken(token, (text) => text.trimEnd());
}
function trimLeadingAndTrailingWhitespace(
text: string,
context: SiblingVisitorContext | null,
): string {
const isFirstTokenInTag = !context?.prev;
const isLastTokenInTag = !context?.next;
const maybeTrimmedStart = isFirstTokenInTag ? text.trimStart() : text;
const maybeTrimmed = isLastTokenInTag ? maybeTrimmedStart.trimEnd() : maybeTrimmedStart;
return maybeTrimmed;
}
function createWhitespaceProcessedTextToken({type, parts, sourceSpan}: TextToken): TextToken {
return {type, parts: [processWhitespace(parts[0])], sourceSpan};
}
function transformTextToken(
{type, parts, sourceSpan}: TextToken,
transform: (parts: string) => string,
): TextToken {
// `TextToken` only ever has one part as defined in its type, so we just transform the first element.
return {type, parts: [transform(parts[0])], sourceSpan};
}
function processWhitespace(text: string): string {
return replaceNgsp(text).replace(WS_REPLACE_REGEXP, ' ');
}
export function removeWhitespaces(
htmlAstWithErrors: ParseTreeResult,
preserveSignificantWhitespace: boolean,
): ParseTreeResult {
return new ParseTreeResult(
visitAllWithSiblings(
new WhitespaceVisitor(preserveSignificantWhitespace),
htmlAstWithErrors.rootNodes,
),
htmlAstWithErrors.errors,
);
}
interface SiblingVisitorContext {
prev: html.Node | undefined;
next: html.Node | undefined;
}
export function visitAllWithSiblings(visitor: WhitespaceVisitor, nodes: html.Node[]): any[] {
const result: any[] = [];
nodes.forEach((ast, i) => {
const context: SiblingVisitorContext = {prev: nodes[i - 1], next: nodes[i + 1]};
const astResult = ast.visit(visitor, context);
if (astResult) {
result.push(astResult);
}
});
return result;
}