/** * @license * Copyright Google LLC All Rights Reserved. * * Use of this source code is governed by an MIT-style license that can be * found in the LICENSE file at https://angular.dev/license */ import * as html from './ast'; import {NGSP_UNICODE} from './entities'; import {ParseTreeResult} from './parser'; import {InterpolatedTextToken, TextToken, TokenType} from './tokens'; export const PRESERVE_WS_ATTR_NAME = 'ngPreserveWhitespaces'; const SKIP_WS_TRIM_TAGS = new Set(['pre', 'template', 'textarea', 'script', 'style']); // Equivalent to \s with \u00a0 (non-breaking space) excluded. // Based on https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp const WS_CHARS = ' \f\n\r\t\v\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff'; const NO_WS_REGEXP = new RegExp(`[^${WS_CHARS}]`); const WS_REPLACE_REGEXP = new RegExp(`[${WS_CHARS}]{2,}`, 'g'); function hasPreserveWhitespacesAttr(attrs: html.Attribute[]): boolean { return attrs.some((attr: html.Attribute) => attr.name === PRESERVE_WS_ATTR_NAME); } /** * &ngsp; is a placeholder for non-removable space * &ngsp; is converted to the 0xE500 PUA (Private Use Areas) unicode character * and later on replaced by a space. */ export function replaceNgsp(value: string): string { // lexer is replacing the &ngsp; pseudo-entity with NGSP_UNICODE return value.replace(new RegExp(NGSP_UNICODE, 'g'), ' '); } /** * This visitor can walk HTML parse tree and remove / trim text nodes using the following rules: * - consider spaces, tabs and new lines as whitespace characters; * - drop text nodes consisting of whitespace characters only; * - for all other text nodes replace consecutive whitespace characters with one space; * - convert &ngsp; pseudo-entity to a single space; * * Removal and trimming of whitespaces have positive performance impact (less code to generate * while compiling templates, faster view creation). At the same time it can be "destructive" * in some cases (whitespaces can influence layout). Because of the potential of breaking layout * this visitor is not activated by default in Angular 5 and people need to explicitly opt-in for * whitespace removal. The default option for whitespace removal will be revisited in Angular 6 * and might be changed to "on" by default. * * If `originalNodeMap` is provided, the transformed nodes will be mapped back to their original * inputs. Any output nodes not in the map were not transformed. This supports correlating and * porting information between the trimmed nodes and original nodes (such as `i18n` properties) * such that trimming whitespace does not does not drop required information from the node. */ export class WhitespaceVisitor implements html.Visitor { // How many ICU expansions which are currently being visited. ICUs can be nested, so this // tracks the current depth of nesting. If this depth is greater than 0, then this visitor is // currently processing content inside an ICU expansion. private icuExpansionDepth = 0; constructor( private readonly preserveSignificantWhitespace: boolean, private readonly originalNodeMap?: Map, private readonly requireContext = true, ) {} visitElement(element: html.Element, context: any): any { if (SKIP_WS_TRIM_TAGS.has(element.name) || hasPreserveWhitespacesAttr(element.attrs)) { // don't descent into elements where we need to preserve whitespaces // but still visit all attributes to eliminate one used as a market to preserve WS const newElement = new html.Element( element.name, visitAllWithSiblings(this, element.attrs), element.children, element.sourceSpan, element.startSourceSpan, element.endSourceSpan, element.i18n, ); this.originalNodeMap?.set(newElement, element); return newElement; } const newElement = new html.Element( element.name, element.attrs, visitAllWithSiblings(this, element.children), element.sourceSpan, element.startSourceSpan, element.endSourceSpan, element.i18n, ); this.originalNodeMap?.set(newElement, element); return newElement; } visitAttribute(attribute: html.Attribute, context: any): any { return attribute.name !== PRESERVE_WS_ATTR_NAME ? attribute : null; } visitText(text: html.Text, context: SiblingVisitorContext | null): any { const isNotBlank = text.value.match(NO_WS_REGEXP); const hasExpansionSibling = context && (context.prev instanceof html.Expansion || context.next instanceof html.Expansion); // Do not trim whitespace within ICU expansions when preserving significant whitespace. // Historically, ICU whitespace was never trimmed and this is really a bug. However fixing it // would change message IDs which we can't easily do. Instead we only trim ICU whitespace within // ICU expansions when not preserving significant whitespace, which is the new behavior where it // most matters. const inIcuExpansion = this.icuExpansionDepth > 0; if (inIcuExpansion && this.preserveSignificantWhitespace) return text; if (isNotBlank || hasExpansionSibling) { // Process the whitespace in the tokens of this Text node const tokens = text.tokens.map((token) => token.type === TokenType.TEXT ? createWhitespaceProcessedTextToken(token) : token, ); // Fully trim message when significant whitespace is not preserved. if (!this.preserveSignificantWhitespace && tokens.length > 0) { // The first token should only call `.trimStart()` and the last token // should only call `.trimEnd()`, but there might be only one token which // needs to call both. const firstToken = tokens[0]!; tokens.splice(0, 1, trimLeadingWhitespace(firstToken, context)); const lastToken = tokens[tokens.length - 1]; // Could be the same as the first token. tokens.splice(tokens.length - 1, 1, trimTrailingWhitespace(lastToken, context)); } // Process the whitespace of the value of this Text node. Also trim the leading/trailing // whitespace when we don't need to preserve significant whitespace. const processed = processWhitespace(text.value); const value = this.preserveSignificantWhitespace ? processed : trimLeadingAndTrailingWhitespace(processed, context); const result = new html.Text(value, text.sourceSpan, tokens, text.i18n); this.originalNodeMap?.set(result, text); return result; } return null; } visitComment(comment: html.Comment, context: any): any { return comment; } visitExpansion(expansion: html.Expansion, context: any): any { this.icuExpansionDepth++; let newExpansion: html.Expansion; try { newExpansion = new html.Expansion( expansion.switchValue, expansion.type, visitAllWithSiblings(this, expansion.cases), expansion.sourceSpan, expansion.switchValueSourceSpan, expansion.i18n, ); } finally { this.icuExpansionDepth--; } this.originalNodeMap?.set(newExpansion, expansion); return newExpansion; } visitExpansionCase(expansionCase: html.ExpansionCase, context: any): any { const newExpansionCase = new html.ExpansionCase( expansionCase.value, visitAllWithSiblings(this, expansionCase.expression), expansionCase.sourceSpan, expansionCase.valueSourceSpan, expansionCase.expSourceSpan, ); this.originalNodeMap?.set(newExpansionCase, expansionCase); return newExpansionCase; } visitBlock(block: html.Block, context: any): any { const newBlock = new html.Block( block.name, block.parameters, visitAllWithSiblings(this, block.children), block.sourceSpan, block.nameSpan, block.startSourceSpan, block.endSourceSpan, ); this.originalNodeMap?.set(newBlock, block); return newBlock; } visitBlockParameter(parameter: html.BlockParameter, context: any) { return parameter; } visitLetDeclaration(decl: html.LetDeclaration, context: any) { return decl; } visit(_node: html.Node, context: any) { // `visitAllWithSiblings` provides context necessary for ICU messages to be handled correctly. // Prefer that over calling `html.visitAll` directly on this visitor. if (this.requireContext && !context) { throw new Error( `WhitespaceVisitor requires context. Visit via \`visitAllWithSiblings\` to get this context.`, ); } return false; } } function trimLeadingWhitespace( token: InterpolatedTextToken, context: SiblingVisitorContext | null, ): InterpolatedTextToken { if (token.type !== TokenType.TEXT) return token; const isFirstTokenInTag = !context?.prev; if (!isFirstTokenInTag) return token; return transformTextToken(token, (text) => text.trimStart()); } function trimTrailingWhitespace( token: InterpolatedTextToken, context: SiblingVisitorContext | null, ): InterpolatedTextToken { if (token.type !== TokenType.TEXT) return token; const isLastTokenInTag = !context?.next; if (!isLastTokenInTag) return token; return transformTextToken(token, (text) => text.trimEnd()); } function trimLeadingAndTrailingWhitespace( text: string, context: SiblingVisitorContext | null, ): string { const isFirstTokenInTag = !context?.prev; const isLastTokenInTag = !context?.next; const maybeTrimmedStart = isFirstTokenInTag ? text.trimStart() : text; const maybeTrimmed = isLastTokenInTag ? maybeTrimmedStart.trimEnd() : maybeTrimmedStart; return maybeTrimmed; } function createWhitespaceProcessedTextToken({type, parts, sourceSpan}: TextToken): TextToken { return {type, parts: [processWhitespace(parts[0])], sourceSpan}; } function transformTextToken( {type, parts, sourceSpan}: TextToken, transform: (parts: string) => string, ): TextToken { // `TextToken` only ever has one part as defined in its type, so we just transform the first element. return {type, parts: [transform(parts[0])], sourceSpan}; } function processWhitespace(text: string): string { return replaceNgsp(text).replace(WS_REPLACE_REGEXP, ' '); } export function removeWhitespaces( htmlAstWithErrors: ParseTreeResult, preserveSignificantWhitespace: boolean, ): ParseTreeResult { return new ParseTreeResult( visitAllWithSiblings( new WhitespaceVisitor(preserveSignificantWhitespace), htmlAstWithErrors.rootNodes, ), htmlAstWithErrors.errors, ); } interface SiblingVisitorContext { prev: html.Node | undefined; next: html.Node | undefined; } export function visitAllWithSiblings(visitor: WhitespaceVisitor, nodes: html.Node[]): any[] { const result: any[] = []; nodes.forEach((ast, i) => { const context: SiblingVisitorContext = {prev: nodes[i - 1], next: nodes[i + 1]}; const astResult = ast.visit(visitor, context); if (astResult) { result.push(astResult); } }); return result; }