mirror of
https://github.com/zammad/zammad
synced 2026-05-24 09:48:36 +00:00
Co-authored-by: Benjamin Scharf <bs@zammad.com> Co-authored-by: Dusan Vuckovic <dv@zammad.com> Co-authored-by: Mantas Masalskis <mm@zammad.com>
106 lines
3.7 KiB
TypeScript
106 lines
3.7 KiB
TypeScript
// Copyright (C) 2012-2026 Zammad Foundation, https://zammad-foundation.org/
|
|
|
|
import { wordFilter } from './wordFilter.ts'
|
|
|
|
const replaceWithContent = (parent: Element, selector: string) => {
|
|
parent.querySelectorAll(selector).forEach((element) => {
|
|
element.replaceWith(...Array.from(element.childNodes))
|
|
})
|
|
}
|
|
|
|
const removeElements = (parent: Element, selector: string) => {
|
|
parent.querySelectorAll(selector).forEach((element) => {
|
|
element.remove()
|
|
})
|
|
}
|
|
|
|
const removeComments = (parent: Node) => {
|
|
if (!parent.hasChildNodes()) return
|
|
|
|
parent.childNodes.forEach((node) => {
|
|
if (node.nodeType === Node.COMMENT_NODE) {
|
|
node.remove()
|
|
}
|
|
removeComments(node)
|
|
})
|
|
}
|
|
|
|
// editor always renders an additional line break, because prose mirror requires it
|
|
// but if there is another line break, it will be rendered as two line breaks
|
|
// this should remove a line break at the end of a paragraph, so editor can safely add "visual" one
|
|
const removeTrailingLineBreaks = (parent: Element) => {
|
|
parent.querySelectorAll('br').forEach((element) => {
|
|
// keep paragraphs with just a line break, but convert them into <p> tags
|
|
if (element.parentElement?.childNodes.length === 1) {
|
|
const p = document.createElement('p')
|
|
for (const attr of element.parentElement.attributes) {
|
|
p.setAttribute(attr.name, attr.value)
|
|
}
|
|
element.parentElement.replaceWith(p)
|
|
return
|
|
}
|
|
const { nextSibling } = element
|
|
if (
|
|
// if <br> is the last element, remove it because editor will add one anyway
|
|
!nextSibling ||
|
|
// if next element is a block element, remove <br>, because it will be converted into a paragraph with a line break
|
|
(nextSibling.nodeType !== Node.TEXT_NODE && (nextSibling as Element).tagName !== 'BR') ||
|
|
// if the next element is an empty text, remove <br>
|
|
(nextSibling.nodeType === Node.TEXT_NODE &&
|
|
!nextSibling.nextSibling &&
|
|
nextSibling.textContent?.trim().length === 0)
|
|
) {
|
|
element.remove()
|
|
}
|
|
})
|
|
}
|
|
|
|
const removeWordMarkup = (parent: Element) => {
|
|
const html = parent.outerHTML
|
|
const regexpTagsW = /<(\/w|w):[A-Za-z]/
|
|
const regexpTagsO = /<(\/o|o):[A-Za-z]/
|
|
const match = regexpTagsW.test(html) || regexpTagsO.test(html)
|
|
if (match) return wordFilter(parent)
|
|
return parent
|
|
}
|
|
|
|
const replaceEmptyTableCells = (parent: Element) => {
|
|
parent.querySelectorAll('td, th').forEach((cell) => {
|
|
if (cell.innerHTML.trim() !== '') return
|
|
|
|
// TODO: TipTap has parsing issues with completely empty table cells, so we add a non-breaking space.
|
|
// Consider dropping this workaround if the upstream issue gets fixed:
|
|
// https://github.com/ueberdosis/tiptap/issues/6237
|
|
cell.innerHTML = ' '
|
|
})
|
|
}
|
|
|
|
export const htmlCleanup = (
|
|
html: string,
|
|
removeImages = false,
|
|
returnElement = false,
|
|
): string | Element => {
|
|
const element = document.createElement('div') as Element
|
|
element.innerHTML = html
|
|
|
|
removeComments(element)
|
|
removeWordMarkup(element)
|
|
replaceWithContent(element, 'small, time, form, label')
|
|
if (removeImages) {
|
|
replaceWithContent(element, 'img')
|
|
}
|
|
removeElements(
|
|
element,
|
|
'svg, input, select, button, style, applet, embed, noframes, canvas, script, frame, iframe, meta, link, title, head, fieldset',
|
|
)
|
|
removeTrailingLineBreaks(element)
|
|
replaceEmptyTableCells(element)
|
|
|
|
// we don't need to remove attributes here, because the editor doesn't put unknown attributes on html elements
|
|
|
|
if (returnElement) return element
|
|
|
|
// remove empty new lines, editor considers them actual new lines
|
|
// and this will affect lists, where new line is a new list item
|
|
return element.innerHTML.replace(/\n\s*</g, '<').replace(/>\n/g, '>')
|
|
}
|