latex works with some hacks

This commit is contained in:
Mathew Pareles 2025-04-18 05:40:55 -07:00
parent 7a739e567c
commit 021acf541a
3 changed files with 205 additions and 1395 deletions

1427
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -79,6 +79,7 @@
"@mistralai/mistralai": "^1.5.0",
"@modelcontextprotocol/sdk": "^1.9.0",
"@parcel/watcher": "2.5.1",
"@types/katex": "^0.16.7",
"@types/semver": "^7.5.8",
"@vscode/deviceid": "^0.1.1",
"@vscode/iconv-lite-umd": "0.7.0",
@ -111,6 +112,7 @@
"http-proxy-agent": "^7.0.0",
"https-proxy-agent": "^7.0.2",
"jschardet": "3.1.4",
"katex": "^0.16.22",
"kerberos": "2.1.1",
"lucide-react": "^0.477.0",
"marked": "^15.0.7",

View file

@ -5,6 +5,9 @@
import React, { JSX, useMemo, useState } from 'react'
import { marked, MarkedToken, Token } from 'marked'
import katex from 'katex'
import 'katex/dist/katex.min.css'
import dompurify from '../../../../../../../base/browser/dompurify/dompurify.js'
import { convertToVscodeLang, detectLanguage } from '../../../../common/helpers/languageHelpers.js'
import { BlockCodeApplyWrapper } from './ApplyBlockHoverButtons.js'
@ -31,6 +34,63 @@ function isValidUri(s: string): boolean {
return s.length > 5 && isAbsolute(s) && !s.includes('//') && !s.includes('/*') // common case that is a false positive is comments like //
}
// renders contiguous string of latex eg $e^{i\pi}$
const LatexRender = ({ latex }: { latex: string }) => {
try {
let formula = latex;
let displayMode = false;
// Extract the formula from delimiters
if (latex.startsWith('$') && latex.endsWith('$')) {
// Check if it's display math $$...$$
if (latex.startsWith('$$') && latex.endsWith('$$')) {
formula = latex.slice(2, -2);
displayMode = true;
} else {
formula = latex.slice(1, -1);
}
} else if (latex.startsWith('\\(') && latex.endsWith('\\)')) {
formula = latex.slice(2, -2);
} else if (latex.startsWith('\\[') && latex.endsWith('\\]')) {
formula = latex.slice(2, -2);
displayMode = true;
}
// Render LaTeX
const html = katex.renderToString(formula, {
displayMode: displayMode,
throwOnError: false,
output: 'html'
});
// Sanitize the HTML output with DOMPurify
const sanitizedHtml = dompurify.sanitize(html, {
RETURN_TRUSTED_TYPE: true,
USE_PROFILES: { html: true, svg: true, mathMl: true }
});
// Add proper styling based on mode
const className = displayMode
? 'katex-block my-2 text-center'
: 'katex-inline';
// Use the ref approach to avoid dangerouslySetInnerHTML
const mathRef = React.useRef<HTMLSpanElement>(null);
React.useEffect(() => {
if (mathRef.current) {
mathRef.current.innerHTML = sanitizedHtml as unknown as string;
}
}, [sanitizedHtml]);
return <span ref={mathRef} className={className}></span>;
} catch (error) {
console.error('KaTeX rendering error:', error);
return <span className="katex-error text-red-500">{latex}</span>;
}
}
const Codespan = ({ text, className, onClick }: { text: string, className?: string, onClick?: () => void }) => {
// TODO compute this once for efficiency. we should use `labels.ts/shorten` to display duplicates properly
@ -108,6 +168,105 @@ const CodespanWithLink = ({ text, rawText, chatMessageLocation }: { text: string
}
const paragraphToLatexSegments = (paragraphText: string) => {
const segments: React.ReactNode[] = [];
if (paragraphText
&& !(paragraphText.includes('#') || paragraphText.includes('`')) // don't process latex if a codespan or header tag
&& !/^[\w\s.()[\]{}]+$/.test(paragraphText) // don't process latex if string only contains alphanumeric chars, whitespace, periods, and brackets
) {
const rawText = paragraphText;
// Regular expressions to match LaTeX delimiters
const displayMathRegex = /\$\$(.*?)\$\$/g; // Display math: $$...$$
const inlineMathRegex = /\$((?!\$).*?)\$/g; // Inline math: $...$ (but not $$)
// Check if the paragraph contains any LaTeX expressions
if (displayMathRegex.test(rawText) || inlineMathRegex.test(rawText)) {
// Reset the regex state (since we used .test earlier)
displayMathRegex.lastIndex = 0;
inlineMathRegex.lastIndex = 0;
// Parse the text into segments of regular text and LaTeX
let lastIndex = 0;
let segmentId = 0;
// First replace display math ($$...$$)
let match;
while ((match = displayMathRegex.exec(rawText)) !== null) {
const [fullMatch, formula] = match;
const matchIndex = match.index;
// Add text before the LaTeX expression
if (matchIndex > lastIndex) {
const textBefore = rawText.substring(lastIndex, matchIndex);
segments.push(
<span key={`text-${segmentId++}`}>
{textBefore}
</span>
);
}
// Add the LaTeX expression
segments.push(
<LatexRender key={`latex-${segmentId++}`} latex={fullMatch} />
);
lastIndex = matchIndex + fullMatch.length;
}
// Add any remaining text (which might contain inline math)
if (lastIndex < rawText.length) {
const remainingText = rawText.substring(lastIndex);
// Process inline math in the remaining text
lastIndex = 0;
inlineMathRegex.lastIndex = 0;
const inlineSegments: React.ReactNode[] = [];
while ((match = inlineMathRegex.exec(remainingText)) !== null) {
const [fullMatch] = match;
const matchIndex = match.index;
// Add text before the inline LaTeX
if (matchIndex > lastIndex) {
const textBefore = remainingText.substring(lastIndex, matchIndex);
inlineSegments.push(
<span key={`inline-text-${segmentId++}`}>
{textBefore}
</span>
);
}
// Add the inline LaTeX
inlineSegments.push(
<LatexRender key={`inline-latex-${segmentId++}`} latex={fullMatch} />
);
lastIndex = matchIndex + fullMatch.length;
}
// Add any remaining text after all inline math
if (lastIndex < remainingText.length) {
inlineSegments.push(
<span key={`inline-final-${segmentId++}`}>
{remainingText.substring(lastIndex)}
</span>
);
}
segments.push(...inlineSegments);
}
}
}
return segments
}
export type RenderTokenOptions = { isApplyEnabled?: boolean, isLinkDetectionEnabled?: boolean }
const RenderToken = ({ token, inPTag, codeURI, chatMessageLocation, tokenIdx, ...options }: { token: Token | string, inPTag?: boolean, codeURI?: URI, chatMessageLocation?: ChatMessageLocation, tokenIdx: string, } & RenderTokenOptions): React.ReactNode => {
const accessor = useAccessor()
@ -289,6 +448,17 @@ const RenderToken = ({ token, inPTag, codeURI, chatMessageLocation, tokenIdx, ..
}
if (t.type === 'paragraph') {
// check for latex
const latexSegments = paragraphToLatexSegments(t.raw)
if (latexSegments.length !== 0) {
if (inPTag) {
return <span className='block'>{latexSegments}</span>;
}
return <p>{latexSegments}</p>;
}
// if no latex, default behavior
const contents = <>
{t.tokens.map((token, index) => (
<RenderToken key={index}
@ -385,4 +555,3 @@ export const ChatMarkdownRender = ({ string, inPTag = false, chatMessageLocation
</>
)
}