diff --git a/frontend/package.json b/frontend/package.json index b06341489..dec636b4e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -25,7 +25,7 @@ "i18next": "^23.11.3", "i18next-browser-languagedetector": "^7.2.1", "js-levenshtein": "^1.1.6", - "katex": "^0.6.0", + "katex": "0.16.11", "lodash.debounce": "^4.0.8", "markdown-it": "^13.0.1", "moment": "^2.30.1", diff --git a/frontend/src/utils/chat/plugins/markdown-katex.js b/frontend/src/utils/chat/plugins/markdown-katex.js index 288d34567..54934852e 100644 --- a/frontend/src/utils/chat/plugins/markdown-katex.js +++ b/frontend/src/utils/chat/plugins/markdown-katex.js @@ -1,278 +1,245 @@ import katex from "katex"; import "katex/dist/katex.min.css"; -// Test if potential opening or closing delimieter -// Assumes that there is a "$" at state.src[pos] -function isValidDelim(state, pos) { - var prevChar, - nextChar, - max = state.posMax, - can_open = true, - can_close = true; +// Validate whether a $ at `pos` can open/close inline math. +// This preserves common markdown behavior and avoids parsing currency badly. +function isValidDollarDelim(state, pos) { + const max = state.posMax; + const prevChar = pos > 0 ? state.src.charCodeAt(pos - 1) : -1; + const nextChar = pos + 1 <= max ? state.src.charCodeAt(pos + 1) : -1; - prevChar = pos > 0 ? state.src.charCodeAt(pos - 1) : -1; - nextChar = pos + 1 <= max ? state.src.charCodeAt(pos + 1) : -1; + let canOpen = true; + let canClose = true; - // Only apply whitespace rules if we're dealing with $ delimiter - if (state.src[pos] === "$") { - if ( - prevChar === 0x20 /* " " */ || - prevChar === 0x09 /* \t */ || - (nextChar >= 0x30 /* "0" */ && nextChar <= 0x39) /* "9" */ - ) { - can_close = false; - } - if (nextChar === 0x20 /* " " */ || nextChar === 0x09 /* \t */) { - can_open = false; - } + // Same basic heuristics as many markdown-it math plugins: + // - "$ " should not open + // - " $" should not close + // - "$5" should usually not be treated as opening math + if ( + prevChar === 0x20 || // space + prevChar === 0x09 || // tab + (nextChar >= 0x30 && nextChar <= 0x39) // digit + ) { + canClose = false; } - return { - can_open: can_open, - can_close: can_close, - }; + if ( + nextChar === 0x20 || // space + nextChar === 0x09 // tab + ) { + canOpen = false; + } + + return { canOpen, canClose }; +} + +function findClosingDollar(src, start) { + let match = start; + + while ((match = src.indexOf("$", match)) !== -1) { + let pos = match - 1; + while (pos >= 0 && src[pos] === "\\") pos--; + + // odd number of backslashes before $ => unescaped delimiter + if ((match - pos) % 2 === 1) { + return match; + } + + match += 1; + } + + return -1; +} + +function findClosingParen(src, start) { + let match = start; + + while ((match = src.indexOf("\\)", match)) !== -1) { + let pos = match - 1; + while (pos >= 0 && src[pos] === "\\") pos--; + + // odd number of backslashes before the "\" in "\)" => real closing delimiter + if ((match - pos) % 2 === 1) { + return match; + } + + match += 2; + } + + return -1; } function math_inline(state, silent) { - var start, match, token, res, pos; + const src = state.src; + const pos = state.pos; - // Only process $ and \( delimiters for inline math - if ( - state.src[state.pos] !== "$" && - (state.src[state.pos] !== "\\" || state.src[state.pos + 1] !== "(") - ) { - return false; - } - - // Handle \( ... \) case separately - if (state.src[state.pos] === "\\" && state.src[state.pos + 1] === "(") { - start = state.pos + 2; - match = start; - while ((match = state.src.indexOf("\\)", match)) !== -1) { - pos = match - 1; - while (state.src[pos] === "\\") { - pos -= 1; - } - if ((match - pos) % 2 == 1) { - break; - } - match += 1; - } + // Case 1: \( ... \) + if (src[pos] === "\\" && src[pos + 1] === "(") { + const start = pos + 2; + const match = findClosingParen(src, start); if (match === -1) { - if (!silent) { - state.pending += "\\("; - } - state.pos = start; - return true; + return false; // let markdown handle it normally } if (!silent) { - token = state.push("math_inline", "math", 0); + const token = state.push("math_inline", "math", 0); token.markup = "\\("; - token.content = state.src.slice(start, match); + token.content = src.slice(start, match); } state.pos = match + 2; return true; } - res = isValidDelim(state, state.pos); - if (!res.can_open) { - if (!silent) { - state.pending += "$"; - } - state.pos += 1; - return true; + // Case 2: $ ... $ + if (src[pos] !== "$") { + return false; } - // First check for and bypass all properly escaped delimieters - // This loop will assume that the first leading backtick can not - // be the first character in state.src, which is known since - // we have found an opening delimieter already. - start = state.pos + 1; - match = start; - while ((match = state.src.indexOf("$", match)) !== -1) { - // Found potential $, look for escapes, pos will point to - // first non escape when complete - pos = match - 1; - while (state.src[pos] === "\\") { - pos -= 1; - } - - // Even number of escapes, potential closing delimiter found - if ((match - pos) % 2 == 1) { - break; - } - match += 1; + const open = isValidDollarDelim(state, pos); + if (!open.canOpen) { + return false; } - // No closing delimiter found. Consume $ and continue. + // Don't treat $$ as inline math here; leave it for block parsing. + if (src[pos + 1] === "$") { + return false; + } + + const start = pos + 1; + const match = findClosingDollar(src, start); + if (match === -1) { - if (!silent) { - state.pending += "$"; - } - state.pos = start; - return true; + return false; // let literal $ pass through as markdown text } - // Check if we have empty content, ie: $$. Do not parse. - if (match - start === 0) { - if (!silent) { - state.pending += "$$"; - } - state.pos = start + 1; - return true; + if (match === start) { + return false; // empty content: "$$" or "$$..." edge case, not inline math } - // Check for valid closing delimiter - res = isValidDelim(state, match); - if (!res.can_close) { - if (!silent) { - state.pending += "$"; - } - state.pos = start; - return true; + const close = isValidDollarDelim(state, match); + if (!close.canClose) { + return false; } if (!silent) { - token = state.push("math_inline", "math", 0); + const token = state.push("math_inline", "math", 0); token.markup = "$"; - token.content = state.src.slice(start, match); + token.content = src.slice(start, match); } state.pos = match + 1; return true; } -function math_block(state, start, end, silent) { - var firstLine, - lastLine, - next, - lastPos, - found = false, - token, - pos = state.bMarks[start] + state.tShift[start], - max = state.eMarks[start]; +function math_block(state, startLine, endLine, silent) { + let pos = state.bMarks[startLine] + state.tShift[startLine]; + let max = state.eMarks[startLine]; - // Check for $$, \[, or standalone [ as opening delimiters - if (pos + 1 > max) { + if (pos + 1 >= max) return false; + + const firstTwo = state.src.slice(pos, pos + 2); + + let openDelim = null; + let closeDelim = null; + + if (firstTwo === "$$") { + openDelim = "$$"; + closeDelim = "$$"; + } else if (firstTwo === "\\[") { + openDelim = "\\["; + closeDelim = "\\]"; + } else { return false; } - let openDelim = state.src.slice(pos, pos + 2); - let isDoubleDollar = openDelim === "$$"; - let isLatexBracket = openDelim === "\\["; + if (silent) return true; - if (!isDoubleDollar && !isLatexBracket) { - return false; - } + let firstLine = state.src.slice(pos + 2, max); + let nextLine = startLine; + let found = false; + let lastLine = ""; - // Determine the closing delimiter and position adjustment - let delimiter, posAdjust; - if (isDoubleDollar) { - delimiter = "$$"; - posAdjust = 2; - } else if (isLatexBracket) { - delimiter = "\\]"; - posAdjust = 2; - } - - pos += posAdjust; - firstLine = state.src.slice(pos, max); - - if (silent) { - return true; - } - if (firstLine.trim().slice(-delimiter.length) === delimiter) { - // Single line expression - firstLine = firstLine.trim().slice(0, -delimiter.length); + // Single-line block + const trimmedFirst = firstLine.trim(); + if (trimmedFirst.endsWith(closeDelim)) { + firstLine = trimmedFirst.slice(0, -closeDelim.length); found = true; } - for (next = start; !found; ) { - next++; + while (!found) { + nextLine++; + if (nextLine >= endLine) break; - if (next >= end) { + pos = state.bMarks[nextLine] + state.tShift[nextLine]; + max = state.eMarks[nextLine]; + + // stop on negative indentation like markdown-it block rules typically do + if (pos < max && state.tShift[nextLine] < state.blkIndent) { break; } - pos = state.bMarks[next] + state.tShift[next]; - max = state.eMarks[next]; + const lineText = state.src.slice(pos, max); + const trimmed = lineText.trim(); - if (pos < max && state.tShift[next] < state.blkIndent) { - // non-empty line with negative indent should stop the list: - break; - } - - if ( - state.src.slice(pos, max).trim().slice(-delimiter.length) === delimiter - ) { - lastPos = state.src.slice(0, max).lastIndexOf(delimiter); - lastLine = state.src.slice(pos, lastPos); + if (trimmed.endsWith(closeDelim)) { + const endIndex = lineText.lastIndexOf(closeDelim); + lastLine = lineText.slice(0, endIndex); found = true; } } - state.line = next + 1; + if (!found) { + return false; + } - token = state.push("math_block", "math", 0); + state.line = nextLine + 1; + + const token = state.push("math_block", "math", 0); token.block = true; + token.markup = openDelim; + token.map = [startLine, state.line]; token.content = - (firstLine && firstLine.trim() ? firstLine + "\n" : "") + - state.getLines(start + 1, next, state.tShift[start], true) + - (lastLine && lastLine.trim() ? lastLine : ""); - token.map = [start, state.line]; - token.markup = delimiter; + (firstLine ? firstLine : "") + + (nextLine > startLine ? "\n" : "") + + state.getLines(startLine + 1, nextLine, state.tShift[startLine], true) + + (lastLine ? lastLine : ""); + return true; } -export default function math_plugin(md, options) { - // Default options - options = options || {}; - - var katexInline = function (latex) { - options.displayMode = false; +export default function math_plugin(md, options = {}) { + function renderMath(latex, displayMode) { try { - latex = latex - .replace(/^\[(.*)\]$/, "$1") - .replace(/^\\\((.*)\\\)$/, "$1") - .replace(/^\\\[(.*)\\\]$/, "$1"); - return katex.renderToString(latex, options); + return katex.renderToString(latex, { + ...options, + displayMode, + }); } catch (error) { if (options.throwOnError) { - console.log(error); + console.error(error); } - return latex; + + // Escape minimally so raw latex shows safely if rendering fails + const escaped = md.utils.escapeHtml(latex); + return displayMode + ? `
${escaped}
` + : `${escaped}`; } - }; + } - var inlineRenderer = function (tokens, idx) { - return katexInline(tokens[idx].content); - }; + md.inline.ruler.before("escape", "math_inline", math_inline); - var katexBlock = function (latex) { - options.displayMode = true; - try { - // Remove surrounding delimiters if present - latex = latex.replace(/^\[(.*)\]$/, "$1").replace(/^\\\[(.*)\\\]$/, "$1"); - return "

" + katex.renderToString(latex, options) + "

"; - } catch (error) { - if (options.throwOnError) { - console.log(error); - } - return latex; - } - }; - - var blockRenderer = function (tokens, idx) { - return katexBlock(tokens[idx].content) + "\n"; - }; - - md.inline.ruler.after("escape", "math_inline", math_inline); md.block.ruler.after("blockquote", "math_block", math_block, { alt: ["paragraph", "reference", "blockquote", "list"], }); - md.renderer.rules.math_inline = inlineRenderer; - md.renderer.rules.math_block = blockRenderer; + + md.renderer.rules.math_inline = function (tokens, idx) { + return renderMath(tokens[idx].content, false); + }; + + md.renderer.rules.math_block = function (tokens, idx) { + return renderMath(tokens[idx].content, true) + "\n"; + }; } diff --git a/frontend/yarn.lock b/frontend/yarn.lock index d6738b43f..2ee132b9d 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -1685,6 +1685,11 @@ commander@^4.0.0: resolved "https://registry.yarnpkg.com/commander/-/commander-4.1.1.tgz#9fd602bd936294e9e9ef46a3f4d6964044b18068" integrity sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA== +commander@^8.3.0: + version "8.3.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-8.3.0.tgz#4837ea1b2da67b9c616a67afbb0fafee567bca66" + integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww== + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" @@ -3504,12 +3509,12 @@ jss@10.10.0, jss@^10.10.0: object.assign "^4.1.4" object.values "^1.1.6" -katex@^0.6.0: - version "0.6.0" - resolved "https://registry.yarnpkg.com/katex/-/katex-0.6.0.tgz#12418e09121c05c92041b6b3b9fb6bab213cb6f3" - integrity sha512-rS4mY3SvHYg5LtQV6RBcK0if7ur6plyEukAOV+jGGPqFImuzu8fHL6M752iBmRGoUyF0bhZbAPoezehn7xYksA== +katex@0.16.11: + version "0.16.11" + resolved "https://registry.yarnpkg.com/katex/-/katex-0.16.11.tgz#4bc84d5584f996abece5f01c6ad11304276a33f5" + integrity sha512-RQrI8rlHY92OLf3rho/Ts8i/XvjgguEjOkO1BEXcU3N8BqPpSzBNwV/G0Ukr+P/l3ivvJUE/Fa/CwbS6HesGNQ== dependencies: - match-at "^0.1.0" + commander "^8.3.0" keyv@^4.5.4: version "4.5.4" @@ -3610,11 +3615,6 @@ markdown-it@^13.0.1: mdurl "^1.0.1" uc.micro "^1.0.5" -match-at@^0.1.0: - version "0.1.1" - resolved "https://registry.yarnpkg.com/match-at/-/match-at-0.1.1.tgz#25d040d291777704d5e6556bbb79230ec2de0540" - integrity sha512-h4Yd392z9mST+dzc+yjuybOGFNOZjmXIPKWjxBd1Bb23r4SmDOsk2NYCU2BMUBGbSpZqwVsZYNq26QS3xfaT3Q== - math-intrinsics@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz#a0dd74be81e2aa5c2f27e65ce283605ee4e2b7f9"