Skip to content

Commit

Permalink
Allow $ to literally denote quantities of USD in chat (#95)
Browse files Browse the repository at this point in the history
Co-authored-by: david qiu <44106031+dlqqq@users.noreply.github.com>
  • Loading branch information
brichet and dlqqq authored Nov 7, 2024
1 parent 2819cee commit 323ca04
Showing 1 changed file with 72 additions and 5 deletions.
77 changes: 72 additions & 5 deletions packages/jupyter-chat/src/components/rendermime-markdown.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,74 @@ type RendermimeMarkdownProps = {
};

/**
* Takes \( and returns \\(. Escapes LaTeX delimeters by adding extra backslashes where needed for proper rendering by @jupyterlab/rendermime.
* Escapes backslashes in LaTeX delimiters such that they appear in the DOM
* after the initial MarkDown render. For example, this function takes '\(` and
* returns `\\(`.
*
* Required for proper rendering of MarkDown + LaTeX markup in the chat by
* `ILatexTypesetter`.
*/
function escapeLatexDelimiters(text: string) {
return text
.replace('\\(', '\\\\(')
.replace('\\)', '\\\\)')
.replace('\\[', '\\\\[')
.replace('\\]', '\\\\]');
.replace('\\(/g', '\\\\(')
.replace('\\)/g', '\\\\)')
.replace('\\[/g', '\\\\[')
.replace('\\]/g', '\\\\]');
}

/**
* Type predicate function that determines whether a given DOM Node is a Text
* node.
*/
function isTextNode(node: Node | null): node is Text {
return node?.nodeType === Node.TEXT_NODE;
}

/**
* Escapes all `$` symbols present in an HTML element except those within the
* following elements: `pre`, `code`, `samp`, `kbd`.
*
* This prevents `$` symbols from being used as inline math delimiters, allowing
* `$` symbols to be used literally to denote quantities of USD. This does not
* escape literal `$` within elements that display their contents literally,
* like code elements. This overrides JupyterLab's default rendering of MarkDown
* w/ LaTeX.
*
* The Jupyter AI system prompt should explicitly request that the LLM not use
* `$` as an inline math delimiter. This is the default behavior.
*/
function escapeDollarSymbols(el: HTMLElement) {
// Get all text nodes that are not within pre, code, samp, or kbd elements
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, {
acceptNode: node => {
const isInSkippedElements = node.parentElement?.closest(
'pre, code, samp, kbd'
);
return isInSkippedElements
? NodeFilter.FILTER_SKIP
: NodeFilter.FILTER_ACCEPT;
}
});

// Collect all valid text nodes in an array.
const textNodes: Text[] = [];
let currentNode: Node | null;
while ((currentNode = walker.nextNode())) {
if (isTextNode(currentNode)) {
textNodes.push(currentNode);
}
}

// Replace each `$` symbol with `\$` for each text node, unless there is
// another `$` symbol adjacent or it is already escaped. Examples:
// - `$10 - $5` => `\$10 - \$5` (escaped)
// - `$$ \infty $$` => `$$ \infty $$` (unchanged)
// - `\$10` => `\$10` (unchanged, already escaped)
textNodes.forEach(node => {
if (node.textContent) {
node.textContent = node.textContent.replace(/(?<![$\\])\$(?!\$)/g, '\\$');
}
});
}

function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element {
Expand All @@ -47,12 +107,15 @@ function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element {

useEffect(() => {
const renderContent = async () => {
// initialize mime model
const mdStr = escapeLatexDelimiters(props.markdownStr);
const model = props.rmRegistry.createModel({
data: { [MD_MIME_TYPE]: mdStr }
});

const renderer = props.rmRegistry.createRenderer(MD_MIME_TYPE);

// step 1: render markdown
await renderer.renderModel(model);
props.rmRegistry.latexTypesetter?.typeset(renderer.node);
if (!renderer.node) {
Expand All @@ -61,6 +124,10 @@ function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element {
);
}

// step 2: render LaTeX via MathJax, while escaping single dollar symbols.
escapeDollarSymbols(renderer.node);
props.rmRegistry.latexTypesetter?.typeset(renderer.node);

const newCodeToolbarDefns: [HTMLDivElement, CodeToolbarProps][] = [];

// Attach CodeToolbar root element to each <pre> block
Expand Down

0 comments on commit 323ca04

Please sign in to comment.