diff --git a/packages/lexical-markdown/src/MarkdownExport.ts b/packages/lexical-markdown/src/MarkdownExport.ts index 81f636d73f9..e6bfb80e60c 100644 --- a/packages/lexical-markdown/src/MarkdownExport.ts +++ b/packages/lexical-markdown/src/MarkdownExport.ts @@ -27,6 +27,7 @@ import { import { ElementTransformer, + getCodeSpanDelimiter, hardLineBreakState, MultilineElementTransformer, TextFormatTransformer, @@ -493,21 +494,38 @@ function exportTextFormat( // Otherwise, we escape leading and trailing whitespaces to their corresponding code points, // ensuring the returned string maintains its original formatting, e.g., "** foo **". + const isCode = node.hasFormat('code'); + let output = textContent; - if (!node.hasFormat('code')) { + if (!isCode) { // Preserve literal backslashes when preserving source newlines. output = shouldPreserveNewLines ? output.replace(/([*_`~])/g, '\\$1') : output.replace(/([*_`~\\])/g, '\\$1'); } - // Extract leading and trailing whitespaces. - // CommonMark flanking rules require formatting tags to be adjacent to non-whitespace characters. - const match = output.match(/^(\s*)(.*?)(\s*)$/s) || ['', '', output, '']; - const leadingSpace = match[1]; - const trimmedOutput = match[2]; - const trailingSpace = match[3]; - const isWhitespaceOnly = trimmedOutput === ''; + let leadingSpace: string; + let trimmedOutput: string; + let trailingSpace: string; + let isWhitespaceOnly: boolean; + + if (isCode) { + // Inline code is an atomic literal span with a content-derived fence, so + // its whitespace stays inside the fence and other formats wrap around it. + const {fence, padded} = getCodeSpanDelimiter(textContent); + leadingSpace = ''; + trailingSpace = ''; + trimmedOutput = fence + padded + fence; + isWhitespaceOnly = false; + } else { + // Extract leading and trailing whitespaces. + // CommonMark flanking rules require formatting tags to be adjacent to non-whitespace characters. + const match = output.match(/^(\s*)(.*?)(\s*)$/s) || ['', '', output, '']; + leadingSpace = match[1]; + trimmedOutput = match[2]; + trailingSpace = match[3]; + isWhitespaceOnly = trimmedOutput === ''; + } // the opening tags to be added to the result let openingTags = ''; @@ -524,6 +542,11 @@ function exportTextFormat( const format = transformer.format[0]; const tag = transformer.tag; + // Inline code uses a content-derived fence handled above, not a static tag. + if (format === 'code') { + continue; + } + // dedup applied formats if (checkHasFormat(node, format) && !applied.has(format)) { applied.add(format); diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts index 2a500642dc4..f0be87f0520 100644 --- a/packages/lexical-markdown/src/MarkdownTransformers.ts +++ b/packages/lexical-markdown/src/MarkdownTransformers.ts @@ -794,6 +794,25 @@ export const INLINE_CODE: TextFormatTransformer = { type: 'text-format', }; +// Computes a CommonMark-compliant fence and padded content for an inline code +// span: https://spec.commonmark.org/#code-spans +export function getCodeSpanDelimiter(content: string): { + fence: string; + padded: string; +} { + const backtickRuns = content.match(/`+/g); + const longestRun = backtickRuns + ? Math.max(...backtickRuns.map(run => run.length)) + : 0; + const fence = '`'.repeat(longestRun + 1); + const needsPadding = + content.length === 0 || + content.includes('`') || + (/^\s/.test(content) && /\s$/.test(content)); + const padded = needsPadding ? ` ${content} ` : content; + return {fence, padded}; +} + export const HIGHLIGHT: TextFormatTransformer = { format: ['highlight'], tag: '==', diff --git a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts index 612c1d0d834..7d168d6cf58 100644 --- a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts +++ b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts @@ -424,6 +424,27 @@ describe('Markdown', () => { html: '

$a $b

', md: '`$a` `$b`', }, + { + // Inline code containing a backtick must use a longer fence (CommonMark + // code spans) and pad with spaces so it round-trips losslessly. + html: '

Here: a`b

', + md: 'Here: `` a`b ``', + }, + { + // Two consecutive backticks in the content bump the fence to three. + html: '

Code: a``b

', + md: 'Code: ``` a``b ```', + }, + { + // Content beginning with a backtick is padded so the fence stays distinct. + html: '

`x

', + md: '`` `x ``', + }, + { + // The code fence must remain the innermost wrapping, inside bold. + html: '

a`b

', + md: '**`` a`b ``**', + }, { html: '

Hello world

', md: '[Hello](https://lexical.dev) world', @@ -2280,6 +2301,68 @@ describe('markdown Safari compatibility (issue #8012)', () => { }); }); +describe('inline code with backticks (CommonMark code spans)', () => { + function createTestEditor() { + return createHeadlessEditor({ + nodes: [ + HeadingNode, + ListNode, + ListItemNode, + QuoteNode, + CodeNode, + LinkNode, + ], + }); + } + + function roundtrip(md: string): string { + const editor = createTestEditor(); + editor.update(() => $convertFromMarkdownString(md, TRANSFORMERS), { + discrete: true, + }); + return editor.read('latest', () => $convertToMarkdownString(TRANSFORMERS)); + } + + function exportCodeSpan(content: string): string { + const editor = createTestEditor(); + editor.update( + () => { + const paragraph = $createParagraphNode(); + const text = $createTextNode(content); + text.toggleFormat('code'); + paragraph.append(text); + $getRoot().append(paragraph); + }, + {discrete: true}, + ); + return editor.read('latest', () => $convertToMarkdownString(TRANSFORMERS)); + } + + it('round-trips code spans whose content contains backticks', () => { + expect(roundtrip('Here: `` a`b ``')).toBe('Here: `` a`b ``'); + expect(roundtrip('Code: ``` a``b ```')).toBe('Code: ``` a``b ```'); + expect(roundtrip('`` `x ``')).toBe('`` `x ``'); + }); + + it('exports a content-derived fence longer than any backtick run', () => { + expect(exportCodeSpan('block code')).toBe('`block code`'); + expect(exportCodeSpan('a`b')).toBe('`` a`b ``'); + expect(exportCodeSpan('a``b')).toBe('``` a``b ```'); + expect(exportCodeSpan('`x')).toBe('`` `x ``'); + }); + + it('normalizes a redundant inline fence to the minimal valid fence', () => { + // Both a single- and triple-backtick inline fence with backtick-free + // content normalize to a single backtick on export. + expect(roundtrip('a `block code` b')).toBe('a `block code` b'); + expect(roundtrip('a ```block code``` b')).toBe('a `block code` b'); + }); + + it('keeps the code fence innermost when combined with bold', () => { + expect(roundtrip('**`` a`b ``**')).toBe('**`` a`b ``**'); + }); +}); + describe('$convertSelectionToMarkdownString', () => { function createTestEditor() { return createHeadlessEditor({ diff --git a/packages/lexical-markdown/src/importTextFormatTransformer.ts b/packages/lexical-markdown/src/importTextFormatTransformer.ts index b4fac76dd6b..2c4615bc59d 100644 --- a/packages/lexical-markdown/src/importTextFormatTransformer.ts +++ b/packages/lexical-markdown/src/importTextFormatTransformer.ts @@ -37,37 +37,29 @@ export function findOutermostTextFormatTransformer( } | null { const textContent = textNode.getTextContent(); - // Find code span first. Emphasis delimiters inside inline elements (e.g., code spans) + // Find code spans first. Emphasis delimiters inside inline elements (e.g., code spans) // should not be processed. Currently only code spans are handled; other inline elements // (e.g., links, raw HTML) may need similar treatment in the future. - const codeRegex = textFormatTransformersIndex.fullMatchRegExpByTag['`']; const codeTransformer = textFormatTransformersIndex.transformersByTag['`']; const excludeRanges: {start: number; end: number}[] = []; let codeMatch = null; - if (codeRegex && codeTransformer) { - const globalRegex = new RegExp(codeRegex.source, 'g'); - const matches = Array.from(textContent.matchAll(globalRegex)); - - for (const match of matches) { - // Group 1 captures the character preceding the opening backtick (or an - // empty string when the span starts at position 0). Offset past it so - // startIndex points to the backtick itself. - const startIndex = match.index! + match[1].length; - const endIndex = match.index! + match[0].length; + if (codeTransformer) { + const codeSpans = scanCodeSpans(textContent); + for (const span of codeSpans) { if (!codeMatch) { codeMatch = { - content: match[3], - endIndex, - startIndex, + content: span.content, + endIndex: span.endIndex, + startIndex: span.startIndex, tag: '`', }; } excludeRanges.push({ - end: endIndex, - start: startIndex, + end: span.endIndex, + start: span.startIndex, }); } } @@ -129,6 +121,86 @@ export function findOutermostTextFormatTransformer( }; } +// Finds all inline code spans, left to right and non-overlapping, per CommonMark +// rules: https://spec.commonmark.org/#code-spans. A run opens a span and the +// next run of equal length closes it. An escaped backtick (`\``) cannot open a +// span, but backslashes are otherwise literal and don't prevent closing. +function scanCodeSpans(text: string): { + startIndex: number; + endIndex: number; + content: string; +}[] { + const isEscaped = (index: number): boolean => { + let count = 0; + for (let i = index - 1; i >= 0 && text[i] === '\\'; i--) { + count++; + } + return count % 2 === 1; + }; + + // Collect maximal backtick runs. + const runs: {index: number; length: number}[] = []; + let i = 0; + while (i < text.length) { + if (text[i] === '`') { + let length = 1; + while (i + length < text.length && text[i + length] === '`') { + length++; + } + runs.push({index: i, length}); + i += length; + } else { + i++; + } + } + + const spans: {content: string; endIndex: number; startIndex: number}[] = []; + let openIdx = 0; + while (openIdx < runs.length) { + const opener = runs[openIdx]; + + // An escaped backtick run is a literal backtick and cannot open a span. + if (isEscaped(opener.index)) { + openIdx++; + continue; + } + + let closeIdx = -1; + for (let c = openIdx + 1; c < runs.length; c++) { + if (runs[c].length === opener.length) { + closeIdx = c; + break; + } + } + + if (closeIdx === -1) { + // No matching closer; treat this run as literal and try the next one. + openIdx++; + continue; + } + + const closer = runs[closeIdx]; + let content = text.slice(opener.index + opener.length, closer.index); + if ( + content.length >= 2 && + content.startsWith(' ') && + content.endsWith(' ') && + /[^ ]/.test(content) + ) { + content = content.slice(1, -1); + } + + spans.push({ + content, + endIndex: closer.index + closer.length, + startIndex: opener.index, + }); + openIdx = closeIdx + 1; + } + + return spans; +} + function scanDelimiters( text: string, transformersIndex: TextFormatTransformersIndex,