Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 31 additions & 8 deletions packages/lexical-markdown/src/MarkdownExport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {

import {
ElementTransformer,
getCodeSpanDelimiter,
hardLineBreakState,
MultilineElementTransformer,
TextFormatTransformer,
Expand Down Expand Up @@ -493,21 +494,38 @@ function exportTextFormat(
// Otherwise, we escape leading and trailing whitespaces to their corresponding code points,
// ensuring the returned string maintains its original formatting, e.g., "**   foo   **".

const isCode = node.hasFormat('code');

let output = textContent;
if (!node.hasFormat('code')) {
if (!isCode) {
// Preserve literal backslashes when preserving source newlines.
output = shouldPreserveNewLines
? output.replace(/([*_`~])/g, '\\$1')
: output.replace(/([*_`~\\])/g, '\\$1');
}

// Extract leading and trailing whitespaces.
// CommonMark flanking rules require formatting tags to be adjacent to non-whitespace characters.
const match = output.match(/^(\s*)(.*?)(\s*)$/s) || ['', '', output, ''];
const leadingSpace = match[1];
const trimmedOutput = match[2];
const trailingSpace = match[3];
const isWhitespaceOnly = trimmedOutput === '';
let leadingSpace: string;
let trimmedOutput: string;
let trailingSpace: string;
let isWhitespaceOnly: boolean;

if (isCode) {
// Inline code is an atomic literal span with a content-derived fence, so
// its whitespace stays inside the fence and other formats wrap around it.
const {fence, padded} = getCodeSpanDelimiter(textContent);
leadingSpace = '';
trailingSpace = '';
trimmedOutput = fence + padded + fence;
isWhitespaceOnly = false;
} else {
// Extract leading and trailing whitespaces.
// CommonMark flanking rules require formatting tags to be adjacent to non-whitespace characters.
const match = output.match(/^(\s*)(.*?)(\s*)$/s) || ['', '', output, ''];
leadingSpace = match[1];
trimmedOutput = match[2];
trailingSpace = match[3];
isWhitespaceOnly = trimmedOutput === '';
}

// the opening tags to be added to the result
let openingTags = '';
Expand All @@ -524,6 +542,11 @@ function exportTextFormat(
const format = transformer.format[0];
const tag = transformer.tag;

// Inline code uses a content-derived fence handled above, not a static tag.
if (format === 'code') {
continue;
}

// dedup applied formats
if (checkHasFormat(node, format) && !applied.has(format)) {
applied.add(format);
Expand Down
19 changes: 19 additions & 0 deletions packages/lexical-markdown/src/MarkdownTransformers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,25 @@ export const INLINE_CODE: TextFormatTransformer = {
type: 'text-format',
};

// Computes a CommonMark-compliant fence and padded content for an inline code
// span: https://spec.commonmark.org/#code-spans
export function getCodeSpanDelimiter(content: string): {
fence: string;
padded: string;
} {
const backtickRuns = content.match(/`+/g);
const longestRun = backtickRuns
? Math.max(...backtickRuns.map(run => run.length))
: 0;
const fence = '`'.repeat(longestRun + 1);
const needsPadding =
content.length === 0 ||
content.includes('`') ||
(/^\s/.test(content) && /\s$/.test(content));
const padded = needsPadding ? ` ${content} ` : content;
return {fence, padded};
}

export const HIGHLIGHT: TextFormatTransformer = {
format: ['highlight'],
tag: '==',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,27 @@ describe('Markdown', () => {
html: '<p><code spellcheck="false" style="white-space: pre-wrap;"><span>$a</span></code><span style="white-space: pre-wrap;"> </span><code spellcheck="false" style="white-space: pre-wrap;"><span>$b</span></code></p>',
md: '`$a` `$b`',
},
{
// Inline code containing a backtick must use a longer fence (CommonMark
// code spans) and pad with spaces so it round-trips losslessly.
html: '<p><span style="white-space: pre-wrap;">Here: </span><code spellcheck="false" style="white-space: pre-wrap;"><span>a`b</span></code></p>',
md: 'Here: `` a`b ``',
},
{
// Two consecutive backticks in the content bump the fence to three.
html: '<p><span style="white-space: pre-wrap;">Code: </span><code spellcheck="false" style="white-space: pre-wrap;"><span>a``b</span></code></p>',
md: 'Code: ``` a``b ```',
},
{
// Content beginning with a backtick is padded so the fence stays distinct.
html: '<p><code spellcheck="false" style="white-space: pre-wrap;"><span>`x</span></code></p>',
md: '`` `x ``',
},
{
// The code fence must remain the innermost wrapping, inside bold.
html: '<p><b><code spellcheck="false" style="white-space: pre-wrap;"><strong>a`b</strong></code></b></p>',
md: '**`` a`b ``**',
},
{
html: '<p><a href="https://lexical.dev"><span style="white-space: pre-wrap;">Hello</span></a><span style="white-space: pre-wrap;"> world</span></p>',
md: '[Hello](https://lexical.dev) world',
Expand Down Expand Up @@ -2280,6 +2301,68 @@ describe('markdown Safari compatibility (issue #8012)', () => {
});
});

describe('inline code with backticks (CommonMark code spans)', () => {
function createTestEditor() {
return createHeadlessEditor({
nodes: [
HeadingNode,
ListNode,
ListItemNode,
QuoteNode,
CodeNode,
LinkNode,
],
});
}

function roundtrip(md: string): string {
const editor = createTestEditor();
editor.update(() => $convertFromMarkdownString(md, TRANSFORMERS), {
discrete: true,
});
return editor.read('latest', () => $convertToMarkdownString(TRANSFORMERS));
}

function exportCodeSpan(content: string): string {
const editor = createTestEditor();
editor.update(
() => {
const paragraph = $createParagraphNode();
const text = $createTextNode(content);
text.toggleFormat('code');
paragraph.append(text);
$getRoot().append(paragraph);
},
{discrete: true},
);
return editor.read('latest', () => $convertToMarkdownString(TRANSFORMERS));
}

it('round-trips code spans whose content contains backticks', () => {
expect(roundtrip('Here: `` a`b ``')).toBe('Here: `` a`b ``');
expect(roundtrip('Code: ``` a``b ```')).toBe('Code: ``` a``b ```');
expect(roundtrip('`` `x ``')).toBe('`` `x ``');
});

it('exports a content-derived fence longer than any backtick run', () => {
expect(exportCodeSpan('block code')).toBe('`block code`');
expect(exportCodeSpan('a`b')).toBe('`` a`b ``');
expect(exportCodeSpan('a``b')).toBe('``` a``b ```');
expect(exportCodeSpan('`x')).toBe('`` `x ``');
});

it('normalizes a redundant inline fence to the minimal valid fence', () => {
// Both a single- and triple-backtick inline fence with backtick-free
// content normalize to a single backtick on export.
expect(roundtrip('a `block code` b')).toBe('a `block code` b');
expect(roundtrip('a ```block code``` b')).toBe('a `block code` b');
});

it('keeps the code fence innermost when combined with bold', () => {
expect(roundtrip('**`` a`b ``**')).toBe('**`` a`b ``**');
});
});

describe('$convertSelectionToMarkdownString', () => {
function createTestEditor() {
return createHeadlessEditor({
Expand Down
106 changes: 89 additions & 17 deletions packages/lexical-markdown/src/importTextFormatTransformer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,37 +37,29 @@ export function findOutermostTextFormatTransformer(
} | null {
const textContent = textNode.getTextContent();

// Find code span first. Emphasis delimiters inside inline elements (e.g., code spans)
// Find code spans first. Emphasis delimiters inside inline elements (e.g., code spans)
// should not be processed. Currently only code spans are handled; other inline elements
// (e.g., links, raw HTML) may need similar treatment in the future.
const codeRegex = textFormatTransformersIndex.fullMatchRegExpByTag['`'];
const codeTransformer = textFormatTransformersIndex.transformersByTag['`'];

const excludeRanges: {start: number; end: number}[] = [];
let codeMatch = null;
if (codeRegex && codeTransformer) {
const globalRegex = new RegExp(codeRegex.source, 'g');
const matches = Array.from(textContent.matchAll(globalRegex));

for (const match of matches) {
// Group 1 captures the character preceding the opening backtick (or an
// empty string when the span starts at position 0). Offset past it so
// startIndex points to the backtick itself.
const startIndex = match.index! + match[1].length;
const endIndex = match.index! + match[0].length;
if (codeTransformer) {
const codeSpans = scanCodeSpans(textContent);

for (const span of codeSpans) {
if (!codeMatch) {
codeMatch = {
content: match[3],
endIndex,
startIndex,
content: span.content,
endIndex: span.endIndex,
startIndex: span.startIndex,
tag: '`',
};
}

excludeRanges.push({
end: endIndex,
start: startIndex,
end: span.endIndex,
start: span.startIndex,
});
}
}
Expand Down Expand Up @@ -129,6 +121,86 @@ export function findOutermostTextFormatTransformer(
};
}

// Finds all inline code spans, left to right and non-overlapping, per CommonMark
// rules: https://spec.commonmark.org/#code-spans. A run opens a span and the
// next run of equal length closes it. An escaped backtick (`\``) cannot open a
// span, but backslashes are otherwise literal and don't prevent closing.
function scanCodeSpans(text: string): {
startIndex: number;
endIndex: number;
content: string;
}[] {
const isEscaped = (index: number): boolean => {
let count = 0;
for (let i = index - 1; i >= 0 && text[i] === '\\'; i--) {
count++;
}
return count % 2 === 1;
};

// Collect maximal backtick runs.
const runs: {index: number; length: number}[] = [];
let i = 0;
while (i < text.length) {
if (text[i] === '`') {
let length = 1;
while (i + length < text.length && text[i + length] === '`') {
length++;
}
runs.push({index: i, length});
i += length;
} else {
i++;
}
}

const spans: {content: string; endIndex: number; startIndex: number}[] = [];
let openIdx = 0;
while (openIdx < runs.length) {
const opener = runs[openIdx];

// An escaped backtick run is a literal backtick and cannot open a span.
if (isEscaped(opener.index)) {
openIdx++;
continue;
}

let closeIdx = -1;
for (let c = openIdx + 1; c < runs.length; c++) {
if (runs[c].length === opener.length) {
closeIdx = c;
break;
}
}

if (closeIdx === -1) {
// No matching closer; treat this run as literal and try the next one.
openIdx++;
continue;
}

const closer = runs[closeIdx];
let content = text.slice(opener.index + opener.length, closer.index);
if (
content.length >= 2 &&
content.startsWith(' ') &&
content.endsWith(' ') &&
/[^ ]/.test(content)
) {
content = content.slice(1, -1);
}

spans.push({
content,
endIndex: closer.index + closer.length,
startIndex: opener.index,
});
openIdx = closeIdx + 1;
}

return spans;
}

function scanDelimiters(
text: string,
transformersIndex: TextFormatTransformersIndex,
Expand Down
Loading