Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@ private static String postProcessPandocOutput(String output) {
// Remove empty lines at the start and end
output = output.trim();

// Remove unnecessary backslash escapes that pandoc adds for markdown
// These characters don't need escaping in Python docstrings
output = output.replaceAll("\\\\([\\[\\]'{}()<>`@_*|!~$#^])", "$1");
// Fix up the backslash escapes pandoc adds for Markdown so the result is
// a valid Python string literal (see normalizeBackslashEscapes).
output = normalizeBackslashEscapes(output);

// Replace <note> and <important> tags with admonitions for mkdocstrings
output = replaceAdmonitionTags(output, "note", "Note");
Expand All @@ -180,6 +180,45 @@ private static String postProcessPandocOutput(String output) {
return output.replace("$", "$$");
}

// A lone backslash before one of these is dropped: pandoc adds it for Markdown
// but it needs no escaping in a Python docstring.
private static final String MARKDOWN_ONLY_CHARS = "[](){}<>`@_*|!~$#^'.";
// A lone backslash before one of these is kept: together they form a valid
// Python escape. (' is intentionally left out; it lives in MARKDOWN_ONLY_CHARS.)
private static final String VALID_ESCAPE_CHARS = "\\\"abfnrtv01234567xNuU\r\n";
private static final Pattern BACKSLASH_RUN = Pattern.compile("(\\\\+)([^\\\\]|$)", Pattern.DOTALL);

/**
* Fixes pandoc's backslash escaping so the docstring is a valid Python literal.
*
* <p>Backslashes must come in pairs, but pandoc can leave an odd-length run
* (it escapes literal backslashes and Markdown characters separately, and
* adjacent escapes pile up). For each run we keep the pairs; a leftover
* backslash is then kept if it forms a valid escape, dropped if it is only
* there for Markdown, or doubled otherwise.
*/
private static String normalizeBackslashEscapes(String output) {
Matcher m = BACKSLASH_RUN.matcher(output);
StringBuilder sb = new StringBuilder();
while (m.find()) {
int runLength = m.group(1).length();
String next = m.group(2);
int backslashes = (runLength / 2) * 2;
if (runLength % 2 != 0) {
char c = next.isEmpty() ? '\0' : next.charAt(0);
if (!next.isEmpty() && VALID_ESCAPE_CHARS.indexOf(c) >= 0) {
backslashes += 1;
} else if (next.isEmpty() || MARKDOWN_ONLY_CHARS.indexOf(c) < 0) {
backslashes += 2;
}
// else: Markdown-only char, drop the spurious backslash
}
m.appendReplacement(sb, Matcher.quoteReplacement("\\".repeat(backslashes) + next));
}
m.appendTail(sb);
return sb.toString();
}

/**
* Replaces admonition tags (e.g. note, important) with Google-style format.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,54 @@ public void testConvertRemovesUnnecessaryBackslashEscapes() {
assertEquals("Text with [brackets] and {braces} and (parens)", result.trim());
}

@Test
public void testConvertPreservesLiteralDoubleBackslash() {
// A literal "\\" is a valid Python escape (e.g. a password charset like
// "[\\]") and must be preserved.
String html = "<code>@[\\\\]^</code>";
String result = MarkdownConverter.convert(html, createMockContext(true)).trim();
assertEquals("`@[\\\\]^`", result);
}

@Test
public void testConvertEscapesLoneBackslash() {
// A lone backslash that is not part of a recognized Python escape (here a
// backslash followed by a space) must be doubled so the docstring is a
// valid Python string.
String html = "<code>[ \\ ]</code>";
String result = MarkdownConverter.convert(html, createMockContext(true)).trim();
assertEquals("`[ \\\\ ]`", result);
}

@Test
public void testConvertPreservesValidPythonEscapes() {
// A backslash that already forms a valid Python escape (e.g. \") must be
// left untouched rather than doubled.
String html = "<code>!\\\"#</code>";
String result = MarkdownConverter.convert(html, createMockContext(true)).trim();
assertEquals("`!\\\"#`", result);
}

@Test
public void testConvertHandlesBackslashBeforeMarkdownChar() {
// A backslash followed by a Markdown-significant character (here "*") makes
// pandoc emit an odd-length backslash run. The spurious escape must be
// dropped so the result is valid Python.
String html = "<code>arn:aws:iam::\\*:user/\\*</code>";
String result = MarkdownConverter.convert(html, createMockContext(true)).trim();
assertEquals("`arn:aws:iam::*:user/*`", result);
}

@Test
public void testConvertHandlesBackslashStarNextToQuote() {
// The API Gateway model documents the comment marker "\*/". Next to a quote,
// pandoc emits an odd-length backslash run; the result must stay a valid
// Python literal.
String html = "<p>Do not include \"\\*/\" characters</p>";
String result = MarkdownConverter.convert(html, createMockContext(true)).trim();
assertEquals("Do not include \\\"\\\\*/\\\" characters", result);
}

@Test
public void testConvertMixedElements() {
String html = "<h1>Title</h1><p>Paragraph</p><ul><li>Item 1</li><li>Item 2</li></ul>";
Expand Down
Loading