Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ documentation:
- changed-files:
- any-glob-to-any-file: 'apps/docs/**/*'

# Add 'self-hosted' to any change in the self-hosting docs
self-hosted:
- changed-files:
- any-glob-to-any-file: 'apps/docs/content/guides/self-hosting/**/*'

# Add 'api-deploy-required' to any change in packages/api-types/types
api-deploy-required:
- changed-files:
Expand Down
323 changes: 134 additions & 189 deletions apps/docs/internals/generate-guides-markdown.ts
Original file line number Diff line number Diff line change
@@ -1,198 +1,160 @@
import fs from 'node:fs'
import fs from 'node:fs/promises'
import path from 'node:path'
import { globby } from 'globby'
import matter from 'gray-matter'

import { getInternalLinkBaseUrl, prefixInternalLinks, withDocsBasePath } from './internal-links'
import type { Content, Parent, Root } from 'mdast'
import { fromMarkdown } from 'mdast-util-from-markdown'
import { gfmFromMarkdown, gfmToMarkdown } from 'mdast-util-gfm'
import { mdxFromMarkdown, mdxToMarkdown } from 'mdast-util-mdx'
import type { MdxJsxFlowElement, MdxJsxTextElement } from 'mdast-util-mdx-jsx'
import { toMarkdown } from 'mdast-util-to-markdown'
import { gfm } from 'micromark-extension-gfm'
import { mdxjs } from 'micromark-extension-mdxjs'

import { getInternalLinkBaseUrl, prefixInternalLinks } from './internal-links'
import { Link } from './markdown-schema/Link'
import { Panel } from './markdown-schema/Panel'
import { StepHike } from './markdown-schema/StepHike'
import { TabPanel } from './markdown-schema/TabPanel'
import { Admonition } from './markdown-schema/Admonition'

const PARTIALS_DIR = path.join(process.cwd(), 'content', '_partials')

type JsxNode = MdxJsxFlowElement | MdxJsxTextElement
type Props = Record<string, unknown>

/**
* Reads <$Partial path="..." /> tags and replaces them with the file contents.
* Recurses to handle nested partials.
* A handler converts a single MDX component into a markdown string. It receives
* the component's props, the already-serialized markdown of its children, and
* the raw AST node (escape hatch for handlers that need to inspect structure).
*
* Any component not in the schema is treated as `({ children }) => children`,
* i.e. the wrapper is dropped and its children are kept as-is.
*/
async function inlinePartials(content: string): Promise<string> {
const partialRegex = /<\$Partial\s+path="([^"]+)"[^/]*\/>/g
const matches = [...content.matchAll(partialRegex)]
for (const [fullMatch, partialPath] of matches) {
try {
const raw = await fs.promises.readFile(path.join(PARTIALS_DIR, partialPath), 'utf8')
const { content: partialBody } = matter(raw)
const inlined = await inlinePartials(partialBody)
content = content.replace(fullMatch, inlined)
} catch {
content = content.replace(fullMatch, '')
}
}
return content
type ComponentHandler = (ctx: { props: Props; children: string; node: JsxNode }) => string
type ComponentSchema = Record<string, ComponentHandler>

const PARSE_OPTIONS = {
extensions: [mdxjs(), gfm()],
mdastExtensions: [mdxFromMarkdown(), gfmFromMarkdown()],
}
const SERIALIZE_OPTIONS = {
extensions: [mdxToMarkdown(), gfmToMarkdown()],
bullet: '-' as const,
listItemIndent: 'one' as const,
}

/** Remove the minimum common leading whitespace from all non-empty lines. */
function dedentBlock(text: string): string {
const lines = text.split('\n')
const nonEmpty = lines.filter((l) => /\S/.test(l))
if (!nonEmpty.length) return text
const minIndent = Math.min(...nonEmpty.map((l) => (l.match(/^([ \t]*)/) ?? ['', ''])[1].length))
if (!minIndent) return text
return lines.map((l) => l.slice(minIndent)).join('\n')
const parseMdx = (source: string): Root => fromMarkdown(source, PARSE_OPTIONS)
const serializeMdx = (tree: Parent): string => toMarkdown(tree as Root, SERIALIZE_OPTIONS)

const defaultHandler: ComponentHandler = ({ children }) => children

const isJsx = (n: Content): n is JsxNode =>
n.type === 'mdxJsxFlowElement' || n.type === 'mdxJsxTextElement'

function propsFrom(node: JsxNode): Props {
const props: Props = {}
for (const attr of node.attributes) {
if (attr.type !== 'mdxJsxAttribute') continue
if (attr.value == null) props[attr.name] = true
else if (typeof attr.value === 'string') props[attr.name] = attr.value
else props[attr.name] = attr.value.value
}
return props
}

/**
* Converts StepHikeCompact components to markdown ordered lists.
* Each step becomes a numbered item: the title (from Details) is bolded on the item
* line, and the full step body (Details + Code) is dedented and appended below.
* Remaining JSX tags inside the body are later stripped by stripJsxTags.
* Replaces every `<$Partial path="..." />` in the tree with the parsed AST of
* the referenced file. Recurses so partials may include other partials.
*/
function convertStepHike(content: string): string {
return content.replace(/<StepHikeCompact>([\s\S]*?)<\/StepHikeCompact>/g, (_, body) => {
const items: string[] = []
const stepRe = /<StepHikeCompact\.Step[^>]*>([\s\S]*?)<\/StepHikeCompact\.Step>/g
let stepNum = 1
let m: RegExpExecArray | null
while ((m = stepRe.exec(body)) !== null) {
const stepBody = m[1]
const titleMatch = stepBody.match(/<StepHikeCompact\.Details[^>]+title="([^"]*)"/)
const title = titleMatch ? titleMatch[1] : ''
// Dedent the entire step body so nested JSX indentation is removed.
// Remaining component tags (Details, Code, Admonition…) are stripped later.
const inner = dedentBlock(stepBody).trim()
const item = title ? `${stepNum}. **${title}**\n\n${inner}` : `${stepNum}. ${inner}`
items.push(item)
stepNum++
async function inlinePartials(parent: Parent): Promise<void> {
const next: Content[] = []
for (const child of parent.children as Content[]) {
if (isJsx(child) && child.name === '$Partial') {
const partialPath = String(propsFrom(child).path ?? '')
try {
const raw = await fs.readFile(path.join(PARTIALS_DIR, partialPath), 'utf8')
const subtree = parseMdx(matter(raw).content)
await inlinePartials(subtree)
next.push(...(subtree.children as Content[]))
} catch {
// missing or broken partials are silently dropped
}
continue
}
return items.join('\n\n')
})
if ('children' in child) await inlinePartials(child as Parent)
next.push(child)
}
parent.children = next as Parent['children']
}

/**
* Extracts a `title` prop value from a JSX opening-tag's attribute text.
* Supports `title="..."`, `title='...'`, and `title={<jsx>...</jsx>}` forms.
* For JSX titles, strips inline tags and collapses whitespace so the text
* content remains.
* Walks the tree bottom-up. For each JSX element, runs its schema handler (or
* the default) and replaces the node with an `html` node holding the result.
* The `html` type passes through `mdast-util-to-markdown` verbatim, so whatever
* markdown the handler returns lands in the final output unchanged.
*/
function extractTitleAttr(attrs: string): string | null {
const strMatch = attrs.match(/\btitle=(?:"([^"]+)"|'([^']+)')/)
if (strMatch) return strMatch[1] ?? strMatch[2]

const exprIdx = attrs.indexOf('title={')
if (exprIdx === -1) return null

let i = exprIdx + 'title={'.length
const inner: string[] = []
let depth = 1
while (i < attrs.length && depth > 0) {
const ch = attrs[i]
if (ch === '{') depth++
else if (ch === '}') {
depth--
if (depth === 0) break
function applySchema(parent: Parent, schema: ComponentSchema): void {
for (const child of parent.children as Content[]) {
if ('children' in child) applySchema(child as Parent, schema)
}
const next: Content[] = []
for (const child of parent.children as Content[]) {
if (
child.type === 'mdxFlowExpression' ||
child.type === 'mdxTextExpression' ||
child.type === 'mdxjsEsm'
) {
continue
}
inner.push(ch)
i++
if (isJsx(child)) {
const handler = schema[child.name ?? ''] ?? defaultHandler
const children = serializeMdx({
type: 'root',
children: child.children as Root['children'],
}).trim()
const value = handler({ props: propsFrom(child), children, node: child })
next.push({ type: 'html', value } as Content)
continue
}
next.push(child)
}
return inner
.join('')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.trim()
parent.children = next as Parent['children']
}

/**
* Converts `<Link href="..."><GlassPanel|IconPanel title="...">desc</...></Link>`
* blocks into markdown bullet lines: `- [title](href). description`. Without
* this, the rendered output keeps prop syntax (className, passHref, etc.) since
* stripping JSX tags discards inner text from props but leaves panel children
* floating without context. Applied to all guides so resource-card grids render
* consistently in the markdown view.
* Per-component overrides. Each handler receives `{ props, children, node }`
* and returns the markdown string that should replace the JSX element. Any
* component not listed is unwrapped (children are kept, wrapper is dropped).
*/
function convertLinkPanels(content: string): string {
return content.replace(/<Link\b([\s\S]*?)>([\s\S]*?)<\/Link>/g, (full, linkAttrs, body) => {
const hrefMatch = linkAttrs.match(/\bhref="([^"]+)"/)
if (!hrefMatch) return full
const href = withDocsBasePath(hrefMatch[1])

const panelOpen = body.match(/<(GlassPanel|IconPanel)\b/)
if (!panelOpen) return full
const panelName = panelOpen[1]
const panelStart = panelOpen.index ?? 0

// Walk past the opening tag, respecting JSX expression braces so attribute
// values like `title={<span>...</span>}` don't terminate parsing early.
let i = panelStart + panelOpen[0].length
let depth = 0
while (i < body.length) {
const ch = body[i]
if (ch === '{') depth++
else if (ch === '}') depth--
else if (ch === '>' && depth === 0) break
i++
}
if (i >= body.length) return full
const SCHEMA: ComponentSchema = {
Admonition,
Link,
GlassPanel: Panel,
IconPanel: Panel,
...StepHike,
TabPanel,
}

const panelAttrs = body.slice(panelStart + panelOpen[0].length, i)
const closingTag = `</${panelName}>`
const closeIdx = body.indexOf(closingTag, i)
if (closeIdx === -1) return full
async function generateOne(filePath: string, linkBaseUrl: string): Promise<string> {
const raw = await fs.readFile(filePath, 'utf8')
const { content, data } = matter(raw)

const title = extractTitleAttr(panelAttrs)
if (!title) return full
const tree = parseMdx(content)
await inlinePartials(tree)
applySchema(tree, SCHEMA)
const body = prefixInternalLinks(serializeMdx(tree), linkBaseUrl)

const description = body
.slice(i + 1, closeIdx)
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.trim()
const headerParts: string[] = []
if (data.title) headerParts.push(`# ${data.title}`)
if (data.subtitle) headerParts.push(String(data.subtitle))
// Add description only when differs from subtitle.
if (data.description && String(data.description) !== String(data.subtitle))
headerParts.push(String(data.description))

return `- [${title}](${href})${description ? `. ${description}` : ''}`
})
}
const header = headerParts.join('\n\n')

/**
* Strips JSX component tags (capitalized names, dot-notation, or $-prefixed)
* while keeping their inner content. Also strips wrapper div and a elements.
* Removes MDX JSX comment blocks. Strips unnecessary leading indentation from
* non-code-block lines.
*/
function stripJsxTags(content: string): string {
// Remove MDX/JSX comments {/* ... */}
content = content.replace(/\{\/\*[\s\S]*?\*\/\}/g, '')

// Remove self-closing JSX components: <Component ... /> or <$Directive ... />
content = content.replace(/<[\$A-Z][\w.]*(?:\s[^>]*)?\s*\/>/gs, '')

// Remove opening JSX component tags (possibly multi-line): <Component ...>
content = content.replace(/<[\$A-Z][\w.]*(?:\s[^>]*)?\s*>/gs, '')

// Remove closing JSX component tags: </Component>
content = content.replace(/<\/[\$A-Z][\w.]*>/g, '')

// Remove wrapper div and a elements used structurally in MDX (carry JSX props
// like className which are not valid HTML; inner content such as img is preserved)
content = content.replace(/<div(?:\s[^>]*)?\s*>/g, '')
content = content.replace(/<\/div>/g, '')
content = content.replace(/<a(?:\s[^>]*)?\s*>/g, '')
content = content.replace(/<\/a>/g, '')

// Split on fenced code blocks to handle prose and code separately.
// For prose (even segments): strip leading whitespace (removes JSX nesting indent).
// For code blocks (odd segments): dedent the body to remove JSX nesting indent while
// preserving relative code structure, then normalize the closing fence.
const segments = content.split(/(```[\s\S]*?```)/g)
content = segments
.map((seg, i) => {
if (i % 2 === 0) return seg.replace(/^[ \t]+/gm, '')
return seg.replace(
/^(```[^\n]*\n)([\s\S]*?)(\n[ \t]*```)$/,
(_, open, body) => open + dedentBlock(body) + '\n```'
)
})
.join('')

// Collapse lines that are only whitespace to empty lines, then deduplicate blank lines
content = content.replace(/^[^\S\n]+$/gm, '')
content = content.replace(/\n{3,}/g, '\n\n').trim()

return content
return header ? `${header}\n\n${body}` : body
}

async function generate() {
Expand All @@ -202,47 +164,30 @@ async function generate() {

await Promise.all(
files.map(async (filePath) => {
// content/guides/ai/vector-columns.mdx → public/markdown/guides/ai/vector-columns.md
// Placing under public/markdown/ ensures the file is served at /docs/guides/...
// matching the URL of the rendered page.
const outPath = filePath
.replace(/^content\/guides\//, 'public/markdown/guides/')
.replace(/\.mdx$/, '.md')

let output: string
try {
const raw = await fs.promises.readFile(filePath, 'utf8')
const { content: rawContent, data } = matter(raw)

const withPartials = await inlinePartials(rawContent)
const withSteps = convertStepHike(withPartials)
const withLinks = convertLinkPanels(withSteps)
const stripped = stripJsxTags(withLinks)
const processed = prefixInternalLinks(stripped, linkBaseUrl)

const header = [
data.title ? `# ${data.title}` : '',
data.subtitle || data.description ? `\n${data.subtitle ?? data.description}` : '',
]
.filter(Boolean)
.join('\n')

output = header ? `${header}\n\n${processed}` : processed
output = await generateOne(filePath, linkBaseUrl)
} catch (err) {
warnings++
console.warn(
`[warn] Failed to process ${filePath}: ${err instanceof Error ? err.message : err}`
)
// Fall back to raw file content so the route still serves something
try {
output = await fs.promises.readFile(filePath, 'utf8')
output = await fs.readFile(filePath, 'utf8')
} catch {
output = `<!-- failed to generate: ${filePath} -->`
}
}

// content/guides/ai/vector-columns.mdx → public/markdown/guides/ai/vector-columns.md
// Placing under public/markdown/ ensures the file is served at /docs/guides/...
// matching the exact URL of the rendered page.
await fs.promises.mkdir(path.dirname(outPath), { recursive: true })
await fs.promises.writeFile(outPath, output)
await fs.mkdir(path.dirname(outPath), { recursive: true })
await fs.writeFile(outPath, output)
})
)

Expand Down
Loading
Loading