llmxml
Version:
Convert between markdown and LLM-friendly pseudo-XML
176 lines (155 loc) • 5.44 kB
text/typescript
import { ASTNode, HeadingNode, TextNode, CodeBlockNode, ListNode, ListItemNode, ParagraphNode, EmphasisNode, LinkNode } from '../types';
import logger from '../utils/logger';
/**
* Serializes Markdown AST back to text
*/
export class MarkdownSerializer {
/**
* Convert AST to Markdown text
*/
public serialize(ast: ASTNode[]): string {
// Filter out null/undefined nodes
const validNodes = ast.filter((node): node is ASTNode => node != null);
// Join nodes with a single blank line between them (two newlines)
// This ensures proper markdown formatting with consistent spacing
let markdown = validNodes.map(node => this.serializeNode(node)).filter(Boolean).join('\n\n');
// Normalize any multiple blank lines (more than one) down to a single blank line
markdown = markdown.replace(/\n{3,}/g, '\n\n');
return markdown;
}
private serializeNode(node: ASTNode): string {
try {
switch (node.type) {
case 'heading':
return this.serializeHeading(node as HeadingNode);
case 'text':
return this.serializeText(node as TextNode);
case 'code':
return this.serializeCode(node as CodeBlockNode);
case 'list':
return this.serializeList(node as ListNode);
case 'paragraph':
return this.serializeParagraph(node as ParagraphNode);
case 'emphasis':
case 'strong':
return this.serializeEmphasis(node as EmphasisNode);
case 'link':
return this.serializeLink(node as LinkNode);
default:
logger.warn('Unknown node type during serialization', { type: node.type });
return '';
}
} catch (error) {
logger.error('Error serializing node', { node, error });
return '';
}
}
private serializeHeading(node: HeadingNode): string {
const { depth = 1, text = '' } = node;
return '#'.repeat(depth) + ' ' + text;
}
private serializeText(node: TextNode): string {
return node.value || '';
}
private serializeCode(node: CodeBlockNode): string {
if (!node.value) {
logger.warn('Empty code block', { node });
return '';
}
return `\`\`\`${node.lang || ''}\n${node.value}\n\`\`\``;
}
private serializeList(node: ListNode, indent: string = ''): string {
if (!Array.isArray(node.children)) {
logger.warn('Malformed list node', { node });
return '';
}
return node.children
.map((item: ListItemNode, index: number) => {
try {
const marker = node.ordered ? `${index + 1}.` : '-';
const itemText = this.serializeListItem(item, `${indent} `);
return itemText ? `${indent}${marker} ${itemText}` : '';
} catch (error) {
logger.error('Error serializing list item', { item, error });
return '';
}
})
.filter(Boolean)
.join('\n');
}
private serializeListItem(node: ListItemNode, indent: string): string {
if (!Array.isArray(node.children)) {
logger.warn('Malformed list item', { node });
return '';
}
return node.children
.map(child => {
try {
if (child.type === 'list') {
return '\n\n' + this.serializeList(child as ListNode, indent);
}
return this.serializeNode(child);
} catch (error) {
logger.error('Error serializing list item child', { child, error });
return '';
}
})
.filter(Boolean)
.join('\n')
.replace(/\n/g, '\n' + indent);
}
private serializeParagraph(node: ParagraphNode): string {
if (!Array.isArray(node.children)) {
logger.warn('Malformed paragraph node', { node });
return '';
}
return node.children
.map(child => {
try {
switch (child.type) {
case 'text':
return (child as TextNode).value || '';
case 'emphasis':
case 'strong':
return this.serializeEmphasis(child as EmphasisNode);
case 'link':
return this.serializeLink(child as LinkNode);
default:
return '';
}
} catch (error) {
logger.error('Error serializing paragraph child', { child, error });
return '';
}
})
.filter(Boolean)
.join('');
}
private serializeEmphasis(node: EmphasisNode): string {
if (!Array.isArray(node.children)) {
logger.warn('Malformed emphasis node', { node });
return '';
}
// Use the original marker if available, otherwise fall back to * for emphasis and ** for strong
const defaultMarker = node.type === 'emphasis' ? '*' : '**';
const marker = (node as any).marker ?? defaultMarker;
const content = node.children
.map(child => (child as TextNode).value || '')
.filter(Boolean)
.join('');
return content ? `${marker}${content}${marker}` : '';
}
private serializeLink(node: LinkNode): string {
if (!Array.isArray(node.children) || !node.url) {
logger.warn('Malformed link node', { node });
return '';
}
const text = node.children
.map(child => (child as TextNode).value || '')
.filter(Boolean)
.join('');
return text ? `[${text}](${node.url})` : '';
}
}
// Export a singleton instance
export const markdownSerializer = new MarkdownSerializer();