UNPKG

llmxml

Version:

Convert between markdown and LLM-friendly pseudo-XML

176 lines (155 loc) 5.44 kB
import { ASTNode, HeadingNode, TextNode, CodeBlockNode, ListNode, ListItemNode, ParagraphNode, EmphasisNode, LinkNode } from '../types'; import logger from '../utils/logger'; /** * Serializes Markdown AST back to text */ export class MarkdownSerializer { /** * Convert AST to Markdown text */ public serialize(ast: ASTNode[]): string { // Filter out null/undefined nodes const validNodes = ast.filter((node): node is ASTNode => node != null); // Join nodes with a single blank line between them (two newlines) // This ensures proper markdown formatting with consistent spacing let markdown = validNodes.map(node => this.serializeNode(node)).filter(Boolean).join('\n\n'); // Normalize any multiple blank lines (more than one) down to a single blank line markdown = markdown.replace(/\n{3,}/g, '\n\n'); return markdown; } private serializeNode(node: ASTNode): string { try { switch (node.type) { case 'heading': return this.serializeHeading(node as HeadingNode); case 'text': return this.serializeText(node as TextNode); case 'code': return this.serializeCode(node as CodeBlockNode); case 'list': return this.serializeList(node as ListNode); case 'paragraph': return this.serializeParagraph(node as ParagraphNode); case 'emphasis': case 'strong': return this.serializeEmphasis(node as EmphasisNode); case 'link': return this.serializeLink(node as LinkNode); default: logger.warn('Unknown node type during serialization', { type: node.type }); return ''; } } catch (error) { logger.error('Error serializing node', { node, error }); return ''; } } private serializeHeading(node: HeadingNode): string { const { depth = 1, text = '' } = node; return '#'.repeat(depth) + ' ' + text; } private serializeText(node: TextNode): string { return node.value || ''; } private serializeCode(node: CodeBlockNode): string { if (!node.value) { logger.warn('Empty code block', { node }); return ''; } return `\`\`\`${node.lang || ''}\n${node.value}\n\`\`\``; } private serializeList(node: ListNode, indent: string = ''): string { if (!Array.isArray(node.children)) { logger.warn('Malformed list node', { node }); return ''; } return node.children .map((item: ListItemNode, index: number) => { try { const marker = node.ordered ? `${index + 1}.` : '-'; const itemText = this.serializeListItem(item, `${indent} `); return itemText ? `${indent}${marker} ${itemText}` : ''; } catch (error) { logger.error('Error serializing list item', { item, error }); return ''; } }) .filter(Boolean) .join('\n'); } private serializeListItem(node: ListItemNode, indent: string): string { if (!Array.isArray(node.children)) { logger.warn('Malformed list item', { node }); return ''; } return node.children .map(child => { try { if (child.type === 'list') { return '\n\n' + this.serializeList(child as ListNode, indent); } return this.serializeNode(child); } catch (error) { logger.error('Error serializing list item child', { child, error }); return ''; } }) .filter(Boolean) .join('\n') .replace(/\n/g, '\n' + indent); } private serializeParagraph(node: ParagraphNode): string { if (!Array.isArray(node.children)) { logger.warn('Malformed paragraph node', { node }); return ''; } return node.children .map(child => { try { switch (child.type) { case 'text': return (child as TextNode).value || ''; case 'emphasis': case 'strong': return this.serializeEmphasis(child as EmphasisNode); case 'link': return this.serializeLink(child as LinkNode); default: return ''; } } catch (error) { logger.error('Error serializing paragraph child', { child, error }); return ''; } }) .filter(Boolean) .join(''); } private serializeEmphasis(node: EmphasisNode): string { if (!Array.isArray(node.children)) { logger.warn('Malformed emphasis node', { node }); return ''; } // Use the original marker if available, otherwise fall back to * for emphasis and ** for strong const defaultMarker = node.type === 'emphasis' ? '*' : '**'; const marker = (node as any).marker ?? defaultMarker; const content = node.children .map(child => (child as TextNode).value || '') .filter(Boolean) .join(''); return content ? `${marker}${content}${marker}` : ''; } private serializeLink(node: LinkNode): string { if (!Array.isArray(node.children) || !node.url) { logger.warn('Malformed link node', { node }); return ''; } const text = node.children .map(child => (child as TextNode).value || '') .filter(Boolean) .join(''); return text ? `[${text}](${node.url})` : ''; } } // Export a singleton instance export const markdownSerializer = new MarkdownSerializer();