llm-stream-parser

Version:

A TypeScript library for parsing and processing structured data from LLM streaming responses with custom tag definitions and event-driven architecture

github.com/brankrts/llm-stream-parser

brankrts/llm-stream-parser

1 lines • 85.8 kB

Source Map (JSON)

View Raw

{"version":3,"sources":["../src/core/stream-parser.ts","../src/types/base.ts","../src/types/config.ts","../src/types/errors.ts","../src/core/buffer-manager.ts","../src/core/tag-matcher.ts","../src/core/validator.ts","../src/core/transformer.ts","../src/llm-stream-parser.ts","../src/index.ts"],"sourcesContent":["/**\r\n * Main Stream Parser - Core parsing engine\r\n * Supports both flat and nested XML parsing modes\r\n */\r\n\r\nimport { EventEmitter } from 'events';\r\nimport { BaseTag, NestedTag, ParserState, ParserStats, TagMatch } from '../types/base';\r\nimport { ParserConfig, mergeConfig, RequiredParserConfig } from '../types/config';\r\nimport { ParserError, ParserErrorCode } from '../types/errors';\r\nimport { TagDefinition } from '../types/schema';\r\n\r\nimport { BufferManager } from './buffer-manager';\r\nimport { TagMatcher } from './tag-matcher';\r\nimport { TagValidator } from './validator';\r\nimport { TagTransformer } from './transformer';\r\n\r\n/**\r\n * Stack entry for tracking open tags in nested mode\r\n */\r\ninterface TagStackEntry {\r\n tag: NestedTag;\r\n startIndex: number;\r\n depth: number;\r\n path: string;\r\n}\r\n\r\n/**\r\n * Main stream parser with support for both flat and nested XML parsing\r\n */\r\nexport class StreamParser<T extends BaseTag = BaseTag> extends EventEmitter {\r\n private readonly config: RequiredParserConfig;\r\n private readonly bufferManager: BufferManager;\r\n private readonly tagMatcher: TagMatcher;\r\n private readonly tagRegistry = new Map<string, TagDefinition<T>>();\r\n\r\n private state: ParserState = ParserState.IDLE;\r\n private stats: ParserStats;\r\n\r\n // Nested parsing state\r\n private tagStack: TagStackEntry[] = [];\r\n private currentDepth = 0;\r\n private currentPath = '';\r\n\r\n constructor(config: ParserConfig = {}) {\r\n super();\r\n this.config = mergeConfig(config);\r\n this.bufferManager = new BufferManager(this.config.maxBufferSize);\r\n this.tagMatcher = new TagMatcher(this.config.caseSensitive);\r\n this.stats = this.initializeStats();\r\n }\r\n\r\n /**\r\n * Register a tag definition\r\n */\r\n defineTag(definition: TagDefinition<T>): this {\r\n this.tagRegistry.set(definition.tagName, definition);\r\n this.updateStats();\r\n return this;\r\n }\r\n\r\n /**\r\n * Remove a tag definition\r\n */\r\n removeTag(tagName: string): boolean {\r\n const deleted = this.tagRegistry.delete(tagName);\r\n if (deleted) {\r\n this.updateStats();\r\n }\r\n return deleted;\r\n }\r\n\r\n /**\r\n * Check if a tag is registered\r\n */\r\n hasTag(tagName: string): boolean {\r\n return this.tagRegistry.has(tagName);\r\n }\r\n\r\n /**\r\n * Get all registered tag names\r\n */\r\n getRegisteredTags(): readonly string[] {\r\n return Array.from(this.tagRegistry.keys());\r\n }\r\n\r\n /**\r\n * Parse a chunk of streaming data\r\n */\r\n parse(chunk: string): void {\r\n try {\r\n this.state = ParserState.PARSING;\r\n this.bufferManager.append(chunk);\r\n\r\n // Choose parsing strategy based on configuration\r\n if (this.config.enableNested) {\r\n this.processBufferNested();\r\n } else {\r\n this.processBufferFlat();\r\n }\r\n\r\n this.state = ParserState.COMPLETED;\r\n } catch (error) {\r\n this.state = ParserState.ERROR;\r\n this.stats = { ...this.stats, errorCount: this.stats.errorCount + 1 };\r\n this.emitError(\r\n error instanceof ParserError\r\n ? error\r\n : new ParserError(\r\n `Unexpected error: ${error instanceof Error ? error.message : String(error)}`,\r\n ParserErrorCode.INVALID_TAG_FORMAT\r\n ),\r\n chunk\r\n );\r\n }\r\n }\r\n\r\n /**\r\n * Process buffer for flat parsing mode\r\n */\r\n private processBufferFlat(): void {\r\n const buffer = this.bufferManager.getContent();\r\n const completeTags = this.tagMatcher.findCompleteTags(buffer);\r\n const parsedTags: T[] = [];\r\n\r\n for (const match of completeTags) {\r\n try {\r\n const tag = this.processTagFlat(match);\r\n if (tag) {\r\n parsedTags.push(tag);\r\n this.stats = { ...this.stats, totalTagsParsed: this.stats.totalTagsParsed + 1 };\r\n }\r\n } catch (error) {\r\n this.emitError(\r\n error instanceof ParserError\r\n ? error\r\n : new ParserError('Failed to process tag', ParserErrorCode.TRANSFORMATION_FAILED),\r\n match\r\n );\r\n }\r\n }\r\n\r\n // Remove processed content from buffer\r\n this.removeProcessedContent(completeTags);\r\n\r\n if (parsedTags.length > 0) {\r\n this.emit('parsing_complete', parsedTags);\r\n this.emit('document_completed', parsedTags);\r\n }\r\n\r\n this.updateStats();\r\n }\r\n\r\n /**\r\n * Process buffer for nested parsing mode\r\n */\r\n private processBufferNested(): void {\r\n const buffer = this.bufferManager.getContent();\r\n let lastProcessedIndex = 0;\r\n\r\n while (lastProcessedIndex < buffer.length) {\r\n const nextTag = this.tagMatcher.findNextTag(buffer, lastProcessedIndex);\r\n\r\n if (!nextTag) break;\r\n // Check if the tag is registered BEFORE processing\r\n const isRegistered = this.tagRegistry.has(nextTag.tagName);\r\n\r\n if (!isRegistered) {\r\n // If tag is not registered, treat the entire tag (including content) as text content\r\n const unregisteredTagContent = this.tagMatcher.extractTextContent(\r\n buffer,\r\n lastProcessedIndex,\r\n nextTag.endIndex\r\n );\r\n this.handleTextContent(unregisteredTagContent);\r\n lastProcessedIndex = nextTag.endIndex;\r\n continue;\r\n }\r\n\r\n // Extract text content between tags\r\n if (nextTag.startIndex > lastProcessedIndex) {\r\n const textContent = this.tagMatcher.extractTextContent(\r\n buffer,\r\n lastProcessedIndex,\r\n nextTag.startIndex\r\n );\r\n this.handleTextContent(textContent);\r\n }\r\n\r\n // Process the tag\r\n this.processTagNested(nextTag);\r\n lastProcessedIndex = nextTag.endIndex;\r\n }\r\n\r\n // Update buffer to remaining content\r\n if (lastProcessedIndex > 0) {\r\n this.bufferManager.consume(lastProcessedIndex);\r\n }\r\n }\r\n\r\n /**\r\n * Process a single tag match in flat mode\r\n */\r\n private processTagFlat(match: TagMatch): T | null {\r\n const definition = this.tagRegistry.get(match.tagName);\r\n\r\n if (!definition) {\r\n throw ParserError.fromUnknownTag(match.tagName);\r\n }\r\n\r\n // Emit tag started event\r\n this.emit('tag_started', match.tagName as T['tagName'], match.attributes as T['attributes']);\r\n definition.onStart?.(match.tagName as T['tagName'], match.attributes as T['attributes']);\r\n\r\n // Create tag object\r\n const tag: T = {\r\n tagName: match.tagName,\r\n content: this.config.trimWhitespace ? match.content.trim() : match.content,\r\n attributes: match.attributes,\r\n } as T;\r\n\r\n // Apply defaults\r\n TagTransformer.applyDefaults(tag, definition);\r\n\r\n // Validate and transform\r\n TagValidator.validate(tag, definition);\r\n TagTransformer.transform(tag, definition);\r\n\r\n // Emit completion events\r\n this.emit('tag_completed', tag);\r\n definition.onComplete?.(tag);\r\n\r\n return tag;\r\n }\r\n\r\n /**\r\n * Process a tag in nested mode\r\n */\r\n private processTagNested(tagMatch: TagMatch): void {\r\n switch (tagMatch.type) {\r\n case 'opening':\r\n this.handleOpeningTag(tagMatch);\r\n break;\r\n case 'closing':\r\n this.handleClosingTag(tagMatch);\r\n break;\r\n case 'self-closing':\r\n this.handleSelfClosingTag(tagMatch);\r\n break;\r\n }\r\n }\r\n\r\n /**\r\n * Handle opening tag in nested mode\r\n */\r\n private handleOpeningTag(tagMatch: TagMatch): void {\r\n // Check depth limits\r\n if (this.currentDepth >= this.config.maxDepth) {\r\n throw ParserError.fromMaxDepth(this.config.maxDepth, tagMatch.path);\r\n }\r\n\r\n const definition = this.tagRegistry.get(tagMatch.tagName);\r\n\r\n // Create new nested tag\r\n const newTag: NestedTag = {\r\n tagName: tagMatch.tagName,\r\n content: definition?.defaultContent || '',\r\n children: [],\r\n attributes: { ...definition?.defaultAttributes, ...tagMatch.attributes },\r\n parent: this.getCurrentParent(),\r\n path: this.buildPath(tagMatch.tagName),\r\n depth: this.currentDepth + 1,\r\n isSelfClosing: false,\r\n };\r\n\r\n // Add to current parent's children if we have a parent\r\n const currentParent = this.getCurrentParent();\r\n if (currentParent && currentParent.children) {\r\n currentParent.children.push(newTag);\r\n }\r\n\r\n // Update state\r\n this.currentDepth++;\r\n this.currentPath = newTag.path || '';\r\n this.stats = {\r\n ...this.stats,\r\n maxDepthReached: Math.max(this.stats.maxDepthReached || 0, this.currentDepth),\r\n totalNestedTags: (this.stats.totalNestedTags || 0) + 1,\r\n };\r\n\r\n // Push to stack\r\n this.tagStack.push({\r\n tag: newTag,\r\n startIndex: tagMatch.startIndex,\r\n depth: this.currentDepth,\r\n path: newTag.path || '',\r\n });\r\n\r\n // Emit events\r\n this.emit('tag_opened', newTag as unknown as Partial<T>, this.currentDepth, newTag.path || '');\r\n this.emit('tag_started', newTag.tagName as T['tagName'], newTag.attributes as T['attributes']);\r\n definition?.onStart?.(newTag.tagName as T['tagName'], newTag.attributes as T['attributes']);\r\n }\r\n\r\n /**\r\n * Handle closing tag in nested mode\r\n */\r\n private handleClosingTag(tagMatch: TagMatch): void {\r\n if (this.tagStack.length === 0) {\r\n throw new ParserError(\r\n `Unexpected closing tag: ${tagMatch.tagName}`,\r\n ParserErrorCode.MISMATCHED_CLOSING_TAG\r\n );\r\n }\r\n\r\n const currentEntry = this.tagStack[this.tagStack.length - 1]!;\r\n\r\n // Check if closing tag matches the most recent opening tag\r\n if (currentEntry.tag.tagName !== tagMatch.tagName) {\r\n if (this.config.autoCloseUnclosed) {\r\n // Auto-close intervening tags\r\n while (\r\n this.tagStack.length > 0 &&\r\n this.tagStack[this.tagStack.length - 1]!.tag.tagName !== tagMatch.tagName\r\n ) {\r\n this.autoCloseTag();\r\n }\r\n } else {\r\n throw ParserError.fromMismatchedClosing(currentEntry.tag.tagName, tagMatch.tagName);\r\n }\r\n }\r\n\r\n // Pop from stack and complete the tag\r\n const completedEntry = this.tagStack.pop()!;\r\n this.completeTag(completedEntry.tag);\r\n\r\n // Update state\r\n this.currentDepth--;\r\n this.currentPath =\r\n this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1]!.path : '';\r\n }\r\n\r\n /**\r\n * Auto-close unclosed tag\r\n */\r\n private autoCloseTag(): void {\r\n if (this.tagStack.length === 0) return;\r\n\r\n const entry = this.tagStack.pop()!;\r\n this.completeTag(entry.tag);\r\n\r\n this.currentDepth--;\r\n this.currentPath =\r\n this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1]!.path : '';\r\n }\r\n\r\n /**\r\n * Complete a tag with validation and transformation\r\n */\r\n private completeTag(tag: NestedTag): void {\r\n const definition = this.tagRegistry.get(tag.tagName);\r\n\r\n if (definition) {\r\n TagValidator.validate(tag, definition);\r\n TagTransformer.transform(tag, definition);\r\n definition.onComplete?.(tag as unknown as T);\r\n }\r\n\r\n this.stats = { ...this.stats, totalTagsParsed: this.stats.totalTagsParsed + 1 };\r\n\r\n this.emit('tag_closed', tag as unknown as T, tag.depth || 0, tag.path || '');\r\n this.emit('tag_completed', tag as unknown as T);\r\n\r\n // Emit subtree_completed if tag has children (indicating a completed subtree)\r\n if (tag.children && tag.children.length > 0) {\r\n this.emit('subtree_completed', tag as unknown as T, tag.depth || 0);\r\n }\r\n }\r\n\r\n /**\r\n * Handle self-closing tag in nested mode\r\n */\r\n private handleSelfClosingTag(tagMatch: TagMatch): void {\r\n const definition = this.tagRegistry.get(tagMatch.tagName);\r\n\r\n // Create self-closing tag\r\n const tag: NestedTag = {\r\n tagName: tagMatch.tagName,\r\n content: definition?.defaultContent || '',\r\n children: [],\r\n attributes: { ...definition?.defaultAttributes, ...tagMatch.attributes },\r\n parent: this.getCurrentParent(),\r\n path: this.buildPath(tagMatch.tagName),\r\n depth: this.currentDepth + 1,\r\n isSelfClosing: true,\r\n };\r\n\r\n // Add to current parent's children if we have a parent\r\n const currentParent = this.getCurrentParent();\r\n if (currentParent && currentParent.children) {\r\n currentParent.children.push(tag);\r\n }\r\n\r\n // Complete the tag immediately\r\n this.completeTag(tag);\r\n }\r\n\r\n /**\r\n * Get current parent tag from stack\r\n */\r\n private getCurrentParent(): NestedTag | undefined {\r\n return this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1]!.tag : undefined;\r\n }\r\n\r\n /**\r\n * Build path string for current tag\r\n */\r\n private buildPath(tagName: string): string {\r\n return this.currentPath ? `${this.currentPath}/${tagName}` : tagName;\r\n }\r\n\r\n /**\r\n * Handle text content between tags\r\n */\r\n private handleTextContent(textContent: string): void {\r\n if (!textContent || textContent.trim() === '') return;\r\n\r\n const currentParent = this.getCurrentParent();\r\n if (currentParent) {\r\n // Add text content to current parent\r\n if (currentParent.content) {\r\n currentParent.content += textContent;\r\n } else {\r\n currentParent.content = textContent;\r\n }\r\n\r\n // Emit with correct parameters: tagName and partialContent\r\n this.emit('tag_content_update', currentParent.tagName as T['tagName'], textContent);\r\n }\r\n }\r\n\r\n /**\r\n * Remove processed content from buffer\r\n */\r\n private removeProcessedContent(matches: TagMatch[]): void {\r\n if (matches.length === 0) return;\r\n\r\n // Find the end of the last processed tag\r\n const lastMatch = matches[matches.length - 1]!;\r\n this.bufferManager.consume(lastMatch.endIndex);\r\n }\r\n\r\n /**\r\n * Initialize parser statistics\r\n */\r\n private initializeStats(): ParserStats {\r\n return {\r\n totalTagsParsed: 0,\r\n totalBytesProcessed: 0,\r\n errorCount: 0,\r\n bufferSize: 0,\r\n state: ParserState.IDLE,\r\n registeredTagsCount: 0,\r\n maxDepthReached: 0,\r\n totalNestedTags: 0,\r\n };\r\n }\r\n\r\n /**\r\n * Update parser statistics\r\n */\r\n private updateStats(): void {\r\n this.stats = {\r\n ...this.stats,\r\n totalBytesProcessed: this.bufferManager.getTotalProcessed(),\r\n bufferSize: this.bufferManager.getSize(),\r\n registeredTagsCount: this.tagRegistry.size,\r\n };\r\n\r\n this.emit('stats_updated', this.stats);\r\n }\r\n\r\n /**\r\n * Emit error event with context\r\n */\r\n private emitError(error: ParserError, context: unknown): void {\r\n this.emit('parse_error', error, context);\r\n }\r\n\r\n /**\r\n * Get current parser state\r\n */\r\n getState(): ParserState {\r\n return this.state;\r\n }\r\n\r\n /**\r\n * Get parser statistics\r\n */\r\n getStats(): Readonly<ParserStats> {\r\n return { ...this.stats };\r\n }\r\n\r\n /**\r\n * Get current parsing depth (nested mode only)\r\n */\r\n getCurrentDepth(): number {\r\n return this.currentDepth;\r\n }\r\n\r\n /**\r\n * Get current parsing path (nested mode only)\r\n */\r\n getCurrentPath(): string {\r\n return this.currentPath;\r\n }\r\n\r\n /**\r\n * Get buffer size\r\n */\r\n getBufferSize(): number {\r\n return this.bufferManager.getSize();\r\n }\r\n\r\n /**\r\n * Clear buffer and reset parser state\r\n */\r\n reset(): void {\r\n this.bufferManager.clear();\r\n this.emit('buffer_cleared');\r\n\r\n this.tagStack = [];\r\n this.currentDepth = 0;\r\n this.currentPath = '';\r\n this.state = ParserState.IDLE;\r\n this.stats = this.initializeStats();\r\n\r\n this.emit('parser_reset');\r\n }\r\n\r\n /**\r\n * Finalize parsing and auto-close any remaining open tags\r\n */\r\n finalize(): void {\r\n // Collect root tags before auto-closing\r\n const rootTags: T[] = [];\r\n for (const entry of this.tagStack) {\r\n if (entry.depth === 1) {\r\n // Root level tags\r\n rootTags.push(entry.tag as unknown as T);\r\n }\r\n }\r\n\r\n // Auto-close any remaining open tags in nested mode\r\n while (this.tagStack.length > 0) {\r\n this.autoCloseTag();\r\n }\r\n\r\n this.state = ParserState.COMPLETED;\r\n this.updateStats();\r\n\r\n // Emit document completed with root tags\r\n if (rootTags.length > 0) {\r\n this.emit('document_completed', rootTags);\r\n }\r\n\r\n this.emit('parsing_finalized', this.stats);\r\n }\r\n}\r\n","/**\r\n * Base types for LLM Stream Parser\r\n */\r\n\r\n/**\r\n * Base interface that all custom tag definitions must extend\r\n */\r\nexport interface BaseTag {\r\n readonly tagName: string;\r\n content: string;\r\n attributes?: Record<string, unknown>;\r\n}\r\n\r\n/**\r\n * Enhanced base tag interface that supports nested structure\r\n * Backward compatible with BaseTag\r\n */\r\nexport interface NestedTag extends BaseTag {\r\n children?: NestedTag[];\r\n parent: NestedTag | undefined;\r\n path: string | undefined;\r\n depth?: number;\r\n isSelfClosing?: boolean;\r\n}\r\n\r\n/**\r\n * Tag match information from the parser\r\n */\r\nexport interface TagMatch {\r\n readonly tagName: string;\r\n readonly content: string;\r\n readonly attributes: Record<string, unknown> | undefined;\r\n readonly startIndex: number;\r\n readonly endIndex: number;\r\n readonly fullMatch: string;\r\n readonly type?: 'opening' | 'closing' | 'self-closing' | 'complete';\r\n readonly depth?: number;\r\n readonly path?: string;\r\n}\r\n\r\n/**\r\n * Validation result for tag content or attributes\r\n */\r\nexport type ValidationResult = true | string;\r\n\r\n/**\r\n * Parser state enumeration\r\n */\r\nexport enum ParserState {\r\n IDLE = 'IDLE',\r\n PARSING = 'PARSING',\r\n ERROR = 'ERROR',\r\n COMPLETED = 'COMPLETED',\r\n}\r\n\r\n/**\r\n * Statistics about parsing operations\r\n */\r\nexport interface ParserStats {\r\n readonly totalTagsParsed: number;\r\n readonly totalBytesProcessed: number;\r\n readonly errorCount: number;\r\n readonly bufferSize: number;\r\n readonly state: ParserState;\r\n readonly registeredTagsCount: number;\r\n readonly maxDepthReached?: number;\r\n readonly totalNestedTags?: number;\r\n readonly selfClosingTags?: number;\r\n}\r\n\r\n/**\r\n * Parse result interface\r\n */\r\nexport interface ParsedResult<T = any> {\r\n success: boolean;\r\n data?: T;\r\n errors?: any[];\r\n warnings?: string[];\r\n stats: ParserStats;\r\n}\r\n","/**\r\n * Configuration types for LLM Stream Parser\r\n */\r\n\r\nimport { ParserError } from './errors';\r\n\r\n/**\r\n * Configuration options for the parser\r\n */\r\nexport interface ParserConfig {\r\n /** Whether tag names are case sensitive (default: false) */\r\n caseSensitive?: boolean;\r\n /** Whether to trim whitespace from content (default: true) */\r\n trimWhitespace?: boolean;\r\n /** Maximum buffer size in bytes (default: 1MB) */\r\n maxBufferSize?: number;\r\n /** Whether to preserve attributes order (default: false) */\r\n preserveAttributeOrder?: boolean;\r\n /** Custom error handler for parsing errors */\r\n errorHandler?: ((error: ParserError) => void) | undefined;\r\n\r\n // Enhanced options for nested parsing\r\n /** Maximum nesting depth (default: 50) */\r\n maxDepth?: number;\r\n /** Whether to preserve whitespace in nested content (default: false) */\r\n preserveWhitespace?: boolean;\r\n /** Whether to auto-close unclosed tags at EOF (default: true) */\r\n autoCloseUnclosed?: boolean;\r\n /** Enable nested parsing mode (default: false for backward compatibility) */\r\n enableNested?: boolean;\r\n}\r\n\r\n/**\r\n * Required configuration with all defaults applied\r\n */\r\nexport interface RequiredParserConfig {\r\n caseSensitive: boolean;\r\n trimWhitespace: boolean;\r\n maxBufferSize: number;\r\n preserveAttributeOrder: boolean;\r\n errorHandler: ((error: ParserError) => void) | undefined;\r\n maxDepth: number;\r\n preserveWhitespace: boolean;\r\n autoCloseUnclosed: boolean;\r\n enableNested: boolean;\r\n}\r\n\r\n/**\r\n * Default configuration values\r\n */\r\nexport const DEFAULT_CONFIG: RequiredParserConfig = {\r\n caseSensitive: false,\r\n trimWhitespace: true,\r\n maxBufferSize: 1024 * 1024, // 1MB\r\n preserveAttributeOrder: false,\r\n errorHandler: undefined,\r\n maxDepth: 50,\r\n preserveWhitespace: false,\r\n autoCloseUnclosed: true,\r\n enableNested: false,\r\n};\r\n\r\n/**\r\n * Merge user config with defaults\r\n */\r\nexport function mergeConfig(config: ParserConfig = {}): RequiredParserConfig {\r\n return {\r\n caseSensitive: config.caseSensitive ?? DEFAULT_CONFIG.caseSensitive,\r\n trimWhitespace: config.trimWhitespace ?? DEFAULT_CONFIG.trimWhitespace,\r\n maxBufferSize: config.maxBufferSize ?? DEFAULT_CONFIG.maxBufferSize,\r\n preserveAttributeOrder: config.preserveAttributeOrder ?? DEFAULT_CONFIG.preserveAttributeOrder,\r\n errorHandler: config.errorHandler ?? DEFAULT_CONFIG.errorHandler,\r\n maxDepth: config.maxDepth ?? DEFAULT_CONFIG.maxDepth,\r\n preserveWhitespace: config.preserveWhitespace ?? DEFAULT_CONFIG.preserveWhitespace,\r\n autoCloseUnclosed: config.autoCloseUnclosed ?? DEFAULT_CONFIG.autoCloseUnclosed,\r\n enableNested: config.enableNested ?? DEFAULT_CONFIG.enableNested,\r\n };\r\n}\r\n\r\n/**\r\n * Validate configuration\r\n */\r\nexport function validateConfig(config: ParserConfig): string[] {\r\n const errors: string[] = [];\r\n\r\n if (config.maxBufferSize !== undefined && config.maxBufferSize <= 0) {\r\n errors.push('maxBufferSize must be greater than 0');\r\n }\r\n\r\n if (config.maxDepth !== undefined && config.maxDepth <= 0) {\r\n errors.push('maxDepth must be greater than 0');\r\n }\r\n\r\n if (config.maxBufferSize !== undefined && config.maxBufferSize > 100 * 1024 * 1024) {\r\n errors.push('maxBufferSize should not exceed 100MB for performance reasons');\r\n }\r\n\r\n if (config.maxDepth !== undefined && config.maxDepth > 1000) {\r\n errors.push('maxDepth should not exceed 1000 for performance reasons');\r\n }\r\n\r\n return errors;\r\n}\r\n","/**\r\n * Error types for LLM Stream Parser\r\n */\r\n\r\n/**\r\n * Error codes for different types of parsing errors\r\n */\r\nexport enum ParserErrorCode {\r\n INVALID_TAG_FORMAT = 'INVALID_TAG_FORMAT',\r\n UNKNOWN_TAG = 'UNKNOWN_TAG',\r\n CONTENT_VALIDATION_FAILED = 'CONTENT_VALIDATION_FAILED',\r\n ATTRIBUTE_VALIDATION_FAILED = 'ATTRIBUTE_VALIDATION_FAILED',\r\n BUFFER_OVERFLOW = 'BUFFER_OVERFLOW',\r\n MALFORMED_ATTRIBUTES = 'MALFORMED_ATTRIBUTES',\r\n UNCLOSED_TAG = 'UNCLOSED_TAG',\r\n TRANSFORMATION_FAILED = 'TRANSFORMATION_FAILED',\r\n\r\n // Enhanced error codes for nested parsing\r\n INVALID_NESTING = 'INVALID_NESTING',\r\n MISMATCHED_CLOSING_TAG = 'MISMATCHED_CLOSING_TAG',\r\n INVALID_SELF_CLOSING = 'INVALID_SELF_CLOSING',\r\n MAX_DEPTH_EXCEEDED = 'MAX_DEPTH_EXCEEDED',\r\n INVALID_CHILDREN = 'INVALID_CHILDREN',\r\n SCHEMA_VIOLATION = 'SCHEMA_VIOLATION',\r\n}\r\n\r\n/**\r\n * Custom error class for parsing errors\r\n */\r\nexport class ParserError extends Error {\r\n constructor(\r\n message: string,\r\n public readonly code: ParserErrorCode,\r\n public readonly context?: unknown,\r\n public readonly path?: string,\r\n public readonly depth?: number\r\n ) {\r\n super(message);\r\n this.name = 'ParserError';\r\n }\r\n\r\n /**\r\n * Create error from validation failure\r\n */\r\n static fromValidation(\r\n tagName: string,\r\n validationMessage: string,\r\n type: 'content' | 'attributes' | 'children' = 'content'\r\n ): ParserError {\r\n const codeMap = {\r\n content: ParserErrorCode.CONTENT_VALIDATION_FAILED,\r\n attributes: ParserErrorCode.ATTRIBUTE_VALIDATION_FAILED,\r\n children: ParserErrorCode.INVALID_CHILDREN,\r\n };\r\n\r\n return new ParserError(\r\n `${type} validation failed for tag '${tagName}': ${validationMessage}`,\r\n codeMap[type]\r\n );\r\n }\r\n\r\n /**\r\n * Create error from transformation failure\r\n */\r\n static fromTransformation(tagName: string, error: Error): ParserError {\r\n return new ParserError(\r\n `Transformation failed for tag '${tagName}': ${error.message}`,\r\n ParserErrorCode.TRANSFORMATION_FAILED\r\n );\r\n }\r\n\r\n /**\r\n * Create error from unknown tag\r\n */\r\n static fromUnknownTag(tagName: string): ParserError {\r\n return new ParserError(`Unknown tag: ${tagName}`, ParserErrorCode.UNKNOWN_TAG);\r\n }\r\n\r\n /**\r\n * Create error from buffer overflow\r\n */\r\n static fromBufferOverflow(maxSize: number): ParserError {\r\n return new ParserError(\r\n `Buffer overflow: exceeds maximum size of ${maxSize} bytes`,\r\n ParserErrorCode.BUFFER_OVERFLOW\r\n );\r\n }\r\n\r\n /**\r\n * Create error from max depth exceeded\r\n */\r\n static fromMaxDepth(maxDepth: number, path?: string): ParserError {\r\n return new ParserError(\r\n `Maximum nesting depth of ${maxDepth} exceeded`,\r\n ParserErrorCode.MAX_DEPTH_EXCEEDED,\r\n undefined,\r\n path,\r\n maxDepth\r\n );\r\n }\r\n\r\n /**\r\n * Create error from mismatched closing tag\r\n */\r\n static fromMismatchedClosing(expected: string, actual: string, path?: string): ParserError {\r\n return new ParserError(\r\n `Mismatched closing tag: expected ${expected}, got ${actual}`,\r\n ParserErrorCode.MISMATCHED_CLOSING_TAG,\r\n { expected, actual },\r\n path\r\n );\r\n }\r\n}\r\n","/**\r\n * Buffer management for LLM Stream Parser\r\n */\r\n\r\nimport { ParserError, ParserErrorCode } from '../types/errors';\r\n\r\n/**\r\n * Buffer manager for handling streaming content\r\n */\r\nexport class BufferManager {\r\n private buffer = '';\r\n private readonly maxSize: number;\r\n private totalBytesProcessed = 0;\r\n\r\n constructor(maxSize: number = 1024 * 1024) {\r\n this.maxSize = maxSize;\r\n }\r\n\r\n /**\r\n * Append content to buffer\r\n */\r\n append(chunk: string): void {\r\n if (typeof chunk !== 'string') {\r\n throw new ParserError('Chunk must be a string', ParserErrorCode.INVALID_TAG_FORMAT);\r\n }\r\n\r\n // Check for buffer overflow before appending\r\n if (this.buffer.length + chunk.length > this.maxSize) {\r\n throw ParserError.fromBufferOverflow(this.maxSize);\r\n }\r\n\r\n this.buffer += chunk;\r\n this.totalBytesProcessed += chunk.length;\r\n }\r\n\r\n /**\r\n * Get current buffer content\r\n */\r\n getContent(): string {\r\n return this.buffer;\r\n }\r\n\r\n /**\r\n * Get buffer size\r\n */\r\n getSize(): number {\r\n return this.buffer.length;\r\n }\r\n\r\n /**\r\n * Get total bytes processed\r\n */\r\n getTotalProcessed(): number {\r\n return this.totalBytesProcessed;\r\n }\r\n\r\n /**\r\n * Clear the buffer\r\n */\r\n clear(): void {\r\n this.buffer = '';\r\n }\r\n\r\n /**\r\n * Remove content from start of buffer\r\n */\r\n consume(length: number): string {\r\n if (length <= 0) {\r\n return '';\r\n }\r\n\r\n if (length >= this.buffer.length) {\r\n const content = this.buffer;\r\n this.buffer = '';\r\n return content;\r\n }\r\n\r\n const content = this.buffer.slice(0, length);\r\n this.buffer = this.buffer.slice(length);\r\n return content;\r\n }\r\n\r\n /**\r\n * Remove content from buffer by index range\r\n */\r\n removeRange(startIndex: number, endIndex: number): void {\r\n if (startIndex < 0 || endIndex < startIndex || startIndex >= this.buffer.length) {\r\n return;\r\n }\r\n\r\n this.buffer = this.buffer.slice(0, startIndex) + this.buffer.slice(endIndex);\r\n }\r\n\r\n /**\r\n * Get a slice of buffer without modifying it\r\n */\r\n slice(start?: number, end?: number): string {\r\n return this.buffer.slice(start, end);\r\n }\r\n\r\n /**\r\n * Search for pattern in buffer\r\n */\r\n indexOf(searchValue: string, fromIndex?: number): number {\r\n return this.buffer.indexOf(searchValue, fromIndex);\r\n }\r\n\r\n /**\r\n * Check if buffer is empty\r\n */\r\n isEmpty(): boolean {\r\n return this.buffer.length === 0;\r\n }\r\n\r\n /**\r\n * Check if buffer has content\r\n */\r\n hasContent(): boolean {\r\n return this.buffer.length > 0;\r\n }\r\n\r\n /**\r\n * Get buffer utilization percentage\r\n */\r\n getUtilization(): number {\r\n return (this.buffer.length / this.maxSize) * 100;\r\n }\r\n\r\n /**\r\n * Get remaining capacity\r\n */\r\n getRemainingCapacity(): number {\r\n return this.maxSize - this.buffer.length;\r\n }\r\n\r\n /**\r\n * Check if buffer is near full (80% capacity)\r\n */\r\n isNearFull(): boolean {\r\n return this.getUtilization() >= 80;\r\n }\r\n\r\n /**\r\n * Get buffer statistics\r\n */\r\n getStats(): {\r\n size: number;\r\n maxSize: number;\r\n utilization: number;\r\n totalProcessed: number;\r\n remainingCapacity: number;\r\n } {\r\n return {\r\n size: this.buffer.length,\r\n maxSize: this.maxSize,\r\n utilization: this.getUtilization(),\r\n totalProcessed: this.totalBytesProcessed,\r\n remainingCapacity: this.getRemainingCapacity(),\r\n };\r\n }\r\n}\r\n","/**\r\n * Tag matching and pattern management for LLM Stream Parser\r\n */\r\n\r\nimport { TagMatch } from '../types/base';\r\n\r\n/**\r\n * Regular expression patterns for tag matching\r\n */\r\nexport class TagPatterns {\r\n // Self-closing tags: <tag />\r\n static readonly SELF_CLOSING = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\\s+[^>]*)?)\\s*\\/>/g;\r\n\r\n // Opening tags: <tag>\r\n static readonly OPENING = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\\s+[^>]*)?)\\s*>/g;\r\n\r\n // Closing tags: </tag>\r\n static readonly CLOSING = /<\\/([a-zA-Z][a-zA-Z0-9_-]*)\\s*>/g;\r\n\r\n // Complete flat tags: <tag>content</tag>\r\n static readonly COMPLETE = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\\s+[^>]*)?)\\s*>(.*?)<\\/\\1\\s*>/gs;\r\n\r\n // Attributes parsing\r\n static readonly ATTRIBUTES = /(\\w+)(?:=(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+)))?/g;\r\n\r\n /**\r\n * Reset all regex patterns to start from beginning\r\n */\r\n static resetAll(): void {\r\n this.SELF_CLOSING.lastIndex = 0;\r\n this.OPENING.lastIndex = 0;\r\n this.CLOSING.lastIndex = 0;\r\n this.COMPLETE.lastIndex = 0;\r\n this.ATTRIBUTES.lastIndex = 0;\r\n }\r\n}\r\n\r\n/**\r\n * Tag matcher for finding and parsing XML-like tags\r\n */\r\nexport class TagMatcher {\r\n private readonly caseSensitive: boolean;\r\n\r\n constructor(caseSensitive = false) {\r\n this.caseSensitive = caseSensitive;\r\n }\r\n\r\n /**\r\n * Find the next tag in the buffer starting from given index\r\n */\r\n findNextTag(buffer: string, startIndex = 0): TagMatch | null {\r\n const searchBuffer = buffer.slice(startIndex);\r\n let earliestMatch: TagMatch | null = null;\r\n let earliestIndex = Infinity;\r\n\r\n // Reset regex patterns\r\n TagPatterns.resetAll();\r\n\r\n // Check for self-closing tags\r\n const selfClosingMatch = TagPatterns.SELF_CLOSING.exec(searchBuffer);\r\n if (selfClosingMatch && selfClosingMatch.index < earliestIndex) {\r\n earliestIndex = selfClosingMatch.index;\r\n earliestMatch = this.createTagMatch(selfClosingMatch, startIndex, 'self-closing');\r\n }\r\n\r\n // Reset and check for opening tags\r\n TagPatterns.OPENING.lastIndex = 0;\r\n const openingMatch = TagPatterns.OPENING.exec(searchBuffer);\r\n if (openingMatch && openingMatch.index < earliestIndex) {\r\n earliestIndex = openingMatch.index;\r\n earliestMatch = this.createTagMatch(openingMatch, startIndex, 'opening');\r\n }\r\n\r\n // Reset and check for closing tags\r\n TagPatterns.CLOSING.lastIndex = 0;\r\n const closingMatch = TagPatterns.CLOSING.exec(searchBuffer);\r\n if (closingMatch && closingMatch.index < earliestIndex) {\r\n earliestIndex = closingMatch.index;\r\n earliestMatch = this.createTagMatch(closingMatch, startIndex, 'closing');\r\n }\r\n\r\n return earliestMatch;\r\n }\r\n\r\n /**\r\n * Find all complete tags in buffer (flat mode)\r\n */\r\n findCompleteTags(buffer: string): TagMatch[] {\r\n const matches: TagMatch[] = [];\r\n TagPatterns.COMPLETE.lastIndex = 0;\r\n\r\n let match: RegExpExecArray | null;\r\n while ((match = TagPatterns.COMPLETE.exec(buffer)) !== null) {\r\n const [fullMatch, tagName, attributesStr, content] = match;\r\n\r\n if (!tagName || content === undefined) continue;\r\n\r\n matches.push({\r\n tagName: this.normalizeTagName(tagName),\r\n content: content,\r\n attributes: this.parseAttributes(attributesStr || ''),\r\n startIndex: match.index,\r\n endIndex: match.index + fullMatch.length,\r\n fullMatch,\r\n type: 'complete',\r\n });\r\n }\r\n\r\n return matches;\r\n }\r\n\r\n /**\r\n * Parse attributes from attribute string\r\n */\r\n parseAttributes(attributesStr: string): Record<string, unknown> | undefined {\r\n if (!attributesStr.trim()) {\r\n return undefined;\r\n }\r\n\r\n const attributes: Record<string, unknown> = {};\r\n TagPatterns.ATTRIBUTES.lastIndex = 0;\r\n\r\n let match: RegExpExecArray | null;\r\n while ((match = TagPatterns.ATTRIBUTES.exec(attributesStr)) !== null) {\r\n const [, name, doubleQuotedValue, singleQuotedValue, unquotedValue] = match;\r\n if (!name) continue;\r\n\r\n const value = doubleQuotedValue ?? singleQuotedValue ?? unquotedValue ?? true;\r\n attributes[name] = this.parseAttributeValue(value);\r\n }\r\n\r\n return Object.keys(attributes).length > 0 ? attributes : undefined;\r\n }\r\n\r\n /**\r\n * Create TagMatch object from regex match\r\n */\r\n private createTagMatch(\r\n match: RegExpExecArray,\r\n startIndex: number,\r\n type: 'opening' | 'closing' | 'self-closing'\r\n ): TagMatch {\r\n const [fullMatch, tagName, attributesStr] = match;\r\n\r\n return {\r\n tagName: this.normalizeTagName(tagName!),\r\n content: '',\r\n attributes: type === 'closing' ? undefined : this.parseAttributes(attributesStr || ''),\r\n startIndex: startIndex + match.index,\r\n endIndex: startIndex + match.index + fullMatch.length,\r\n fullMatch,\r\n type,\r\n };\r\n }\r\n\r\n /**\r\n * Parse individual attribute value with type coercion\r\n */\r\n private parseAttributeValue(value: string | boolean): unknown {\r\n if (typeof value === 'boolean') {\r\n return value;\r\n }\r\n\r\n // Try to parse as number\r\n if (/^\\d+$/.test(value)) {\r\n return parseInt(value, 10);\r\n }\r\n\r\n if (/^\\d*\\.\\d+$/.test(value)) {\r\n return parseFloat(value);\r\n }\r\n\r\n // Try to parse as boolean\r\n if (value === 'true') return true;\r\n if (value === 'false') return false;\r\n\r\n return value;\r\n }\r\n\r\n /**\r\n * Normalize tag name according to case sensitivity\r\n */\r\n private normalizeTagName(tagName: string): string {\r\n return this.caseSensitive ? tagName : tagName.toLowerCase();\r\n }\r\n\r\n /**\r\n * Check if a string contains any XML-like tags\r\n */\r\n containsTags(content: string): boolean {\r\n return /<[a-zA-Z][a-zA-Z0-9_-]*/.test(content);\r\n }\r\n\r\n /**\r\n * Extract text content between tags\r\n */\r\n extractTextContent(buffer: string, startIndex: number, endIndex: number): string {\r\n return buffer.slice(startIndex, endIndex);\r\n }\r\n}\r\n","/**\r\n * Validation utilities for LLM Stream Parser\r\n */\r\n\r\nimport { BaseTag, NestedTag, ValidationResult } from '../types/base';\r\nimport { TagDefinition } from '../types/schema';\r\nimport { ParserError } from '../types/errors';\r\n\r\n/**\r\n * Content validators for common use cases\r\n */\r\nexport class ContentValidators {\r\n /**\r\n * Validate minimum length\r\n */\r\n static minLength(min: number): (content: string) => ValidationResult {\r\n return (content: string) => {\r\n return content.length >= min ? true : `Content must be at least ${min} characters`;\r\n };\r\n }\r\n\r\n /**\r\n * Validate maximum length\r\n */\r\n static maxLength(max: number): (content: string) => ValidationResult {\r\n return (content: string) => {\r\n return content.length <= max ? true : `Content must be no more than ${max} characters`;\r\n };\r\n }\r\n\r\n /**\r\n * Validate pattern match\r\n */\r\n static pattern(regex: RegExp, message?: string): (content: string) => ValidationResult {\r\n return (content: string) => {\r\n return regex.test(content) ? true : message || 'Content does not match required pattern';\r\n };\r\n }\r\n\r\n /**\r\n * Validate enumerated values\r\n */\r\n static enum(\r\n allowedValues: string[],\r\n caseSensitive = false\r\n ): (content: string) => ValidationResult {\r\n const values = caseSensitive ? allowedValues : allowedValues.map(v => v.toLowerCase());\r\n\r\n return (content: string) => {\r\n const testValue = caseSensitive ? content : content.toLowerCase();\r\n return values.includes(testValue)\r\n ? true\r\n : `Value must be one of: ${allowedValues.join(', ')}`;\r\n };\r\n }\r\n\r\n /**\r\n * Validate numeric content\r\n */\r\n static numeric(\r\n options: {\r\n min?: number;\r\n max?: number;\r\n integer?: boolean;\r\n } = {}\r\n ): (content: string) => ValidationResult {\r\n return (content: string) => {\r\n const num = parseFloat(content.trim());\r\n\r\n if (isNaN(num)) {\r\n return 'Content must be a valid number';\r\n }\r\n\r\n if (options.integer && !Number.isInteger(num)) {\r\n return 'Content must be an integer';\r\n }\r\n\r\n if (options.min !== undefined && num < options.min) {\r\n return `Value must be at least ${options.min}`;\r\n }\r\n\r\n if (options.max !== undefined && num > options.max) {\r\n return `Value must be no more than ${options.max}`;\r\n }\r\n\r\n return true;\r\n };\r\n }\r\n\r\n /**\r\n * Validate URL format\r\n */\r\n static url(\r\n allowedProtocols: string[] = ['http', 'https']\r\n ): (content: string) => ValidationResult {\r\n return (content: string) => {\r\n try {\r\n const url = new URL(content.trim());\r\n const protocol = url.protocol.slice(0, -1);\r\n\r\n if (!allowedProtocols.includes(protocol)) {\r\n return `URL must use one of these protocols: ${allowedProtocols.join(', ')}`;\r\n }\r\n\r\n return true;\r\n } catch {\r\n return 'Invalid URL format';\r\n }\r\n };\r\n }\r\n\r\n /**\r\n * Validate email format\r\n */\r\n static email(): (content: string) => ValidationResult {\r\n const emailPattern = /^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$/;\r\n return ContentValidators.pattern(emailPattern, 'Invalid email format');\r\n }\r\n\r\n /**\r\n * Validate required (non-empty)\r\n */\r\n static required(): (content: string) => ValidationResult {\r\n return (content: string) => {\r\n return content.trim().length > 0 ? true : 'Content is required';\r\n };\r\n }\r\n\r\n /**\r\n * Combine multiple validators\r\n */\r\n static combine(\r\n ...validators: Array<(content: string) => ValidationResult>\r\n ): (content: string) => ValidationResult {\r\n return (content: string) => {\r\n for (const validator of validators) {\r\n const result = validator(content);\r\n if (result !== true) {\r\n return result;\r\n }\r\n }\r\n return true;\r\n };\r\n }\r\n}\r\n\r\n/**\r\n * Attribute validators\r\n */\r\nexport class AttributeValidators {\r\n /**\r\n * Validate required attributes\r\n */\r\n static required(\r\n requiredAttrs: string[]\r\n ): (attributes?: Record<string, unknown>) => ValidationResult {\r\n return attributes => {\r\n if (!attributes) {\r\n return requiredAttrs.length > 0\r\n ? `Missing required attributes: ${requiredAttrs.join(', ')}`\r\n : true;\r\n }\r\n\r\n const missing = requiredAttrs.filter(attr => !(attr in attributes));\r\n return missing.length === 0 ? true : `Missing required attributes: ${missing.join(', ')}`;\r\n };\r\n }\r\n\r\n /**\r\n * Validate allowed attributes\r\n */\r\n static allowed(\r\n allowedAttrs: string[]\r\n ): (attributes?: Record<string, unknown>) => ValidationResult {\r\n return attributes => {\r\n if (!attributes) return true;\r\n\r\n const invalid = Object.keys(attributes).filter(attr => !allowedAttrs.includes(attr));\r\n return invalid.length === 0 ? true : `Invalid attributes: ${invalid.join(', ')}`;\r\n };\r\n }\r\n\r\n /**\r\n * Validate attribute types\r\n */\r\n static types(\r\n typeMap: Record<string, 'string' | 'number' | 'boolean'>\r\n ): (attributes?: Record<string, unknown>) => ValidationResult {\r\n return attributes => {\r\n if (!attributes) return true;\r\n\r\n for (const [attr, expectedType] of Object.entries(typeMap)) {\r\n if (attr in attributes) {\r\n const value = attributes[attr];\r\n const actualType = typeof value;\r\n\r\n if (expectedType === 'number' && typeof value === 'string' && !isNaN(Number(value))) {\r\n // Allow string numbers\r\n continue;\r\n }\r\n\r\n if (actualType !== expectedType) {\r\n return `Attribute '${attr}' must be of type ${expectedType}, got ${actualType}`;\r\n }\r\n }\r\n }\r\n\r\n return true;\r\n };\r\n }\r\n}\r\n\r\n/**\r\n * Tag validator that handles validation according to tag definition\r\n */\r\nexport class TagValidator {\r\n /**\r\n * Validate a tag according to its definition\r\n */\r\n static validate<T extends BaseTag>(tag: T | NestedTag, definition: TagDefinition<T>): void {\r\n // Content validation\r\n if (definition.validateContent && tag.content) {\r\n const result = definition.validateContent(tag.content);\r\n if (result !== true) {\r\n throw ParserError.fromValidation(tag.tagName, result, 'content');\r\n }\r\n }\r\n\r\n // Attributes validation\r\n if (definition.validateAttributes && tag.attributes) {\r\n const result = definition.validateAttributes(tag.attributes);\r\n if (result !== true) {\r\n throw ParserError.fromValidation(tag.tagName, result, 'attributes');\r\n }\r\n }\r\n\r\n // Children validation (for nested tags)\r\n if (definition.validateChildren && 'children' in tag && tag.children) {\r\n const result = definition.validateChildren(tag.children);\r\n if (result !== true) {\r\n throw ParserError.fromValidation(tag.tagName, result, 'children');\r\n }\r\n }\r\n }\r\n\r\n /**\r\n * Validate tag structure for nested parsing\r\n */\r\n static validateNested(tag: NestedTag, definition: TagDefinition): void {\r\n // Check if self-closing is allowed\r\n if (tag.isSelfClosing && definition.allowSelfClosing === false) {\r\n throw new ParserError(\r\n `Tag ${tag.tagName} cannot be self-closing`,\r\n 'INVALID_SELF_CLOSING' as any\r\n );\r\n }\r\n\r\n // Check if children are allowed\r\n if (tag.children && tag.children.length > 0 && definition.allowChildren === false) {\r\n throw new ParserError(`Tag ${tag.tagName} cannot have children`, 'INVALID_CHILDREN' as any);\r\n }\r\n }\r\n}\r\n","/**\r\n * Content and attribute transformation utilities\r\n */\r\n\r\nimport { BaseTag, NestedTag } from '../types/base';\r\nimport { TagDefinition } from '../types/schema';\r\nimport { ParserError } from '../types/errors';\r\n\r\n/**\r\n * Content transformers for common use cases\r\n */\r\nexport class ContentTransformers {\r\n /**\r\n * Trim whitespace from content\r\n */\r\n static trim(): (content: string) => string {\r\n return (content: string) => content.trim();\r\n }\r\n\r\n /**\r\n * Convert to lowercase\r\n */\r\n static toLowerCase(): (content: string) => string {\r\n return (content: string) => content.toLowerCase();\r\n }\r\n\r\n /**\r\n * Convert to uppercase\r\n */\r\n static toUpperCase(): (content: string) => string {\r\n return (content: string) => content.toUpperCase();\r\n }\r\n\r\n /**\r\n * Capitalize first letter\r\n */\r\n static capitalize(): (content: string) => string {\r\n return (content: string) => {\r\n if (content.length === 0) return content;\r\n return content.charAt(0).toUpperCase() + content.slice(1).toLowerCase();\r\n };\r\n }\r\n\r\n /**\r\n * Replace patterns\r\n */\r\n static replace(searchValue: string | RegExp, replaceValue: string): (content: string) => string {\r\n return (content: string) => content.replace(searchValue, replaceValue);\r\n }\r\n\r\n /**\r\n * Remove HTML tags\r\n */\r\n static stripHtml(): (content: string) => string {\r\n return (content: string) => content.replace(/<[^>]*>/g, '');\r\n }\r\n\r\n /**\r\n * Normalize whitespace (collapse multiple spaces/newlines)\r\n */\r\n static normalizeWhitespace(): (content: string) => string {\r\n return (content: string) => content.replace(/\\s+/g, ' ').trim();\r\n }\r\n\r\n /**\r\n * Parse as number\r\n */\r\n static toNumber(\r\n options: { integer?: boolean; defaultValue?: number } = {}\r\n ): (content: string) => string {\r\n return (content: string) => {\r\n const num = parseFloat(content.trim());\r\n\r\n if (isNaN(num)) {\r\n return options.defaultValue !== undefined ? options.defaultValue.toString() : content;\r\n }\r\n\r\n return options.integer ? Math.round(num).toString() : num.toString();\r\n };\r\n }\r\n\r\n /**\r\n * Parse as boolean\r\n */\r\n static toBoolean(\r\n options: { trueValues?: string[]; falseValues?: string[] } = {}\r\n ): (content: string) => string {\r\n const trueValues = options.trueValues || ['true', '1', 'yes', 'on'];\r\n const falseValues = options.falseValues || ['false', '0', 'no', 'off'];\r\n\r\n return (content: string) => {\r\n const normalized = content.toLowerCase().trim();\r\n\r\n if (trueValues.includes(normalized)) {\r\n return 'true';\r\n }\r\n\r\n if (falseValues.includes(normalized)) {\r\n return 'false';\r\n }\r\n\r\n return content;\r\n };\r\n }\r\n\r\n /**\r\n * Apply multiple transformers in sequence\r\n */\r\n static chain(...transformers: Array<(content: string) => string>): (content: string) => string {\r\n return (content: string) => {\r\n return transformers.reduce((result, transformer) => transformer(result), content);\r\n };\r\n }\r\n\r\n /**\r\n * Custom transformer with error handling\r\n */\r\n static custom(\r\n fn: (content: string) => string,\r\n errorMessage?: string\r\n ): (content: string) => string {\r\n return (content: string) => {\r\n try {\r\n return fn(content);\r\n } catch (error) {\r\n throw new ParserError(\r\n errorMessage ||\r\n `Content transformation failed: ${\r\n error instanceof Error ? error.message : String(error)\r\n }`,\r\n 'TRANSFORMATION_FAILED' as any\r\n );\r\n }\r\n };\r\n }\r\n}\r\n\r\n/**\r\n * Attribute transformers\r\n */\r\nexport class AttributeTransformers {\r\n /**\r\n * Convert attribute types\r\n */\r\n static convertTypes(\r\n typeMap: Record<string, 'string' | 'number' | 'boolean'>\r\n ): (attributes?: Record<string, unknown>) => Record<string, unknown> {\r\n return attributes => {\r\n if (!attributes) return {};\r\n\r\n const result: Record<string, unknown> = { ...attributes };\r\n\r\n for (const [attr, targetType] of Object.entries(typeMap)) {\r\n if (attr in result) {\r\n const value = result[attr];\r\n\r\n switch (targetType) {\r\n case 'number':\r\n result[attr] = typeof value === 'string' ? parseFloat(value) : Number(value);\r\n break;\r\n case 'boolean':\r\n result[attr] =\r\n typeof value === 'string'\r\n ? ['true', '1', 'yes', 'on'].includes(value.toLowerCase())\r\n : Boolean(value);\r\n break;\r\n case 'string':\r\n result[attr] = String(value);\r\n break;\r\n }\r\n }\r\n }\r\n\r\n return result;\r\n };\r\n }\r\n\r\n /**\r\n * Rename attributes\r\n */\r\n static rename(\r\n mapping: Recor