UNPKG

llm-stream-parser

Version:

A TypeScript library for parsing and processing structured data from LLM streaming responses with custom tag definitions and event-driven architecture

994 lines (971 loc) 29.2 kB
import { EventEmitter as EventEmitter$1 } from 'events'; /** * Base types for LLM Stream Parser */ /** * Base interface that all custom tag definitions must extend */ interface BaseTag { readonly tagName: string; content: string; attributes?: Record<string, unknown>; } /** * Enhanced base tag interface that supports nested structure * Backward compatible with BaseTag */ interface NestedTag extends BaseTag { children?: NestedTag[]; parent: NestedTag | undefined; path: string | undefined; depth?: number; isSelfClosing?: boolean; } /** * Tag match information from the parser */ interface TagMatch { readonly tagName: string; readonly content: string; readonly attributes: Record<string, unknown> | undefined; readonly startIndex: number; readonly endIndex: number; readonly fullMatch: string; readonly type?: 'opening' | 'closing' | 'self-closing' | 'complete'; readonly depth?: number; readonly path?: string; } /** * Validation result for tag content or attributes */ type ValidationResult = true | string; /** * Parser state enumeration */ declare enum ParserState { IDLE = "IDLE", PARSING = "PARSING", ERROR = "ERROR", COMPLETED = "COMPLETED" } /** * Statistics about parsing operations */ interface ParserStats { readonly totalTagsParsed: number; readonly totalBytesProcessed: number; readonly errorCount: number; readonly bufferSize: number; readonly state: ParserState; readonly registeredTagsCount: number; readonly maxDepthReached?: number; readonly totalNestedTags?: number; readonly selfClosingTags?: number; } /** * Parse result interface */ interface ParsedResult<T = any> { success: boolean; data?: T; errors?: any[]; warnings?: string[]; stats: ParserStats; } /** * Error types for LLM Stream Parser */ /** * Error codes for different types of parsing errors */ declare enum ParserErrorCode { INVALID_TAG_FORMAT = "INVALID_TAG_FORMAT", UNKNOWN_TAG = "UNKNOWN_TAG", CONTENT_VALIDATION_FAILED = "CONTENT_VALIDATION_FAILED", ATTRIBUTE_VALIDATION_FAILED = "ATTRIBUTE_VALIDATION_FAILED", BUFFER_OVERFLOW = "BUFFER_OVERFLOW", MALFORMED_ATTRIBUTES = "MALFORMED_ATTRIBUTES", UNCLOSED_TAG = "UNCLOSED_TAG", TRANSFORMATION_FAILED = "TRANSFORMATION_FAILED", INVALID_NESTING = "INVALID_NESTING", MISMATCHED_CLOSING_TAG = "MISMATCHED_CLOSING_TAG", INVALID_SELF_CLOSING = "INVALID_SELF_CLOSING", MAX_DEPTH_EXCEEDED = "MAX_DEPTH_EXCEEDED", INVALID_CHILDREN = "INVALID_CHILDREN", SCHEMA_VIOLATION = "SCHEMA_VIOLATION" } /** * Custom error class for parsing errors */ declare class ParserError extends Error { readonly code: ParserErrorCode; readonly context?: unknown; readonly path?: string | undefined; readonly depth?: number | undefined; constructor(message: string, code: ParserErrorCode, context?: unknown, path?: string | undefined, depth?: number | undefined); /** * Create error from validation failure */ static fromValidation(tagName: string, validationMessage: string, type?: 'content' | 'attributes' | 'children'): ParserError; /** * Create error from transformation failure */ static fromTransformation(tagName: string, error: Error): ParserError; /** * Create error from unknown tag */ static fromUnknownTag(tagName: string): ParserError; /** * Create error from buffer overflow */ static fromBufferOverflow(maxSize: number): ParserError; /** * Create error from max depth exceeded */ static fromMaxDepth(maxDepth: number, path?: string): ParserError; /** * Create error from mismatched closing tag */ static fromMismatchedClosing(expected: string, actual: string, path?: string): ParserError; } /** * Configuration types for LLM Stream Parser */ /** * Configuration options for the parser */ interface ParserConfig { /** Whether tag names are case sensitive (default: false) */ caseSensitive?: boolean; /** Whether to trim whitespace from content (default: true) */ trimWhitespace?: boolean; /** Maximum buffer size in bytes (default: 1MB) */ maxBufferSize?: number; /** Whether to preserve attributes order (default: false) */ preserveAttributeOrder?: boolean; /** Custom error handler for parsing errors */ errorHandler?: ((error: ParserError) => void) | undefined; /** Maximum nesting depth (default: 50) */ maxDepth?: number; /** Whether to preserve whitespace in nested content (default: false) */ preserveWhitespace?: boolean; /** Whether to auto-close unclosed tags at EOF (default: true) */ autoCloseUnclosed?: boolean; /** Enable nested parsing mode (default: false for backward compatibility) */ enableNested?: boolean; } /** * Required configuration with all defaults applied */ interface RequiredParserConfig { caseSensitive: boolean; trimWhitespace: boolean; maxBufferSize: number; preserveAttributeOrder: boolean; errorHandler: ((error: ParserError) => void) | undefined; maxDepth: number; preserveWhitespace: boolean; autoCloseUnclosed: boolean; enableNested: boolean; } /** * Default configuration values */ declare const DEFAULT_CONFIG: RequiredParserConfig; /** * Merge user config with defaults */ declare function mergeConfig(config?: ParserConfig): RequiredParserConfig; /** * Validate configuration */ declare function validateConfig(config: ParserConfig): string[]; /** * Event types for LLM Stream Parser */ /** * Parser event types with type-safe handlers */ interface ParserEvents<T extends BaseTag = BaseTag> { /** Emitted when a tag starts being parsed */ tag_started: (tagName: T['tagName'], attributes?: T['attributes']) => void; /** Emitted when a tag's content is updated during streaming */ tag_content_update: (tagName: T['tagName'], partialContent: string) => void; /** Emitted when a tag is completely parsed */ tag_completed: (tag: T) => void; /** Emitted when a parsing error occurs */ parse_error: (error: ParserError, context: TagMatch | string) => void; /** Emitted when parsing is complete for the current buffer */ parsing_complete: (parsedTags: T[]) => void; /** Emitted when buffer is cleared */ buffer_cleared: () => void; /** Emitted when parser statistics are updated */ stats_updated: (stats: ParserStats) => void; /** Emitted when parser is reset */ parser_reset: () => void; /** Emitted when parsing is finalized */ parsing_finalized: (stats: ParserStats) => void; /** Emitted when a tag is opened (nested mode) */ tag_opened: (tag: Partial<T>, depth: number, path: string) => void; /** Emitted when a tag is closed (nested mode) */ tag_closed: (tag: T, depth: number, path: string) => void; /** Emitted when a subtree is completed (nested mode) */ subtree_completed: (rootTag: T, depth: number) => void; /** Emitted when entire document is parsed (nested mode) */ document_completed: (rootTags: T[]) => void; } /** * Event handler function type */ type EventHandler<T extends BaseTag = BaseTag, K extends keyof ParserEvents<T> = keyof ParserEvents<T>> = ParserEvents<T>[K]; /** * Event emitter interface */ interface EventEmitter<T extends BaseTag = BaseTag> { on<K extends keyof ParserEvents<T>>(event: K, listener: ParserEvents<T>[K]): this; off<K extends keyof ParserEvents<T>>(event: K, listener: ParserEvents<T>[K]): this; once<K extends keyof ParserEvents<T>>(event: K, listener: ParserEvents<T>[K]): this; emit<K extends keyof ParserEvents<T>>(event: K, ...args: Parameters<ParserEvents<T>[K]>): boolean; } /** * Event listener options */ interface ListenerOptions { /** Remove listener after first emission */ once?: boolean; /** Priority for listener ordering (higher = called first) */ priority?: number; /** Context object for listener */ context?: any; } /** * Event subscription handle */ interface EventSubscription { /** Remove this event listener */ unsubscribe(): void; /** Check if subscription is still active */ isActive(): boolean; } /** * Batch event data for bulk processing */ interface BatchEventData<T extends BaseTag = BaseTag> { type: 'batch_completed'; tags: T[]; totalProcessingTime: number; averageTagSize: number; } /** * Progress event data for long-running operations */ interface ProgressEventData { type: 'parsing_progress'; processed: number; total: number; percentage: number; estimatedTimeRemaining?: number; } /** * Performance metrics event data */ interface PerformanceEventData { type: 'performance_metrics'; memoryUsage: number; processingSpeed: number; bufferUtilization: number; errorRate: number; } /** * Debug event data for development */ interface DebugEventData { type: 'debug_info'; message: string; data?: any; timestamp: number; level: 'trace' | 'debug' | 'info' | 'warn'; } /** * Extended events for advanced use cases */ interface ExtendedParserEvents<T extends BaseTag = BaseTag> extends ParserEvents<T> { /** Emitted for batch operations */ batch_completed: (data: BatchEventData<T>) => void; /** Emitted for progress tracking */ parsing_progress: (data: ProgressEventData) => void; /** Emitted for performance monitoring */ performance_metrics: (data: PerformanceEventData) => void; /** Emitted for debug information */ debug_info: (data: DebugEventData) => void; } /** * Schema types for LLM Stream Parser */ /** * Schema definition for automatic tag generation */ interface SchemaDefinition { [tagName: string]: SchemaProperty; } /** * Schema property types */ type SchemaProperty = 'string' | 'number' | 'boolean' | SchemaDefinition | SchemaProperty[] | { type: 'string' | 'number' | 'boolean' | 'object' | 'array'; required?: boolean; default?: unknown; validation?: (value: unknown) => ValidationResult; transform?: (value: unknown) => unknown; children?: SchemaDefinition; items?: SchemaProperty; }; /** * Tag definition interface for registering custom tags */ interface TagDefinition<T extends BaseTag = BaseTag> { readonly tagName: T['tagName']; /** Content validation function */ validateContent?: (content: string) => ValidationResult; /** Attributes validation function */ validateAttributes?: (attributes?: T['attributes']) => ValidationResult; /** Transform content before emitting events */ transformContent?: (content: string) => string; /** Transform attributes before emitting events */ transformAttributes?: (attributes?: Record<string, unknown>) => T['attributes']; /** Validate children structure (only for nested mode) */ validateChildren?: (children: NestedTag[]) => ValidationResult; /** Whether this tag can have children (default: inferred from usage) */ allowChildren?: boolean; /** Whether this tag can be self-closing (default: true) */ allowSelfClosing?: boolean; /** Default content if tag is empty */ defaultContent?: string; /** Default attributes if not provided */ defaultAttributes?: Record<string, unknown>; /** Called when tag parsing starts */ onStart?: (tagName: T['tagName'], attributes?: T['attributes']) => void; /** Called when tag content is being updated (streaming) */ onContentUpdate?: (partialContent: string, tag: Partial<T>) => void; /** Called when tag parsing is completed */ onComplete?: (tag: T) => void; /** Called when a child tag is added (nested mode only) */ onChildAdded?: (child: NestedTag, parent: T) => void; } /** * Schema generation options */ interface SchemaGenerationOptions { /** Enable strict validation for all generated tags */ strictValidation?: boolean; /** Auto-transform content based on type */ autoTransform?: boolean; /** Default behavior for self-closing tags */ defaultSelfClosing?: boolean; /** Global event handlers */ globalHandlers?: { onTagStart?: (tagName: string, attributes?: Record<string, unknown>) => void; onTagComplete?: (tag: any) => void; onContentUpdate?: (content: string, tag: any) => void; }; } /** * Built-in schema templates */ interface SchemaTemplates { /** Quiz/assessment schema */ quiz: () => SchemaDefinition; /** Documentation schema */ documentation: () => SchemaDefinition; /** Form schema */ form: () => SchemaDefinition; /** Article/blog schema */ article: () => SchemaDefinition; /** Code example schema */ code: () => SchemaDefinition; } /** * Schema validation context */ interface SchemaValidationContext { path: string[]; depth: number; parent?: SchemaProperty; root: SchemaDefinition; } /** * Schema validation result */ interface SchemaValidationResult { valid: boolean; errors: SchemaValidationError[]; warnings: string[]; } /** * Schema validation error */ interface SchemaValidationError { path: string; property: string; message: string; expectedType?: string; actualType?: string; value?: unknown; } /** * Main Stream Parser - Core parsing engine * Supports both flat and nested XML parsing modes */ /** * Main stream parser with support for both flat and nested XML parsing */ declare class StreamParser<T extends BaseTag = BaseTag> extends EventEmitter$1 { private readonly config; private readonly bufferManager; private readonly tagMatcher; private readonly tagRegistry; private state; private stats; private tagStack; private currentDepth; private currentPath; constructor(config?: ParserConfig); /** * Register a tag definition */ defineTag(definition: TagDefinition<T>): this; /** * Remove a tag definition */ removeTag(tagName: string): boolean; /** * Check if a tag is registered */ hasTag(tagName: string): boolean; /** * Get all registered tag names */ getRegisteredTags(): readonly string[]; /** * Parse a chunk of streaming data */ parse(chunk: string): void; /** * Process buffer for flat parsing mode */ private processBufferFlat; /** * Process buffer for nested parsing mode */ private processBufferNested; /** * Process a single tag match in flat mode */ private processTagFlat; /** * Process a tag in nested mode */ private processTagNested; /** * Handle opening tag in nested mode */ private handleOpeningTag; /** * Handle closing tag in nested mode */ private handleClosingTag; /** * Auto-close unclosed tag */ private autoCloseTag; /** * Complete a tag with validation and transformation */ private completeTag; /** * Handle self-closing tag in nested mode */ private handleSelfClosingTag; /** * Get current parent tag from stack */ private getCurrentParent; /** * Build path string for current tag */ private buildPath; /** * Handle text content between tags */ private handleTextContent; /** * Remove processed content from buffer */ private removeProcessedContent; /** * Initialize parser statistics */ private initializeStats; /** * Update parser statistics */ private updateStats; /** * Emit error event with context */ private emitError; /** * Get current parser state */ getState(): ParserState; /** * Get parser statistics */ getStats(): Readonly<ParserStats>; /** * Get current parsing depth (nested mode only) */ getCurrentDepth(): number; /** * Get current parsing path (nested mode only) */ getCurrentPath(): string; /** * Get buffer size */ getBufferSize(): number; /** * Clear buffer and reset parser state */ reset(): void; /** * Finalize parsing and auto-close any remaining open tags */ finalize(): void; } /** * LLM Stream Parser - Main user-facing API * High-level interface with convenience methods */ /** * Main LLM Stream Parser - simplified user-facing API */ declare class LLMStreamParser<T extends BaseTag = BaseTag> { private readonly parser; constructor(config?: ParserConfig); /** * Parse a chunk of streaming data */ parse(chunk: string): void; /** * Register a new tag definition */ defineTag(definition: TagDefinition<T>): this; /** * Register multiple tag definitions */ defineTags(definitions: TagDefinition<T>[]): this; /** * Remove a tag definition */ removeTag(tagName: string): boolean; /** * Check if a tag is registered */ hasTag(tagName: string): boolean; /** * Get all registered tag names */ getRegisteredTags(): readonly string[]; /** * Get current parser state */ getState(): ParserState; /** * Get parser statistics */ getStats(): Readonly<ParserStats>; /** * Get current parsing depth (nested mode only) */ getCurrentDepth(): number; /** * Get current parsing path (nested mode only) */ getCurrentPath(): string; /** * Get buffer size */ getBufferSize(): number; /** * Reset parser state and clear buffer */ reset(): void; /** * Finalize parsing and auto-close remaining tags */ finalize(): void; /** * Event emitter methods (delegating to internal parser) */ on<K extends keyof ParserEvents<T>>(event: K, listener: ParserEvents<T>[K]): this; off<K extends keyof ParserEvents<T>>(event: K, listener: ParserEvents<T>[K]): this; once<K extends keyof ParserEvents<T>>(event: K, listener: ParserEvents<T>[K]): this; emit<K extends keyof ParserEvents<T>>(event: K, ...args: Parameters<ParserEvents<T>[K]>): boolean; /** * Create a new parser with the same configuration */ clone(): LLMStreamParser<T>; /** * Convenience method to parse a complete string and finalize */ parseComplete(content: string): void; /** * Convenience method to add a simple tag definition */ addSimpleTag(tagName: string, options?: { allowChildren?: boolean; allowSelfClosing?: boolean; defaultContent?: string; onComplete?: (tag: T) => void; }): this; /** * Convenience method to add multiple simple tags */ addSimpleTags(tagNames: string[]): this; /** * Get a summary of parser status */ getStatus(): { state: ParserState; registeredTags: number; bufferSize: number; totalParsed: number; errorCount: number; }; } /** * Factory function for creating a simple parser */ declare function createParser<T extends BaseTag = BaseTag>(config?: ParserConfig): LLMStreamParser<T>; /** * Factory function for creating a parser with predefined tags */ declare function createParserWithTags<T extends BaseTag = BaseTag>(tagNames: string[], config?: ParserConfig): LLMStreamParser<T>; /** * Buffer management for LLM Stream Parser */ /** * Buffer manager for handling streaming content */ declare class BufferManager { private buffer; private readonly maxSize; private totalBytesProcessed; constructor(maxSize?: number); /** * Append content to buffer */ append(chunk: string): void; /** * Get current buffer content */ getContent(): string; /** * Get buffer size */ getSize(): number; /** * Get total bytes processed */ getTotalProcessed(): number; /** * Clear the buffer */ clear(): void; /** * Remove content from start of buffer */ consume(length: number): string; /** * Remove content from buffer by index range */ removeRange(startIndex: number, endIndex: number): void; /** * Get a slice of buffer without modifying it */ slice(start?: number, end?: number): string; /** * Search for pattern in buffer */ indexOf(searchValue: string, fromIndex?: number): number; /** * Check if buffer is empty */ isEmpty(): boolean; /** * Check if buffer has content */ hasContent(): boolean; /** * Get buffer utilization percentage */ getUtilization(): number; /** * Get remaining capacity */ getRemainingCapacity(): number; /** * Check if buffer is near full (80% capacity) */ isNearFull(): boolean; /** * Get buffer statistics */ getStats(): { size: number; maxSize: number; utilization: number; totalProcessed: number; remainingCapacity: number; }; } /** * Tag matching and pattern management for LLM Stream Parser */ /** * Regular expression patterns for tag matching */ declare class TagPatterns { static readonly SELF_CLOSING: RegExp; static readonly OPENING: RegExp; static readonly CLOSING: RegExp; static readonly COMPLETE: RegExp; static readonly ATTRIBUTES: RegExp; /** * Reset all regex patterns to start from beginning */ static resetAll(): void; } /** * Tag matcher for finding and parsing XML-like tags */ declare class TagMatcher { private readonly caseSensitive; constructor(caseSensitive?: boolean); /** * Find the next tag in the buffer starting from given index */ findNextTag(buffer: string, startIndex?: number): TagMatch | null; /** * Find all complete tags in buffer (flat mode) */ findCompleteTags(buffer: string): TagMatch[]; /** * Parse attributes from attribute string */ parseAttributes(attributesStr: string): Record<string, unknown> | undefined; /** * Create TagMatch object from regex match */ private createTagMatch; /** * Parse individual attribute value with type coercion */ private parseAttributeValue; /** * Normalize tag name according to case sensitivity */ private normalizeTagName; /** * Check if a string contains any XML-like tags */ containsTags(content: string): boolean; /** * Extract text content between tags */ extractTextContent(buffer: string, startIndex: number, endIndex: number): string; } /** * Validation utilities for LLM Stream Parser */ /** * Content validators for common use cases */ declare class ContentValidators { /** * Validate minimum length */ static minLength(min: number): (content: string) => ValidationResult; /** * Validate maximum length */ static maxLength(max: number): (content: string) => ValidationResult; /** * Validate pattern match */ static pattern(regex: RegExp, message?: string): (content: string) => ValidationResult; /** * Validate enumerated values */ static enum(allowedValues: string[], caseSensitive?: boolean): (content: string) => ValidationResult; /** * Validate numeric content */ static numeric(options?: { min?: number; max?: number; integer?: boolean; }): (content: string) => ValidationResult; /** * Validate URL format */ static url(allowedProtocols?: string[]): (content: string) => ValidationResult; /** * Validate email format */ static email(): (content: string) => ValidationResult; /** * Validate required (non-empty) */ static required(): (content: string) => ValidationResult; /** * Combine multiple validators */ static combine(...validators: Array<(content: string) => ValidationResult>): (content: string) => ValidationResult; } /** * Attribute validators */ declare class AttributeValidators { /** * Validate required attributes */ static required(requiredAttrs: string[]): (attributes?: Record<string, unknown>) => ValidationResult; /** * Validate allowed attributes */ static allowed(allowedAttrs: string[]): (attributes?: Record<string, unknown>) => ValidationResult; /** * Validate attribute types */ static types(typeMap: Record<string, 'string' | 'number' | 'boolean'>): (attributes?: Record<string, unknown>) => ValidationResult; } /** * Tag validator that handles validation according to tag definition */ declare class TagValidator { /** * Validate a tag according to its definition */ static validate<T extends BaseTag>(tag: T | NestedTag, definition: TagDefinition<T>): void; /** * Validate tag structure for nested parsing */ static validateNested(tag: NestedTag, definition: TagDefinition): void; } /** * Content and attribute transformation utilities */ /** * Content transformers for common use cases */ declare class ContentTransformers { /** * Trim whitespace from content */ static trim(): (content: string) => string; /** * Convert to lowercase */ static toLowerCase(): (content: string) => string; /** * Convert to uppercase */ static toUpperCase(): (content: string) => string; /** * Capitalize first letter */ static capitalize(): (content: string) => string; /** * Replace patterns */ static replace(searchValue: string | RegExp, replaceValue: string): (content: string) => string; /** * Remove HTML tags */ static stripHtml(): (content: string) => string; /** * Normalize whitespace (collapse multiple spaces/newlines) */ static normalizeWhitespace(): (content: string) => string; /** * Parse as number */ static toNumber(options?: { integer?: boolean; defaultValue?: number; }): (content: string) => string; /** * Parse as boolean */ static toBoolean(options?: { trueValues?: string[]; falseValues?: string[]; }): (content: string) => string; /** * Apply multiple transformers in sequence */ static chain(...transformers: Array<(content: string) => string>): (content: string) => string; /** * Custom transformer with error handling */ static custom(fn: (content: string) => string, errorMessage?: string): (content: string) => string; } /** * Attribute transformers */ declare class AttributeTransformers { /** * Convert attribute types */ static convertTypes(typeMap: Record<string, 'string' | 'number' | 'boolean'>): (attributes?: Record<string, unknown>) => Record<string, unknown>; /** * Rename attributes */ static rename(mapping: Record<string, string>): (attributes?: Record<string, unknown>) => Record<string, unknown>; /** * Filter attributes (keep only specified ones) */ static filter(allowedAttributes: string[]): (attributes?: Record<string, unknown>) => Record<string, unknown>; /** * Add default attributes */ static addDefaults(defaults: Record<string, unknown>): (attributes?: Record<string, unknown>) => Record<string, unknown>; } /** * Tag transformer that applies transformations according to definition */ declare class TagTransformer { /** * Apply transformations to a tag according to its definition */ static transform<T extends BaseTag>(tag: T | NestedTag, definition: TagDefinition<T>): void; /** * Apply default values to tag */ static applyDefaults<T extends BaseTag>(tag: T | NestedTag, definition: TagDefinition<T>): void; /** * Clean tag content and attributes */ static clean<T extends BaseTag>(tag: T | NestedTag, options?: { trimContent?: boolean; normalizeWhitespace?: boolean; removeEmptyAttributes?: boolean; }): void; } /** * LLM Stream Parser - Main exports */ declare const version = "1.0.1"; export { AttributeTransformers, AttributeValidators, type BaseTag, type BatchEventData, BufferManager, ContentTransformers, ContentValidators, DEFAULT_CONFIG, type DebugEventData, type EventEmitter, type EventHandler, type EventSubscription, type ExtendedParserEvents, LLMStreamParser, type ListenerOptions, type NestedTag, type ParsedResult, type ParserConfig, ParserError, ParserErrorCode, type ParserEvents, ParserState, type ParserStats, type PerformanceEventData, type ProgressEventData, type RequiredParserConfig, type SchemaDefinition, type SchemaGenerationOptions, type SchemaProperty, type SchemaTemplates, type SchemaValidationContext, type SchemaValidationError, type SchemaValidationResult, StreamParser, type TagDefinition, type TagMatch, TagMatcher, TagPatterns, TagTransformer, TagValidator, type ValidationResult, createParser, createParserWithTags, mergeConfig, validateConfig, version };