UNPKG

llm-stream-parser

Version:

A TypeScript library for parsing and processing structured data from LLM streaming responses with custom tag definitions and event-driven architecture

1,501 lines (1,489 loc) 42.6 kB
/*! * llm-stream-parser * A TypeScript library for parsing and processing structured data from LLM streaming responses * * @license MIT * @author Baran Karatas */ "use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var index_exports = {}; __export(index_exports, { AttributeTransformers: () => AttributeTransformers, AttributeValidators: () => AttributeValidators, BufferManager: () => BufferManager, ContentTransformers: () => ContentTransformers, ContentValidators: () => ContentValidators, DEFAULT_CONFIG: () => DEFAULT_CONFIG, LLMStreamParser: () => LLMStreamParser, ParserError: () => ParserError, ParserErrorCode: () => ParserErrorCode, ParserState: () => ParserState, StreamParser: () => StreamParser, TagMatcher: () => TagMatcher, TagPatterns: () => TagPatterns, TagTransformer: () => TagTransformer, TagValidator: () => TagValidator, createParser: () => createParser, createParserWithTags: () => createParserWithTags, mergeConfig: () => mergeConfig, validateConfig: () => validateConfig, version: () => version }); module.exports = __toCommonJS(index_exports); // src/core/stream-parser.ts var import_events = require("events"); // src/types/base.ts var ParserState = /* @__PURE__ */ ((ParserState2) => { ParserState2["IDLE"] = "IDLE"; ParserState2["PARSING"] = "PARSING"; ParserState2["ERROR"] = "ERROR"; ParserState2["COMPLETED"] = "COMPLETED"; return ParserState2; })(ParserState || {}); // src/types/config.ts var DEFAULT_CONFIG = { caseSensitive: false, trimWhitespace: true, maxBufferSize: 1024 * 1024, // 1MB preserveAttributeOrder: false, errorHandler: void 0, maxDepth: 50, preserveWhitespace: false, autoCloseUnclosed: true, enableNested: false }; function mergeConfig(config = {}) { return { caseSensitive: config.caseSensitive ?? DEFAULT_CONFIG.caseSensitive, trimWhitespace: config.trimWhitespace ?? DEFAULT_CONFIG.trimWhitespace, maxBufferSize: config.maxBufferSize ?? DEFAULT_CONFIG.maxBufferSize, preserveAttributeOrder: config.preserveAttributeOrder ?? DEFAULT_CONFIG.preserveAttributeOrder, errorHandler: config.errorHandler ?? DEFAULT_CONFIG.errorHandler, maxDepth: config.maxDepth ?? DEFAULT_CONFIG.maxDepth, preserveWhitespace: config.preserveWhitespace ?? DEFAULT_CONFIG.preserveWhitespace, autoCloseUnclosed: config.autoCloseUnclosed ?? DEFAULT_CONFIG.autoCloseUnclosed, enableNested: config.enableNested ?? DEFAULT_CONFIG.enableNested }; } function validateConfig(config) { const errors = []; if (config.maxBufferSize !== void 0 && config.maxBufferSize <= 0) { errors.push("maxBufferSize must be greater than 0"); } if (config.maxDepth !== void 0 && config.maxDepth <= 0) { errors.push("maxDepth must be greater than 0"); } if (config.maxBufferSize !== void 0 && config.maxBufferSize > 100 * 1024 * 1024) { errors.push("maxBufferSize should not exceed 100MB for performance reasons"); } if (config.maxDepth !== void 0 && config.maxDepth > 1e3) { errors.push("maxDepth should not exceed 1000 for performance reasons"); } return errors; } // src/types/errors.ts var ParserErrorCode = /* @__PURE__ */ ((ParserErrorCode2) => { ParserErrorCode2["INVALID_TAG_FORMAT"] = "INVALID_TAG_FORMAT"; ParserErrorCode2["UNKNOWN_TAG"] = "UNKNOWN_TAG"; ParserErrorCode2["CONTENT_VALIDATION_FAILED"] = "CONTENT_VALIDATION_FAILED"; ParserErrorCode2["ATTRIBUTE_VALIDATION_FAILED"] = "ATTRIBUTE_VALIDATION_FAILED"; ParserErrorCode2["BUFFER_OVERFLOW"] = "BUFFER_OVERFLOW"; ParserErrorCode2["MALFORMED_ATTRIBUTES"] = "MALFORMED_ATTRIBUTES"; ParserErrorCode2["UNCLOSED_TAG"] = "UNCLOSED_TAG"; ParserErrorCode2["TRANSFORMATION_FAILED"] = "TRANSFORMATION_FAILED"; ParserErrorCode2["INVALID_NESTING"] = "INVALID_NESTING"; ParserErrorCode2["MISMATCHED_CLOSING_TAG"] = "MISMATCHED_CLOSING_TAG"; ParserErrorCode2["INVALID_SELF_CLOSING"] = "INVALID_SELF_CLOSING"; ParserErrorCode2["MAX_DEPTH_EXCEEDED"] = "MAX_DEPTH_EXCEEDED"; ParserErrorCode2["INVALID_CHILDREN"] = "INVALID_CHILDREN"; ParserErrorCode2["SCHEMA_VIOLATION"] = "SCHEMA_VIOLATION"; return ParserErrorCode2; })(ParserErrorCode || {}); var ParserError = class _ParserError extends Error { constructor(message, code, context, path, depth) { super(message); this.code = code; this.context = context; this.path = path; this.depth = depth; this.name = "ParserError"; } /** * Create error from validation failure */ static fromValidation(tagName, validationMessage, type = "content") { const codeMap = { content: "CONTENT_VALIDATION_FAILED" /* CONTENT_VALIDATION_FAILED */, attributes: "ATTRIBUTE_VALIDATION_FAILED" /* ATTRIBUTE_VALIDATION_FAILED */, children: "INVALID_CHILDREN" /* INVALID_CHILDREN */ }; return new _ParserError( `${type} validation failed for tag '${tagName}': ${validationMessage}`, codeMap[type] ); } /** * Create error from transformation failure */ static fromTransformation(tagName, error) { return new _ParserError( `Transformation failed for tag '${tagName}': ${error.message}`, "TRANSFORMATION_FAILED" /* TRANSFORMATION_FAILED */ ); } /** * Create error from unknown tag */ static fromUnknownTag(tagName) { return new _ParserError(`Unknown tag: ${tagName}`, "UNKNOWN_TAG" /* UNKNOWN_TAG */); } /** * Create error from buffer overflow */ static fromBufferOverflow(maxSize) { return new _ParserError( `Buffer overflow: exceeds maximum size of ${maxSize} bytes`, "BUFFER_OVERFLOW" /* BUFFER_OVERFLOW */ ); } /** * Create error from max depth exceeded */ static fromMaxDepth(maxDepth, path) { return new _ParserError( `Maximum nesting depth of ${maxDepth} exceeded`, "MAX_DEPTH_EXCEEDED" /* MAX_DEPTH_EXCEEDED */, void 0, path, maxDepth ); } /** * Create error from mismatched closing tag */ static fromMismatchedClosing(expected, actual, path) { return new _ParserError( `Mismatched closing tag: expected ${expected}, got ${actual}`, "MISMATCHED_CLOSING_TAG" /* MISMATCHED_CLOSING_TAG */, { expected, actual }, path ); } }; // src/core/buffer-manager.ts var BufferManager = class { constructor(maxSize = 1024 * 1024) { this.buffer = ""; this.totalBytesProcessed = 0; this.maxSize = maxSize; } /** * Append content to buffer */ append(chunk) { if (typeof chunk !== "string") { throw new ParserError("Chunk must be a string", "INVALID_TAG_FORMAT" /* INVALID_TAG_FORMAT */); } if (this.buffer.length + chunk.length > this.maxSize) { throw ParserError.fromBufferOverflow(this.maxSize); } this.buffer += chunk; this.totalBytesProcessed += chunk.length; } /** * Get current buffer content */ getContent() { return this.buffer; } /** * Get buffer size */ getSize() { return this.buffer.length; } /** * Get total bytes processed */ getTotalProcessed() { return this.totalBytesProcessed; } /** * Clear the buffer */ clear() { this.buffer = ""; } /** * Remove content from start of buffer */ consume(length) { if (length <= 0) { return ""; } if (length >= this.buffer.length) { const content2 = this.buffer; this.buffer = ""; return content2; } const content = this.buffer.slice(0, length); this.buffer = this.buffer.slice(length); return content; } /** * Remove content from buffer by index range */ removeRange(startIndex, endIndex) { if (startIndex < 0 || endIndex < startIndex || startIndex >= this.buffer.length) { return; } this.buffer = this.buffer.slice(0, startIndex) + this.buffer.slice(endIndex); } /** * Get a slice of buffer without modifying it */ slice(start, end) { return this.buffer.slice(start, end); } /** * Search for pattern in buffer */ indexOf(searchValue, fromIndex) { return this.buffer.indexOf(searchValue, fromIndex); } /** * Check if buffer is empty */ isEmpty() { return this.buffer.length === 0; } /** * Check if buffer has content */ hasContent() { return this.buffer.length > 0; } /** * Get buffer utilization percentage */ getUtilization() { return this.buffer.length / this.maxSize * 100; } /** * Get remaining capacity */ getRemainingCapacity() { return this.maxSize - this.buffer.length; } /** * Check if buffer is near full (80% capacity) */ isNearFull() { return this.getUtilization() >= 80; } /** * Get buffer statistics */ getStats() { return { size: this.buffer.length, maxSize: this.maxSize, utilization: this.getUtilization(), totalProcessed: this.totalBytesProcessed, remainingCapacity: this.getRemainingCapacity() }; } }; // src/core/tag-matcher.ts var TagPatterns = class { /** * Reset all regex patterns to start from beginning */ static resetAll() { this.SELF_CLOSING.lastIndex = 0; this.OPENING.lastIndex = 0; this.CLOSING.lastIndex = 0; this.COMPLETE.lastIndex = 0; this.ATTRIBUTES.lastIndex = 0; } }; // Self-closing tags: <tag /> TagPatterns.SELF_CLOSING = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\s+[^>]*)?)\s*\/>/g; // Opening tags: <tag> TagPatterns.OPENING = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\s+[^>]*)?)\s*>/g; // Closing tags: </tag> TagPatterns.CLOSING = /<\/([a-zA-Z][a-zA-Z0-9_-]*)\s*>/g; // Complete flat tags: <tag>content</tag> TagPatterns.COMPLETE = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\s+[^>]*)?)\s*>(.*?)<\/\1\s*>/gs; // Attributes parsing TagPatterns.ATTRIBUTES = /(\w+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g; var TagMatcher = class { constructor(caseSensitive = false) { this.caseSensitive = caseSensitive; } /** * Find the next tag in the buffer starting from given index */ findNextTag(buffer, startIndex = 0) { const searchBuffer = buffer.slice(startIndex); let earliestMatch = null; let earliestIndex = Infinity; TagPatterns.resetAll(); const selfClosingMatch = TagPatterns.SELF_CLOSING.exec(searchBuffer); if (selfClosingMatch && selfClosingMatch.index < earliestIndex) { earliestIndex = selfClosingMatch.index; earliestMatch = this.createTagMatch(selfClosingMatch, startIndex, "self-closing"); } TagPatterns.OPENING.lastIndex = 0; const openingMatch = TagPatterns.OPENING.exec(searchBuffer); if (openingMatch && openingMatch.index < earliestIndex) { earliestIndex = openingMatch.index; earliestMatch = this.createTagMatch(openingMatch, startIndex, "opening"); } TagPatterns.CLOSING.lastIndex = 0; const closingMatch = TagPatterns.CLOSING.exec(searchBuffer); if (closingMatch && closingMatch.index < earliestIndex) { earliestIndex = closingMatch.index; earliestMatch = this.createTagMatch(closingMatch, startIndex, "closing"); } return earliestMatch; } /** * Find all complete tags in buffer (flat mode) */ findCompleteTags(buffer) { const matches = []; TagPatterns.COMPLETE.lastIndex = 0; let match; while ((match = TagPatterns.COMPLETE.exec(buffer)) !== null) { const [fullMatch, tagName, attributesStr, content] = match; if (!tagName || content === void 0) continue; matches.push({ tagName: this.normalizeTagName(tagName), content, attributes: this.parseAttributes(attributesStr || ""), startIndex: match.index, endIndex: match.index + fullMatch.length, fullMatch, type: "complete" }); } return matches; } /** * Parse attributes from attribute string */ parseAttributes(attributesStr) { if (!attributesStr.trim()) { return void 0; } const attributes = {}; TagPatterns.ATTRIBUTES.lastIndex = 0; let match; while ((match = TagPatterns.ATTRIBUTES.exec(attributesStr)) !== null) { const [, name, doubleQuotedValue, singleQuotedValue, unquotedValue] = match; if (!name) continue; const value = doubleQuotedValue ?? singleQuotedValue ?? unquotedValue ?? true; attributes[name] = this.parseAttributeValue(value); } return Object.keys(attributes).length > 0 ? attributes : void 0; } /** * Create TagMatch object from regex match */ createTagMatch(match, startIndex, type) { const [fullMatch, tagName, attributesStr] = match; return { tagName: this.normalizeTagName(tagName), content: "", attributes: type === "closing" ? void 0 : this.parseAttributes(attributesStr || ""), startIndex: startIndex + match.index, endIndex: startIndex + match.index + fullMatch.length, fullMatch, type }; } /** * Parse individual attribute value with type coercion */ parseAttributeValue(value) { if (typeof value === "boolean") { return value; } if (/^\d+$/.test(value)) { return parseInt(value, 10); } if (/^\d*\.\d+$/.test(value)) { return parseFloat(value); } if (value === "true") return true; if (value === "false") return false; return value; } /** * Normalize tag name according to case sensitivity */ normalizeTagName(tagName) { return this.caseSensitive ? tagName : tagName.toLowerCase(); } /** * Check if a string contains any XML-like tags */ containsTags(content) { return /<[a-zA-Z][a-zA-Z0-9_-]*/.test(content); } /** * Extract text content between tags */ extractTextContent(buffer, startIndex, endIndex) { return buffer.slice(startIndex, endIndex); } }; // src/core/validator.ts var ContentValidators = class _ContentValidators { /** * Validate minimum length */ static minLength(min) { return (content) => { return content.length >= min ? true : `Content must be at least ${min} characters`; }; } /** * Validate maximum length */ static maxLength(max) { return (content) => { return content.length <= max ? true : `Content must be no more than ${max} characters`; }; } /** * Validate pattern match */ static pattern(regex, message) { return (content) => { return regex.test(content) ? true : message || "Content does not match required pattern"; }; } /** * Validate enumerated values */ static enum(allowedValues, caseSensitive = false) { const values = caseSensitive ? allowedValues : allowedValues.map((v) => v.toLowerCase()); return (content) => { const testValue = caseSensitive ? content : content.toLowerCase(); return values.includes(testValue) ? true : `Value must be one of: ${allowedValues.join(", ")}`; }; } /** * Validate numeric content */ static numeric(options = {}) { return (content) => { const num = parseFloat(content.trim()); if (isNaN(num)) { return "Content must be a valid number"; } if (options.integer && !Number.isInteger(num)) { return "Content must be an integer"; } if (options.min !== void 0 && num < options.min) { return `Value must be at least ${options.min}`; } if (options.max !== void 0 && num > options.max) { return `Value must be no more than ${options.max}`; } return true; }; } /** * Validate URL format */ static url(allowedProtocols = ["http", "https"]) { return (content) => { try { const url = new URL(content.trim()); const protocol = url.protocol.slice(0, -1); if (!allowedProtocols.includes(protocol)) { return `URL must use one of these protocols: ${allowedProtocols.join(", ")}`; } return true; } catch { return "Invalid URL format"; } }; } /** * Validate email format */ static email() { const emailPattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; return _ContentValidators.pattern(emailPattern, "Invalid email format"); } /** * Validate required (non-empty) */ static required() { return (content) => { return content.trim().length > 0 ? true : "Content is required"; }; } /** * Combine multiple validators */ static combine(...validators) { return (content) => { for (const validator of validators) { const result = validator(content); if (result !== true) { return result; } } return true; }; } }; var AttributeValidators = class { /** * Validate required attributes */ static required(requiredAttrs) { return (attributes) => { if (!attributes) { return requiredAttrs.length > 0 ? `Missing required attributes: ${requiredAttrs.join(", ")}` : true; } const missing = requiredAttrs.filter((attr) => !(attr in attributes)); return missing.length === 0 ? true : `Missing required attributes: ${missing.join(", ")}`; }; } /** * Validate allowed attributes */ static allowed(allowedAttrs) { return (attributes) => { if (!attributes) return true; const invalid = Object.keys(attributes).filter((attr) => !allowedAttrs.includes(attr)); return invalid.length === 0 ? true : `Invalid attributes: ${invalid.join(", ")}`; }; } /** * Validate attribute types */ static types(typeMap) { return (attributes) => { if (!attributes) return true; for (const [attr, expectedType] of Object.entries(typeMap)) { if (attr in attributes) { const value = attributes[attr]; const actualType = typeof value; if (expectedType === "number" && typeof value === "string" && !isNaN(Number(value))) { continue; } if (actualType !== expectedType) { return `Attribute '${attr}' must be of type ${expectedType}, got ${actualType}`; } } } return true; }; } }; var TagValidator = class { /** * Validate a tag according to its definition */ static validate(tag, definition) { if (definition.validateContent && tag.content) { const result = definition.validateContent(tag.content); if (result !== true) { throw ParserError.fromValidation(tag.tagName, result, "content"); } } if (definition.validateAttributes && tag.attributes) { const result = definition.validateAttributes(tag.attributes); if (result !== true) { throw ParserError.fromValidation(tag.tagName, result, "attributes"); } } if (definition.validateChildren && "children" in tag && tag.children) { const result = definition.validateChildren(tag.children); if (result !== true) { throw ParserError.fromValidation(tag.tagName, result, "children"); } } } /** * Validate tag structure for nested parsing */ static validateNested(tag, definition) { if (tag.isSelfClosing && definition.allowSelfClosing === false) { throw new ParserError( `Tag ${tag.tagName} cannot be self-closing`, "INVALID_SELF_CLOSING" ); } if (tag.children && tag.children.length > 0 && definition.allowChildren === false) { throw new ParserError(`Tag ${tag.tagName} cannot have children`, "INVALID_CHILDREN"); } } }; // src/core/transformer.ts var ContentTransformers = class { /** * Trim whitespace from content */ static trim() { return (content) => content.trim(); } /** * Convert to lowercase */ static toLowerCase() { return (content) => content.toLowerCase(); } /** * Convert to uppercase */ static toUpperCase() { return (content) => content.toUpperCase(); } /** * Capitalize first letter */ static capitalize() { return (content) => { if (content.length === 0) return content; return content.charAt(0).toUpperCase() + content.slice(1).toLowerCase(); }; } /** * Replace patterns */ static replace(searchValue, replaceValue) { return (content) => content.replace(searchValue, replaceValue); } /** * Remove HTML tags */ static stripHtml() { return (content) => content.replace(/<[^>]*>/g, ""); } /** * Normalize whitespace (collapse multiple spaces/newlines) */ static normalizeWhitespace() { return (content) => content.replace(/\s+/g, " ").trim(); } /** * Parse as number */ static toNumber(options = {}) { return (content) => { const num = parseFloat(content.trim()); if (isNaN(num)) { return options.defaultValue !== void 0 ? options.defaultValue.toString() : content; } return options.integer ? Math.round(num).toString() : num.toString(); }; } /** * Parse as boolean */ static toBoolean(options = {}) { const trueValues = options.trueValues || ["true", "1", "yes", "on"]; const falseValues = options.falseValues || ["false", "0", "no", "off"]; return (content) => { const normalized = content.toLowerCase().trim(); if (trueValues.includes(normalized)) { return "true"; } if (falseValues.includes(normalized)) { return "false"; } return content; }; } /** * Apply multiple transformers in sequence */ static chain(...transformers) { return (content) => { return transformers.reduce((result, transformer) => transformer(result), content); }; } /** * Custom transformer with error handling */ static custom(fn, errorMessage) { return (content) => { try { return fn(content); } catch (error) { throw new ParserError( errorMessage || `Content transformation failed: ${error instanceof Error ? error.message : String(error)}`, "TRANSFORMATION_FAILED" ); } }; } }; var AttributeTransformers = class { /** * Convert attribute types */ static convertTypes(typeMap) { return (attributes) => { if (!attributes) return {}; const result = { ...attributes }; for (const [attr, targetType] of Object.entries(typeMap)) { if (attr in result) { const value = result[attr]; switch (targetType) { case "number": result[attr] = typeof value === "string" ? parseFloat(value) : Number(value); break; case "boolean": result[attr] = typeof value === "string" ? ["true", "1", "yes", "on"].includes(value.toLowerCase()) : Boolean(value); break; case "string": result[attr] = String(value); break; } } } return result; }; } /** * Rename attributes */ static rename(mapping) { return (attributes) => { if (!attributes) return {}; const result = {}; for (const [key, value] of Object.entries(attributes)) { const newKey = mapping[key] || key; result[newKey] = value; } return result; }; } /** * Filter attributes (keep only specified ones) */ static filter(allowedAttributes) { return (attributes) => { if (!attributes) return {}; const result = {}; for (const attr of allowedAttributes) { if (attr in attributes) { result[attr] = attributes[attr]; } } return result; }; } /** * Add default attributes */ static addDefaults(defaults) { return (attributes) => { return { ...defaults, ...attributes }; }; } }; var TagTransformer = class { /** * Apply transformations to a tag according to its definition */ static transform(tag, definition) { try { if (definition.transformContent && tag.content) { tag.content = definition.transformContent(tag.content); } if (definition.transformAttributes && tag.attributes) { tag.attributes = definition.transformAttributes(tag.attributes); } } catch (error) { if (error instanceof ParserError) { throw error; } throw ParserError.fromTransformation( tag.tagName, error instanceof Error ? error : new Error(String(error)) ); } } /** * Apply default values to tag */ static applyDefaults(tag, definition) { if (definition.defaultContent && (!tag.content || tag.content.trim() === "")) { tag.content = definition.defaultContent; } if (definition.defaultAttributes) { tag.attributes = { ...definition.defaultAttributes, ...tag.attributes }; } } /** * Clean tag content and attributes */ static clean(tag, options = {}) { const { trimContent = true, normalizeWhitespace = false, removeEmptyAttributes = false } = options; if (tag.content) { if (trimContent) { tag.content = tag.content.trim(); } if (normalizeWhitespace) { tag.content = tag.content.replace(/\s+/g, " ").trim(); } } if (tag.attributes && removeEmptyAttributes) { const cleaned = {}; for (const [key, value] of Object.entries(tag.attributes)) { if (value !== null && value !== void 0 && value !== "") { cleaned[key] = value; } } if (Object.keys(cleaned).length > 0) { tag.attributes = cleaned; } else { delete tag.attributes; } } } }; // src/core/stream-parser.ts var StreamParser = class extends import_events.EventEmitter { constructor(config = {}) { super(); this.tagRegistry = /* @__PURE__ */ new Map(); this.state = "IDLE" /* IDLE */; // Nested parsing state this.tagStack = []; this.currentDepth = 0; this.currentPath = ""; this.config = mergeConfig(config); this.bufferManager = new BufferManager(this.config.maxBufferSize); this.tagMatcher = new TagMatcher(this.config.caseSensitive); this.stats = this.initializeStats(); } /** * Register a tag definition */ defineTag(definition) { this.tagRegistry.set(definition.tagName, definition); this.updateStats(); return this; } /** * Remove a tag definition */ removeTag(tagName) { const deleted = this.tagRegistry.delete(tagName); if (deleted) { this.updateStats(); } return deleted; } /** * Check if a tag is registered */ hasTag(tagName) { return this.tagRegistry.has(tagName); } /** * Get all registered tag names */ getRegisteredTags() { return Array.from(this.tagRegistry.keys()); } /** * Parse a chunk of streaming data */ parse(chunk) { try { this.state = "PARSING" /* PARSING */; this.bufferManager.append(chunk); if (this.config.enableNested) { this.processBufferNested(); } else { this.processBufferFlat(); } this.state = "COMPLETED" /* COMPLETED */; } catch (error) { this.state = "ERROR" /* ERROR */; this.stats = { ...this.stats, errorCount: this.stats.errorCount + 1 }; this.emitError( error instanceof ParserError ? error : new ParserError( `Unexpected error: ${error instanceof Error ? error.message : String(error)}`, "INVALID_TAG_FORMAT" /* INVALID_TAG_FORMAT */ ), chunk ); } } /** * Process buffer for flat parsing mode */ processBufferFlat() { const buffer = this.bufferManager.getContent(); const completeTags = this.tagMatcher.findCompleteTags(buffer); const parsedTags = []; for (const match of completeTags) { try { const tag = this.processTagFlat(match); if (tag) { parsedTags.push(tag); this.stats = { ...this.stats, totalTagsParsed: this.stats.totalTagsParsed + 1 }; } } catch (error) { this.emitError( error instanceof ParserError ? error : new ParserError("Failed to process tag", "TRANSFORMATION_FAILED" /* TRANSFORMATION_FAILED */), match ); } } this.removeProcessedContent(completeTags); if (parsedTags.length > 0) { this.emit("parsing_complete", parsedTags); this.emit("document_completed", parsedTags); } this.updateStats(); } /** * Process buffer for nested parsing mode */ processBufferNested() { const buffer = this.bufferManager.getContent(); let lastProcessedIndex = 0; while (lastProcessedIndex < buffer.length) { const nextTag = this.tagMatcher.findNextTag(buffer, lastProcessedIndex); if (!nextTag) break; const isRegistered = this.tagRegistry.has(nextTag.tagName); if (!isRegistered) { const unregisteredTagContent = this.tagMatcher.extractTextContent( buffer, lastProcessedIndex, nextTag.endIndex ); this.handleTextContent(unregisteredTagContent); lastProcessedIndex = nextTag.endIndex; continue; } if (nextTag.startIndex > lastProcessedIndex) { const textContent = this.tagMatcher.extractTextContent( buffer, lastProcessedIndex, nextTag.startIndex ); this.handleTextContent(textContent); } this.processTagNested(nextTag); lastProcessedIndex = nextTag.endIndex; } if (lastProcessedIndex > 0) { this.bufferManager.consume(lastProcessedIndex); } } /** * Process a single tag match in flat mode */ processTagFlat(match) { const definition = this.tagRegistry.get(match.tagName); if (!definition) { throw ParserError.fromUnknownTag(match.tagName); } this.emit("tag_started", match.tagName, match.attributes); definition.onStart?.(match.tagName, match.attributes); const tag = { tagName: match.tagName, content: this.config.trimWhitespace ? match.content.trim() : match.content, attributes: match.attributes }; TagTransformer.applyDefaults(tag, definition); TagValidator.validate(tag, definition); TagTransformer.transform(tag, definition); this.emit("tag_completed", tag); definition.onComplete?.(tag); return tag; } /** * Process a tag in nested mode */ processTagNested(tagMatch) { switch (tagMatch.type) { case "opening": this.handleOpeningTag(tagMatch); break; case "closing": this.handleClosingTag(tagMatch); break; case "self-closing": this.handleSelfClosingTag(tagMatch); break; } } /** * Handle opening tag in nested mode */ handleOpeningTag(tagMatch) { if (this.currentDepth >= this.config.maxDepth) { throw ParserError.fromMaxDepth(this.config.maxDepth, tagMatch.path); } const definition = this.tagRegistry.get(tagMatch.tagName); const newTag = { tagName: tagMatch.tagName, content: definition?.defaultContent || "", children: [], attributes: { ...definition?.defaultAttributes, ...tagMatch.attributes }, parent: this.getCurrentParent(), path: this.buildPath(tagMatch.tagName), depth: this.currentDepth + 1, isSelfClosing: false }; const currentParent = this.getCurrentParent(); if (currentParent && currentParent.children) { currentParent.children.push(newTag); } this.currentDepth++; this.currentPath = newTag.path || ""; this.stats = { ...this.stats, maxDepthReached: Math.max(this.stats.maxDepthReached || 0, this.currentDepth), totalNestedTags: (this.stats.totalNestedTags || 0) + 1 }; this.tagStack.push({ tag: newTag, startIndex: tagMatch.startIndex, depth: this.currentDepth, path: newTag.path || "" }); this.emit("tag_opened", newTag, this.currentDepth, newTag.path || ""); this.emit("tag_started", newTag.tagName, newTag.attributes); definition?.onStart?.(newTag.tagName, newTag.attributes); } /** * Handle closing tag in nested mode */ handleClosingTag(tagMatch) { if (this.tagStack.length === 0) { throw new ParserError( `Unexpected closing tag: ${tagMatch.tagName}`, "MISMATCHED_CLOSING_TAG" /* MISMATCHED_CLOSING_TAG */ ); } const currentEntry = this.tagStack[this.tagStack.length - 1]; if (currentEntry.tag.tagName !== tagMatch.tagName) { if (this.config.autoCloseUnclosed) { while (this.tagStack.length > 0 && this.tagStack[this.tagStack.length - 1].tag.tagName !== tagMatch.tagName) { this.autoCloseTag(); } } else { throw ParserError.fromMismatchedClosing(currentEntry.tag.tagName, tagMatch.tagName); } } const completedEntry = this.tagStack.pop(); this.completeTag(completedEntry.tag); this.currentDepth--; this.currentPath = this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1].path : ""; } /** * Auto-close unclosed tag */ autoCloseTag() { if (this.tagStack.length === 0) return; const entry = this.tagStack.pop(); this.completeTag(entry.tag); this.currentDepth--; this.currentPath = this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1].path : ""; } /** * Complete a tag with validation and transformation */ completeTag(tag) { const definition = this.tagRegistry.get(tag.tagName); if (definition) { TagValidator.validate(tag, definition); TagTransformer.transform(tag, definition); definition.onComplete?.(tag); } this.stats = { ...this.stats, totalTagsParsed: this.stats.totalTagsParsed + 1 }; this.emit("tag_closed", tag, tag.depth || 0, tag.path || ""); this.emit("tag_completed", tag); if (tag.children && tag.children.length > 0) { this.emit("subtree_completed", tag, tag.depth || 0); } } /** * Handle self-closing tag in nested mode */ handleSelfClosingTag(tagMatch) { const definition = this.tagRegistry.get(tagMatch.tagName); const tag = { tagName: tagMatch.tagName, content: definition?.defaultContent || "", children: [], attributes: { ...definition?.defaultAttributes, ...tagMatch.attributes }, parent: this.getCurrentParent(), path: this.buildPath(tagMatch.tagName), depth: this.currentDepth + 1, isSelfClosing: true }; const currentParent = this.getCurrentParent(); if (currentParent && currentParent.children) { currentParent.children.push(tag); } this.completeTag(tag); } /** * Get current parent tag from stack */ getCurrentParent() { return this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1].tag : void 0; } /** * Build path string for current tag */ buildPath(tagName) { return this.currentPath ? `${this.currentPath}/${tagName}` : tagName; } /** * Handle text content between tags */ handleTextContent(textContent) { if (!textContent || textContent.trim() === "") return; const currentParent = this.getCurrentParent(); if (currentParent) { if (currentParent.content) { currentParent.content += textContent; } else { currentParent.content = textContent; } this.emit("tag_content_update", currentParent.tagName, textContent); } } /** * Remove processed content from buffer */ removeProcessedContent(matches) { if (matches.length === 0) return; const lastMatch = matches[matches.length - 1]; this.bufferManager.consume(lastMatch.endIndex); } /** * Initialize parser statistics */ initializeStats() { return { totalTagsParsed: 0, totalBytesProcessed: 0, errorCount: 0, bufferSize: 0, state: "IDLE" /* IDLE */, registeredTagsCount: 0, maxDepthReached: 0, totalNestedTags: 0 }; } /** * Update parser statistics */ updateStats() { this.stats = { ...this.stats, totalBytesProcessed: this.bufferManager.getTotalProcessed(), bufferSize: this.bufferManager.getSize(), registeredTagsCount: this.tagRegistry.size }; this.emit("stats_updated", this.stats); } /** * Emit error event with context */ emitError(error, context) { this.emit("parse_error", error, context); } /** * Get current parser state */ getState() { return this.state; } /** * Get parser statistics */ getStats() { return { ...this.stats }; } /** * Get current parsing depth (nested mode only) */ getCurrentDepth() { return this.currentDepth; } /** * Get current parsing path (nested mode only) */ getCurrentPath() { return this.currentPath; } /** * Get buffer size */ getBufferSize() { return this.bufferManager.getSize(); } /** * Clear buffer and reset parser state */ reset() { this.bufferManager.clear(); this.emit("buffer_cleared"); this.tagStack = []; this.currentDepth = 0; this.currentPath = ""; this.state = "IDLE" /* IDLE */; this.stats = this.initializeStats(); this.emit("parser_reset"); } /** * Finalize parsing and auto-close any remaining open tags */ finalize() { const rootTags = []; for (const entry of this.tagStack) { if (entry.depth === 1) { rootTags.push(entry.tag); } } while (this.tagStack.length > 0) { this.autoCloseTag(); } this.state = "COMPLETED" /* COMPLETED */; this.updateStats(); if (rootTags.length > 0) { this.emit("document_completed", rootTags); } this.emit("parsing_finalized", this.stats); } }; // src/llm-stream-parser.ts var LLMStreamParser = class _LLMStreamParser { constructor(config) { this.parser = new StreamParser(config); } /** * Parse a chunk of streaming data */ parse(chunk) { this.parser.parse(chunk); } /** * Register a new tag definition */ defineTag(definition) { this.parser.defineTag(definition); return this; } /** * Register multiple tag definitions */ defineTags(definitions) { for (const definition of definitions) { this.parser.defineTag(definition); } return this; } /** * Remove a tag definition */ removeTag(tagName) { return this.parser.removeTag(tagName); } /** * Check if a tag is registered */ hasTag(tagName) { return this.parser.hasTag(tagName); } /** * Get all registered tag names */ getRegisteredTags() { return this.parser.getRegisteredTags(); } /** * Get current parser state */ getState() { return this.parser.getState(); } /** * Get parser statistics */ getStats() { return this.parser.getStats(); } /** * Get current parsing depth (nested mode only) */ getCurrentDepth() { return this.parser.getCurrentDepth(); } /** * Get current parsing path (nested mode only) */ getCurrentPath() { return this.parser.getCurrentPath(); } /** * Get buffer size */ getBufferSize() { return this.parser.getBufferSize(); } /** * Reset parser state and clear buffer */ reset() { this.parser.reset(); } /** * Finalize parsing and auto-close remaining tags */ finalize() { this.parser.finalize(); } /** * Event emitter methods (delegating to internal parser) */ on(event, listener) { this.parser.on(event, listener); return this; } off(event, listener) { this.parser.off(event, listener); return this; } once(event, listener) { this.parser.once(event, listener); return this; } emit(event, ...args) { return this.parser.emit(event, ...args); } /** * Create a new parser with the same configuration */ clone() { return new _LLMStreamParser(); } /** * Convenience method to parse a complete string and finalize */ parseComplete(content) { this.parse(content); this.finalize(); } /** * Convenience method to add a simple tag definition */ addSimpleTag(tagName, options = {}) { const definition = { tagName }; if (options.allowChildren !== void 0) { definition.allowChildren = options.allowChildren; } if (options.allowSelfClosing !== void 0) { definition.allowSelfClosing = options.allowSelfClosing; } if (options.defaultContent !== void 0) { definition.defaultContent = options.defaultContent; } if (options.onComplete !== void 0) { definition.onComplete = options.onComplete; } return this.defineTag(definition); } /** * Convenience method to add multiple simple tags */ addSimpleTags(tagNames) { for (const tagName of tagNames) { this.addSimpleTag(tagName); } return this; } /** * Get a summary of parser status */ getStatus() { const stats = this.getStats(); return { state: this.getState(), registeredTags: this.getRegisteredTags().length, bufferSize: this.getBufferSize(), totalParsed: stats.totalTagsParsed, errorCount: stats.errorCount }; } }; function createParser(config) { return new LLMStreamParser(config); } function createParserWithTags(tagNames, config) { const parser = new LLMStreamParser(config); parser.addSimpleTags(tagNames); return parser; } // src/index.ts var version = "1.0.1"; // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { AttributeTransformers, AttributeValidators, BufferManager, ContentTransformers, ContentValidators, DEFAULT_CONFIG, LLMStreamParser, ParserError, ParserErrorCode, ParserState, StreamParser, TagMatcher, TagPatterns, TagTransformer, TagValidator, createParser, createParserWithTags, mergeConfig, validateConfig, version }); //# sourceMappingURL=index.js.map