llm-stream-parser
Version:
A TypeScript library for parsing and processing structured data from LLM streaming responses with custom tag definitions and event-driven architecture
1,501 lines (1,489 loc) • 42.6 kB
JavaScript
/*!
* llm-stream-parser
* A TypeScript library for parsing and processing structured data from LLM streaming responses
*
* @license MIT
* @author Baran Karatas
*/
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var index_exports = {};
__export(index_exports, {
AttributeTransformers: () => AttributeTransformers,
AttributeValidators: () => AttributeValidators,
BufferManager: () => BufferManager,
ContentTransformers: () => ContentTransformers,
ContentValidators: () => ContentValidators,
DEFAULT_CONFIG: () => DEFAULT_CONFIG,
LLMStreamParser: () => LLMStreamParser,
ParserError: () => ParserError,
ParserErrorCode: () => ParserErrorCode,
ParserState: () => ParserState,
StreamParser: () => StreamParser,
TagMatcher: () => TagMatcher,
TagPatterns: () => TagPatterns,
TagTransformer: () => TagTransformer,
TagValidator: () => TagValidator,
createParser: () => createParser,
createParserWithTags: () => createParserWithTags,
mergeConfig: () => mergeConfig,
validateConfig: () => validateConfig,
version: () => version
});
module.exports = __toCommonJS(index_exports);
// src/core/stream-parser.ts
var import_events = require("events");
// src/types/base.ts
var ParserState = /* @__PURE__ */ ((ParserState2) => {
ParserState2["IDLE"] = "IDLE";
ParserState2["PARSING"] = "PARSING";
ParserState2["ERROR"] = "ERROR";
ParserState2["COMPLETED"] = "COMPLETED";
return ParserState2;
})(ParserState || {});
// src/types/config.ts
var DEFAULT_CONFIG = {
caseSensitive: false,
trimWhitespace: true,
maxBufferSize: 1024 * 1024,
// 1MB
preserveAttributeOrder: false,
errorHandler: void 0,
maxDepth: 50,
preserveWhitespace: false,
autoCloseUnclosed: true,
enableNested: false
};
function mergeConfig(config = {}) {
return {
caseSensitive: config.caseSensitive ?? DEFAULT_CONFIG.caseSensitive,
trimWhitespace: config.trimWhitespace ?? DEFAULT_CONFIG.trimWhitespace,
maxBufferSize: config.maxBufferSize ?? DEFAULT_CONFIG.maxBufferSize,
preserveAttributeOrder: config.preserveAttributeOrder ?? DEFAULT_CONFIG.preserveAttributeOrder,
errorHandler: config.errorHandler ?? DEFAULT_CONFIG.errorHandler,
maxDepth: config.maxDepth ?? DEFAULT_CONFIG.maxDepth,
preserveWhitespace: config.preserveWhitespace ?? DEFAULT_CONFIG.preserveWhitespace,
autoCloseUnclosed: config.autoCloseUnclosed ?? DEFAULT_CONFIG.autoCloseUnclosed,
enableNested: config.enableNested ?? DEFAULT_CONFIG.enableNested
};
}
function validateConfig(config) {
const errors = [];
if (config.maxBufferSize !== void 0 && config.maxBufferSize <= 0) {
errors.push("maxBufferSize must be greater than 0");
}
if (config.maxDepth !== void 0 && config.maxDepth <= 0) {
errors.push("maxDepth must be greater than 0");
}
if (config.maxBufferSize !== void 0 && config.maxBufferSize > 100 * 1024 * 1024) {
errors.push("maxBufferSize should not exceed 100MB for performance reasons");
}
if (config.maxDepth !== void 0 && config.maxDepth > 1e3) {
errors.push("maxDepth should not exceed 1000 for performance reasons");
}
return errors;
}
// src/types/errors.ts
var ParserErrorCode = /* @__PURE__ */ ((ParserErrorCode2) => {
ParserErrorCode2["INVALID_TAG_FORMAT"] = "INVALID_TAG_FORMAT";
ParserErrorCode2["UNKNOWN_TAG"] = "UNKNOWN_TAG";
ParserErrorCode2["CONTENT_VALIDATION_FAILED"] = "CONTENT_VALIDATION_FAILED";
ParserErrorCode2["ATTRIBUTE_VALIDATION_FAILED"] = "ATTRIBUTE_VALIDATION_FAILED";
ParserErrorCode2["BUFFER_OVERFLOW"] = "BUFFER_OVERFLOW";
ParserErrorCode2["MALFORMED_ATTRIBUTES"] = "MALFORMED_ATTRIBUTES";
ParserErrorCode2["UNCLOSED_TAG"] = "UNCLOSED_TAG";
ParserErrorCode2["TRANSFORMATION_FAILED"] = "TRANSFORMATION_FAILED";
ParserErrorCode2["INVALID_NESTING"] = "INVALID_NESTING";
ParserErrorCode2["MISMATCHED_CLOSING_TAG"] = "MISMATCHED_CLOSING_TAG";
ParserErrorCode2["INVALID_SELF_CLOSING"] = "INVALID_SELF_CLOSING";
ParserErrorCode2["MAX_DEPTH_EXCEEDED"] = "MAX_DEPTH_EXCEEDED";
ParserErrorCode2["INVALID_CHILDREN"] = "INVALID_CHILDREN";
ParserErrorCode2["SCHEMA_VIOLATION"] = "SCHEMA_VIOLATION";
return ParserErrorCode2;
})(ParserErrorCode || {});
var ParserError = class _ParserError extends Error {
constructor(message, code, context, path, depth) {
super(message);
this.code = code;
this.context = context;
this.path = path;
this.depth = depth;
this.name = "ParserError";
}
/**
* Create error from validation failure
*/
static fromValidation(tagName, validationMessage, type = "content") {
const codeMap = {
content: "CONTENT_VALIDATION_FAILED" /* CONTENT_VALIDATION_FAILED */,
attributes: "ATTRIBUTE_VALIDATION_FAILED" /* ATTRIBUTE_VALIDATION_FAILED */,
children: "INVALID_CHILDREN" /* INVALID_CHILDREN */
};
return new _ParserError(
`${type} validation failed for tag '${tagName}': ${validationMessage}`,
codeMap[type]
);
}
/**
* Create error from transformation failure
*/
static fromTransformation(tagName, error) {
return new _ParserError(
`Transformation failed for tag '${tagName}': ${error.message}`,
"TRANSFORMATION_FAILED" /* TRANSFORMATION_FAILED */
);
}
/**
* Create error from unknown tag
*/
static fromUnknownTag(tagName) {
return new _ParserError(`Unknown tag: ${tagName}`, "UNKNOWN_TAG" /* UNKNOWN_TAG */);
}
/**
* Create error from buffer overflow
*/
static fromBufferOverflow(maxSize) {
return new _ParserError(
`Buffer overflow: exceeds maximum size of ${maxSize} bytes`,
"BUFFER_OVERFLOW" /* BUFFER_OVERFLOW */
);
}
/**
* Create error from max depth exceeded
*/
static fromMaxDepth(maxDepth, path) {
return new _ParserError(
`Maximum nesting depth of ${maxDepth} exceeded`,
"MAX_DEPTH_EXCEEDED" /* MAX_DEPTH_EXCEEDED */,
void 0,
path,
maxDepth
);
}
/**
* Create error from mismatched closing tag
*/
static fromMismatchedClosing(expected, actual, path) {
return new _ParserError(
`Mismatched closing tag: expected ${expected}, got ${actual}`,
"MISMATCHED_CLOSING_TAG" /* MISMATCHED_CLOSING_TAG */,
{ expected, actual },
path
);
}
};
// src/core/buffer-manager.ts
var BufferManager = class {
constructor(maxSize = 1024 * 1024) {
this.buffer = "";
this.totalBytesProcessed = 0;
this.maxSize = maxSize;
}
/**
* Append content to buffer
*/
append(chunk) {
if (typeof chunk !== "string") {
throw new ParserError("Chunk must be a string", "INVALID_TAG_FORMAT" /* INVALID_TAG_FORMAT */);
}
if (this.buffer.length + chunk.length > this.maxSize) {
throw ParserError.fromBufferOverflow(this.maxSize);
}
this.buffer += chunk;
this.totalBytesProcessed += chunk.length;
}
/**
* Get current buffer content
*/
getContent() {
return this.buffer;
}
/**
* Get buffer size
*/
getSize() {
return this.buffer.length;
}
/**
* Get total bytes processed
*/
getTotalProcessed() {
return this.totalBytesProcessed;
}
/**
* Clear the buffer
*/
clear() {
this.buffer = "";
}
/**
* Remove content from start of buffer
*/
consume(length) {
if (length <= 0) {
return "";
}
if (length >= this.buffer.length) {
const content2 = this.buffer;
this.buffer = "";
return content2;
}
const content = this.buffer.slice(0, length);
this.buffer = this.buffer.slice(length);
return content;
}
/**
* Remove content from buffer by index range
*/
removeRange(startIndex, endIndex) {
if (startIndex < 0 || endIndex < startIndex || startIndex >= this.buffer.length) {
return;
}
this.buffer = this.buffer.slice(0, startIndex) + this.buffer.slice(endIndex);
}
/**
* Get a slice of buffer without modifying it
*/
slice(start, end) {
return this.buffer.slice(start, end);
}
/**
* Search for pattern in buffer
*/
indexOf(searchValue, fromIndex) {
return this.buffer.indexOf(searchValue, fromIndex);
}
/**
* Check if buffer is empty
*/
isEmpty() {
return this.buffer.length === 0;
}
/**
* Check if buffer has content
*/
hasContent() {
return this.buffer.length > 0;
}
/**
* Get buffer utilization percentage
*/
getUtilization() {
return this.buffer.length / this.maxSize * 100;
}
/**
* Get remaining capacity
*/
getRemainingCapacity() {
return this.maxSize - this.buffer.length;
}
/**
* Check if buffer is near full (80% capacity)
*/
isNearFull() {
return this.getUtilization() >= 80;
}
/**
* Get buffer statistics
*/
getStats() {
return {
size: this.buffer.length,
maxSize: this.maxSize,
utilization: this.getUtilization(),
totalProcessed: this.totalBytesProcessed,
remainingCapacity: this.getRemainingCapacity()
};
}
};
// src/core/tag-matcher.ts
var TagPatterns = class {
/**
* Reset all regex patterns to start from beginning
*/
static resetAll() {
this.SELF_CLOSING.lastIndex = 0;
this.OPENING.lastIndex = 0;
this.CLOSING.lastIndex = 0;
this.COMPLETE.lastIndex = 0;
this.ATTRIBUTES.lastIndex = 0;
}
};
// Self-closing tags: <tag />
TagPatterns.SELF_CLOSING = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\s+[^>]*)?)\s*\/>/g;
// Opening tags: <tag>
TagPatterns.OPENING = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\s+[^>]*)?)\s*>/g;
// Closing tags: </tag>
TagPatterns.CLOSING = /<\/([a-zA-Z][a-zA-Z0-9_-]*)\s*>/g;
// Complete flat tags: <tag>content</tag>
TagPatterns.COMPLETE = /<([a-zA-Z][a-zA-Z0-9_-]*)((?:\s+[^>]*)?)\s*>(.*?)<\/\1\s*>/gs;
// Attributes parsing
TagPatterns.ATTRIBUTES = /(\w+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
var TagMatcher = class {
constructor(caseSensitive = false) {
this.caseSensitive = caseSensitive;
}
/**
* Find the next tag in the buffer starting from given index
*/
findNextTag(buffer, startIndex = 0) {
const searchBuffer = buffer.slice(startIndex);
let earliestMatch = null;
let earliestIndex = Infinity;
TagPatterns.resetAll();
const selfClosingMatch = TagPatterns.SELF_CLOSING.exec(searchBuffer);
if (selfClosingMatch && selfClosingMatch.index < earliestIndex) {
earliestIndex = selfClosingMatch.index;
earliestMatch = this.createTagMatch(selfClosingMatch, startIndex, "self-closing");
}
TagPatterns.OPENING.lastIndex = 0;
const openingMatch = TagPatterns.OPENING.exec(searchBuffer);
if (openingMatch && openingMatch.index < earliestIndex) {
earliestIndex = openingMatch.index;
earliestMatch = this.createTagMatch(openingMatch, startIndex, "opening");
}
TagPatterns.CLOSING.lastIndex = 0;
const closingMatch = TagPatterns.CLOSING.exec(searchBuffer);
if (closingMatch && closingMatch.index < earliestIndex) {
earliestIndex = closingMatch.index;
earliestMatch = this.createTagMatch(closingMatch, startIndex, "closing");
}
return earliestMatch;
}
/**
* Find all complete tags in buffer (flat mode)
*/
findCompleteTags(buffer) {
const matches = [];
TagPatterns.COMPLETE.lastIndex = 0;
let match;
while ((match = TagPatterns.COMPLETE.exec(buffer)) !== null) {
const [fullMatch, tagName, attributesStr, content] = match;
if (!tagName || content === void 0) continue;
matches.push({
tagName: this.normalizeTagName(tagName),
content,
attributes: this.parseAttributes(attributesStr || ""),
startIndex: match.index,
endIndex: match.index + fullMatch.length,
fullMatch,
type: "complete"
});
}
return matches;
}
/**
* Parse attributes from attribute string
*/
parseAttributes(attributesStr) {
if (!attributesStr.trim()) {
return void 0;
}
const attributes = {};
TagPatterns.ATTRIBUTES.lastIndex = 0;
let match;
while ((match = TagPatterns.ATTRIBUTES.exec(attributesStr)) !== null) {
const [, name, doubleQuotedValue, singleQuotedValue, unquotedValue] = match;
if (!name) continue;
const value = doubleQuotedValue ?? singleQuotedValue ?? unquotedValue ?? true;
attributes[name] = this.parseAttributeValue(value);
}
return Object.keys(attributes).length > 0 ? attributes : void 0;
}
/**
* Create TagMatch object from regex match
*/
createTagMatch(match, startIndex, type) {
const [fullMatch, tagName, attributesStr] = match;
return {
tagName: this.normalizeTagName(tagName),
content: "",
attributes: type === "closing" ? void 0 : this.parseAttributes(attributesStr || ""),
startIndex: startIndex + match.index,
endIndex: startIndex + match.index + fullMatch.length,
fullMatch,
type
};
}
/**
* Parse individual attribute value with type coercion
*/
parseAttributeValue(value) {
if (typeof value === "boolean") {
return value;
}
if (/^\d+$/.test(value)) {
return parseInt(value, 10);
}
if (/^\d*\.\d+$/.test(value)) {
return parseFloat(value);
}
if (value === "true") return true;
if (value === "false") return false;
return value;
}
/**
* Normalize tag name according to case sensitivity
*/
normalizeTagName(tagName) {
return this.caseSensitive ? tagName : tagName.toLowerCase();
}
/**
* Check if a string contains any XML-like tags
*/
containsTags(content) {
return /<[a-zA-Z][a-zA-Z0-9_-]*/.test(content);
}
/**
* Extract text content between tags
*/
extractTextContent(buffer, startIndex, endIndex) {
return buffer.slice(startIndex, endIndex);
}
};
// src/core/validator.ts
var ContentValidators = class _ContentValidators {
/**
* Validate minimum length
*/
static minLength(min) {
return (content) => {
return content.length >= min ? true : `Content must be at least ${min} characters`;
};
}
/**
* Validate maximum length
*/
static maxLength(max) {
return (content) => {
return content.length <= max ? true : `Content must be no more than ${max} characters`;
};
}
/**
* Validate pattern match
*/
static pattern(regex, message) {
return (content) => {
return regex.test(content) ? true : message || "Content does not match required pattern";
};
}
/**
* Validate enumerated values
*/
static enum(allowedValues, caseSensitive = false) {
const values = caseSensitive ? allowedValues : allowedValues.map((v) => v.toLowerCase());
return (content) => {
const testValue = caseSensitive ? content : content.toLowerCase();
return values.includes(testValue) ? true : `Value must be one of: ${allowedValues.join(", ")}`;
};
}
/**
* Validate numeric content
*/
static numeric(options = {}) {
return (content) => {
const num = parseFloat(content.trim());
if (isNaN(num)) {
return "Content must be a valid number";
}
if (options.integer && !Number.isInteger(num)) {
return "Content must be an integer";
}
if (options.min !== void 0 && num < options.min) {
return `Value must be at least ${options.min}`;
}
if (options.max !== void 0 && num > options.max) {
return `Value must be no more than ${options.max}`;
}
return true;
};
}
/**
* Validate URL format
*/
static url(allowedProtocols = ["http", "https"]) {
return (content) => {
try {
const url = new URL(content.trim());
const protocol = url.protocol.slice(0, -1);
if (!allowedProtocols.includes(protocol)) {
return `URL must use one of these protocols: ${allowedProtocols.join(", ")}`;
}
return true;
} catch {
return "Invalid URL format";
}
};
}
/**
* Validate email format
*/
static email() {
const emailPattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
return _ContentValidators.pattern(emailPattern, "Invalid email format");
}
/**
* Validate required (non-empty)
*/
static required() {
return (content) => {
return content.trim().length > 0 ? true : "Content is required";
};
}
/**
* Combine multiple validators
*/
static combine(...validators) {
return (content) => {
for (const validator of validators) {
const result = validator(content);
if (result !== true) {
return result;
}
}
return true;
};
}
};
var AttributeValidators = class {
/**
* Validate required attributes
*/
static required(requiredAttrs) {
return (attributes) => {
if (!attributes) {
return requiredAttrs.length > 0 ? `Missing required attributes: ${requiredAttrs.join(", ")}` : true;
}
const missing = requiredAttrs.filter((attr) => !(attr in attributes));
return missing.length === 0 ? true : `Missing required attributes: ${missing.join(", ")}`;
};
}
/**
* Validate allowed attributes
*/
static allowed(allowedAttrs) {
return (attributes) => {
if (!attributes) return true;
const invalid = Object.keys(attributes).filter((attr) => !allowedAttrs.includes(attr));
return invalid.length === 0 ? true : `Invalid attributes: ${invalid.join(", ")}`;
};
}
/**
* Validate attribute types
*/
static types(typeMap) {
return (attributes) => {
if (!attributes) return true;
for (const [attr, expectedType] of Object.entries(typeMap)) {
if (attr in attributes) {
const value = attributes[attr];
const actualType = typeof value;
if (expectedType === "number" && typeof value === "string" && !isNaN(Number(value))) {
continue;
}
if (actualType !== expectedType) {
return `Attribute '${attr}' must be of type ${expectedType}, got ${actualType}`;
}
}
}
return true;
};
}
};
var TagValidator = class {
/**
* Validate a tag according to its definition
*/
static validate(tag, definition) {
if (definition.validateContent && tag.content) {
const result = definition.validateContent(tag.content);
if (result !== true) {
throw ParserError.fromValidation(tag.tagName, result, "content");
}
}
if (definition.validateAttributes && tag.attributes) {
const result = definition.validateAttributes(tag.attributes);
if (result !== true) {
throw ParserError.fromValidation(tag.tagName, result, "attributes");
}
}
if (definition.validateChildren && "children" in tag && tag.children) {
const result = definition.validateChildren(tag.children);
if (result !== true) {
throw ParserError.fromValidation(tag.tagName, result, "children");
}
}
}
/**
* Validate tag structure for nested parsing
*/
static validateNested(tag, definition) {
if (tag.isSelfClosing && definition.allowSelfClosing === false) {
throw new ParserError(
`Tag ${tag.tagName} cannot be self-closing`,
"INVALID_SELF_CLOSING"
);
}
if (tag.children && tag.children.length > 0 && definition.allowChildren === false) {
throw new ParserError(`Tag ${tag.tagName} cannot have children`, "INVALID_CHILDREN");
}
}
};
// src/core/transformer.ts
var ContentTransformers = class {
/**
* Trim whitespace from content
*/
static trim() {
return (content) => content.trim();
}
/**
* Convert to lowercase
*/
static toLowerCase() {
return (content) => content.toLowerCase();
}
/**
* Convert to uppercase
*/
static toUpperCase() {
return (content) => content.toUpperCase();
}
/**
* Capitalize first letter
*/
static capitalize() {
return (content) => {
if (content.length === 0) return content;
return content.charAt(0).toUpperCase() + content.slice(1).toLowerCase();
};
}
/**
* Replace patterns
*/
static replace(searchValue, replaceValue) {
return (content) => content.replace(searchValue, replaceValue);
}
/**
* Remove HTML tags
*/
static stripHtml() {
return (content) => content.replace(/<[^>]*>/g, "");
}
/**
* Normalize whitespace (collapse multiple spaces/newlines)
*/
static normalizeWhitespace() {
return (content) => content.replace(/\s+/g, " ").trim();
}
/**
* Parse as number
*/
static toNumber(options = {}) {
return (content) => {
const num = parseFloat(content.trim());
if (isNaN(num)) {
return options.defaultValue !== void 0 ? options.defaultValue.toString() : content;
}
return options.integer ? Math.round(num).toString() : num.toString();
};
}
/**
* Parse as boolean
*/
static toBoolean(options = {}) {
const trueValues = options.trueValues || ["true", "1", "yes", "on"];
const falseValues = options.falseValues || ["false", "0", "no", "off"];
return (content) => {
const normalized = content.toLowerCase().trim();
if (trueValues.includes(normalized)) {
return "true";
}
if (falseValues.includes(normalized)) {
return "false";
}
return content;
};
}
/**
* Apply multiple transformers in sequence
*/
static chain(...transformers) {
return (content) => {
return transformers.reduce((result, transformer) => transformer(result), content);
};
}
/**
* Custom transformer with error handling
*/
static custom(fn, errorMessage) {
return (content) => {
try {
return fn(content);
} catch (error) {
throw new ParserError(
errorMessage || `Content transformation failed: ${error instanceof Error ? error.message : String(error)}`,
"TRANSFORMATION_FAILED"
);
}
};
}
};
var AttributeTransformers = class {
/**
* Convert attribute types
*/
static convertTypes(typeMap) {
return (attributes) => {
if (!attributes) return {};
const result = { ...attributes };
for (const [attr, targetType] of Object.entries(typeMap)) {
if (attr in result) {
const value = result[attr];
switch (targetType) {
case "number":
result[attr] = typeof value === "string" ? parseFloat(value) : Number(value);
break;
case "boolean":
result[attr] = typeof value === "string" ? ["true", "1", "yes", "on"].includes(value.toLowerCase()) : Boolean(value);
break;
case "string":
result[attr] = String(value);
break;
}
}
}
return result;
};
}
/**
* Rename attributes
*/
static rename(mapping) {
return (attributes) => {
if (!attributes) return {};
const result = {};
for (const [key, value] of Object.entries(attributes)) {
const newKey = mapping[key] || key;
result[newKey] = value;
}
return result;
};
}
/**
* Filter attributes (keep only specified ones)
*/
static filter(allowedAttributes) {
return (attributes) => {
if (!attributes) return {};
const result = {};
for (const attr of allowedAttributes) {
if (attr in attributes) {
result[attr] = attributes[attr];
}
}
return result;
};
}
/**
* Add default attributes
*/
static addDefaults(defaults) {
return (attributes) => {
return { ...defaults, ...attributes };
};
}
};
var TagTransformer = class {
/**
* Apply transformations to a tag according to its definition
*/
static transform(tag, definition) {
try {
if (definition.transformContent && tag.content) {
tag.content = definition.transformContent(tag.content);
}
if (definition.transformAttributes && tag.attributes) {
tag.attributes = definition.transformAttributes(tag.attributes);
}
} catch (error) {
if (error instanceof ParserError) {
throw error;
}
throw ParserError.fromTransformation(
tag.tagName,
error instanceof Error ? error : new Error(String(error))
);
}
}
/**
* Apply default values to tag
*/
static applyDefaults(tag, definition) {
if (definition.defaultContent && (!tag.content || tag.content.trim() === "")) {
tag.content = definition.defaultContent;
}
if (definition.defaultAttributes) {
tag.attributes = { ...definition.defaultAttributes, ...tag.attributes };
}
}
/**
* Clean tag content and attributes
*/
static clean(tag, options = {}) {
const {
trimContent = true,
normalizeWhitespace = false,
removeEmptyAttributes = false
} = options;
if (tag.content) {
if (trimContent) {
tag.content = tag.content.trim();
}
if (normalizeWhitespace) {
tag.content = tag.content.replace(/\s+/g, " ").trim();
}
}
if (tag.attributes && removeEmptyAttributes) {
const cleaned = {};
for (const [key, value] of Object.entries(tag.attributes)) {
if (value !== null && value !== void 0 && value !== "") {
cleaned[key] = value;
}
}
if (Object.keys(cleaned).length > 0) {
tag.attributes = cleaned;
} else {
delete tag.attributes;
}
}
}
};
// src/core/stream-parser.ts
var StreamParser = class extends import_events.EventEmitter {
constructor(config = {}) {
super();
this.tagRegistry = /* @__PURE__ */ new Map();
this.state = "IDLE" /* IDLE */;
// Nested parsing state
this.tagStack = [];
this.currentDepth = 0;
this.currentPath = "";
this.config = mergeConfig(config);
this.bufferManager = new BufferManager(this.config.maxBufferSize);
this.tagMatcher = new TagMatcher(this.config.caseSensitive);
this.stats = this.initializeStats();
}
/**
* Register a tag definition
*/
defineTag(definition) {
this.tagRegistry.set(definition.tagName, definition);
this.updateStats();
return this;
}
/**
* Remove a tag definition
*/
removeTag(tagName) {
const deleted = this.tagRegistry.delete(tagName);
if (deleted) {
this.updateStats();
}
return deleted;
}
/**
* Check if a tag is registered
*/
hasTag(tagName) {
return this.tagRegistry.has(tagName);
}
/**
* Get all registered tag names
*/
getRegisteredTags() {
return Array.from(this.tagRegistry.keys());
}
/**
* Parse a chunk of streaming data
*/
parse(chunk) {
try {
this.state = "PARSING" /* PARSING */;
this.bufferManager.append(chunk);
if (this.config.enableNested) {
this.processBufferNested();
} else {
this.processBufferFlat();
}
this.state = "COMPLETED" /* COMPLETED */;
} catch (error) {
this.state = "ERROR" /* ERROR */;
this.stats = { ...this.stats, errorCount: this.stats.errorCount + 1 };
this.emitError(
error instanceof ParserError ? error : new ParserError(
`Unexpected error: ${error instanceof Error ? error.message : String(error)}`,
"INVALID_TAG_FORMAT" /* INVALID_TAG_FORMAT */
),
chunk
);
}
}
/**
* Process buffer for flat parsing mode
*/
processBufferFlat() {
const buffer = this.bufferManager.getContent();
const completeTags = this.tagMatcher.findCompleteTags(buffer);
const parsedTags = [];
for (const match of completeTags) {
try {
const tag = this.processTagFlat(match);
if (tag) {
parsedTags.push(tag);
this.stats = { ...this.stats, totalTagsParsed: this.stats.totalTagsParsed + 1 };
}
} catch (error) {
this.emitError(
error instanceof ParserError ? error : new ParserError("Failed to process tag", "TRANSFORMATION_FAILED" /* TRANSFORMATION_FAILED */),
match
);
}
}
this.removeProcessedContent(completeTags);
if (parsedTags.length > 0) {
this.emit("parsing_complete", parsedTags);
this.emit("document_completed", parsedTags);
}
this.updateStats();
}
/**
* Process buffer for nested parsing mode
*/
processBufferNested() {
const buffer = this.bufferManager.getContent();
let lastProcessedIndex = 0;
while (lastProcessedIndex < buffer.length) {
const nextTag = this.tagMatcher.findNextTag(buffer, lastProcessedIndex);
if (!nextTag) break;
const isRegistered = this.tagRegistry.has(nextTag.tagName);
if (!isRegistered) {
const unregisteredTagContent = this.tagMatcher.extractTextContent(
buffer,
lastProcessedIndex,
nextTag.endIndex
);
this.handleTextContent(unregisteredTagContent);
lastProcessedIndex = nextTag.endIndex;
continue;
}
if (nextTag.startIndex > lastProcessedIndex) {
const textContent = this.tagMatcher.extractTextContent(
buffer,
lastProcessedIndex,
nextTag.startIndex
);
this.handleTextContent(textContent);
}
this.processTagNested(nextTag);
lastProcessedIndex = nextTag.endIndex;
}
if (lastProcessedIndex > 0) {
this.bufferManager.consume(lastProcessedIndex);
}
}
/**
* Process a single tag match in flat mode
*/
processTagFlat(match) {
const definition = this.tagRegistry.get(match.tagName);
if (!definition) {
throw ParserError.fromUnknownTag(match.tagName);
}
this.emit("tag_started", match.tagName, match.attributes);
definition.onStart?.(match.tagName, match.attributes);
const tag = {
tagName: match.tagName,
content: this.config.trimWhitespace ? match.content.trim() : match.content,
attributes: match.attributes
};
TagTransformer.applyDefaults(tag, definition);
TagValidator.validate(tag, definition);
TagTransformer.transform(tag, definition);
this.emit("tag_completed", tag);
definition.onComplete?.(tag);
return tag;
}
/**
* Process a tag in nested mode
*/
processTagNested(tagMatch) {
switch (tagMatch.type) {
case "opening":
this.handleOpeningTag(tagMatch);
break;
case "closing":
this.handleClosingTag(tagMatch);
break;
case "self-closing":
this.handleSelfClosingTag(tagMatch);
break;
}
}
/**
* Handle opening tag in nested mode
*/
handleOpeningTag(tagMatch) {
if (this.currentDepth >= this.config.maxDepth) {
throw ParserError.fromMaxDepth(this.config.maxDepth, tagMatch.path);
}
const definition = this.tagRegistry.get(tagMatch.tagName);
const newTag = {
tagName: tagMatch.tagName,
content: definition?.defaultContent || "",
children: [],
attributes: { ...definition?.defaultAttributes, ...tagMatch.attributes },
parent: this.getCurrentParent(),
path: this.buildPath(tagMatch.tagName),
depth: this.currentDepth + 1,
isSelfClosing: false
};
const currentParent = this.getCurrentParent();
if (currentParent && currentParent.children) {
currentParent.children.push(newTag);
}
this.currentDepth++;
this.currentPath = newTag.path || "";
this.stats = {
...this.stats,
maxDepthReached: Math.max(this.stats.maxDepthReached || 0, this.currentDepth),
totalNestedTags: (this.stats.totalNestedTags || 0) + 1
};
this.tagStack.push({
tag: newTag,
startIndex: tagMatch.startIndex,
depth: this.currentDepth,
path: newTag.path || ""
});
this.emit("tag_opened", newTag, this.currentDepth, newTag.path || "");
this.emit("tag_started", newTag.tagName, newTag.attributes);
definition?.onStart?.(newTag.tagName, newTag.attributes);
}
/**
* Handle closing tag in nested mode
*/
handleClosingTag(tagMatch) {
if (this.tagStack.length === 0) {
throw new ParserError(
`Unexpected closing tag: ${tagMatch.tagName}`,
"MISMATCHED_CLOSING_TAG" /* MISMATCHED_CLOSING_TAG */
);
}
const currentEntry = this.tagStack[this.tagStack.length - 1];
if (currentEntry.tag.tagName !== tagMatch.tagName) {
if (this.config.autoCloseUnclosed) {
while (this.tagStack.length > 0 && this.tagStack[this.tagStack.length - 1].tag.tagName !== tagMatch.tagName) {
this.autoCloseTag();
}
} else {
throw ParserError.fromMismatchedClosing(currentEntry.tag.tagName, tagMatch.tagName);
}
}
const completedEntry = this.tagStack.pop();
this.completeTag(completedEntry.tag);
this.currentDepth--;
this.currentPath = this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1].path : "";
}
/**
* Auto-close unclosed tag
*/
autoCloseTag() {
if (this.tagStack.length === 0) return;
const entry = this.tagStack.pop();
this.completeTag(entry.tag);
this.currentDepth--;
this.currentPath = this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1].path : "";
}
/**
* Complete a tag with validation and transformation
*/
completeTag(tag) {
const definition = this.tagRegistry.get(tag.tagName);
if (definition) {
TagValidator.validate(tag, definition);
TagTransformer.transform(tag, definition);
definition.onComplete?.(tag);
}
this.stats = { ...this.stats, totalTagsParsed: this.stats.totalTagsParsed + 1 };
this.emit("tag_closed", tag, tag.depth || 0, tag.path || "");
this.emit("tag_completed", tag);
if (tag.children && tag.children.length > 0) {
this.emit("subtree_completed", tag, tag.depth || 0);
}
}
/**
* Handle self-closing tag in nested mode
*/
handleSelfClosingTag(tagMatch) {
const definition = this.tagRegistry.get(tagMatch.tagName);
const tag = {
tagName: tagMatch.tagName,
content: definition?.defaultContent || "",
children: [],
attributes: { ...definition?.defaultAttributes, ...tagMatch.attributes },
parent: this.getCurrentParent(),
path: this.buildPath(tagMatch.tagName),
depth: this.currentDepth + 1,
isSelfClosing: true
};
const currentParent = this.getCurrentParent();
if (currentParent && currentParent.children) {
currentParent.children.push(tag);
}
this.completeTag(tag);
}
/**
* Get current parent tag from stack
*/
getCurrentParent() {
return this.tagStack.length > 0 ? this.tagStack[this.tagStack.length - 1].tag : void 0;
}
/**
* Build path string for current tag
*/
buildPath(tagName) {
return this.currentPath ? `${this.currentPath}/${tagName}` : tagName;
}
/**
* Handle text content between tags
*/
handleTextContent(textContent) {
if (!textContent || textContent.trim() === "") return;
const currentParent = this.getCurrentParent();
if (currentParent) {
if (currentParent.content) {
currentParent.content += textContent;
} else {
currentParent.content = textContent;
}
this.emit("tag_content_update", currentParent.tagName, textContent);
}
}
/**
* Remove processed content from buffer
*/
removeProcessedContent(matches) {
if (matches.length === 0) return;
const lastMatch = matches[matches.length - 1];
this.bufferManager.consume(lastMatch.endIndex);
}
/**
* Initialize parser statistics
*/
initializeStats() {
return {
totalTagsParsed: 0,
totalBytesProcessed: 0,
errorCount: 0,
bufferSize: 0,
state: "IDLE" /* IDLE */,
registeredTagsCount: 0,
maxDepthReached: 0,
totalNestedTags: 0
};
}
/**
* Update parser statistics
*/
updateStats() {
this.stats = {
...this.stats,
totalBytesProcessed: this.bufferManager.getTotalProcessed(),
bufferSize: this.bufferManager.getSize(),
registeredTagsCount: this.tagRegistry.size
};
this.emit("stats_updated", this.stats);
}
/**
* Emit error event with context
*/
emitError(error, context) {
this.emit("parse_error", error, context);
}
/**
* Get current parser state
*/
getState() {
return this.state;
}
/**
* Get parser statistics
*/
getStats() {
return { ...this.stats };
}
/**
* Get current parsing depth (nested mode only)
*/
getCurrentDepth() {
return this.currentDepth;
}
/**
* Get current parsing path (nested mode only)
*/
getCurrentPath() {
return this.currentPath;
}
/**
* Get buffer size
*/
getBufferSize() {
return this.bufferManager.getSize();
}
/**
* Clear buffer and reset parser state
*/
reset() {
this.bufferManager.clear();
this.emit("buffer_cleared");
this.tagStack = [];
this.currentDepth = 0;
this.currentPath = "";
this.state = "IDLE" /* IDLE */;
this.stats = this.initializeStats();
this.emit("parser_reset");
}
/**
* Finalize parsing and auto-close any remaining open tags
*/
finalize() {
const rootTags = [];
for (const entry of this.tagStack) {
if (entry.depth === 1) {
rootTags.push(entry.tag);
}
}
while (this.tagStack.length > 0) {
this.autoCloseTag();
}
this.state = "COMPLETED" /* COMPLETED */;
this.updateStats();
if (rootTags.length > 0) {
this.emit("document_completed", rootTags);
}
this.emit("parsing_finalized", this.stats);
}
};
// src/llm-stream-parser.ts
var LLMStreamParser = class _LLMStreamParser {
constructor(config) {
this.parser = new StreamParser(config);
}
/**
* Parse a chunk of streaming data
*/
parse(chunk) {
this.parser.parse(chunk);
}
/**
* Register a new tag definition
*/
defineTag(definition) {
this.parser.defineTag(definition);
return this;
}
/**
* Register multiple tag definitions
*/
defineTags(definitions) {
for (const definition of definitions) {
this.parser.defineTag(definition);
}
return this;
}
/**
* Remove a tag definition
*/
removeTag(tagName) {
return this.parser.removeTag(tagName);
}
/**
* Check if a tag is registered
*/
hasTag(tagName) {
return this.parser.hasTag(tagName);
}
/**
* Get all registered tag names
*/
getRegisteredTags() {
return this.parser.getRegisteredTags();
}
/**
* Get current parser state
*/
getState() {
return this.parser.getState();
}
/**
* Get parser statistics
*/
getStats() {
return this.parser.getStats();
}
/**
* Get current parsing depth (nested mode only)
*/
getCurrentDepth() {
return this.parser.getCurrentDepth();
}
/**
* Get current parsing path (nested mode only)
*/
getCurrentPath() {
return this.parser.getCurrentPath();
}
/**
* Get buffer size
*/
getBufferSize() {
return this.parser.getBufferSize();
}
/**
* Reset parser state and clear buffer
*/
reset() {
this.parser.reset();
}
/**
* Finalize parsing and auto-close remaining tags
*/
finalize() {
this.parser.finalize();
}
/**
* Event emitter methods (delegating to internal parser)
*/
on(event, listener) {
this.parser.on(event, listener);
return this;
}
off(event, listener) {
this.parser.off(event, listener);
return this;
}
once(event, listener) {
this.parser.once(event, listener);
return this;
}
emit(event, ...args) {
return this.parser.emit(event, ...args);
}
/**
* Create a new parser with the same configuration
*/
clone() {
return new _LLMStreamParser();
}
/**
* Convenience method to parse a complete string and finalize
*/
parseComplete(content) {
this.parse(content);
this.finalize();
}
/**
* Convenience method to add a simple tag definition
*/
addSimpleTag(tagName, options = {}) {
const definition = {
tagName
};
if (options.allowChildren !== void 0) {
definition.allowChildren = options.allowChildren;
}
if (options.allowSelfClosing !== void 0) {
definition.allowSelfClosing = options.allowSelfClosing;
}
if (options.defaultContent !== void 0) {
definition.defaultContent = options.defaultContent;
}
if (options.onComplete !== void 0) {
definition.onComplete = options.onComplete;
}
return this.defineTag(definition);
}
/**
* Convenience method to add multiple simple tags
*/
addSimpleTags(tagNames) {
for (const tagName of tagNames) {
this.addSimpleTag(tagName);
}
return this;
}
/**
* Get a summary of parser status
*/
getStatus() {
const stats = this.getStats();
return {
state: this.getState(),
registeredTags: this.getRegisteredTags().length,
bufferSize: this.getBufferSize(),
totalParsed: stats.totalTagsParsed,
errorCount: stats.errorCount
};
}
};
function createParser(config) {
return new LLMStreamParser(config);
}
function createParserWithTags(tagNames, config) {
const parser = new LLMStreamParser(config);
parser.addSimpleTags(tagNames);
return parser;
}
// src/index.ts
var version = "1.0.1";
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
AttributeTransformers,
AttributeValidators,
BufferManager,
ContentTransformers,
ContentValidators,
DEFAULT_CONFIG,
LLMStreamParser,
ParserError,
ParserErrorCode,
ParserState,
StreamParser,
TagMatcher,
TagPatterns,
TagTransformer,
TagValidator,
createParser,
createParserWithTags,
mergeConfig,
validateConfig,
version
});
//# sourceMappingURL=index.js.map