UNPKG

stax-xml

Version:

High-performance, pull-based XML parser for JavaScript/TypeScript with declarative converter API

1,647 lines (1,643 loc) 62.9 kB
//#region src/types.ts /** * Enumeration of XML stream event types used by the StAX parser * * @public */ const XmlEventType = { START_DOCUMENT: "START_DOCUMENT", END_DOCUMENT: "END_DOCUMENT", START_ELEMENT: "START_ELEMENT", END_ELEMENT: "END_ELEMENT", CHARACTERS: "CHARACTERS", CDATA: "CDATA", ERROR: "ERROR" }; /** * Type guard function - Check if the event is a START_ELEMENT event * @param event XML event to check * @returns true if the event is a START_ELEMENT event, false otherwise */ function isStartElement(event) { return event.type === XmlEventType.START_ELEMENT; } /** * Type guard function - Check if the event is an END_ELEMENT event * @param event XML event to check * @returns true if the event is an END_ELEMENT event, false otherwise */ function isEndElement(event) { return event.type === XmlEventType.END_ELEMENT; } /** * Type guard function - Check if the event is a CHARACTERS event * @param event XML event to check * @returns true if the event is a CHARACTERS event, false otherwise */ function isCharacters(event) { return event.type === XmlEventType.CHARACTERS; } /** * Type guard function - Check if the event is a CDATA event * @param event XML event to check * @returns true if the event is a CDATA event, false otherwise */ function isCdata(event) { return event.type === XmlEventType.CDATA; } /** * Type guard function - Check if the event is an ERROR event * @param event XML event to check * @returns true if the event is an ERROR event, false otherwise */ function isError(event) { return event.type === XmlEventType.ERROR; } /** * Type guard function - Check if the event is a START_DOCUMENT event * @param event XML event to check * @returns true if the event is a START_DOCUMENT event, false otherwise */ function isStartDocument(event) { return event.type === XmlEventType.START_DOCUMENT; } /** * Type guard function - Check if the event is an END_DOCUMENT event * @param event XML event to check * @returns true if the event is an END_DOCUMENT event, false otherwise */ function isEndDocument(event) { return event.type === XmlEventType.END_DOCUMENT; } //#endregion //#region src/StaxXmlParser.ts /** * High-performance asynchronous XML parser implementing the StAX (Streaming API for XML) pattern. * * This parser provides memory-efficient processing of large XML files through streaming * with support for pull-based parsing, custom entity handling, and namespace processing. * * @remarks * The parser uses UTF-8 safe processing with Boyer-Moore-Horspool pattern search optimization * and supports both single-event and batch processing modes for improved performance. * * @example * Basic usage: * ```typescript * const xmlContent = '<root><item>Hello</item></root>'; * const stream = new ReadableStream({ * start(controller) { * controller.enqueue(new TextEncoder().encode(xmlContent)); * controller.close(); * } * }); * * const parser = new StaxXmlParser(stream); * for await (const event of parser) { * console.log(event.type, event); * } * ``` * * @example * With custom options: * ```typescript * const options = { * autoDecodeEntities: true, * maxBufferSize: 128 * 1024, * addEntities: [{ entity: 'custom', value: 'replacement' }] * }; * const parser = new StaxXmlParser(stream, options); * ``` * * @public */ var StaxXmlParser = class StaxXmlParser { reader = null; decoder; buffer; bufferLength = 0; position = 0; eventQueue = []; resolveNext = null; error = null; isStreamEnded = false; parserFinished = false; currentTextBuffer = ""; elementStack = []; namespaceStack = []; options; static ASCII_TABLE = (() => { const table = new Uint8Array(128); table[9] = 1; table[10] = 1; table[13] = 1; table[32] = 1; table[60] = 2; table[62] = 3; table[47] = 4; table[61] = 5; table[33] = 6; table[63] = 7; table[34] = 8; table[39] = 9; table[38] = 10; table[91] = 11; table[93] = 12; return table; })(); static ENTITY_REGEX_CACHE = /* @__PURE__ */ new Map(); static DEFAULT_ENTITY_REGEX = /&(lt|gt|quot|apos|amp);/g; static DEFAULT_ENTITY_MAP = { "lt": "<", "gt": ">", "quot": "\"", "apos": "'", "amp": "&" }; entityDecoder; bmhCache = /* @__PURE__ */ new Map(); batchMetrics = { avgEventSize: 100, lastBatchTime: 0, eventCount: 0 }; /** * Creates a new StaxXmlParser instance. * * @param xmlStream - The ReadableStream containing XML data as Uint8Array chunks * @param options - Configuration options for the parser * @throws {Error} When xmlStream is not a valid ReadableStream * * @example * ```typescript * const xmlData = '<root><item>content</item></root>'; * const stream = new ReadableStream({ * start(controller) { * controller.enqueue(new TextEncoder().encode(xmlData)); * controller.close(); * } * }); * * const parser = new StaxXmlParser(stream, { * autoDecodeEntities: true, * maxBufferSize: 64 * 1024 * }); * ``` */ constructor(xmlStream, options = {}) { if (!(xmlStream instanceof ReadableStream)) throw new Error("xmlStream must be a web standard ReadableStream."); this.options = { encoding: "utf-8", autoDecodeEntities: true, maxBufferSize: 64 * 1024, enableBufferCompaction: true, batchSize: 10, batchTimeout: 10, ...options }; this.decoder = new TextDecoder(this.options.encoding, { fatal: false, ignoreBOM: true }); this.buffer = new Uint8Array(this.options.maxBufferSize || 64 * 1024); this.entityDecoder = this._compileEntityDecoder(); this.reader = xmlStream.getReader(); this._startReading(); this._addEvent({ type: XmlEventType.START_DOCUMENT, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }); } /** * Fast XML special character check */ getXmlCharType(byte) { return byte < 128 ? StaxXmlParser.ASCII_TABLE[byte] : 0; } /** * Check if UTF-8 byte is the start of a character * @param byte The byte to check * @returns true if it's the start of a character */ isUtf8CharStart(byte) { return (byte & 128) === 0 || (byte & 192) === 192; } /** * Calculate UTF-8 sequence length * @param byte The first byte * @returns Sequence length (1-4) */ getUtf8SequenceLength(byte) { if ((byte & 128) === 0) return 1; if ((byte & 224) === 192) return 2; if ((byte & 240) === 224) return 3; if ((byte & 248) === 240) return 4; return 1; } /** * Safely adjust position at UTF-8 character boundaries * @param pos The position to adjust * @param searchBackward Whether to search backwards * @returns Safe UTF-8 boundary position */ findSafeUtf8Boundary(pos, searchBackward = true) { if (pos <= 0 || pos >= this.bufferLength) return pos; if (searchBackward) { let safePos = pos; let backtrack = 0; while (safePos > 0 && backtrack < 4) { if (this.isUtf8CharStart(this.buffer[safePos])) { const seqLen = this.getUtf8SequenceLength(this.buffer[safePos]); if (safePos + seqLen > pos) return safePos; else return pos; } safePos--; backtrack++; } return pos; } else { while (pos < this.bufferLength && !this.isUtf8CharStart(this.buffer[pos])) pos++; return pos; } } /** * Safely extract UTF-8 string from buffer * @param start Starting position * @param end Ending position * @returns Decoded string */ safeDecodeRange(start, end) { const safeStart = this.findSafeUtf8Boundary(start, false); const safeEnd = this.findSafeUtf8Boundary(end, true); if (safeStart >= safeEnd) return ""; return this.decoder.decode(this.buffer.subarray(safeStart, safeEnd), { stream: false }); } /** * Build Boyer-Moore-Horspool bad character table */ _buildBMHTable(pattern) { const table = new Uint8Array(256); const patternLength = pattern.length; table.fill(patternLength); for (let i = 0; i < patternLength - 1; i++) table[pattern[i]] = patternLength - 1 - i; return table; } /** * Pattern search using Boyer-Moore-Horspool algorithm * XML delimiters are all ASCII, so no UTF-8 boundary issues */ _findPatternBMH(pattern, startPos) { const patternBytes = new TextEncoder().encode(pattern); const patternLength = patternBytes.length; if (patternLength === 0) return -1; if (patternLength === 1) return this._findSingleByte(patternBytes[0], startPos); let skipTable = this.bmhCache.get(pattern); if (!skipTable) { skipTable = this._buildBMHTable(patternBytes); if (this.bmhCache.size > 20) this.bmhCache.clear(); this.bmhCache.set(pattern, skipTable); } const start = startPos || this.position; const bufferEnd = this.bufferLength - patternLength; let pos = start; while (pos <= bufferEnd) { let i = patternLength - 1; while (i >= 0 && this.buffer[pos + i] === patternBytes[i]) i--; if (i < 0) return pos; pos += skipTable[this.buffer[pos + patternLength - 1]]; } return -1; } /** * Single byte search (optimized) */ _findSingleByte(byte, startPos) { const start = startPos || this.position; const buffer = this.buffer; const end = this.bufferLength; const end4 = end - 3; let i = start; for (; i < end4; i += 4) { if (buffer[i] === byte) return i; if (buffer[i + 1] === byte) return i + 1; if (buffer[i + 2] === byte) return i + 2; if (buffer[i + 3] === byte) return i + 3; } for (; i < end; i++) if (buffer[i] === byte) return i; return -1; } _compileEntityDecoder() { if (!this.options.autoDecodeEntities) return (text) => text; if (this.options.addEntities && this.options.addEntities.length > 0) { const entityMap = { ...StaxXmlParser.DEFAULT_ENTITY_MAP }; const patterns = [ "lt", "gt", "quot", "apos" ]; for (const { entity, value } of this.options.addEntities) if (entity && value) { const key = entity.startsWith("&") && entity.endsWith(";") ? entity.slice(1, -1) : entity; entityMap[key] = value; patterns.push(key); } patterns.push("amp"); const cacheKey = patterns.join(","); let regex = StaxXmlParser.ENTITY_REGEX_CACHE.get(cacheKey); if (!regex) { const pattern = patterns.sort((a, b) => b.length - a.length).map((e) => e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|"); regex = new RegExp(`&(${pattern});`, "g"); StaxXmlParser.ENTITY_REGEX_CACHE.set(cacheKey, regex); } return (text) => { if (!text || text.indexOf("&") === -1) return text; regex.lastIndex = 0; return text.replace(regex, (_, entity) => entityMap[entity] || _); }; } return (text) => { if (!text || text.indexOf("&") === -1) return text; StaxXmlParser.DEFAULT_ENTITY_REGEX.lastIndex = 0; return text.replace(StaxXmlParser.DEFAULT_ENTITY_REGEX, (_, entity) => StaxXmlParser.DEFAULT_ENTITY_MAP[entity] || _); }; } _calculateOptimalBatchSize() { const MIN_BATCH = 1; const MAX_BATCH = this.options.batchSize || 10; if (this.bufferLength < 1024) return MIN_BATCH; if (this.bufferLength > 10240) return MAX_BATCH; if (this.eventQueue.length > 0) { if (this.eventQueue[this.eventQueue.length - 1]?.type === XmlEventType.CHARACTERS) return MIN_BATCH; } if (this.batchMetrics.eventCount > 100) { const avgSize = this.batchMetrics.avgEventSize; if (avgSize > 1e3) return MIN_BATCH; if (avgSize < 100) return MAX_BATCH; } return Math.min(MAX_BATCH, Math.max(MIN_BATCH, Math.floor(this.bufferLength / 1024))); } async nextBatch(size) { const batch = []; const targetSize = size || this._calculateOptimalBatchSize(); const startTime = Date.now(); const timeout = this.options.batchTimeout || 10; for (let i = 0; i < targetSize; i++) { if (Date.now() - startTime > timeout) break; const result = await this.next(); if (result.done) break; batch.push(result.value); } return batch; } async *batchedIterator(batchSize) { while (!this.parserFinished || this.eventQueue.length > 0) { const targetSize = batchSize || this._calculateOptimalBatchSize(); const batch = await this.nextBatch(targetSize); if (batch.length === 0) break; yield batch; } } _compactBufferIfNeeded() { if (!this.options.enableBufferCompaction) return; const maxSize = this.options.maxBufferSize || 64 * 1024; if (this.position > 8192 && this.bufferLength > 16384 || this.position > maxSize / 2 || this.bufferLength > maxSize && this.position > maxSize / 4) this._compactBuffer(); } _compactBuffer() { if (this.position > 0 && this.position < this.bufferLength) { const safePos = this.findSafeUtf8Boundary(this.position, true); const remainingLength = this.bufferLength - safePos; if (remainingLength < safePos) { const newBuffer = new Uint8Array(this.buffer.length); newBuffer.set(this.buffer.subarray(safePos, this.bufferLength)); this.buffer = newBuffer; } else this.buffer.copyWithin(0, safePos, this.bufferLength); this.bufferLength = remainingLength; this.position = this.position - safePos; if (this.bmhCache.size > 20) this.bmhCache.clear(); } } async _startReading() { try { while (true) { const { done, value } = await this.reader.read(); if (done) { this.isStreamEnded = true; this._parseBuffer(); if (!this.parserFinished && this.elementStack.length > 0) this._addError(/* @__PURE__ */ new Error("Unexpected end of document. Not all elements were closed.")); if (!this.parserFinished) { this._flushCharacters(); this._addEvent({ type: XmlEventType.END_DOCUMENT, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }); this.parserFinished = true; } if (this.resolveNext && this.eventQueue.length === 0) { this.resolveNext({ value: void 0, done: true }); this.resolveNext = null; } break; } this._appendToBuffer(value); this._parseBuffer(); this._compactBufferIfNeeded(); this._updateBatchMetrics(value.length); } } catch (err) { this._addError(err); if (this.resolveNext) { this.resolveNext({ value: void 0, done: true }); this.resolveNext = null; } } } _updateBatchMetrics(bytesProcessed) { const eventsDelta = this.eventQueue.length; if (eventsDelta > 0) { this.batchMetrics.eventCount += eventsDelta; this.batchMetrics.avgEventSize = this.batchMetrics.avgEventSize * .9 + bytesProcessed / eventsDelta * .1; } this.batchMetrics.lastBatchTime = Date.now(); } _parseBuffer() { while (this.position < this.bufferLength && !this.parserFinished) { const ltPos = this._findSingleByte(60, this.position); if (ltPos === -1) { if (this.isStreamEnded) { const remainingText = this._readBuffer(); this.currentTextBuffer += remainingText; this._flushCharacters(); } break; } if (ltPos > this.position) try { const textLength = ltPos - this.position; const text = this._readBuffer(textLength); this.currentTextBuffer += text; } catch (error) { if (!this.isStreamEnded) break; throw error; } this.position = ltPos; const nextByte = this.buffer[this.position + 1]; const charType = this.getXmlCharType(nextByte); if (charType === 4) { this._flushCharacters(); if (!this._parseEndTag()) break; } else if (charType === 6) if (this._matchesPattern("<!--")) { if (!this._parseComment()) break; } else if (this._matchesPattern("<![CDATA[")) { if (!this._parseCData()) break; } else { if (this.isStreamEnded) { this._addError(/* @__PURE__ */ new Error(`Malformed XML near position ${this.position}`)); return; } break; } else if (charType === 7) { if (this._matchesPattern("<?xml")) { if (!this._parseXmlDeclaration()) break; } else if (this._matchesPattern("<?")) { if (!this._parseProcessingInstruction()) break; } } else { this._flushCharacters(); if (!this._parseStartTag()) break; } this._compactBufferIfNeeded(); } } _flushCharacters() { if (this.currentTextBuffer.length > 0) { const decodedText = this.entityDecoder(this.currentTextBuffer); if (decodedText.trim().length > 0) this._addEvent({ type: XmlEventType.CHARACTERS, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: decodedText, error: void 0 }); this.currentTextBuffer = ""; } } _clearBuffers() { this.bufferLength = 0; this.position = 0; this.currentTextBuffer = ""; this.bmhCache.clear(); } _addEvent(event) { this.eventQueue.push(event); if (this.resolveNext) { this.resolveNext(this._popNextEvent()); this.resolveNext = null; } } _addError(err) { if (this.error === null) { this.error = err; this._addEvent({ type: XmlEventType.ERROR, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: err }); this.parserFinished = true; this._clearBuffers(); if (this.reader) { this.reader.releaseLock(); this.reader = null; } } } _popNextEvent() { if (this.eventQueue.length > 0) return { value: this.eventQueue.shift(), done: false }; if (this.parserFinished) return { value: void 0, done: true }; return null; } async next() { if (this.error) throw this.error; const nextEvent = this._popNextEvent(); if (nextEvent) return nextEvent; if (this.parserFinished) return { value: void 0, done: true }; return new Promise((resolve) => { this.resolveNext = resolve; }); } [Symbol.asyncIterator]() { return this; } _appendToBuffer(newData) { const requiredSize = this.bufferLength + newData.length; if (requiredSize > this.buffer.length) { const newSize = Math.max(this.buffer.length * 2, requiredSize); const newBuffer = new Uint8Array(newSize); newBuffer.set(this.buffer.subarray(0, this.bufferLength)); this.buffer = newBuffer; } this.buffer.set(newData, this.bufferLength); this.bufferLength += newData.length; } /** * UTF-8 safe buffer reading */ _readBuffer(length) { const originalPos = this.position; let endPos = length ? Math.min(this.position + length, this.bufferLength) : this.bufferLength; if (length && endPos < this.bufferLength) endPos = this.findSafeUtf8Boundary(endPos, true); const slice = this.buffer.subarray(this.position, endPos); try { const result = this.decoder.decode(slice, { stream: !this.isStreamEnded }); this.position = endPos; return result; } catch (error) { if (!this.isStreamEnded && endPos === this.bufferLength) for (let i = 1; i <= 4 && endPos - i > this.position; i++) { const testEnd = this.findSafeUtf8Boundary(endPos - i, true); if (testEnd > this.position) try { const safeSlice = this.buffer.subarray(this.position, testEnd); const result = this.decoder.decode(safeSlice, { stream: true }); this.position = testEnd; return result; } catch { continue; } } this.position = originalPos; throw error; } } _matchesPattern(pattern) { const patternBytes = new TextEncoder().encode(pattern); if (this.position + patternBytes.length > this.bufferLength) return false; for (let i = 0; i < patternBytes.length; i++) if (this.buffer[this.position + i] !== patternBytes[i]) return false; return true; } _parseXmlDeclaration() { const endPos = this._findPatternBMH("?>"); if (endPos === -1) return false; this.position = endPos + 2; return true; } _parseComment() { const endPos = this._findPatternBMH("-->"); if (endPos === -1) return false; this.position = endPos + 3; return true; } /** * UTF-8 safe CDATA parsing */ _parseCData() { const startPos = this.position + 9; const endPos = this._findPatternBMH("]]>"); if (endPos === -1) return false; try { const safeStart = this.findSafeUtf8Boundary(startPos, false); const safeEnd = this.findSafeUtf8Boundary(endPos, true); const cdataContent = this.decoder.decode(this.buffer.subarray(safeStart, safeEnd), { stream: false }); this._addEvent({ type: XmlEventType.CDATA, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: cdataContent, error: void 0 }); this.position = endPos + 3; return true; } catch (error) { if (!this.isStreamEnded) return false; throw error; } } _parseProcessingInstruction() { const endPos = this._findPatternBMH("?>"); if (endPos === -1) return false; this.position = endPos + 2; return true; } /** * UTF-8 safe end tag parsing */ _parseEndTag() { const gtPos = this._findSingleByte(62, this.position); if (gtPos === -1) return false; try { const closeTagMatch = this.safeDecodeRange(this.position, gtPos + 1).match(/^<\/([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)\s*>$/); if (!closeTagMatch) { this._addError(/* @__PURE__ */ new Error("Malformed closing tag")); return true; } const tagName = closeTagMatch[1]; if (this.elementStack.length === 0 || this.elementStack[this.elementStack.length - 1] !== tagName) { this._addError(/* @__PURE__ */ new Error(`Mismatched closing tag: </${tagName}>. Expected </${this.elementStack[this.elementStack.length - 1] || "nothing"}>`)); return true; } const currentNamespaces = this.namespaceStack.length > 0 ? this.namespaceStack[this.namespaceStack.length - 1] : /* @__PURE__ */ new Map(); const { localName, prefix, uri } = this._parseQualifiedName(tagName, currentNamespaces); this.elementStack.pop(); this.namespaceStack.pop(); this._addEvent({ type: XmlEventType.END_ELEMENT, name: tagName, localName, prefix, uri, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }); this.position = gtPos + 1; return true; } catch (error) { if (!this.isStreamEnded) return false; throw error; } } /** * UTF-8 safe start tag parsing (using ASCII table) */ _parseStartTag() { const gtPos = this._findSingleByte(62, this.position); if (gtPos === -1) return false; try { const tagMatch = this.safeDecodeRange(this.position, gtPos + 1).match(/^<([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)(\s+[^>]*?)?\s*(\/?)>$/); if (!tagMatch) { this._addError(/* @__PURE__ */ new Error("Malformed start tag")); return true; } const tagName = tagMatch[1]; const attributesString = tagMatch[2] || ""; const isSelfClosing = tagMatch[3] === "/"; const currentNamespaces = /* @__PURE__ */ new Map(); if (this.namespaceStack.length > 0) { const parentNamespaces = this.namespaceStack[this.namespaceStack.length - 1]; for (const [prefix$1, uri$1] of parentNamespaces) currentNamespaces.set(prefix$1, uri$1); } const attributes = {}; const attributesWithPrefix = {}; const attrRegex = /([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)(?:\s*=\s*"([^"]*)"|\s*=\s*'([^']*)')?/g; let attrMatch; while ((attrMatch = attrRegex.exec(attributesString)) !== null) { const attrName = attrMatch[1]; const attrValue = this.entityDecoder(attrMatch[2] || attrMatch[3] || "true"); attributes[attrName] = attrValue; const attrNamespaceInfo = this._parseQualifiedName(attrName, currentNamespaces, true); attributesWithPrefix[attrNamespaceInfo.localName] = { value: attrValue, prefix: attrNamespaceInfo.prefix, uri: attrNamespaceInfo.uri }; if (attrName === "xmlns") currentNamespaces.set("", attrValue); else if (attrName.startsWith("xmlns:")) { const prefix$1 = attrName.substring(6); currentNamespaces.set(prefix$1, attrValue); } } const { localName, prefix, uri } = this._parseQualifiedName(tagName, currentNamespaces); this._addEvent({ type: XmlEventType.START_ELEMENT, name: tagName, localName, prefix, uri, attributes, attributesWithPrefix, value: void 0, error: void 0 }); this.position = gtPos + 1; if (!isSelfClosing) { this.elementStack.push(tagName); this.namespaceStack.push(currentNamespaces); } else this._addEvent({ type: XmlEventType.END_ELEMENT, name: tagName, localName, prefix, uri, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }); return true; } catch (error) { if (!this.isStreamEnded) return false; throw error; } } _parseQualifiedName(qname, namespaces, isAttribute = false) { const colonIndex = qname.indexOf(":"); if (colonIndex === -1) if (isAttribute) return { localName: qname, prefix: void 0, uri: void 0 }; else { const defaultUri = namespaces.get(""); return { localName: qname, prefix: void 0, uri: defaultUri }; } else { const prefix = qname.substring(0, colonIndex); const localName = qname.substring(colonIndex + 1); const uri = namespaces.get(prefix); return { localName, prefix, uri }; } } get XmlEventType() { return XmlEventType; } }; //#endregion //#region src/StaxXmlParserSync.ts var StaxXmlParserSync = class StaxXmlParserSync { xml; xmlLength; pos = 0; elementStack = []; namespaceStack = []; options; internalIterator; static ASCII_TABLE = (() => { const table = new Uint8Array(128); table[9] = 1; table[10] = 1; table[13] = 1; table[32] = 1; table[60] = 2; table[62] = 3; table[47] = 4; table[61] = 5; table[33] = 6; table[63] = 7; table[34] = 8; table[39] = 9; return table; })(); static UNICODE_WHITESPACE = new Set([ 160, 5760, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8232, 8233, 8239, 8287, 12288, 65279 ]); static ENTITY_REGEX_CACHE = /* @__PURE__ */ new Map(); static DEFAULT_ENTITY_REGEX = /&(lt|gt|quot|apos|amp);/g; static DEFAULT_ENTITY_MAP = { "lt": "<", "gt": ">", "quot": "\"", "apos": "'", "amp": "&" }; entityDecoder; constructor(xml, options = {}) { this.xml = xml; this.xmlLength = xml.length; this.options = { autoDecodeEntities: true, ...options }; this.namespaceStack.push(/* @__PURE__ */ new Map()); this.entityDecoder = this.compileEntityDecoder(); } static isWhitespace(code) { if (code < 128) return StaxXmlParserSync.ASCII_TABLE[code] === 1; return code <= 32 || StaxXmlParserSync.UNICODE_WHITESPACE.has(code); } static isHighSurrogate(code) { return code >= 55296 && code <= 56319; } static isLowSurrogate(code) { return code >= 56320 && code <= 57343; } findChar(targetCode, start = this.pos) { const xml = this.xml; const len = this.xmlLength; const len16 = len - 15; let i = start; for (; i < len16; i += 16) { if (xml.charCodeAt(i) === targetCode) return i; if (xml.charCodeAt(i + 1) === targetCode) return i + 1; if (xml.charCodeAt(i + 2) === targetCode) return i + 2; if (xml.charCodeAt(i + 3) === targetCode) return i + 3; if (xml.charCodeAt(i + 4) === targetCode) return i + 4; if (xml.charCodeAt(i + 5) === targetCode) return i + 5; if (xml.charCodeAt(i + 6) === targetCode) return i + 6; if (xml.charCodeAt(i + 7) === targetCode) return i + 7; if (xml.charCodeAt(i + 8) === targetCode) return i + 8; if (xml.charCodeAt(i + 9) === targetCode) return i + 9; if (xml.charCodeAt(i + 10) === targetCode) return i + 10; if (xml.charCodeAt(i + 11) === targetCode) return i + 11; if (xml.charCodeAt(i + 12) === targetCode) return i + 12; if (xml.charCodeAt(i + 13) === targetCode) return i + 13; if (xml.charCodeAt(i + 14) === targetCode) return i + 14; if (xml.charCodeAt(i + 15) === targetCode) return i + 15; } for (; i < len; i++) if (xml.charCodeAt(i) === targetCode) return i; return -1; } matchesAt(str, pos) { const len = str.length; if (pos + len > this.xmlLength) return false; for (let i = 0; i < len; i++) if (this.xml.charCodeAt(pos + i) !== str.charCodeAt(i)) return false; return true; } trimmedSlice(start, end) { const xml = this.xml; while (start < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(start))) if (StaxXmlParserSync.isHighSurrogate(xml.charCodeAt(start))) start += 2; else start++; while (end > start && StaxXmlParserSync.isWhitespace(xml.charCodeAt(end - 1))) if (end > start + 1 && StaxXmlParserSync.isLowSurrogate(xml.charCodeAt(end - 1)) && StaxXmlParserSync.isHighSurrogate(xml.charCodeAt(end - 2))) end -= 2; else end--; return start < end ? xml.slice(start, end) : ""; } compileEntityDecoder() { if (!this.options.autoDecodeEntities) return (text) => text; if (this.options.addEntities && this.options.addEntities.length > 0) { const entityMap = { ...StaxXmlParserSync.DEFAULT_ENTITY_MAP }; const patterns = [ "lt", "gt", "quot", "apos" ]; for (const { entity, value } of this.options.addEntities) if (entity && value) { const key = entity.startsWith("&") && entity.endsWith(";") ? entity.slice(1, -1) : entity; entityMap[key] = value; patterns.push(key); } patterns.push("amp"); const cacheKey = patterns.join(","); let regex = StaxXmlParserSync.ENTITY_REGEX_CACHE.get(cacheKey); if (!regex) { const pattern = patterns.sort((a, b) => b.length - a.length).map((e) => e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|"); regex = new RegExp(`&(${pattern});`, "g"); StaxXmlParserSync.ENTITY_REGEX_CACHE.set(cacheKey, regex); } return (text) => { if (!text || text.indexOf("&") === -1) return text; regex.lastIndex = 0; return text.replace(regex, (_, entity) => entityMap[entity] || _); }; } return (text) => { if (!text || text.indexOf("&") === -1) return text; StaxXmlParserSync.DEFAULT_ENTITY_REGEX.lastIndex = 0; return text.replace(StaxXmlParserSync.DEFAULT_ENTITY_REGEX, (_, entity) => StaxXmlParserSync.DEFAULT_ENTITY_MAP[entity] || _); }; } /** * Symbol.iterator implementation - returns this instance as iterator * This ensures for...of and explicit next() calls use the same iterator state */ [Symbol.iterator]() { return this; } /** * Internal generator that actually yields AnyXmlEvent * Important: Return type is same as before - Iterator<AnyXmlEvent> * Factory internally creates UnifiedXmlEvent, but * types are returned as StartElementEvent, EndElementEvent etc. so * perfectly compatible with AnyXmlEvent union type */ *internalGenerator() { yield { type: XmlEventType.START_DOCUMENT, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }; while (this.pos < this.xmlLength) { const ltPos = this.findChar(60, this.pos); if (ltPos === -1) { if (this.pos < this.xmlLength) { const text = this.trimmedSlice(this.pos, this.xmlLength); if (text) yield { type: XmlEventType.CHARACTERS, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: this.entityDecoder(text), error: void 0 }; } break; } if (ltPos > this.pos) { const text = this.trimmedSlice(this.pos, ltPos); if (text) yield { type: XmlEventType.CHARACTERS, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: this.entityDecoder(text), error: void 0 }; } this.pos = ltPos; switch (this.xml.charCodeAt(this.pos + 1)) { case 47: yield* this.parseEndTag(); break; case 33: yield* this.parseCdataCommentDoctype(); break; case 63: yield* this.parseProcessingInstruction(); break; default: yield* this.parseStartTag(); break; } } yield { type: XmlEventType.END_DOCUMENT, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }; } next() { if (!this.internalIterator) this.internalIterator = this.internalGenerator(); return this.internalIterator.next(); } *parseEndTag() { const tagClose = this.findChar(62, this.pos); if (tagClose === -1) throw new Error("Unclosed end tag"); const fullTagName = this.trimmedSlice(this.pos + 2, tagClose); if (this.elementStack.length === 0) throw new Error(`Mismatched closing tag: </${fullTagName}>. No open elements.`); const expectedTagName = this.elementStack[this.elementStack.length - 1]; if (fullTagName !== expectedTagName) throw new Error(`Mismatched closing tag: </${fullTagName}>. Expected </${expectedTagName}>.`); this.elementStack.pop(); const currentNamespaces = this.namespaceStack.pop(); const colonIndex = fullTagName.indexOf(":"); let localName, prefix, uri; if (colonIndex === -1) { localName = fullTagName; prefix = void 0; uri = currentNamespaces ? currentNamespaces.get("") : void 0; } else { prefix = fullTagName.slice(0, colonIndex); localName = fullTagName.slice(colonIndex + 1); uri = currentNamespaces ? currentNamespaces.get(prefix) : void 0; } yield { type: XmlEventType.END_ELEMENT, name: fullTagName, localName, prefix, uri, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }; this.pos = tagClose + 1; } *parseCdataCommentDoctype() { if (this.matchesAt("<![CDATA[", this.pos)) { const cdataEnd = this.findSequence("]]>", this.pos + 9); if (cdataEnd === -1) throw new Error("Unclosed CDATA section"); const cdataContent = this.xml.slice(this.pos + 9, cdataEnd); yield { type: XmlEventType.CDATA, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: cdataContent, error: void 0 }; this.pos = cdataEnd + 3; } else if (this.matchesAt("<!--", this.pos)) { const commentEnd = this.findSequence("-->", this.pos + 4); if (commentEnd === -1) throw new Error("Unclosed comment"); this.pos = commentEnd + 3; } else if (this.matchesAt("<!DOCTYPE", this.pos)) { const doctypeEnd = this.findChar(62, this.pos); if (doctypeEnd === -1) throw new Error("Unclosed DOCTYPE declaration"); this.pos = doctypeEnd + 1; } } *parseProcessingInstruction() { const piEnd = this.findSequence("?>", this.pos); if (piEnd === -1) throw new Error("Unclosed processing instruction"); this.pos = piEnd + 2; } *parseStartTag() { const tagStart = this.pos + 1; const tagEnd = this.findTagEnd(tagStart); if (tagEnd === -1) throw new Error("Unclosed start tag"); let isSelfClosing = false; let actualEnd = tagEnd; if (this.xml.charCodeAt(tagEnd - 1) === 47) { isSelfClosing = true; actualEnd = tagEnd - 1; } let nameEnd = tagStart; const xml = this.xml; while (nameEnd < actualEnd) { const code = xml.charCodeAt(nameEnd); if (code <= 32) { if (StaxXmlParserSync.isWhitespace(code)) break; } else if (code === 62 || code === 47) break; nameEnd++; } const tagName = xml.slice(tagStart, nameEnd); const currentNamespaces = /* @__PURE__ */ new Map(); if (this.namespaceStack.length > 0) { const parentNamespaces = this.namespaceStack[this.namespaceStack.length - 1]; for (const [prefix$1, uri$1] of parentNamespaces) currentNamespaces.set(prefix$1, uri$1); } const { attributes, attributesWithPrefix } = this.parseAttributesFast(nameEnd, actualEnd, currentNamespaces); const colonIndex = tagName.indexOf(":"); let localName, prefix, uri; if (colonIndex === -1) { localName = tagName; prefix = void 0; uri = currentNamespaces.get(""); } else { prefix = tagName.slice(0, colonIndex); localName = tagName.slice(colonIndex + 1); uri = currentNamespaces.get(prefix); } yield { type: XmlEventType.START_ELEMENT, name: tagName, localName, prefix, uri, attributes, attributesWithPrefix, value: void 0, error: void 0 }; this.elementStack.push(tagName); if (!isSelfClosing) this.namespaceStack.push(currentNamespaces); else { yield { type: XmlEventType.END_ELEMENT, name: tagName, localName, prefix, uri, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }; this.elementStack.pop(); } this.pos = tagEnd + 1; } parseAttributesFast(start, end, namespaces) { if (start >= end) return { attributes: {}, attributesWithPrefix: {} }; const attributes = {}; const attributesWithPrefix = {}; let i = start; const xml = this.xml; while (i < end) { while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++; if (i >= end) break; const nameStart = i; while (i < end) { const code = xml.charCodeAt(i); if (code === 61 || StaxXmlParserSync.isWhitespace(code)) break; i++; } if (i === nameStart) break; const attrName = xml.slice(nameStart, i); while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++; if (i >= end || xml.charCodeAt(i) !== 61) { attributes[attrName] = "true"; const colonIndex$1 = attrName.indexOf(":"); let localName$1, prefix$1, uri$1; if (colonIndex$1 === -1) { localName$1 = attrName; prefix$1 = void 0; uri$1 = void 0; } else { prefix$1 = attrName.slice(0, colonIndex$1); localName$1 = attrName.slice(colonIndex$1 + 1); uri$1 = namespaces.get(prefix$1); } attributesWithPrefix[attrName] = { value: "true", localName: localName$1, prefix: prefix$1, uri: uri$1 }; continue; } i++; while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++; if (i >= end) break; const quote = xml.charCodeAt(i); if (quote !== 34 && quote !== 39) break; i++; const valueStart = i; while (i < end && xml.charCodeAt(i) !== quote) i++; const rawValue = xml.slice(valueStart, i); const attrValue = this.entityDecoder(rawValue); attributes[attrName] = attrValue; if (attrName === "xmlns") namespaces.set("", attrValue); else if (attrName.startsWith("xmlns:")) namespaces.set(attrName.slice(6), attrValue); const colonIndex = attrName.indexOf(":"); let localName, prefix, uri; if (colonIndex === -1) { localName = attrName; prefix = void 0; uri = void 0; } else { prefix = attrName.slice(0, colonIndex); localName = attrName.slice(colonIndex + 1); uri = namespaces.get(prefix); } if (attrName.startsWith("xmlns")) if (attrName === "xmlns") { localName = "xmlns"; prefix = void 0; } else { localName = attrName.slice(6); prefix = "xmlns"; } attributesWithPrefix[attrName] = { value: attrValue, localName, prefix, uri }; i++; } return { attributes, attributesWithPrefix }; } findTagEnd(start) { let i = start; let inQuote = false; let quoteChar = 0; while (i < this.xmlLength) { const code = this.xml.charCodeAt(i); if (code === 34 || code === 39) { if (!inQuote) { inQuote = true; quoteChar = code; } else if (code === quoteChar) { inQuote = false; quoteChar = 0; } } else if (code === 62 && !inQuote) return i; i++; } return -1; } findSequence(sequence, start) { const seqLen = sequence.length; const maxPos = this.xmlLength - seqLen; for (let i = start; i <= maxPos; i++) { let match = true; for (let j = 0; j < seqLen; j++) if (this.xml.charCodeAt(i + j) !== sequence.charCodeAt(j)) { match = false; break; } if (match) return i; } return -1; } }; //#endregion //#region src/StaxXmlWriter.ts const WriterState$1 = { INITIAL: 0, START_ELEMENT_OPEN: 1, IN_ELEMENT: 2, AFTER_ELEMENT: 3, CLOSED: 4, ERROR: 5 }; /** * High-performance asynchronous XML writer implementing the StAX (Streaming API for XML) pattern. * * This writer provides efficient streaming XML generation using WritableStream for handling * large XML documents with automatic buffering, backpressure management, and namespace support. * * @remarks * The writer supports streaming output with configurable buffering, automatic entity encoding, * pretty printing with customizable indentation, and comprehensive namespace handling. * * @example * Basic usage: * ```typescript * const writableStream = new WritableStream({ * write(chunk) { * console.log(new TextDecoder().decode(chunk)); * } * }); * * const writer = new StaxXmlWriter(writableStream); * await writer.writeStartElement('root'); * await writer.writeElement('item', { id: '1' }, 'Hello World'); * await writer.writeEndElement(); * await writer.close(); * ``` * * @example * With pretty printing: * ```typescript * const options = { * prettyPrint: true, * indentString: ' ', * autoEncodeEntities: true * }; * const writer = new StaxXmlWriter(writableStream, options); * ``` * * @public */ var StaxXmlWriter = class { writer; encoder; buffer; bufferPosition = 0; state = WriterState$1.INITIAL; elementStack = []; hasTextContentStack = []; namespaceStack = []; options; currentIndentLevel = 0; needsIndent = false; entityMap = {}; metrics = { totalBytesWritten: 0, flushCount: 0, lastFlushTime: 0 }; constructor(stream, options = {}) { this.options = { encoding: options.encoding || "utf-8", prettyPrint: options.prettyPrint ?? false, indentString: options.indentString || " ", addEntities: options.addEntities ?? [], autoEncodeEntities: options.autoEncodeEntities ?? true, namespaces: options.namespaces ?? [], bufferSize: options.bufferSize ?? 16 * 1024, highWaterMark: options.highWaterMark ?? 64 * 1024, flushThreshold: options.flushThreshold ?? .8, enableAutoFlush: options.enableAutoFlush ?? true }; if (this.options.flushThreshold <= 1) this.options.flushThreshold = Math.floor(this.options.bufferSize * this.options.flushThreshold); this.writer = stream.getWriter(); this.encoder = new TextEncoder(); this.buffer = new Uint8Array(this.options.bufferSize); this.namespaceStack = [/* @__PURE__ */ new Map()]; this._initializeEntityMap(); } _initializeEntityMap() { if (this.options.addEntities) { for (const entity of this.options.addEntities) if (entity.entity && entity.value) this.entityMap[entity.entity] = entity.value; } } /** * Write data to buffer (with automatic flush) */ async _writeToBuffer(text) { const bytes = this.encoder.encode(text); if (bytes.length > this.options.bufferSize) { await this._flushBuffer(); await this.writer.write(bytes); this.metrics.totalBytesWritten += bytes.length; return; } if (this.bufferPosition + bytes.length > this.options.bufferSize) await this._flushBuffer(); this.buffer.set(bytes, this.bufferPosition); this.bufferPosition += bytes.length; if (this.options.enableAutoFlush && this.bufferPosition >= this.options.flushThreshold) await this._flushBuffer(); } /** * Buffer flush */ async _flushBuffer() { if (this.bufferPosition === 0) return; const chunk = this.buffer.slice(0, this.bufferPosition); await this.writer.write(chunk); this.metrics.totalBytesWritten += this.bufferPosition; this.metrics.flushCount++; this.metrics.lastFlushTime = Date.now(); this.bufferPosition = 0; } /** * Write XML declaration */ async writeStartDocument(version = "1.0", encoding) { if (this.state !== WriterState$1.INITIAL) throw new Error("writeStartDocument can only be called once at the beginning"); this.state = WriterState$1.AFTER_ELEMENT; const actualEncoding = encoding || this.options.encoding || "UTF-8"; const declaration = `<?xml version="${version}" encoding="${actualEncoding.toUpperCase()}"?>`; await this._writeToBuffer(declaration); if (this.options.prettyPrint) this.needsIndent = true; return this; } /** * End document (automatically close all elements) */ async writeEndDocument() { if (this.state === WriterState$1.CLOSED || this.state === WriterState$1.ERROR) return; while (this.elementStack.length > 0) await this.writeEndElement(); await this._flushBuffer(); await this.writer.close(); this.state = WriterState$1.CLOSED; } /** * Write start element */ async writeStartElement(localName, options) { if (this.state === WriterState$1.CLOSED || this.state === WriterState$1.ERROR) throw new Error("Cannot writeStartElement: Writer is closed or in error state"); await this._closeStartElementTag(); const prefix = options?.prefix; const uri = options?.uri; const attributes = options?.attributes; const selfClosing = options?.selfClosing ?? false; if (this.options.prettyPrint && this.needsIndent) await this._writeIndent(); const tagName = prefix ? `${prefix}:${localName}` : localName; await this._writeToBuffer(`<${tagName}`); const currentNamespaces = new Map(this.namespaceStack[this.namespaceStack.length - 1]); if (prefix && uri) { await this._writeToBuffer(` xmlns:${prefix}="${this._escapeXml(uri)}"`); currentNamespaces.set(prefix, uri); } if (attributes) for (const [key, value] of Object.entries(attributes)) if (typeof value === "string") await this._writeToBuffer(` ${key}="${this._escapeXml(value)}"`); else { const attrPrefix = value.prefix; const attrValue = value.value; if (attrPrefix) { if (!currentNamespaces.has(attrPrefix)) throw new Error(`Namespace prefix '${attrPrefix}' is not defined`); await this._writeToBuffer(` ${attrPrefix}:${key}="${this._escapeXml(attrValue)}"`); } else await this._writeToBuffer(` ${key}="${this._escapeXml(attrValue)}"`); } if (selfClosing) { await this._writeToBuffer("/>"); this.state = WriterState$1.AFTER_ELEMENT; if (this.options.prettyPrint) await this._writeNewline(); return this; } this.elementStack.push({ localName, prefix }); this.hasTextContentStack.push(false); this.namespaceStack.push(currentNamespaces); this.state = WriterState$1.START_ELEMENT_OPEN; this.currentIndentLevel++; return this; } /** * Write end element */ async writeEndElement() { if (this.elementStack.length === 0) throw new Error("No open element to close"); this.currentIndentLevel--; if (!(this.hasTextContentStack.pop() || false) && this.state !== WriterState$1.START_ELEMENT_OPEN) await this._writeIndent(); await this._closeStartElementTag(); const elementInfo = this.elementStack.pop(); this.namespaceStack.pop(); const closingTagName = elementInfo.prefix ? `${elementInfo.prefix}:${elementInfo.localName}` : elementInfo.localName; await this._writeToBuffer(`</${closingTagName}>`); this.state = WriterState$1.AFTER_ELEMENT; if (this.options.prettyPrint) this.needsIndent = true; return this; } /** * Write text */ async writeCharacters(text) { if (this.state === WriterState$1.CLOSED || this.state === WriterState$1.ERROR) throw new Error("Cannot writeCharacters: Writer is closed or in error state"); await this._closeStartElementTag(); await this._writeToBuffer(this._escapeXml(text)); this.state = WriterState$1.IN_ELEMENT; if (this.hasTextContentStack.length > 0) this.hasTextContentStack[this.hasTextContentStack.length - 1] = true; this.needsIndent = false; return this; } /** * Write CDATA section */ async writeCData(cdata) { if (cdata.includes("]]>")) throw new Error("CDATA section cannot contain \"]]>\" sequence"); await this._closeStartElementTag(); await this._writeToBuffer(`<![CDATA[${cdata}]]>`); this.state = WriterState$1.IN_ELEMENT; if (this.hasTextContentStack.length > 0) this.hasTextContentStack[this.hasTextContentStack.length - 1] = true; return this; } /** * Write comment */ async writeComment(comment) { if (comment.includes("--")) throw new Error("XML comment cannot contain \"--\" sequence"); await this._closeStartElementTag(); await this._writeIndent(); await this._writeToBuffer(`<!-- ${comment} -->`); this.state = WriterState$1.AFTER_ELEMENT; if (this.options.prettyPrint) await this._writeNewline(); return this; } /** * Write raw XML content without escaping * @param xml Raw XML string to write * @returns this (chainable) */ async writeRaw(xml) { await this._closeStartElementTag(); await this._writeToBuffer(xml); return this; } /** * Manual flush */ async flush() { await this._flushBuffer(); } /** * Return metrics */ getMetrics() { return { ...this.metrics, bufferUtilization: this.bufferPosition / this.options.bufferSize, averageFlushSize: this.metrics.flushCount > 0 ? this.metrics.totalBytesWritten / this.metrics.flushCount : 0 }; } async _closeStartElementTag() { if (this.state === WriterState$1.START_ELEMENT_OPEN) { await this._writeToBuffer(">"); this.state = WriterState$1.IN_ELEMENT; if (this.options.prettyPrint) this.needsIndent = true; } } async _writeIndent() { if (this.options.prettyPrint && this.needsIndent) { const indent = "\n" + this.options.indentString.repeat(this.currentIndentLevel); await this._writeToBuffer(indent); this.needsIndent = false; } } async _writeNewline() { if (this.options.prettyPrint) { await this._writeToBuffer("\n"); this.needsIndent = true; } } _escapeXml(text) { if (!text) return ""; if (!this.options.autoEncodeEntities) return text; let entityMap = { "&": "&amp;", "<": "&lt;", ">": "&gt;", "\"": "&quot;", "'": "&apos;", ...this.options.addEntities?.reduce((map, entity) => { if (entity.entity && entity.value) map[entity.entity] = entity.value; return map; }, {}) }; const regex = new RegExp(Object.keys(entityMap).join("|"), "g"); return text.replace(regex, (match) => { if (entityMap[match]) return entityMap[match]; else return match; }); } }; //#endregion //#region src/StaxXmlWriterSync.ts /** * States that occur during XML document writing *