UNPKG

stax-xml

Version:

High-performance, pull-based XML parser for JavaScript/TypeScript with declarative converter API

1,745 lines (1,736 loc) 147 kB
//#region src/converter/errors.ts /** * XML parse error with detailed issue information * * @public */ var XmlParseError = class extends Error { /** * List of validation issues */ issues; constructor(issues) { super(`XML Parse Error: ${issues.map((i) => i.message).join(", ")}`); this.name = "XmlParseError"; this.issues = issues; } }; //#endregion //#region src/converter/base.ts /** * Base abstract class for all XML schema types * * @remarks * This class provides the foundation for zod-style declarative XML parsing. * Each schema type extends this class and implements the parsing logic. * * @public */ var XmlSchemaBase = class XmlSchemaBase { _output; _input; /** * Writer configuration for this schema * @internal */ writeConfig; /** * Parse XML asynchronously (public API) * @param input - XML string, stream, or async iterator * @param options - Parse options * @returns Parsed output * @throws {XmlParseError} If parsing fails */ async parse(input, options) { return this._parseAsync(input, options); } /** * Parse XML synchronously (public API) * @param input - XML string or sync iterator * @param options - Parse options * @returns Parsed output * @throws {XmlParseError} If parsing fails */ parseSync(input, options) { return this._parse(input, options); } /** * Parse XML asynchronously with error handling * @param input - XML string, stream, or async iterator * @param options - Parse options * @returns Parse result with success flag */ async safeParse(input, options) { try { return { success: true, data: await this._parseAsync(input, options) }; } catch (error) { return { success: false, error: error instanceof XmlParseError ? error : new XmlParseError([{ path: [], message: error instanceof Error ? error.message : String(error), code: "parse_error" }]) }; } } /** * Parse XML synchronously with error handling * @param input - XML string or sync iterator * @param options - Parse options * @returns Parse result with success flag */ safeParseSync(input, options) { try { return { success: true, data: this._parse(input, options) }; } catch (error) { return { success: false, error: error instanceof XmlParseError ? error : new XmlParseError([{ path: [], message: error instanceof Error ? error.message : String(error), code: "parse_error" }]) }; } } /** * Transform the parsed output * @param fn - Transform function * @returns New schema with transform applied */ transform(fn) { return XmlSchemaBase._createTransform(this, fn); } /** * Make this schema optional * @returns New optional schema */ optional() { return XmlSchemaBase._createOptional(this); } /** * Convert this schema to an array schema * @param xpath - XPath expression for array elements * @returns New array schema */ array(xpath) { return XmlSchemaBase._createArray(this, xpath); } /** * Write data to XML string asynchronously (public API) * @param data - Data to write * @param options - Write options * @returns XML string */ async write(data, options) { const chunks = []; const stream = new WritableStream({ write(chunk) { chunks.push(chunk); } }); await this._write(data, stream, options); const encoder = new TextDecoder(options?.encoding || "utf-8"); return chunks.map((chunk) => encoder.decode(chunk, { stream: true })).join("") + encoder.decode(); } /** * Write data to WritableStream asynchronously (public API) * @param data - Data to write * @param stream - Writable stream to write to * @param options - Write options */ async writeToStream(data, stream, options) { return this._write(data, stream, options); } /** * Write data to XML string synchronously (public API) * @param data - Data to write * @param options - Write options * @returns XML string */ writeSync(data, options) { return this._writeSync(data, options); } /** * Configure writer settings for this schema * @param config - Writer configuration * @returns This schema with writer config */ writer(config) { this.writeConfig = config; return this; } static _createTransform; static _createOptional; static _createArray; }; //#endregion //#region src/converter/types.ts /** * Schema type constants for XML schema classification * * @public */ const SchemaType = { STRING: "STRING", NUMBER: "NUMBER", ARRAY: "ARRAY", OBJECT: "OBJECT", TRANSFORM: "TRANSFORM", OPTIONAL: "OPTIONAL" }; /** * Type guard for string schema * * @public */ function isStringSchema(schema) { return schema.schemaType === SchemaType.STRING; } /** * Type guard for number schema * * @public */ function isNumberSchema(schema) { return schema.schemaType === SchemaType.NUMBER; } /** * Type guard for array schema * * @public */ function isArraySchema(schema) { return schema.schemaType === SchemaType.ARRAY; } /** * Type guard for object schema * * @public */ function isObjectSchema(schema) { return schema.schemaType === SchemaType.OBJECT; } /** * Type guard for transform schema * * @public */ function isTransformSchema(schema) { return schema.schemaType === SchemaType.TRANSFORM; } /** * Type guard for optional schema * * @public */ function isOptionalSchema(schema) { return schema.schemaType === SchemaType.OPTIONAL; } //#endregion //#region src/converter/XmlTransformSchema.ts /** * Schema for transforming parsed values * * @public */ var XmlTransformSchema = class extends XmlSchemaBase { schemaType = SchemaType.TRANSFORM; /** @internal */ schema; /** @internal */ transformFn; constructor(schema, transformFn) { super(); this.schema = schema; this.transformFn = transformFn; } _parse(input, options) { const result = this.schema._parse(input, options); return this.transformFn(result); } async _parseAsync(input, options) { const result = await this.schema._parseAsync(input, options); return this.transformFn(result); } /** * Parse from current iterator position and apply transform * @internal */ _parseFromPosition(iterator, startEvent, startDepth, options) { if (this.schema._parseFromPosition) { const result = this.schema._parseFromPosition(iterator, startEvent, startDepth, options); if (result && typeof result.then === "function") return result.then((r) => this.transformFn(r)); return this.transformFn(result); } throw new Error("Transform schema requires base schema with _parseFromPosition"); } _parseText(text) { if (this.schema._parseText) { const result = this.schema._parseText(text); return this.transformFn(result); } throw new Error("Transform schema requires base schema with _parseText"); } /** * Write transformed data to XML synchronously * Note: Transform is not reversible, so writing is not supported * @internal */ _writeSync(data, options) { throw new Error("Transform schema does not support writing. Use the base schema for writing."); } /** * Write transformed data to WritableStream asynchronously * Note: Transform is not reversible, so writing is not supported * @internal */ async _write(data, stream, options) { throw new Error("Transform schema does not support writing. Use the base schema for writing."); } }; //#endregion //#region src/converter/XmlOptionalSchema.ts /** * Schema for optional values * * @public */ var XmlOptionalSchema = class extends XmlSchemaBase { schemaType = SchemaType.OPTIONAL; constructor(schema) { super(); this.schema = schema; } _parse(input, options) { try { const result = this.schema._parse(input, options); if (result === "") return; return result; } catch { return; } } async _parseAsync(input, options) { try { const result = await this.schema._parseAsync(input, options); if (result === "") return; return result; } catch { return; } } _parseText(text) { if (this.schema._parseText) try { const result = this.schema._parseText(text); if (result === "") return; return result; } catch { return; } } /** * Write optional data to XML synchronously * @internal */ _writeSync(data, options) { if (data === void 0 || data === null) return ""; return this.schema._writeSync(data, options); } /** * Write optional data to WritableStream asynchronously * @internal */ async _write(data, stream, options) { if (data === void 0 || data === null) return; return this.schema._write(data, stream, options); } }; //#endregion //#region src/types.ts /** * Enumeration of XML stream event types used by the StAX parser * * @public */ const XmlEventType = { START_DOCUMENT: "START_DOCUMENT", END_DOCUMENT: "END_DOCUMENT", START_ELEMENT: "START_ELEMENT", END_ELEMENT: "END_ELEMENT", CHARACTERS: "CHARACTERS", CDATA: "CDATA", ERROR: "ERROR" }; /** * Type guard function - Check if the event is a START_ELEMENT event * @param event XML event to check * @returns true if the event is a START_ELEMENT event, false otherwise */ function isStartElement(event) { return event.type === XmlEventType.START_ELEMENT; } /** * Type guard function - Check if the event is an END_ELEMENT event * @param event XML event to check * @returns true if the event is an END_ELEMENT event, false otherwise */ function isEndElement(event) { return event.type === XmlEventType.END_ELEMENT; } /** * Type guard function - Check if the event is a CHARACTERS event * @param event XML event to check * @returns true if the event is a CHARACTERS event, false otherwise */ function isCharacters(event) { return event.type === XmlEventType.CHARACTERS; } /** * Type guard function - Check if the event is a CDATA event * @param event XML event to check * @returns true if the event is a CDATA event, false otherwise */ function isCdata(event) { return event.type === XmlEventType.CDATA; } //#endregion //#region src/StaxXmlParser.ts /** * High-performance asynchronous XML parser implementing the StAX (Streaming API for XML) pattern. * * This parser provides memory-efficient processing of large XML files through streaming * with support for pull-based parsing, custom entity handling, and namespace processing. * * @remarks * The parser uses UTF-8 safe processing with Boyer-Moore-Horspool pattern search optimization * and supports both single-event and batch processing modes for improved performance. * * @example * Basic usage: * ```typescript * const xmlContent = '<root><item>Hello</item></root>'; * const stream = new ReadableStream({ * start(controller) { * controller.enqueue(new TextEncoder().encode(xmlContent)); * controller.close(); * } * }); * * const parser = new StaxXmlParser(stream); * for await (const event of parser) { * console.log(event.type, event); * } * ``` * * @example * With custom options: * ```typescript * const options = { * autoDecodeEntities: true, * maxBufferSize: 128 * 1024, * addEntities: [{ entity: 'custom', value: 'replacement' }] * }; * const parser = new StaxXmlParser(stream, options); * ``` * * @public */ var StaxXmlParser = class StaxXmlParser { reader = null; decoder; buffer; bufferLength = 0; position = 0; eventQueue = []; resolveNext = null; error = null; isStreamEnded = false; parserFinished = false; currentTextBuffer = ""; elementStack = []; namespaceStack = []; options; static ASCII_TABLE = (() => { const table = new Uint8Array(128); table[9] = 1; table[10] = 1; table[13] = 1; table[32] = 1; table[60] = 2; table[62] = 3; table[47] = 4; table[61] = 5; table[33] = 6; table[63] = 7; table[34] = 8; table[39] = 9; table[38] = 10; table[91] = 11; table[93] = 12; return table; })(); static ENTITY_REGEX_CACHE = /* @__PURE__ */ new Map(); static DEFAULT_ENTITY_REGEX = /&(lt|gt|quot|apos|amp);/g; static DEFAULT_ENTITY_MAP = { "lt": "<", "gt": ">", "quot": "\"", "apos": "'", "amp": "&" }; entityDecoder; bmhCache = /* @__PURE__ */ new Map(); batchMetrics = { avgEventSize: 100, lastBatchTime: 0, eventCount: 0 }; /** * Creates a new StaxXmlParser instance. * * @param xmlStream - The ReadableStream containing XML data as Uint8Array chunks * @param options - Configuration options for the parser * @throws {Error} When xmlStream is not a valid ReadableStream * * @example * ```typescript * const xmlData = '<root><item>content</item></root>'; * const stream = new ReadableStream({ * start(controller) { * controller.enqueue(new TextEncoder().encode(xmlData)); * controller.close(); * } * }); * * const parser = new StaxXmlParser(stream, { * autoDecodeEntities: true, * maxBufferSize: 64 * 1024 * }); * ``` */ constructor(xmlStream, options = {}) { if (!(xmlStream instanceof ReadableStream)) throw new Error("xmlStream must be a web standard ReadableStream."); this.options = { encoding: "utf-8", autoDecodeEntities: true, maxBufferSize: 64 * 1024, enableBufferCompaction: true, batchSize: 10, batchTimeout: 10, ...options }; this.decoder = new TextDecoder(this.options.encoding, { fatal: false, ignoreBOM: true }); this.buffer = new Uint8Array(this.options.maxBufferSize || 64 * 1024); this.entityDecoder = this._compileEntityDecoder(); this.reader = xmlStream.getReader(); this._startReading(); this._addEvent({ type: XmlEventType.START_DOCUMENT, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }); } /** * Fast XML special character check */ getXmlCharType(byte) { return byte < 128 ? StaxXmlParser.ASCII_TABLE[byte] : 0; } /** * Check if UTF-8 byte is the start of a character * @param byte The byte to check * @returns true if it's the start of a character */ isUtf8CharStart(byte) { return (byte & 128) === 0 || (byte & 192) === 192; } /** * Calculate UTF-8 sequence length * @param byte The first byte * @returns Sequence length (1-4) */ getUtf8SequenceLength(byte) { if ((byte & 128) === 0) return 1; if ((byte & 224) === 192) return 2; if ((byte & 240) === 224) return 3; if ((byte & 248) === 240) return 4; return 1; } /** * Safely adjust position at UTF-8 character boundaries * @param pos The position to adjust * @param searchBackward Whether to search backwards * @returns Safe UTF-8 boundary position */ findSafeUtf8Boundary(pos, searchBackward = true) { if (pos <= 0 || pos >= this.bufferLength) return pos; if (searchBackward) { let safePos = pos; let backtrack = 0; while (safePos > 0 && backtrack < 4) { if (this.isUtf8CharStart(this.buffer[safePos])) { const seqLen = this.getUtf8SequenceLength(this.buffer[safePos]); if (safePos + seqLen > pos) return safePos; else return pos; } safePos--; backtrack++; } return pos; } else { while (pos < this.bufferLength && !this.isUtf8CharStart(this.buffer[pos])) pos++; return pos; } } /** * Safely extract UTF-8 string from buffer * @param start Starting position * @param end Ending position * @returns Decoded string */ safeDecodeRange(start, end) { const safeStart = this.findSafeUtf8Boundary(start, false); const safeEnd = this.findSafeUtf8Boundary(end, true); if (safeStart >= safeEnd) return ""; return this.decoder.decode(this.buffer.subarray(safeStart, safeEnd), { stream: false }); } /** * Build Boyer-Moore-Horspool bad character table */ _buildBMHTable(pattern) { const table = new Uint8Array(256); const patternLength = pattern.length; table.fill(patternLength); for (let i = 0; i < patternLength - 1; i++) table[pattern[i]] = patternLength - 1 - i; return table; } /** * Pattern search using Boyer-Moore-Horspool algorithm * XML delimiters are all ASCII, so no UTF-8 boundary issues */ _findPatternBMH(pattern, startPos) { const patternBytes = new TextEncoder().encode(pattern); const patternLength = patternBytes.length; if (patternLength === 0) return -1; if (patternLength === 1) return this._findSingleByte(patternBytes[0], startPos); let skipTable = this.bmhCache.get(pattern); if (!skipTable) { skipTable = this._buildBMHTable(patternBytes); if (this.bmhCache.size > 20) this.bmhCache.clear(); this.bmhCache.set(pattern, skipTable); } const start = startPos || this.position; const bufferEnd = this.bufferLength - patternLength; let pos = start; while (pos <= bufferEnd) { let i = patternLength - 1; while (i >= 0 && this.buffer[pos + i] === patternBytes[i]) i--; if (i < 0) return pos; pos += skipTable[this.buffer[pos + patternLength - 1]]; } return -1; } /** * Single byte search (optimized) */ _findSingleByte(byte, startPos) { const start = startPos || this.position; const buffer = this.buffer; const end = this.bufferLength; const end4 = end - 3; let i = start; for (; i < end4; i += 4) { if (buffer[i] === byte) return i; if (buffer[i + 1] === byte) return i + 1; if (buffer[i + 2] === byte) return i + 2; if (buffer[i + 3] === byte) return i + 3; } for (; i < end; i++) if (buffer[i] === byte) return i; return -1; } _compileEntityDecoder() { if (!this.options.autoDecodeEntities) return (text) => text; if (this.options.addEntities && this.options.addEntities.length > 0) { const entityMap = { ...StaxXmlParser.DEFAULT_ENTITY_MAP }; const patterns = [ "lt", "gt", "quot", "apos" ]; for (const { entity, value } of this.options.addEntities) if (entity && value) { const key = entity.startsWith("&") && entity.endsWith(";") ? entity.slice(1, -1) : entity; entityMap[key] = value; patterns.push(key); } patterns.push("amp"); const cacheKey = patterns.join(","); let regex = StaxXmlParser.ENTITY_REGEX_CACHE.get(cacheKey); if (!regex) { const pattern = patterns.sort((a, b) => b.length - a.length).map((e) => e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|"); regex = new RegExp(`&(${pattern});`, "g"); StaxXmlParser.ENTITY_REGEX_CACHE.set(cacheKey, regex); } return (text) => { if (!text || text.indexOf("&") === -1) return text; regex.lastIndex = 0; return text.replace(regex, (_, entity) => entityMap[entity] || _); }; } return (text) => { if (!text || text.indexOf("&") === -1) return text; StaxXmlParser.DEFAULT_ENTITY_REGEX.lastIndex = 0; return text.replace(StaxXmlParser.DEFAULT_ENTITY_REGEX, (_, entity) => StaxXmlParser.DEFAULT_ENTITY_MAP[entity] || _); }; } _calculateOptimalBatchSize() { const MIN_BATCH = 1; const MAX_BATCH = this.options.batchSize || 10; if (this.bufferLength < 1024) return MIN_BATCH; if (this.bufferLength > 10240) return MAX_BATCH; if (this.eventQueue.length > 0) { if (this.eventQueue[this.eventQueue.length - 1]?.type === XmlEventType.CHARACTERS) return MIN_BATCH; } if (this.batchMetrics.eventCount > 100) { const avgSize = this.batchMetrics.avgEventSize; if (avgSize > 1e3) return MIN_BATCH; if (avgSize < 100) return MAX_BATCH; } return Math.min(MAX_BATCH, Math.max(MIN_BATCH, Math.floor(this.bufferLength / 1024))); } async nextBatch(size) { const batch = []; const targetSize = size || this._calculateOptimalBatchSize(); const startTime = Date.now(); const timeout = this.options.batchTimeout || 10; for (let i = 0; i < targetSize; i++) { if (Date.now() - startTime > timeout) break; const result = await this.next(); if (result.done) break; batch.push(result.value); } return batch; } async *batchedIterator(batchSize) { while (!this.parserFinished || this.eventQueue.length > 0) { const targetSize = batchSize || this._calculateOptimalBatchSize(); const batch = await this.nextBatch(targetSize); if (batch.length === 0) break; yield batch; } } _compactBufferIfNeeded() { if (!this.options.enableBufferCompaction) return; const maxSize = this.options.maxBufferSize || 64 * 1024; if (this.position > 8192 && this.bufferLength > 16384 || this.position > maxSize / 2 || this.bufferLength > maxSize && this.position > maxSize / 4) this._compactBuffer(); } _compactBuffer() { if (this.position > 0 && this.position < this.bufferLength) { const safePos = this.findSafeUtf8Boundary(this.position, true); const remainingLength = this.bufferLength - safePos; if (remainingLength < safePos) { const newBuffer = new Uint8Array(this.buffer.length); newBuffer.set(this.buffer.subarray(safePos, this.bufferLength)); this.buffer = newBuffer; } else this.buffer.copyWithin(0, safePos, this.bufferLength); this.bufferLength = remainingLength; this.position = this.position - safePos; if (this.bmhCache.size > 20) this.bmhCache.clear(); } } async _startReading() { try { while (true) { const { done, value } = await this.reader.read(); if (done) { this.isStreamEnded = true; this._parseBuffer(); if (!this.parserFinished && this.elementStack.length > 0) this._addError(/* @__PURE__ */ new Error("Unexpected end of document. Not all elements were closed.")); if (!this.parserFinished) { this._flushCharacters(); this._addEvent({ type: XmlEventType.END_DOCUMENT, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }); this.parserFinished = true; } if (this.resolveNext && this.eventQueue.length === 0) { this.resolveNext({ value: void 0, done: true }); this.resolveNext = null; } break; } this._appendToBuffer(value); this._parseBuffer(); this._compactBufferIfNeeded(); this._updateBatchMetrics(value.length); } } catch (err) { this._addError(err); if (this.resolveNext) { this.resolveNext({ value: void 0, done: true }); this.resolveNext = null; } } } _updateBatchMetrics(bytesProcessed) { const eventsDelta = this.eventQueue.length; if (eventsDelta > 0) { this.batchMetrics.eventCount += eventsDelta; this.batchMetrics.avgEventSize = this.batchMetrics.avgEventSize * .9 + bytesProcessed / eventsDelta * .1; } this.batchMetrics.lastBatchTime = Date.now(); } _parseBuffer() { while (this.position < this.bufferLength && !this.parserFinished) { const ltPos = this._findSingleByte(60, this.position); if (ltPos === -1) { if (this.isStreamEnded) { const remainingText = this._readBuffer(); this.currentTextBuffer += remainingText; this._flushCharacters(); } break; } if (ltPos > this.position) try { const textLength = ltPos - this.position; const text = this._readBuffer(textLength); this.currentTextBuffer += text; } catch (error) { if (!this.isStreamEnded) break; throw error; } this.position = ltPos; const nextByte = this.buffer[this.position + 1]; const charType = this.getXmlCharType(nextByte); if (charType === 4) { this._flushCharacters(); if (!this._parseEndTag()) break; } else if (charType === 6) if (this._matchesPattern("<!--")) { if (!this._parseComment()) break; } else if (this._matchesPattern("<![CDATA[")) { if (!this._parseCData()) break; } else { if (this.isStreamEnded) { this._addError(/* @__PURE__ */ new Error(`Malformed XML near position ${this.position}`)); return; } break; } else if (charType === 7) { if (this._matchesPattern("<?xml")) { if (!this._parseXmlDeclaration()) break; } else if (this._matchesPattern("<?")) { if (!this._parseProcessingInstruction()) break; } } else { this._flushCharacters(); if (!this._parseStartTag()) break; } this._compactBufferIfNeeded(); } } _flushCharacters() { if (this.currentTextBuffer.length > 0) { const decodedText = this.entityDecoder(this.currentTextBuffer); if (decodedText.trim().length > 0) this._addEvent({ type: XmlEventType.CHARACTERS, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: decodedText, error: void 0 }); this.currentTextBuffer = ""; } } _clearBuffers() { this.bufferLength = 0; this.position = 0; this.currentTextBuffer = ""; this.bmhCache.clear(); } _addEvent(event) { this.eventQueue.push(event); if (this.resolveNext) { this.resolveNext(this._popNextEvent()); this.resolveNext = null; } } _addError(err) { if (this.error === null) { this.error = err; this._addEvent({ type: XmlEventType.ERROR, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: err }); this.parserFinished = true; this._clearBuffers(); if (this.reader) { this.reader.releaseLock(); this.reader = null; } } } _popNextEvent() { if (this.eventQueue.length > 0) return { value: this.eventQueue.shift(), done: false }; if (this.parserFinished) return { value: void 0, done: true }; return null; } async next() { if (this.error) throw this.error; const nextEvent = this._popNextEvent(); if (nextEvent) return nextEvent; if (this.parserFinished) return { value: void 0, done: true }; return new Promise((resolve) => { this.resolveNext = resolve; }); } [Symbol.asyncIterator]() { return this; } _appendToBuffer(newData) { const requiredSize = this.bufferLength + newData.length; if (requiredSize > this.buffer.length) { const newSize = Math.max(this.buffer.length * 2, requiredSize); const newBuffer = new Uint8Array(newSize); newBuffer.set(this.buffer.subarray(0, this.bufferLength)); this.buffer = newBuffer; } this.buffer.set(newData, this.bufferLength); this.bufferLength += newData.length; } /** * UTF-8 safe buffer reading */ _readBuffer(length) { const originalPos = this.position; let endPos = length ? Math.min(this.position + length, this.bufferLength) : this.bufferLength; if (length && endPos < this.bufferLength) endPos = this.findSafeUtf8Boundary(endPos, true); const slice = this.buffer.subarray(this.position, endPos); try { const result = this.decoder.decode(slice, { stream: !this.isStreamEnded }); this.position = endPos; return result; } catch (error) { if (!this.isStreamEnded && endPos === this.bufferLength) for (let i = 1; i <= 4 && endPos - i > this.position; i++) { const testEnd = this.findSafeUtf8Boundary(endPos - i, true); if (testEnd > this.position) try { const safeSlice = this.buffer.subarray(this.position, testEnd); const result = this.decoder.decode(safeSlice, { stream: true }); this.position = testEnd; return result; } catch { continue; } } this.position = originalPos; throw error; } } _matchesPattern(pattern) { const patternBytes = new TextEncoder().encode(pattern); if (this.position + patternBytes.length > this.bufferLength) return false; for (let i = 0; i < patternBytes.length; i++) if (this.buffer[this.position + i] !== patternBytes[i]) return false; return true; } _parseXmlDeclaration() { const endPos = this._findPatternBMH("?>"); if (endPos === -1) return false; this.position = endPos + 2; return true; } _parseComment() { const endPos = this._findPatternBMH("-->"); if (endPos === -1) return false; this.position = endPos + 3; return true; } /** * UTF-8 safe CDATA parsing */ _parseCData() { const startPos = this.position + 9; const endPos = this._findPatternBMH("]]>"); if (endPos === -1) return false; try { const safeStart = this.findSafeUtf8Boundary(startPos, false); const safeEnd = this.findSafeUtf8Boundary(endPos, true); const cdataContent = this.decoder.decode(this.buffer.subarray(safeStart, safeEnd), { stream: false }); this._addEvent({ type: XmlEventType.CDATA, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: cdataContent, error: void 0 }); this.position = endPos + 3; return true; } catch (error) { if (!this.isStreamEnded) return false; throw error; } } _parseProcessingInstruction() { const endPos = this._findPatternBMH("?>"); if (endPos === -1) return false; this.position = endPos + 2; return true; } /** * UTF-8 safe end tag parsing */ _parseEndTag() { const gtPos = this._findSingleByte(62, this.position); if (gtPos === -1) return false; try { const closeTagMatch = this.safeDecodeRange(this.position, gtPos + 1).match(/^<\/([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)\s*>$/); if (!closeTagMatch) { this._addError(/* @__PURE__ */ new Error("Malformed closing tag")); return true; } const tagName = closeTagMatch[1]; if (this.elementStack.length === 0 || this.elementStack[this.elementStack.length - 1] !== tagName) { this._addError(/* @__PURE__ */ new Error(`Mismatched closing tag: </${tagName}>. Expected </${this.elementStack[this.elementStack.length - 1] || "nothing"}>`)); return true; } const currentNamespaces = this.namespaceStack.length > 0 ? this.namespaceStack[this.namespaceStack.length - 1] : /* @__PURE__ */ new Map(); const { localName, prefix, uri } = this._parseQualifiedName(tagName, currentNamespaces); this.elementStack.pop(); this.namespaceStack.pop(); this._addEvent({ type: XmlEventType.END_ELEMENT, name: tagName, localName, prefix, uri, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }); this.position = gtPos + 1; return true; } catch (error) { if (!this.isStreamEnded) return false; throw error; } } /** * UTF-8 safe start tag parsing (using ASCII table) */ _parseStartTag() { const gtPos = this._findSingleByte(62, this.position); if (gtPos === -1) return false; try { const tagMatch = this.safeDecodeRange(this.position, gtPos + 1).match(/^<([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)(\s+[^>]*?)?\s*(\/?)>$/); if (!tagMatch) { this._addError(/* @__PURE__ */ new Error("Malformed start tag")); return true; } const tagName = tagMatch[1]; const attributesString = tagMatch[2] || ""; const isSelfClosing = tagMatch[3] === "/"; const currentNamespaces = /* @__PURE__ */ new Map(); if (this.namespaceStack.length > 0) { const parentNamespaces = this.namespaceStack[this.namespaceStack.length - 1]; for (const [prefix$1, uri$1] of parentNamespaces) currentNamespaces.set(prefix$1, uri$1); } const attributes = {}; const attributesWithPrefix = {}; const attrRegex = /([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)(?:\s*=\s*"([^"]*)"|\s*=\s*'([^']*)')?/g; let attrMatch; while ((attrMatch = attrRegex.exec(attributesString)) !== null) { const attrName = attrMatch[1]; const attrValue = this.entityDecoder(attrMatch[2] || attrMatch[3] || "true"); attributes[attrName] = attrValue; const attrNamespaceInfo = this._parseQualifiedName(attrName, currentNamespaces, true); attributesWithPrefix[attrNamespaceInfo.localName] = { value: attrValue, prefix: attrNamespaceInfo.prefix, uri: attrNamespaceInfo.uri }; if (attrName === "xmlns") currentNamespaces.set("", attrValue); else if (attrName.startsWith("xmlns:")) { const prefix$1 = attrName.substring(6); currentNamespaces.set(prefix$1, attrValue); } } const { localName, prefix, uri } = this._parseQualifiedName(tagName, currentNamespaces); this._addEvent({ type: XmlEventType.START_ELEMENT, name: tagName, localName, prefix, uri, attributes, attributesWithPrefix, value: void 0, error: void 0 }); this.position = gtPos + 1; if (!isSelfClosing) { this.elementStack.push(tagName); this.namespaceStack.push(currentNamespaces); } else this._addEvent({ type: XmlEventType.END_ELEMENT, name: tagName, localName, prefix, uri, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }); return true; } catch (error) { if (!this.isStreamEnded) return false; throw error; } } _parseQualifiedName(qname, namespaces, isAttribute = false) { const colonIndex = qname.indexOf(":"); if (colonIndex === -1) if (isAttribute) return { localName: qname, prefix: void 0, uri: void 0 }; else { const defaultUri = namespaces.get(""); return { localName: qname, prefix: void 0, uri: defaultUri }; } else { const prefix = qname.substring(0, colonIndex); const localName = qname.substring(colonIndex + 1); const uri = namespaces.get(prefix); return { localName, prefix, uri }; } } get XmlEventType() { return XmlEventType; } }; //#endregion //#region src/StaxXmlParserSync.ts var StaxXmlParserSync = class StaxXmlParserSync { xml; xmlLength; pos = 0; elementStack = []; namespaceStack = []; options; internalIterator; static ASCII_TABLE = (() => { const table = new Uint8Array(128); table[9] = 1; table[10] = 1; table[13] = 1; table[32] = 1; table[60] = 2; table[62] = 3; table[47] = 4; table[61] = 5; table[33] = 6; table[63] = 7; table[34] = 8; table[39] = 9; return table; })(); static UNICODE_WHITESPACE = new Set([ 160, 5760, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8232, 8233, 8239, 8287, 12288, 65279 ]); static ENTITY_REGEX_CACHE = /* @__PURE__ */ new Map(); static DEFAULT_ENTITY_REGEX = /&(lt|gt|quot|apos|amp);/g; static DEFAULT_ENTITY_MAP = { "lt": "<", "gt": ">", "quot": "\"", "apos": "'", "amp": "&" }; entityDecoder; constructor(xml, options = {}) { this.xml = xml; this.xmlLength = xml.length; this.options = { autoDecodeEntities: true, ...options }; this.namespaceStack.push(/* @__PURE__ */ new Map()); this.entityDecoder = this.compileEntityDecoder(); } static isWhitespace(code) { if (code < 128) return StaxXmlParserSync.ASCII_TABLE[code] === 1; return code <= 32 || StaxXmlParserSync.UNICODE_WHITESPACE.has(code); } static isHighSurrogate(code) { return code >= 55296 && code <= 56319; } static isLowSurrogate(code) { return code >= 56320 && code <= 57343; } findChar(targetCode, start = this.pos) { const xml = this.xml; const len = this.xmlLength; const len16 = len - 15; let i = start; for (; i < len16; i += 16) { if (xml.charCodeAt(i) === targetCode) return i; if (xml.charCodeAt(i + 1) === targetCode) return i + 1; if (xml.charCodeAt(i + 2) === targetCode) return i + 2; if (xml.charCodeAt(i + 3) === targetCode) return i + 3; if (xml.charCodeAt(i + 4) === targetCode) return i + 4; if (xml.charCodeAt(i + 5) === targetCode) return i + 5; if (xml.charCodeAt(i + 6) === targetCode) return i + 6; if (xml.charCodeAt(i + 7) === targetCode) return i + 7; if (xml.charCodeAt(i + 8) === targetCode) return i + 8; if (xml.charCodeAt(i + 9) === targetCode) return i + 9; if (xml.charCodeAt(i + 10) === targetCode) return i + 10; if (xml.charCodeAt(i + 11) === targetCode) return i + 11; if (xml.charCodeAt(i + 12) === targetCode) return i + 12; if (xml.charCodeAt(i + 13) === targetCode) return i + 13; if (xml.charCodeAt(i + 14) === targetCode) return i + 14; if (xml.charCodeAt(i + 15) === targetCode) return i + 15; } for (; i < len; i++) if (xml.charCodeAt(i) === targetCode) return i; return -1; } matchesAt(str, pos) { const len = str.length; if (pos + len > this.xmlLength) return false; for (let i = 0; i < len; i++) if (this.xml.charCodeAt(pos + i) !== str.charCodeAt(i)) return false; return true; } trimmedSlice(start, end) { const xml = this.xml; while (start < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(start))) if (StaxXmlParserSync.isHighSurrogate(xml.charCodeAt(start))) start += 2; else start++; while (end > start && StaxXmlParserSync.isWhitespace(xml.charCodeAt(end - 1))) if (end > start + 1 && StaxXmlParserSync.isLowSurrogate(xml.charCodeAt(end - 1)) && StaxXmlParserSync.isHighSurrogate(xml.charCodeAt(end - 2))) end -= 2; else end--; return start < end ? xml.slice(start, end) : ""; } compileEntityDecoder() { if (!this.options.autoDecodeEntities) return (text) => text; if (this.options.addEntities && this.options.addEntities.length > 0) { const entityMap = { ...StaxXmlParserSync.DEFAULT_ENTITY_MAP }; const patterns = [ "lt", "gt", "quot", "apos" ]; for (const { entity, value } of this.options.addEntities) if (entity && value) { const key = entity.startsWith("&") && entity.endsWith(";") ? entity.slice(1, -1) : entity; entityMap[key] = value; patterns.push(key); } patterns.push("amp"); const cacheKey = patterns.join(","); let regex = StaxXmlParserSync.ENTITY_REGEX_CACHE.get(cacheKey); if (!regex) { const pattern = patterns.sort((a, b) => b.length - a.length).map((e) => e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|"); regex = new RegExp(`&(${pattern});`, "g"); StaxXmlParserSync.ENTITY_REGEX_CACHE.set(cacheKey, regex); } return (text) => { if (!text || text.indexOf("&") === -1) return text; regex.lastIndex = 0; return text.replace(regex, (_, entity) => entityMap[entity] || _); }; } return (text) => { if (!text || text.indexOf("&") === -1) return text; StaxXmlParserSync.DEFAULT_ENTITY_REGEX.lastIndex = 0; return text.replace(StaxXmlParserSync.DEFAULT_ENTITY_REGEX, (_, entity) => StaxXmlParserSync.DEFAULT_ENTITY_MAP[entity] || _); }; } /** * Symbol.iterator implementation - returns this instance as iterator * This ensures for...of and explicit next() calls use the same iterator state */ [Symbol.iterator]() { return this; } /** * Internal generator that actually yields AnyXmlEvent * Important: Return type is same as before - Iterator<AnyXmlEvent> * Factory internally creates UnifiedXmlEvent, but * types are returned as StartElementEvent, EndElementEvent etc. so * perfectly compatible with AnyXmlEvent union type */ *internalGenerator() { yield { type: XmlEventType.START_DOCUMENT, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }; while (this.pos < this.xmlLength) { const ltPos = this.findChar(60, this.pos); if (ltPos === -1) { if (this.pos < this.xmlLength) { const text = this.trimmedSlice(this.pos, this.xmlLength); if (text) yield { type: XmlEventType.CHARACTERS, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: this.entityDecoder(text), error: void 0 }; } break; } if (ltPos > this.pos) { const text = this.trimmedSlice(this.pos, ltPos); if (text) yield { type: XmlEventType.CHARACTERS, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: this.entityDecoder(text), error: void 0 }; } this.pos = ltPos; switch (this.xml.charCodeAt(this.pos + 1)) { case 47: yield* this.parseEndTag(); break; case 33: yield* this.parseCdataCommentDoctype(); break; case 63: yield* this.parseProcessingInstruction(); break; default: yield* this.parseStartTag(); break; } } yield { type: XmlEventType.END_DOCUMENT, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }; } next() { if (!this.internalIterator) this.internalIterator = this.internalGenerator(); return this.internalIterator.next(); } *parseEndTag() { const tagClose = this.findChar(62, this.pos); if (tagClose === -1) throw new Error("Unclosed end tag"); const fullTagName = this.trimmedSlice(this.pos + 2, tagClose); if (this.elementStack.length === 0) throw new Error(`Mismatched closing tag: </${fullTagName}>. No open elements.`); const expectedTagName = this.elementStack[this.elementStack.length - 1]; if (fullTagName !== expectedTagName) throw new Error(`Mismatched closing tag: </${fullTagName}>. Expected </${expectedTagName}>.`); this.elementStack.pop(); const currentNamespaces = this.namespaceStack.pop(); const colonIndex = fullTagName.indexOf(":"); let localName, prefix, uri; if (colonIndex === -1) { localName = fullTagName; prefix = void 0; uri = currentNamespaces ? currentNamespaces.get("") : void 0; } else { prefix = fullTagName.slice(0, colonIndex); localName = fullTagName.slice(colonIndex + 1); uri = currentNamespaces ? currentNamespaces.get(prefix) : void 0; } yield { type: XmlEventType.END_ELEMENT, name: fullTagName, localName, prefix, uri, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }; this.pos = tagClose + 1; } *parseCdataCommentDoctype() { if (this.matchesAt("<![CDATA[", this.pos)) { const cdataEnd = this.findSequence("]]>", this.pos + 9); if (cdataEnd === -1) throw new Error("Unclosed CDATA section"); const cdataContent = this.xml.slice(this.pos + 9, cdataEnd); yield { type: XmlEventType.CDATA, name: void 0, localName: void 0, prefix: void 0, uri: void 0, attributes: void 0, attributesWithPrefix: void 0, value: cdataContent, error: void 0 }; this.pos = cdataEnd + 3; } else if (this.matchesAt("<!--", this.pos)) { const commentEnd = this.findSequence("-->", this.pos + 4); if (commentEnd === -1) throw new Error("Unclosed comment"); this.pos = commentEnd + 3; } else if (this.matchesAt("<!DOCTYPE", this.pos)) { const doctypeEnd = this.findChar(62, this.pos); if (doctypeEnd === -1) throw new Error("Unclosed DOCTYPE declaration"); this.pos = doctypeEnd + 1; } } *parseProcessingInstruction() { const piEnd = this.findSequence("?>", this.pos); if (piEnd === -1) throw new Error("Unclosed processing instruction"); this.pos = piEnd + 2; } *parseStartTag() { const tagStart = this.pos + 1; const tagEnd = this.findTagEnd(tagStart); if (tagEnd === -1) throw new Error("Unclosed start tag"); let isSelfClosing = false; let actualEnd = tagEnd; if (this.xml.charCodeAt(tagEnd - 1) === 47) { isSelfClosing = true; actualEnd = tagEnd - 1; } let nameEnd = tagStart; const xml = this.xml; while (nameEnd < actualEnd) { const code = xml.charCodeAt(nameEnd); if (code <= 32) { if (StaxXmlParserSync.isWhitespace(code)) break; } else if (code === 62 || code === 47) break; nameEnd++; } const tagName = xml.slice(tagStart, nameEnd); const currentNamespaces = /* @__PURE__ */ new Map(); if (this.namespaceStack.length > 0) { const parentNamespaces = this.namespaceStack[this.namespaceStack.length - 1]; for (const [prefix$1, uri$1] of parentNamespaces) currentNamespaces.set(prefix$1, uri$1); } const { attributes, attributesWithPrefix } = this.parseAttributesFast(nameEnd, actualEnd, currentNamespaces); const colonIndex = tagName.indexOf(":"); let localName, prefix, uri; if (colonIndex === -1) { localName = tagName; prefix = void 0; uri = currentNamespaces.get(""); } else { prefix = tagName.slice(0, colonIndex); localName = tagName.slice(colonIndex + 1); uri = currentNamespaces.get(prefix); } yield { type: XmlEventType.START_ELEMENT, name: tagName, localName, prefix, uri, attributes, attributesWithPrefix, value: void 0, error: void 0 }; this.elementStack.push(tagName); if (!isSelfClosing) this.namespaceStack.push(currentNamespaces); else { yield { type: XmlEventType.END_ELEMENT, name: tagName, localName, prefix, uri, attributes: void 0, attributesWithPrefix: void 0, value: void 0, error: void 0 }; this.elementStack.pop(); } this.pos = tagEnd + 1; } parseAttributesFast(start, end, namespaces) { if (start >= end) return { attributes: {}, attributesWithPrefix: {} }; const attributes = {}; const attributesWithPrefix = {}; let i = start; const xml = this.xml; while (i < end) { while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++; if (i >= end) break; const nameStart = i; while (i < end) { const code = xml.charCodeAt(i); if (code === 61 || StaxXmlParserSync.isWhitespace(code)) break; i++; } if (i === nameStart) break; const attrName = xml.slice(nameStart, i); while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++; if (i >= end || xml.charCodeAt(i) !== 61) { attributes[attrName] = "true"; const colonIndex$1 = attrName.indexOf(":"); let localName$1, prefix$1, uri$1; if (colonIndex$1 === -1) { localName$1 = attrName; prefix$1 = void 0; uri$1 = void 0; } else { prefix$1 = attrName.slice(0, colonIndex$1); localName$1 = attrName.slice(colonIndex$1 + 1); uri$1 = namespaces.get(prefix$1); } attributesWithPrefix[attrName] = { value: "true", localName: localName$1, prefix: prefix$1, uri: uri$1 }; continue; } i++; while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++; if (i >= end) break; const quote = xml.charCodeAt(i); if (quote !== 34 && quote !== 39) break; i++; const valueStart = i; while (i < end && xml.charCodeAt(i) !== quote) i++; const rawValue = xml.slice(valueStart, i); const attrValue = this.entityDecoder(rawValue); attributes[attrName] = attrValue; if (attrName === "xmlns") namespaces.set("", attrValue); else if (attrName.startsWith("xmlns:")) namespaces.set(attrName.slice(6), attrValue); const colonIndex = attrName.indexOf(":"); let localName, prefix, uri; if (colonIndex === -1) { localName = attrName; prefix = void 0; uri = void 0; } else { prefix = attrName.slice(0, colonIndex); localName = attrName.slice(colonIndex + 1); uri = namespaces.get(prefix); } if (attrName.startsWith("xmlns")) if (attrName === "xmlns") { localName = "xmlns"; prefix = void 0; } else { localName = attrName.slice(6); prefix = "xmlns"; } attributesWithPrefix[attrName] = { value: attrValue, localName, prefix, uri }; i++; } return { attributes, attributesWithPrefix }; } findTagEnd(start) { let i = start; let inQuote = false; let quoteChar = 0; while (i < this.xmlLength) { const code = this.xml.charCodeAt(i); if (code === 34 || code === 39) { if (!inQuote) { inQuote = true; quoteChar = code; } else if (code === quoteChar) { inQuote = false; quoteChar = 0; } } else if (code === 62 && !inQuote) return i; i++; } return -1; } findSequence(sequence, start) { const seqLen = sequence.length; const maxPos = this.xmlLength - seqLen; for (let i = start; i <= maxPos; i++) { let match = true; for (let j = 0; j < seqLen; j++) if (this.xml.charCodeAt(i + j) !== sequence.charCodeAt(j)) { match = false; break; } if (match) return i; } return -1; } }; //#endregion //#region src/converter/XPathEngine.ts /** * XPath compiler with caching * * @internal */ var XPathCompiler = class { static cache = /* @__PURE__ */ new Map(); static MAX_CACHE_SIZE = 1e3; static compile(xpath) { const cached = this.cache.get(xpath); if (cached) return cached; this.validateXPath(xpath); const compiled = this.compileInternal(xpath); if (this.cache.size >= this.MAX_CACHE_SIZE) { const firstKey = this.cache.keys().next().value; if (firstKey !== void 0) this.cache.delete(firstKey); } this.cache.set(xpath, compiled); return compiled; } static validateXPath(xpath) { if (!xpath || xpath.length === 0) throw new Error("XPath cannot be empty"); if (xpath.length > 1e3) throw new Error("XPath too long (max 1000 characters)"); if (/[;<>{}\\]/.test(xpath)) throw new Error("Invalid characters in XPath"); } static compileInternal(xpath) { const trimmed = xpath.trim(); const isRelative = trimmed.startsWith("./") || trimmed === "."; const isAbsolute = !isRelative && trimmed.startsWith("/"); const isDescendant = !isRelative && trimmed.startsWith("//"); let path = trimmed; if (isRelative && trimmed.startsWith("./")) path = path.slice(2); else if (isRelative && trimmed === ".") path = ""; else if (isDescendant) path = path.slice(2); else if (isAbsolute) path = path.slice(1); if (isDescendant && path.includes("//")) throw new Error("Nested descendant-or-self (//) is not supported. Use // only at the beginning of XPath expression, e.g., \"//element/path\""); const segments = []; const parts = path.split("/").filter((p) => p.length > 0); for (const part of parts) segments.push(this.compileSegment(part)); return { segments, isAbsolute, isDescendant }; } static compileSegment(segment) { if (segment.startsWith("@")) return { name: segment.slice(1).trim(), predicates: [], isWildcard: false, isAttribute: true, isTextNode: false }; if (segment === "text()") return { name: "text()", predicates: [], isWildcard: false, isAttribute: false, isTextNode: true }; const predicateMatch = segment.match(/^([^[]+)(\[.+\])?$/); if (!predicateMatch) throw new Error(`Invalid XPath segment: ${segment}`); const name = predicateMatch[1].trim(); const isWildcard = name === "*"; const predicates = []; if (predicateMatch[2]) { const predicateStr = predicateMatch[2]; const attrMatchSingle = predicateStr.match(/\[@([^=]+)='([^']+)'\]/); const attrMatchDouble = predi