stax-xml
Version:
High-performance, pull-based XML parser for JavaScript/TypeScript with declarative converter API
1,745 lines (1,736 loc) • 147 kB
JavaScript
//#region src/converter/errors.ts
/**
* XML parse error with detailed issue information
*
* @public
*/
var XmlParseError = class extends Error {
/**
* List of validation issues
*/
issues;
constructor(issues) {
super(`XML Parse Error: ${issues.map((i) => i.message).join(", ")}`);
this.name = "XmlParseError";
this.issues = issues;
}
};
//#endregion
//#region src/converter/base.ts
/**
* Base abstract class for all XML schema types
*
* @remarks
* This class provides the foundation for zod-style declarative XML parsing.
* Each schema type extends this class and implements the parsing logic.
*
* @public
*/
var XmlSchemaBase = class XmlSchemaBase {
_output;
_input;
/**
* Writer configuration for this schema
* @internal
*/
writeConfig;
/**
* Parse XML asynchronously (public API)
* @param input - XML string, stream, or async iterator
* @param options - Parse options
* @returns Parsed output
* @throws {XmlParseError} If parsing fails
*/
async parse(input, options) {
return this._parseAsync(input, options);
}
/**
* Parse XML synchronously (public API)
* @param input - XML string or sync iterator
* @param options - Parse options
* @returns Parsed output
* @throws {XmlParseError} If parsing fails
*/
parseSync(input, options) {
return this._parse(input, options);
}
/**
* Parse XML asynchronously with error handling
* @param input - XML string, stream, or async iterator
* @param options - Parse options
* @returns Parse result with success flag
*/
async safeParse(input, options) {
try {
return {
success: true,
data: await this._parseAsync(input, options)
};
} catch (error) {
return {
success: false,
error: error instanceof XmlParseError ? error : new XmlParseError([{
path: [],
message: error instanceof Error ? error.message : String(error),
code: "parse_error"
}])
};
}
}
/**
* Parse XML synchronously with error handling
* @param input - XML string or sync iterator
* @param options - Parse options
* @returns Parse result with success flag
*/
safeParseSync(input, options) {
try {
return {
success: true,
data: this._parse(input, options)
};
} catch (error) {
return {
success: false,
error: error instanceof XmlParseError ? error : new XmlParseError([{
path: [],
message: error instanceof Error ? error.message : String(error),
code: "parse_error"
}])
};
}
}
/**
* Transform the parsed output
* @param fn - Transform function
* @returns New schema with transform applied
*/
transform(fn) {
return XmlSchemaBase._createTransform(this, fn);
}
/**
* Make this schema optional
* @returns New optional schema
*/
optional() {
return XmlSchemaBase._createOptional(this);
}
/**
* Convert this schema to an array schema
* @param xpath - XPath expression for array elements
* @returns New array schema
*/
array(xpath) {
return XmlSchemaBase._createArray(this, xpath);
}
/**
* Write data to XML string asynchronously (public API)
* @param data - Data to write
* @param options - Write options
* @returns XML string
*/
async write(data, options) {
const chunks = [];
const stream = new WritableStream({ write(chunk) {
chunks.push(chunk);
} });
await this._write(data, stream, options);
const encoder = new TextDecoder(options?.encoding || "utf-8");
return chunks.map((chunk) => encoder.decode(chunk, { stream: true })).join("") + encoder.decode();
}
/**
* Write data to WritableStream asynchronously (public API)
* @param data - Data to write
* @param stream - Writable stream to write to
* @param options - Write options
*/
async writeToStream(data, stream, options) {
return this._write(data, stream, options);
}
/**
* Write data to XML string synchronously (public API)
* @param data - Data to write
* @param options - Write options
* @returns XML string
*/
writeSync(data, options) {
return this._writeSync(data, options);
}
/**
* Configure writer settings for this schema
* @param config - Writer configuration
* @returns This schema with writer config
*/
writer(config) {
this.writeConfig = config;
return this;
}
static _createTransform;
static _createOptional;
static _createArray;
};
//#endregion
//#region src/converter/types.ts
/**
* Schema type constants for XML schema classification
*
* @public
*/
const SchemaType = {
STRING: "STRING",
NUMBER: "NUMBER",
ARRAY: "ARRAY",
OBJECT: "OBJECT",
TRANSFORM: "TRANSFORM",
OPTIONAL: "OPTIONAL"
};
/**
* Type guard for string schema
*
* @public
*/
function isStringSchema(schema) {
return schema.schemaType === SchemaType.STRING;
}
/**
* Type guard for number schema
*
* @public
*/
function isNumberSchema(schema) {
return schema.schemaType === SchemaType.NUMBER;
}
/**
* Type guard for array schema
*
* @public
*/
function isArraySchema(schema) {
return schema.schemaType === SchemaType.ARRAY;
}
/**
* Type guard for object schema
*
* @public
*/
function isObjectSchema(schema) {
return schema.schemaType === SchemaType.OBJECT;
}
/**
* Type guard for transform schema
*
* @public
*/
function isTransformSchema(schema) {
return schema.schemaType === SchemaType.TRANSFORM;
}
/**
* Type guard for optional schema
*
* @public
*/
function isOptionalSchema(schema) {
return schema.schemaType === SchemaType.OPTIONAL;
}
//#endregion
//#region src/converter/XmlTransformSchema.ts
/**
* Schema for transforming parsed values
*
* @public
*/
var XmlTransformSchema = class extends XmlSchemaBase {
schemaType = SchemaType.TRANSFORM;
/** @internal */
schema;
/** @internal */
transformFn;
constructor(schema, transformFn) {
super();
this.schema = schema;
this.transformFn = transformFn;
}
_parse(input, options) {
const result = this.schema._parse(input, options);
return this.transformFn(result);
}
async _parseAsync(input, options) {
const result = await this.schema._parseAsync(input, options);
return this.transformFn(result);
}
/**
* Parse from current iterator position and apply transform
* @internal
*/
_parseFromPosition(iterator, startEvent, startDepth, options) {
if (this.schema._parseFromPosition) {
const result = this.schema._parseFromPosition(iterator, startEvent, startDepth, options);
if (result && typeof result.then === "function") return result.then((r) => this.transformFn(r));
return this.transformFn(result);
}
throw new Error("Transform schema requires base schema with _parseFromPosition");
}
_parseText(text) {
if (this.schema._parseText) {
const result = this.schema._parseText(text);
return this.transformFn(result);
}
throw new Error("Transform schema requires base schema with _parseText");
}
/**
* Write transformed data to XML synchronously
* Note: Transform is not reversible, so writing is not supported
* @internal
*/
_writeSync(data, options) {
throw new Error("Transform schema does not support writing. Use the base schema for writing.");
}
/**
* Write transformed data to WritableStream asynchronously
* Note: Transform is not reversible, so writing is not supported
* @internal
*/
async _write(data, stream, options) {
throw new Error("Transform schema does not support writing. Use the base schema for writing.");
}
};
//#endregion
//#region src/converter/XmlOptionalSchema.ts
/**
* Schema for optional values
*
* @public
*/
var XmlOptionalSchema = class extends XmlSchemaBase {
schemaType = SchemaType.OPTIONAL;
constructor(schema) {
super();
this.schema = schema;
}
_parse(input, options) {
try {
const result = this.schema._parse(input, options);
if (result === "") return;
return result;
} catch {
return;
}
}
async _parseAsync(input, options) {
try {
const result = await this.schema._parseAsync(input, options);
if (result === "") return;
return result;
} catch {
return;
}
}
_parseText(text) {
if (this.schema._parseText) try {
const result = this.schema._parseText(text);
if (result === "") return;
return result;
} catch {
return;
}
}
/**
* Write optional data to XML synchronously
* @internal
*/
_writeSync(data, options) {
if (data === void 0 || data === null) return "";
return this.schema._writeSync(data, options);
}
/**
* Write optional data to WritableStream asynchronously
* @internal
*/
async _write(data, stream, options) {
if (data === void 0 || data === null) return;
return this.schema._write(data, stream, options);
}
};
//#endregion
//#region src/types.ts
/**
* Enumeration of XML stream event types used by the StAX parser
*
* @public
*/
const XmlEventType = {
START_DOCUMENT: "START_DOCUMENT",
END_DOCUMENT: "END_DOCUMENT",
START_ELEMENT: "START_ELEMENT",
END_ELEMENT: "END_ELEMENT",
CHARACTERS: "CHARACTERS",
CDATA: "CDATA",
ERROR: "ERROR"
};
/**
* Type guard function - Check if the event is a START_ELEMENT event
* @param event XML event to check
* @returns true if the event is a START_ELEMENT event, false otherwise
*/
function isStartElement(event) {
return event.type === XmlEventType.START_ELEMENT;
}
/**
* Type guard function - Check if the event is an END_ELEMENT event
* @param event XML event to check
* @returns true if the event is an END_ELEMENT event, false otherwise
*/
function isEndElement(event) {
return event.type === XmlEventType.END_ELEMENT;
}
/**
* Type guard function - Check if the event is a CHARACTERS event
* @param event XML event to check
* @returns true if the event is a CHARACTERS event, false otherwise
*/
function isCharacters(event) {
return event.type === XmlEventType.CHARACTERS;
}
/**
* Type guard function - Check if the event is a CDATA event
* @param event XML event to check
* @returns true if the event is a CDATA event, false otherwise
*/
function isCdata(event) {
return event.type === XmlEventType.CDATA;
}
//#endregion
//#region src/StaxXmlParser.ts
/**
* High-performance asynchronous XML parser implementing the StAX (Streaming API for XML) pattern.
*
* This parser provides memory-efficient processing of large XML files through streaming
* with support for pull-based parsing, custom entity handling, and namespace processing.
*
* @remarks
* The parser uses UTF-8 safe processing with Boyer-Moore-Horspool pattern search optimization
* and supports both single-event and batch processing modes for improved performance.
*
* @example
* Basic usage:
* ```typescript
* const xmlContent = '<root><item>Hello</item></root>';
* const stream = new ReadableStream({
* start(controller) {
* controller.enqueue(new TextEncoder().encode(xmlContent));
* controller.close();
* }
* });
*
* const parser = new StaxXmlParser(stream);
* for await (const event of parser) {
* console.log(event.type, event);
* }
* ```
*
* @example
* With custom options:
* ```typescript
* const options = {
* autoDecodeEntities: true,
* maxBufferSize: 128 * 1024,
* addEntities: [{ entity: 'custom', value: 'replacement' }]
* };
* const parser = new StaxXmlParser(stream, options);
* ```
*
* @public
*/
var StaxXmlParser = class StaxXmlParser {
reader = null;
decoder;
buffer;
bufferLength = 0;
position = 0;
eventQueue = [];
resolveNext = null;
error = null;
isStreamEnded = false;
parserFinished = false;
currentTextBuffer = "";
elementStack = [];
namespaceStack = [];
options;
static ASCII_TABLE = (() => {
const table = new Uint8Array(128);
table[9] = 1;
table[10] = 1;
table[13] = 1;
table[32] = 1;
table[60] = 2;
table[62] = 3;
table[47] = 4;
table[61] = 5;
table[33] = 6;
table[63] = 7;
table[34] = 8;
table[39] = 9;
table[38] = 10;
table[91] = 11;
table[93] = 12;
return table;
})();
static ENTITY_REGEX_CACHE = /* @__PURE__ */ new Map();
static DEFAULT_ENTITY_REGEX = /&(lt|gt|quot|apos|amp);/g;
static DEFAULT_ENTITY_MAP = {
"lt": "<",
"gt": ">",
"quot": "\"",
"apos": "'",
"amp": "&"
};
entityDecoder;
bmhCache = /* @__PURE__ */ new Map();
batchMetrics = {
avgEventSize: 100,
lastBatchTime: 0,
eventCount: 0
};
/**
* Creates a new StaxXmlParser instance.
*
* @param xmlStream - The ReadableStream containing XML data as Uint8Array chunks
* @param options - Configuration options for the parser
* @throws {Error} When xmlStream is not a valid ReadableStream
*
* @example
* ```typescript
* const xmlData = '<root><item>content</item></root>';
* const stream = new ReadableStream({
* start(controller) {
* controller.enqueue(new TextEncoder().encode(xmlData));
* controller.close();
* }
* });
*
* const parser = new StaxXmlParser(stream, {
* autoDecodeEntities: true,
* maxBufferSize: 64 * 1024
* });
* ```
*/
constructor(xmlStream, options = {}) {
if (!(xmlStream instanceof ReadableStream)) throw new Error("xmlStream must be a web standard ReadableStream.");
this.options = {
encoding: "utf-8",
autoDecodeEntities: true,
maxBufferSize: 64 * 1024,
enableBufferCompaction: true,
batchSize: 10,
batchTimeout: 10,
...options
};
this.decoder = new TextDecoder(this.options.encoding, {
fatal: false,
ignoreBOM: true
});
this.buffer = new Uint8Array(this.options.maxBufferSize || 64 * 1024);
this.entityDecoder = this._compileEntityDecoder();
this.reader = xmlStream.getReader();
this._startReading();
this._addEvent({
type: XmlEventType.START_DOCUMENT,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: void 0
});
}
/**
* Fast XML special character check
*/
getXmlCharType(byte) {
return byte < 128 ? StaxXmlParser.ASCII_TABLE[byte] : 0;
}
/**
* Check if UTF-8 byte is the start of a character
* @param byte The byte to check
* @returns true if it's the start of a character
*/
isUtf8CharStart(byte) {
return (byte & 128) === 0 || (byte & 192) === 192;
}
/**
* Calculate UTF-8 sequence length
* @param byte The first byte
* @returns Sequence length (1-4)
*/
getUtf8SequenceLength(byte) {
if ((byte & 128) === 0) return 1;
if ((byte & 224) === 192) return 2;
if ((byte & 240) === 224) return 3;
if ((byte & 248) === 240) return 4;
return 1;
}
/**
* Safely adjust position at UTF-8 character boundaries
* @param pos The position to adjust
* @param searchBackward Whether to search backwards
* @returns Safe UTF-8 boundary position
*/
findSafeUtf8Boundary(pos, searchBackward = true) {
if (pos <= 0 || pos >= this.bufferLength) return pos;
if (searchBackward) {
let safePos = pos;
let backtrack = 0;
while (safePos > 0 && backtrack < 4) {
if (this.isUtf8CharStart(this.buffer[safePos])) {
const seqLen = this.getUtf8SequenceLength(this.buffer[safePos]);
if (safePos + seqLen > pos) return safePos;
else return pos;
}
safePos--;
backtrack++;
}
return pos;
} else {
while (pos < this.bufferLength && !this.isUtf8CharStart(this.buffer[pos])) pos++;
return pos;
}
}
/**
* Safely extract UTF-8 string from buffer
* @param start Starting position
* @param end Ending position
* @returns Decoded string
*/
safeDecodeRange(start, end) {
const safeStart = this.findSafeUtf8Boundary(start, false);
const safeEnd = this.findSafeUtf8Boundary(end, true);
if (safeStart >= safeEnd) return "";
return this.decoder.decode(this.buffer.subarray(safeStart, safeEnd), { stream: false });
}
/**
* Build Boyer-Moore-Horspool bad character table
*/
_buildBMHTable(pattern) {
const table = new Uint8Array(256);
const patternLength = pattern.length;
table.fill(patternLength);
for (let i = 0; i < patternLength - 1; i++) table[pattern[i]] = patternLength - 1 - i;
return table;
}
/**
* Pattern search using Boyer-Moore-Horspool algorithm
* XML delimiters are all ASCII, so no UTF-8 boundary issues
*/
_findPatternBMH(pattern, startPos) {
const patternBytes = new TextEncoder().encode(pattern);
const patternLength = patternBytes.length;
if (patternLength === 0) return -1;
if (patternLength === 1) return this._findSingleByte(patternBytes[0], startPos);
let skipTable = this.bmhCache.get(pattern);
if (!skipTable) {
skipTable = this._buildBMHTable(patternBytes);
if (this.bmhCache.size > 20) this.bmhCache.clear();
this.bmhCache.set(pattern, skipTable);
}
const start = startPos || this.position;
const bufferEnd = this.bufferLength - patternLength;
let pos = start;
while (pos <= bufferEnd) {
let i = patternLength - 1;
while (i >= 0 && this.buffer[pos + i] === patternBytes[i]) i--;
if (i < 0) return pos;
pos += skipTable[this.buffer[pos + patternLength - 1]];
}
return -1;
}
/**
* Single byte search (optimized)
*/
_findSingleByte(byte, startPos) {
const start = startPos || this.position;
const buffer = this.buffer;
const end = this.bufferLength;
const end4 = end - 3;
let i = start;
for (; i < end4; i += 4) {
if (buffer[i] === byte) return i;
if (buffer[i + 1] === byte) return i + 1;
if (buffer[i + 2] === byte) return i + 2;
if (buffer[i + 3] === byte) return i + 3;
}
for (; i < end; i++) if (buffer[i] === byte) return i;
return -1;
}
_compileEntityDecoder() {
if (!this.options.autoDecodeEntities) return (text) => text;
if (this.options.addEntities && this.options.addEntities.length > 0) {
const entityMap = { ...StaxXmlParser.DEFAULT_ENTITY_MAP };
const patterns = [
"lt",
"gt",
"quot",
"apos"
];
for (const { entity, value } of this.options.addEntities) if (entity && value) {
const key = entity.startsWith("&") && entity.endsWith(";") ? entity.slice(1, -1) : entity;
entityMap[key] = value;
patterns.push(key);
}
patterns.push("amp");
const cacheKey = patterns.join(",");
let regex = StaxXmlParser.ENTITY_REGEX_CACHE.get(cacheKey);
if (!regex) {
const pattern = patterns.sort((a, b) => b.length - a.length).map((e) => e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|");
regex = new RegExp(`&(${pattern});`, "g");
StaxXmlParser.ENTITY_REGEX_CACHE.set(cacheKey, regex);
}
return (text) => {
if (!text || text.indexOf("&") === -1) return text;
regex.lastIndex = 0;
return text.replace(regex, (_, entity) => entityMap[entity] || _);
};
}
return (text) => {
if (!text || text.indexOf("&") === -1) return text;
StaxXmlParser.DEFAULT_ENTITY_REGEX.lastIndex = 0;
return text.replace(StaxXmlParser.DEFAULT_ENTITY_REGEX, (_, entity) => StaxXmlParser.DEFAULT_ENTITY_MAP[entity] || _);
};
}
_calculateOptimalBatchSize() {
const MIN_BATCH = 1;
const MAX_BATCH = this.options.batchSize || 10;
if (this.bufferLength < 1024) return MIN_BATCH;
if (this.bufferLength > 10240) return MAX_BATCH;
if (this.eventQueue.length > 0) {
if (this.eventQueue[this.eventQueue.length - 1]?.type === XmlEventType.CHARACTERS) return MIN_BATCH;
}
if (this.batchMetrics.eventCount > 100) {
const avgSize = this.batchMetrics.avgEventSize;
if (avgSize > 1e3) return MIN_BATCH;
if (avgSize < 100) return MAX_BATCH;
}
return Math.min(MAX_BATCH, Math.max(MIN_BATCH, Math.floor(this.bufferLength / 1024)));
}
async nextBatch(size) {
const batch = [];
const targetSize = size || this._calculateOptimalBatchSize();
const startTime = Date.now();
const timeout = this.options.batchTimeout || 10;
for (let i = 0; i < targetSize; i++) {
if (Date.now() - startTime > timeout) break;
const result = await this.next();
if (result.done) break;
batch.push(result.value);
}
return batch;
}
async *batchedIterator(batchSize) {
while (!this.parserFinished || this.eventQueue.length > 0) {
const targetSize = batchSize || this._calculateOptimalBatchSize();
const batch = await this.nextBatch(targetSize);
if (batch.length === 0) break;
yield batch;
}
}
_compactBufferIfNeeded() {
if (!this.options.enableBufferCompaction) return;
const maxSize = this.options.maxBufferSize || 64 * 1024;
if (this.position > 8192 && this.bufferLength > 16384 || this.position > maxSize / 2 || this.bufferLength > maxSize && this.position > maxSize / 4) this._compactBuffer();
}
_compactBuffer() {
if (this.position > 0 && this.position < this.bufferLength) {
const safePos = this.findSafeUtf8Boundary(this.position, true);
const remainingLength = this.bufferLength - safePos;
if (remainingLength < safePos) {
const newBuffer = new Uint8Array(this.buffer.length);
newBuffer.set(this.buffer.subarray(safePos, this.bufferLength));
this.buffer = newBuffer;
} else this.buffer.copyWithin(0, safePos, this.bufferLength);
this.bufferLength = remainingLength;
this.position = this.position - safePos;
if (this.bmhCache.size > 20) this.bmhCache.clear();
}
}
async _startReading() {
try {
while (true) {
const { done, value } = await this.reader.read();
if (done) {
this.isStreamEnded = true;
this._parseBuffer();
if (!this.parserFinished && this.elementStack.length > 0) this._addError(/* @__PURE__ */ new Error("Unexpected end of document. Not all elements were closed."));
if (!this.parserFinished) {
this._flushCharacters();
this._addEvent({
type: XmlEventType.END_DOCUMENT,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: void 0
});
this.parserFinished = true;
}
if (this.resolveNext && this.eventQueue.length === 0) {
this.resolveNext({
value: void 0,
done: true
});
this.resolveNext = null;
}
break;
}
this._appendToBuffer(value);
this._parseBuffer();
this._compactBufferIfNeeded();
this._updateBatchMetrics(value.length);
}
} catch (err) {
this._addError(err);
if (this.resolveNext) {
this.resolveNext({
value: void 0,
done: true
});
this.resolveNext = null;
}
}
}
_updateBatchMetrics(bytesProcessed) {
const eventsDelta = this.eventQueue.length;
if (eventsDelta > 0) {
this.batchMetrics.eventCount += eventsDelta;
this.batchMetrics.avgEventSize = this.batchMetrics.avgEventSize * .9 + bytesProcessed / eventsDelta * .1;
}
this.batchMetrics.lastBatchTime = Date.now();
}
_parseBuffer() {
while (this.position < this.bufferLength && !this.parserFinished) {
const ltPos = this._findSingleByte(60, this.position);
if (ltPos === -1) {
if (this.isStreamEnded) {
const remainingText = this._readBuffer();
this.currentTextBuffer += remainingText;
this._flushCharacters();
}
break;
}
if (ltPos > this.position) try {
const textLength = ltPos - this.position;
const text = this._readBuffer(textLength);
this.currentTextBuffer += text;
} catch (error) {
if (!this.isStreamEnded) break;
throw error;
}
this.position = ltPos;
const nextByte = this.buffer[this.position + 1];
const charType = this.getXmlCharType(nextByte);
if (charType === 4) {
this._flushCharacters();
if (!this._parseEndTag()) break;
} else if (charType === 6) if (this._matchesPattern("<!--")) {
if (!this._parseComment()) break;
} else if (this._matchesPattern("<![CDATA[")) {
if (!this._parseCData()) break;
} else {
if (this.isStreamEnded) {
this._addError(/* @__PURE__ */ new Error(`Malformed XML near position ${this.position}`));
return;
}
break;
}
else if (charType === 7) {
if (this._matchesPattern("<?xml")) {
if (!this._parseXmlDeclaration()) break;
} else if (this._matchesPattern("<?")) {
if (!this._parseProcessingInstruction()) break;
}
} else {
this._flushCharacters();
if (!this._parseStartTag()) break;
}
this._compactBufferIfNeeded();
}
}
_flushCharacters() {
if (this.currentTextBuffer.length > 0) {
const decodedText = this.entityDecoder(this.currentTextBuffer);
if (decodedText.trim().length > 0) this._addEvent({
type: XmlEventType.CHARACTERS,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: decodedText,
error: void 0
});
this.currentTextBuffer = "";
}
}
_clearBuffers() {
this.bufferLength = 0;
this.position = 0;
this.currentTextBuffer = "";
this.bmhCache.clear();
}
_addEvent(event) {
this.eventQueue.push(event);
if (this.resolveNext) {
this.resolveNext(this._popNextEvent());
this.resolveNext = null;
}
}
_addError(err) {
if (this.error === null) {
this.error = err;
this._addEvent({
type: XmlEventType.ERROR,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: err
});
this.parserFinished = true;
this._clearBuffers();
if (this.reader) {
this.reader.releaseLock();
this.reader = null;
}
}
}
_popNextEvent() {
if (this.eventQueue.length > 0) return {
value: this.eventQueue.shift(),
done: false
};
if (this.parserFinished) return {
value: void 0,
done: true
};
return null;
}
async next() {
if (this.error) throw this.error;
const nextEvent = this._popNextEvent();
if (nextEvent) return nextEvent;
if (this.parserFinished) return {
value: void 0,
done: true
};
return new Promise((resolve) => {
this.resolveNext = resolve;
});
}
[Symbol.asyncIterator]() {
return this;
}
_appendToBuffer(newData) {
const requiredSize = this.bufferLength + newData.length;
if (requiredSize > this.buffer.length) {
const newSize = Math.max(this.buffer.length * 2, requiredSize);
const newBuffer = new Uint8Array(newSize);
newBuffer.set(this.buffer.subarray(0, this.bufferLength));
this.buffer = newBuffer;
}
this.buffer.set(newData, this.bufferLength);
this.bufferLength += newData.length;
}
/**
* UTF-8 safe buffer reading
*/
_readBuffer(length) {
const originalPos = this.position;
let endPos = length ? Math.min(this.position + length, this.bufferLength) : this.bufferLength;
if (length && endPos < this.bufferLength) endPos = this.findSafeUtf8Boundary(endPos, true);
const slice = this.buffer.subarray(this.position, endPos);
try {
const result = this.decoder.decode(slice, { stream: !this.isStreamEnded });
this.position = endPos;
return result;
} catch (error) {
if (!this.isStreamEnded && endPos === this.bufferLength) for (let i = 1; i <= 4 && endPos - i > this.position; i++) {
const testEnd = this.findSafeUtf8Boundary(endPos - i, true);
if (testEnd > this.position) try {
const safeSlice = this.buffer.subarray(this.position, testEnd);
const result = this.decoder.decode(safeSlice, { stream: true });
this.position = testEnd;
return result;
} catch {
continue;
}
}
this.position = originalPos;
throw error;
}
}
_matchesPattern(pattern) {
const patternBytes = new TextEncoder().encode(pattern);
if (this.position + patternBytes.length > this.bufferLength) return false;
for (let i = 0; i < patternBytes.length; i++) if (this.buffer[this.position + i] !== patternBytes[i]) return false;
return true;
}
_parseXmlDeclaration() {
const endPos = this._findPatternBMH("?>");
if (endPos === -1) return false;
this.position = endPos + 2;
return true;
}
_parseComment() {
const endPos = this._findPatternBMH("-->");
if (endPos === -1) return false;
this.position = endPos + 3;
return true;
}
/**
* UTF-8 safe CDATA parsing
*/
_parseCData() {
const startPos = this.position + 9;
const endPos = this._findPatternBMH("]]>");
if (endPos === -1) return false;
try {
const safeStart = this.findSafeUtf8Boundary(startPos, false);
const safeEnd = this.findSafeUtf8Boundary(endPos, true);
const cdataContent = this.decoder.decode(this.buffer.subarray(safeStart, safeEnd), { stream: false });
this._addEvent({
type: XmlEventType.CDATA,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: cdataContent,
error: void 0
});
this.position = endPos + 3;
return true;
} catch (error) {
if (!this.isStreamEnded) return false;
throw error;
}
}
_parseProcessingInstruction() {
const endPos = this._findPatternBMH("?>");
if (endPos === -1) return false;
this.position = endPos + 2;
return true;
}
/**
* UTF-8 safe end tag parsing
*/
_parseEndTag() {
const gtPos = this._findSingleByte(62, this.position);
if (gtPos === -1) return false;
try {
const closeTagMatch = this.safeDecodeRange(this.position, gtPos + 1).match(/^<\/([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)\s*>$/);
if (!closeTagMatch) {
this._addError(/* @__PURE__ */ new Error("Malformed closing tag"));
return true;
}
const tagName = closeTagMatch[1];
if (this.elementStack.length === 0 || this.elementStack[this.elementStack.length - 1] !== tagName) {
this._addError(/* @__PURE__ */ new Error(`Mismatched closing tag: </${tagName}>. Expected </${this.elementStack[this.elementStack.length - 1] || "nothing"}>`));
return true;
}
const currentNamespaces = this.namespaceStack.length > 0 ? this.namespaceStack[this.namespaceStack.length - 1] : /* @__PURE__ */ new Map();
const { localName, prefix, uri } = this._parseQualifiedName(tagName, currentNamespaces);
this.elementStack.pop();
this.namespaceStack.pop();
this._addEvent({
type: XmlEventType.END_ELEMENT,
name: tagName,
localName,
prefix,
uri,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: void 0
});
this.position = gtPos + 1;
return true;
} catch (error) {
if (!this.isStreamEnded) return false;
throw error;
}
}
/**
* UTF-8 safe start tag parsing (using ASCII table)
*/
_parseStartTag() {
const gtPos = this._findSingleByte(62, this.position);
if (gtPos === -1) return false;
try {
const tagMatch = this.safeDecodeRange(this.position, gtPos + 1).match(/^<([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)(\s+[^>]*?)?\s*(\/?)>$/);
if (!tagMatch) {
this._addError(/* @__PURE__ */ new Error("Malformed start tag"));
return true;
}
const tagName = tagMatch[1];
const attributesString = tagMatch[2] || "";
const isSelfClosing = tagMatch[3] === "/";
const currentNamespaces = /* @__PURE__ */ new Map();
if (this.namespaceStack.length > 0) {
const parentNamespaces = this.namespaceStack[this.namespaceStack.length - 1];
for (const [prefix$1, uri$1] of parentNamespaces) currentNamespaces.set(prefix$1, uri$1);
}
const attributes = {};
const attributesWithPrefix = {};
const attrRegex = /([a-zA-Z0-9_:.\-\u0080-\uFFFF]+)(?:\s*=\s*"([^"]*)"|\s*=\s*'([^']*)')?/g;
let attrMatch;
while ((attrMatch = attrRegex.exec(attributesString)) !== null) {
const attrName = attrMatch[1];
const attrValue = this.entityDecoder(attrMatch[2] || attrMatch[3] || "true");
attributes[attrName] = attrValue;
const attrNamespaceInfo = this._parseQualifiedName(attrName, currentNamespaces, true);
attributesWithPrefix[attrNamespaceInfo.localName] = {
value: attrValue,
prefix: attrNamespaceInfo.prefix,
uri: attrNamespaceInfo.uri
};
if (attrName === "xmlns") currentNamespaces.set("", attrValue);
else if (attrName.startsWith("xmlns:")) {
const prefix$1 = attrName.substring(6);
currentNamespaces.set(prefix$1, attrValue);
}
}
const { localName, prefix, uri } = this._parseQualifiedName(tagName, currentNamespaces);
this._addEvent({
type: XmlEventType.START_ELEMENT,
name: tagName,
localName,
prefix,
uri,
attributes,
attributesWithPrefix,
value: void 0,
error: void 0
});
this.position = gtPos + 1;
if (!isSelfClosing) {
this.elementStack.push(tagName);
this.namespaceStack.push(currentNamespaces);
} else this._addEvent({
type: XmlEventType.END_ELEMENT,
name: tagName,
localName,
prefix,
uri,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: void 0
});
return true;
} catch (error) {
if (!this.isStreamEnded) return false;
throw error;
}
}
_parseQualifiedName(qname, namespaces, isAttribute = false) {
const colonIndex = qname.indexOf(":");
if (colonIndex === -1) if (isAttribute) return {
localName: qname,
prefix: void 0,
uri: void 0
};
else {
const defaultUri = namespaces.get("");
return {
localName: qname,
prefix: void 0,
uri: defaultUri
};
}
else {
const prefix = qname.substring(0, colonIndex);
const localName = qname.substring(colonIndex + 1);
const uri = namespaces.get(prefix);
return {
localName,
prefix,
uri
};
}
}
get XmlEventType() {
return XmlEventType;
}
};
//#endregion
//#region src/StaxXmlParserSync.ts
var StaxXmlParserSync = class StaxXmlParserSync {
xml;
xmlLength;
pos = 0;
elementStack = [];
namespaceStack = [];
options;
internalIterator;
static ASCII_TABLE = (() => {
const table = new Uint8Array(128);
table[9] = 1;
table[10] = 1;
table[13] = 1;
table[32] = 1;
table[60] = 2;
table[62] = 3;
table[47] = 4;
table[61] = 5;
table[33] = 6;
table[63] = 7;
table[34] = 8;
table[39] = 9;
return table;
})();
static UNICODE_WHITESPACE = new Set([
160,
5760,
8192,
8193,
8194,
8195,
8196,
8197,
8198,
8199,
8200,
8201,
8202,
8232,
8233,
8239,
8287,
12288,
65279
]);
static ENTITY_REGEX_CACHE = /* @__PURE__ */ new Map();
static DEFAULT_ENTITY_REGEX = /&(lt|gt|quot|apos|amp);/g;
static DEFAULT_ENTITY_MAP = {
"lt": "<",
"gt": ">",
"quot": "\"",
"apos": "'",
"amp": "&"
};
entityDecoder;
constructor(xml, options = {}) {
this.xml = xml;
this.xmlLength = xml.length;
this.options = {
autoDecodeEntities: true,
...options
};
this.namespaceStack.push(/* @__PURE__ */ new Map());
this.entityDecoder = this.compileEntityDecoder();
}
static isWhitespace(code) {
if (code < 128) return StaxXmlParserSync.ASCII_TABLE[code] === 1;
return code <= 32 || StaxXmlParserSync.UNICODE_WHITESPACE.has(code);
}
static isHighSurrogate(code) {
return code >= 55296 && code <= 56319;
}
static isLowSurrogate(code) {
return code >= 56320 && code <= 57343;
}
findChar(targetCode, start = this.pos) {
const xml = this.xml;
const len = this.xmlLength;
const len16 = len - 15;
let i = start;
for (; i < len16; i += 16) {
if (xml.charCodeAt(i) === targetCode) return i;
if (xml.charCodeAt(i + 1) === targetCode) return i + 1;
if (xml.charCodeAt(i + 2) === targetCode) return i + 2;
if (xml.charCodeAt(i + 3) === targetCode) return i + 3;
if (xml.charCodeAt(i + 4) === targetCode) return i + 4;
if (xml.charCodeAt(i + 5) === targetCode) return i + 5;
if (xml.charCodeAt(i + 6) === targetCode) return i + 6;
if (xml.charCodeAt(i + 7) === targetCode) return i + 7;
if (xml.charCodeAt(i + 8) === targetCode) return i + 8;
if (xml.charCodeAt(i + 9) === targetCode) return i + 9;
if (xml.charCodeAt(i + 10) === targetCode) return i + 10;
if (xml.charCodeAt(i + 11) === targetCode) return i + 11;
if (xml.charCodeAt(i + 12) === targetCode) return i + 12;
if (xml.charCodeAt(i + 13) === targetCode) return i + 13;
if (xml.charCodeAt(i + 14) === targetCode) return i + 14;
if (xml.charCodeAt(i + 15) === targetCode) return i + 15;
}
for (; i < len; i++) if (xml.charCodeAt(i) === targetCode) return i;
return -1;
}
matchesAt(str, pos) {
const len = str.length;
if (pos + len > this.xmlLength) return false;
for (let i = 0; i < len; i++) if (this.xml.charCodeAt(pos + i) !== str.charCodeAt(i)) return false;
return true;
}
trimmedSlice(start, end) {
const xml = this.xml;
while (start < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(start))) if (StaxXmlParserSync.isHighSurrogate(xml.charCodeAt(start))) start += 2;
else start++;
while (end > start && StaxXmlParserSync.isWhitespace(xml.charCodeAt(end - 1))) if (end > start + 1 && StaxXmlParserSync.isLowSurrogate(xml.charCodeAt(end - 1)) && StaxXmlParserSync.isHighSurrogate(xml.charCodeAt(end - 2))) end -= 2;
else end--;
return start < end ? xml.slice(start, end) : "";
}
compileEntityDecoder() {
if (!this.options.autoDecodeEntities) return (text) => text;
if (this.options.addEntities && this.options.addEntities.length > 0) {
const entityMap = { ...StaxXmlParserSync.DEFAULT_ENTITY_MAP };
const patterns = [
"lt",
"gt",
"quot",
"apos"
];
for (const { entity, value } of this.options.addEntities) if (entity && value) {
const key = entity.startsWith("&") && entity.endsWith(";") ? entity.slice(1, -1) : entity;
entityMap[key] = value;
patterns.push(key);
}
patterns.push("amp");
const cacheKey = patterns.join(",");
let regex = StaxXmlParserSync.ENTITY_REGEX_CACHE.get(cacheKey);
if (!regex) {
const pattern = patterns.sort((a, b) => b.length - a.length).map((e) => e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|");
regex = new RegExp(`&(${pattern});`, "g");
StaxXmlParserSync.ENTITY_REGEX_CACHE.set(cacheKey, regex);
}
return (text) => {
if (!text || text.indexOf("&") === -1) return text;
regex.lastIndex = 0;
return text.replace(regex, (_, entity) => entityMap[entity] || _);
};
}
return (text) => {
if (!text || text.indexOf("&") === -1) return text;
StaxXmlParserSync.DEFAULT_ENTITY_REGEX.lastIndex = 0;
return text.replace(StaxXmlParserSync.DEFAULT_ENTITY_REGEX, (_, entity) => StaxXmlParserSync.DEFAULT_ENTITY_MAP[entity] || _);
};
}
/**
* Symbol.iterator implementation - returns this instance as iterator
* This ensures for...of and explicit next() calls use the same iterator state
*/
[Symbol.iterator]() {
return this;
}
/**
* Internal generator that actually yields AnyXmlEvent
* Important: Return type is same as before - Iterator<AnyXmlEvent>
* Factory internally creates UnifiedXmlEvent, but
* types are returned as StartElementEvent, EndElementEvent etc. so
* perfectly compatible with AnyXmlEvent union type
*/
*internalGenerator() {
yield {
type: XmlEventType.START_DOCUMENT,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: void 0
};
while (this.pos < this.xmlLength) {
const ltPos = this.findChar(60, this.pos);
if (ltPos === -1) {
if (this.pos < this.xmlLength) {
const text = this.trimmedSlice(this.pos, this.xmlLength);
if (text) yield {
type: XmlEventType.CHARACTERS,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: this.entityDecoder(text),
error: void 0
};
}
break;
}
if (ltPos > this.pos) {
const text = this.trimmedSlice(this.pos, ltPos);
if (text) yield {
type: XmlEventType.CHARACTERS,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: this.entityDecoder(text),
error: void 0
};
}
this.pos = ltPos;
switch (this.xml.charCodeAt(this.pos + 1)) {
case 47:
yield* this.parseEndTag();
break;
case 33:
yield* this.parseCdataCommentDoctype();
break;
case 63:
yield* this.parseProcessingInstruction();
break;
default:
yield* this.parseStartTag();
break;
}
}
yield {
type: XmlEventType.END_DOCUMENT,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: void 0
};
}
next() {
if (!this.internalIterator) this.internalIterator = this.internalGenerator();
return this.internalIterator.next();
}
*parseEndTag() {
const tagClose = this.findChar(62, this.pos);
if (tagClose === -1) throw new Error("Unclosed end tag");
const fullTagName = this.trimmedSlice(this.pos + 2, tagClose);
if (this.elementStack.length === 0) throw new Error(`Mismatched closing tag: </${fullTagName}>. No open elements.`);
const expectedTagName = this.elementStack[this.elementStack.length - 1];
if (fullTagName !== expectedTagName) throw new Error(`Mismatched closing tag: </${fullTagName}>. Expected </${expectedTagName}>.`);
this.elementStack.pop();
const currentNamespaces = this.namespaceStack.pop();
const colonIndex = fullTagName.indexOf(":");
let localName, prefix, uri;
if (colonIndex === -1) {
localName = fullTagName;
prefix = void 0;
uri = currentNamespaces ? currentNamespaces.get("") : void 0;
} else {
prefix = fullTagName.slice(0, colonIndex);
localName = fullTagName.slice(colonIndex + 1);
uri = currentNamespaces ? currentNamespaces.get(prefix) : void 0;
}
yield {
type: XmlEventType.END_ELEMENT,
name: fullTagName,
localName,
prefix,
uri,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: void 0
};
this.pos = tagClose + 1;
}
*parseCdataCommentDoctype() {
if (this.matchesAt("<![CDATA[", this.pos)) {
const cdataEnd = this.findSequence("]]>", this.pos + 9);
if (cdataEnd === -1) throw new Error("Unclosed CDATA section");
const cdataContent = this.xml.slice(this.pos + 9, cdataEnd);
yield {
type: XmlEventType.CDATA,
name: void 0,
localName: void 0,
prefix: void 0,
uri: void 0,
attributes: void 0,
attributesWithPrefix: void 0,
value: cdataContent,
error: void 0
};
this.pos = cdataEnd + 3;
} else if (this.matchesAt("<!--", this.pos)) {
const commentEnd = this.findSequence("-->", this.pos + 4);
if (commentEnd === -1) throw new Error("Unclosed comment");
this.pos = commentEnd + 3;
} else if (this.matchesAt("<!DOCTYPE", this.pos)) {
const doctypeEnd = this.findChar(62, this.pos);
if (doctypeEnd === -1) throw new Error("Unclosed DOCTYPE declaration");
this.pos = doctypeEnd + 1;
}
}
*parseProcessingInstruction() {
const piEnd = this.findSequence("?>", this.pos);
if (piEnd === -1) throw new Error("Unclosed processing instruction");
this.pos = piEnd + 2;
}
*parseStartTag() {
const tagStart = this.pos + 1;
const tagEnd = this.findTagEnd(tagStart);
if (tagEnd === -1) throw new Error("Unclosed start tag");
let isSelfClosing = false;
let actualEnd = tagEnd;
if (this.xml.charCodeAt(tagEnd - 1) === 47) {
isSelfClosing = true;
actualEnd = tagEnd - 1;
}
let nameEnd = tagStart;
const xml = this.xml;
while (nameEnd < actualEnd) {
const code = xml.charCodeAt(nameEnd);
if (code <= 32) {
if (StaxXmlParserSync.isWhitespace(code)) break;
} else if (code === 62 || code === 47) break;
nameEnd++;
}
const tagName = xml.slice(tagStart, nameEnd);
const currentNamespaces = /* @__PURE__ */ new Map();
if (this.namespaceStack.length > 0) {
const parentNamespaces = this.namespaceStack[this.namespaceStack.length - 1];
for (const [prefix$1, uri$1] of parentNamespaces) currentNamespaces.set(prefix$1, uri$1);
}
const { attributes, attributesWithPrefix } = this.parseAttributesFast(nameEnd, actualEnd, currentNamespaces);
const colonIndex = tagName.indexOf(":");
let localName, prefix, uri;
if (colonIndex === -1) {
localName = tagName;
prefix = void 0;
uri = currentNamespaces.get("");
} else {
prefix = tagName.slice(0, colonIndex);
localName = tagName.slice(colonIndex + 1);
uri = currentNamespaces.get(prefix);
}
yield {
type: XmlEventType.START_ELEMENT,
name: tagName,
localName,
prefix,
uri,
attributes,
attributesWithPrefix,
value: void 0,
error: void 0
};
this.elementStack.push(tagName);
if (!isSelfClosing) this.namespaceStack.push(currentNamespaces);
else {
yield {
type: XmlEventType.END_ELEMENT,
name: tagName,
localName,
prefix,
uri,
attributes: void 0,
attributesWithPrefix: void 0,
value: void 0,
error: void 0
};
this.elementStack.pop();
}
this.pos = tagEnd + 1;
}
parseAttributesFast(start, end, namespaces) {
if (start >= end) return {
attributes: {},
attributesWithPrefix: {}
};
const attributes = {};
const attributesWithPrefix = {};
let i = start;
const xml = this.xml;
while (i < end) {
while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++;
if (i >= end) break;
const nameStart = i;
while (i < end) {
const code = xml.charCodeAt(i);
if (code === 61 || StaxXmlParserSync.isWhitespace(code)) break;
i++;
}
if (i === nameStart) break;
const attrName = xml.slice(nameStart, i);
while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++;
if (i >= end || xml.charCodeAt(i) !== 61) {
attributes[attrName] = "true";
const colonIndex$1 = attrName.indexOf(":");
let localName$1, prefix$1, uri$1;
if (colonIndex$1 === -1) {
localName$1 = attrName;
prefix$1 = void 0;
uri$1 = void 0;
} else {
prefix$1 = attrName.slice(0, colonIndex$1);
localName$1 = attrName.slice(colonIndex$1 + 1);
uri$1 = namespaces.get(prefix$1);
}
attributesWithPrefix[attrName] = {
value: "true",
localName: localName$1,
prefix: prefix$1,
uri: uri$1
};
continue;
}
i++;
while (i < end && StaxXmlParserSync.isWhitespace(xml.charCodeAt(i))) i++;
if (i >= end) break;
const quote = xml.charCodeAt(i);
if (quote !== 34 && quote !== 39) break;
i++;
const valueStart = i;
while (i < end && xml.charCodeAt(i) !== quote) i++;
const rawValue = xml.slice(valueStart, i);
const attrValue = this.entityDecoder(rawValue);
attributes[attrName] = attrValue;
if (attrName === "xmlns") namespaces.set("", attrValue);
else if (attrName.startsWith("xmlns:")) namespaces.set(attrName.slice(6), attrValue);
const colonIndex = attrName.indexOf(":");
let localName, prefix, uri;
if (colonIndex === -1) {
localName = attrName;
prefix = void 0;
uri = void 0;
} else {
prefix = attrName.slice(0, colonIndex);
localName = attrName.slice(colonIndex + 1);
uri = namespaces.get(prefix);
}
if (attrName.startsWith("xmlns")) if (attrName === "xmlns") {
localName = "xmlns";
prefix = void 0;
} else {
localName = attrName.slice(6);
prefix = "xmlns";
}
attributesWithPrefix[attrName] = {
value: attrValue,
localName,
prefix,
uri
};
i++;
}
return {
attributes,
attributesWithPrefix
};
}
findTagEnd(start) {
let i = start;
let inQuote = false;
let quoteChar = 0;
while (i < this.xmlLength) {
const code = this.xml.charCodeAt(i);
if (code === 34 || code === 39) {
if (!inQuote) {
inQuote = true;
quoteChar = code;
} else if (code === quoteChar) {
inQuote = false;
quoteChar = 0;
}
} else if (code === 62 && !inQuote) return i;
i++;
}
return -1;
}
findSequence(sequence, start) {
const seqLen = sequence.length;
const maxPos = this.xmlLength - seqLen;
for (let i = start; i <= maxPos; i++) {
let match = true;
for (let j = 0; j < seqLen; j++) if (this.xml.charCodeAt(i + j) !== sequence.charCodeAt(j)) {
match = false;
break;
}
if (match) return i;
}
return -1;
}
};
//#endregion
//#region src/converter/XPathEngine.ts
/**
* XPath compiler with caching
*
* @internal
*/
var XPathCompiler = class {
static cache = /* @__PURE__ */ new Map();
static MAX_CACHE_SIZE = 1e3;
static compile(xpath) {
const cached = this.cache.get(xpath);
if (cached) return cached;
this.validateXPath(xpath);
const compiled = this.compileInternal(xpath);
if (this.cache.size >= this.MAX_CACHE_SIZE) {
const firstKey = this.cache.keys().next().value;
if (firstKey !== void 0) this.cache.delete(firstKey);
}
this.cache.set(xpath, compiled);
return compiled;
}
static validateXPath(xpath) {
if (!xpath || xpath.length === 0) throw new Error("XPath cannot be empty");
if (xpath.length > 1e3) throw new Error("XPath too long (max 1000 characters)");
if (/[;<>{}\\]/.test(xpath)) throw new Error("Invalid characters in XPath");
}
static compileInternal(xpath) {
const trimmed = xpath.trim();
const isRelative = trimmed.startsWith("./") || trimmed === ".";
const isAbsolute = !isRelative && trimmed.startsWith("/");
const isDescendant = !isRelative && trimmed.startsWith("//");
let path = trimmed;
if (isRelative && trimmed.startsWith("./")) path = path.slice(2);
else if (isRelative && trimmed === ".") path = "";
else if (isDescendant) path = path.slice(2);
else if (isAbsolute) path = path.slice(1);
if (isDescendant && path.includes("//")) throw new Error("Nested descendant-or-self (//) is not supported. Use // only at the beginning of XPath expression, e.g., \"//element/path\"");
const segments = [];
const parts = path.split("/").filter((p) => p.length > 0);
for (const part of parts) segments.push(this.compileSegment(part));
return {
segments,
isAbsolute,
isDescendant
};
}
static compileSegment(segment) {
if (segment.startsWith("@")) return {
name: segment.slice(1).trim(),
predicates: [],
isWildcard: false,
isAttribute: true,
isTextNode: false
};
if (segment === "text()") return {
name: "text()",
predicates: [],
isWildcard: false,
isAttribute: false,
isTextNode: true
};
const predicateMatch = segment.match(/^([^[]+)(\[.+\])?$/);
if (!predicateMatch) throw new Error(`Invalid XPath segment: ${segment}`);
const name = predicateMatch[1].trim();
const isWildcard = name === "*";
const predicates = [];
if (predicateMatch[2]) {
const predicateStr = predicateMatch[2];
const attrMatchSingle = predicateStr.match(/\[@([^=]+)='([^']+)'\]/);
const attrMatchDouble = predi