typesxml
Version:
Open source XML library written in TypeScript
1,188 lines • 96.4 kB
JavaScript
"use strict";
/*******************************************************************************
* Copyright (c) 2023-2026 Maxprograms.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse License 1.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/epl-v10.html
*
* Contributors:
* Maxprograms - initial API and implementation
*******************************************************************************/
Object.defineProperty(exports, "__esModule", { value: true });
exports.SAXParser = void 0;
const node_fs_1 = require("node:fs");
const node_os_1 = require("node:os");
const node_path_1 = require("node:path");
const node_url_1 = require("node:url");
const Constants_js_1 = require("./Constants.js");
const FileReader_js_1 = require("./FileReader.js");
const NeedMoreDataError_js_1 = require("./NeedMoreDataError.js");
const RelaxNGParser_js_1 = require("./RelaxNGParser.js");
const StreamReader_js_1 = require("./StreamReader.js");
const StringReader_js_1 = require("./StringReader.js");
const XMLAttribute_js_1 = require("./XMLAttribute.js");
const XMLSchemaParser_js_1 = require("./XMLSchemaParser.js");
const XMLUtils_js_1 = require("./XMLUtils.js");
const DTDGrammar_js_1 = require("./dtd/DTDGrammar.js");
const DTDParser_js_1 = require("./dtd/DTDParser.js");
const fetchUrls_js_1 = require("./fetchUrls.js");
const Grammar_js_1 = require("./grammar/Grammar.js");
const SchemaBuilder_js_1 = require("./schema/SchemaBuilder.js");
const SchemaGrammar_js_1 = require("./schema/SchemaGrammar.js");
class SAXParser {
static SUPPORTED_ENCODINGS = new Map([
['UTF-8', 'UTF-8'],
['UTF-16', 'UTF-16'],
['UTF-16LE', 'UTF-16LE'],
['UTF-16BE', 'UTF-16BE']
]);
static ENCODING_NAME_PATTERN = /^[A-Za-z][A-Za-z0-9._-]*$/;
contentHandler;
reader;
pointer;
buffer = '';
elementStack;
elementNameStack = [];
childrenNames = [];
characterRun;
rootParsed;
xmlVersion;
currentFile;
catalog;
validating = false;
relaxNGDefaultAttributes = new Map();
schemaDefaultAttributes = new Map();
processedSchemaLocations = new Set();
failedSchemaLocations = new Set();
namespaceContextStack = [];
processedNamespaces = new Set();
failedNamespaces = new Set();
isRelaxNG = false;
static DEFAULT_VIRTUAL_FILENAME = '__inmemory__.xml';
streamingMode = false;
sourceEnded = false;
documentStarted = false;
documentEnded = false;
inCDATASection = false;
pendingCR = false;
readingFromFile = false;
internalSubsetApplied = false;
xmlDeclarationParsed = false;
leadingContentBeforeXmlDeclaration = false;
schemaLoadingEnabled = true;
constructor() {
this.characterRun = '';
this.elementStack = 0;
this.elementNameStack = [];
this.childrenNames = [];
this.pointer = 0;
this.rootParsed = false;
this.xmlVersion = '1.0';
this.streamingMode = false;
this.sourceEnded = false;
this.documentStarted = false;
this.documentEnded = false;
this.inCDATASection = false;
this.pendingCR = false;
this.readingFromFile = false;
this.internalSubsetApplied = false;
this.xmlDeclarationParsed = false;
this.leadingContentBeforeXmlDeclaration = false;
this.schemaLoadingEnabled = true;
}
setContentHandler(contentHandler) {
this.contentHandler = contentHandler;
if (this.catalog) {
this.contentHandler.setCatalog(this.catalog);
}
}
setCatalog(catalog) {
this.catalog = catalog;
this.contentHandler?.setCatalog(catalog);
}
setValidating(validating) {
this.validating = validating;
}
setSchemaLoadingEnabled(enabled) {
this.schemaLoadingEnabled = enabled;
}
parseFile(path, encoding) {
if (!this.contentHandler) {
throw new Error('ContentHandler not set');
}
const normalizedPath = (0, node_path_1.isAbsolute)(path) ? path : (0, node_path_1.resolve)(path);
const effectiveEncoding = encoding ?? FileReader_js_1.FileReader.detectEncoding(normalizedPath);
const reader = new FileReader_js_1.FileReader(normalizedPath, effectiveEncoding);
this.initializeParsing(reader, normalizedPath);
this.processSynchronous();
}
parseString(data, options) {
if (!this.contentHandler) {
throw new Error('ContentHandler not set');
}
const reader = new StringReader_js_1.StringReader(data);
const virtualPath = this.resolveVirtualPath(options);
this.initializeParsing(reader, virtualPath);
this.processSynchronous();
}
parseStream(stream, options) {
if (!this.contentHandler) {
return Promise.reject(new Error('ContentHandler not set'));
}
const encoding = options?.encoding ?? 'utf8';
const reader = new StreamReader_js_1.StreamReader(encoding);
const virtualPath = this.resolveVirtualPath(options);
this.initializeParsing(reader, virtualPath);
return new Promise((resolvePromise, rejectPromise) => {
const cleanup = () => {
stream.removeListener('data', onData);
stream.removeListener('end', onEnd);
stream.removeListener('error', onError);
};
const handleProcessing = (finalizing) => {
try {
this.processStreaming(finalizing);
if (this.documentEnded) {
cleanup();
this.reader?.closeFile();
resolvePromise();
}
else if (finalizing) {
cleanup();
this.reader?.closeFile();
rejectPromise(new Error('Malformed XML document: unexpected end of stream'));
}
}
catch (error) {
if (error instanceof NeedMoreDataError_js_1.NeedMoreDataError) {
return;
}
cleanup();
this.reader?.closeFile();
rejectPromise(error);
}
};
const onData = (chunk) => {
reader.enqueue(chunk);
handleProcessing(false);
};
const onEnd = () => {
reader.markFinished();
handleProcessing(true);
};
const onError = (error) => {
cleanup();
this.reader?.closeFile();
rejectPromise(error);
};
stream.setEncoding(encoding);
stream.on('data', onData);
stream.once('end', onEnd);
stream.once('error', onError);
});
}
initializeParsing(reader, currentFilePath) {
this.reader = reader;
const fallbackVirtualPath = (0, node_path_1.resolve)(process.cwd(), SAXParser.DEFAULT_VIRTUAL_FILENAME);
this.currentFile = currentFilePath || fallbackVirtualPath;
this.relaxNGDefaultAttributes = new Map();
this.schemaDefaultAttributes = new Map();
this.processedSchemaLocations = new Set();
this.failedSchemaLocations = new Set();
this.namespaceContextStack = [];
this.processedNamespaces = new Set();
this.failedNamespaces = new Set();
this.isRelaxNG = false;
this.pointer = 0;
this.buffer = '';
this.elementStack = 0;
this.elementNameStack = [];
this.childrenNames = [];
this.characterRun = '';
this.rootParsed = false;
this.xmlVersion = '1.0';
this.streamingMode = reader instanceof StreamReader_js_1.StreamReader;
this.sourceEnded = false;
this.documentStarted = false;
this.documentEnded = false;
this.inCDATASection = false;
this.pendingCR = false;
this.readingFromFile = reader instanceof FileReader_js_1.FileReader;
this.internalSubsetApplied = false;
this.xmlDeclarationParsed = false;
this.leadingContentBeforeXmlDeclaration = false;
this.contentHandler?.initialize();
}
processSynchronous() {
if (!this.reader) {
return;
}
try {
while (!this.documentEnded) {
this.readDocument();
if (!this.documentEnded) {
if (!this.tryReadMore()) {
break;
}
}
}
this.ensureDocumentClosed();
}
finally {
this.reader.closeFile();
}
}
processStreaming(finalizing) {
if (!this.reader) {
return;
}
while (true) {
try {
this.readDocument();
}
catch (error) {
if (error instanceof NeedMoreDataError_js_1.NeedMoreDataError) {
return;
}
throw error;
}
if (this.documentEnded) {
return;
}
let hasMoreData;
try {
hasMoreData = this.tryReadMore();
}
catch (error) {
if (error instanceof NeedMoreDataError_js_1.NeedMoreDataError) {
return;
}
throw error;
}
if (!hasMoreData) {
if (finalizing) {
this.ensureDocumentClosed();
}
return;
}
}
}
resolveVirtualPath(options) {
if (options?.virtualPath) {
const virtualPath = options.virtualPath;
return (0, node_path_1.isAbsolute)(virtualPath) ? virtualPath : (0, node_path_1.resolve)(virtualPath);
}
const pseudoFileName = options?.pseudoFileName ?? SAXParser.DEFAULT_VIRTUAL_FILENAME;
if (options?.basePath) {
const normalizedBase = (0, node_path_1.isAbsolute)(options.basePath) ? options.basePath : (0, node_path_1.resolve)(options.basePath);
return (0, node_path_1.resolve)(normalizedBase, pseudoFileName);
}
return (0, node_path_1.resolve)(process.cwd(), pseudoFileName);
}
tryReadMore() {
if (!this.reader || this.sourceEnded) {
return false;
}
if (this.reader instanceof StreamReader_js_1.StreamReader) {
if (this.reader.dataAvailable()) {
const chunk = this.reader.read();
if (chunk === '') {
if (this.reader.isFinished()) {
this.sourceEnded = true;
this.flushPendingCR();
}
return false;
}
this.appendToBuffer(chunk);
return true;
}
if (this.reader.isFinished()) {
this.sourceEnded = true;
this.flushPendingCR();
return false;
}
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
if (this.reader.dataAvailable()) {
const chunk = this.reader.read();
if (chunk === '') {
this.sourceEnded = true;
this.flushPendingCR();
return false;
}
this.appendToBuffer(chunk);
return true;
}
const chunk = this.reader.read();
if (chunk === '') {
this.sourceEnded = true;
this.flushPendingCR();
return false;
}
this.appendToBuffer(chunk);
return true;
}
appendToBuffer(chunk) {
if (chunk.length === 0) {
return;
}
let text = chunk;
if (this.pendingCR) {
text = '\r' + text;
this.pendingCR = false;
}
if (text.endsWith('\r')) {
this.pendingCR = true;
text = text.substring(0, text.length - 1);
}
text = text.replaceAll('\r\n', '\n').replaceAll('\r', '\n');
this.buffer += text;
}
flushPendingCR() {
if (this.pendingCR) {
this.buffer += '\n';
this.pendingCR = false;
}
}
ensureDocumentClosed() {
if (this.documentEnded) {
return;
}
if (!this.sourceEnded) {
if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
throw new Error('Malformed XML document: unexpected end of input');
}
if (this.elementStack !== 0) {
throw new Error('Malformed XML document: unclosed elements');
}
this.cleanCharacterRun();
if (!this.rootParsed) {
throw new Error('Malformed XML document: missing document element');
}
if (this.rootParsed && !this.documentEnded) {
this.contentHandler?.endDocument();
this.documentEnded = true;
}
}
ensureLookahead(minRemaining) {
if (this.buffer.length - this.pointer >= minRemaining) {
return;
}
while (this.buffer.length - this.pointer < minRemaining) {
if (!this.tryReadMore()) {
break;
}
}
if (this.buffer.length - this.pointer < minRemaining) {
if (this.sourceEnded) {
return;
}
if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
}
}
readDocument() {
if (!this.reader) {
return;
}
if (!this.documentStarted) {
this.contentHandler?.startDocument();
this.documentStarted = true;
}
while (true) {
if (this.pointer >= this.buffer.length) {
if (!this.tryReadMore()) {
break;
}
continue;
}
if (this.inCDATASection) {
const endIndex = this.buffer.indexOf(']]>', this.pointer);
if (endIndex === -1) {
const chunk = this.buffer.substring(this.pointer);
XMLUtils_js_1.XMLUtils.ensureValidXmlCharacters(this.xmlVersion, chunk, 'CDATA section');
this.characterRun += chunk;
this.pointer = this.buffer.length;
if (!this.tryReadMore()) {
if (this.sourceEnded) {
throw new Error('Malformed XML document: unterminated CDATA section');
}
if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
}
continue;
}
const chunk = this.buffer.substring(this.pointer, endIndex);
XMLUtils_js_1.XMLUtils.ensureValidXmlCharacters(this.xmlVersion, chunk, 'CDATA section');
this.characterRun += chunk;
this.pointer = endIndex;
this.endCDATA();
continue;
}
if (this.lookingAt('<?xml ') || this.lookingAt('<?xml\t') || this.lookingAt('<?xml\r') || this.lookingAt('<?xml\n')) {
if (this.rootParsed && this.elementStack > 0) {
throw new Error('Malformed XML declaration: declaration cannot appear inside the document element');
}
this.parseXMLDeclaration();
continue;
}
if (this.lookingAt('<!DOCTYPE')) {
this.parseDoctype();
continue;
}
if (this.lookingAt('<!--')) {
this.parseComment();
continue;
}
if (this.lookingAt('<?')) {
this.parseProcessingInstruction();
continue;
}
if (this.lookingAt('</')) {
this.endElement();
continue;
}
if (this.lookingAt('<![CDATA[')) {
this.startCDATA();
continue;
}
if (this.lookingAt(']]>')) {
if (!this.inCDATASection) {
throw new Error('Malformed XML document: "]]>" cannot appear in character data');
}
this.endCDATA();
continue;
}
if (this.lookingAt('&')) {
if (!this.rootParsed || this.elementStack === 0) {
throw new Error('Malformed XML document: text found outside the document element');
}
this.parseEntityReference();
continue;
}
if (this.lookingAt('<')) {
if (this.rootParsed && this.elementStack === 0) {
throw new Error('Malformed XML document: multiple root elements');
}
this.startElement();
continue;
}
const codePoint = this.buffer.codePointAt(this.pointer);
XMLUtils_js_1.XMLUtils.ensureValidXmlCodePoint(this.xmlVersion, codePoint, 'character data');
const char = String.fromCodePoint(codePoint);
if (!this.rootParsed && !XMLUtils_js_1.XMLUtils.isXmlSpace(char)) {
throw new Error('Malformed XML document: text found in prolog');
}
if (!this.xmlDeclarationParsed && !this.rootParsed && XMLUtils_js_1.XMLUtils.isXmlSpace(char)) {
this.leadingContentBeforeXmlDeclaration = true;
}
if (this.rootParsed && this.elementStack === 0 && !XMLUtils_js_1.XMLUtils.isXmlSpace(char)) {
throw new Error('Malformed XML document: text found after root element');
}
this.characterRun += char;
this.pointer += char.length;
if (!this.inCDATASection && this.characterRun.endsWith(']]>')) {
throw new Error('Malformed XML document: "]]>" cannot appear in character data');
}
}
if (this.sourceEnded) {
this.ensureDocumentClosed();
}
else if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
}
parseEntityReference() {
this.cleanCharacterRun();
this.pointer++; // skip '&'
let name = '';
while (true) {
this.ensureLookahead(1);
if (this.pointer >= this.buffer.length) {
if (this.sourceEnded) {
throw new Error('Malformed XML document: unterminated entity reference');
}
if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
}
if (this.buffer.charAt(this.pointer) === ';') {
break;
}
name += this.buffer.charAt(this.pointer++);
}
const grammar = this.contentHandler?.getGrammar();
const resolvedEntity = grammar?.resolveEntity(name);
if (name === 'lt') {
this.contentHandler?.characters('<');
}
else if (name === 'gt') {
this.contentHandler?.characters('>');
}
else if (name === 'amp') {
this.contentHandler?.characters('&');
}
else if (name === 'apos') {
this.contentHandler?.characters('\'');
}
else if (name === 'quot') {
this.contentHandler?.characters('"');
}
else if (name.startsWith('#x')) {
const codePoint = Number.parseInt(name.substring(2), 16);
XMLUtils_js_1.XMLUtils.ensureValidXmlCodePoint(this.xmlVersion, codePoint, `character reference &#x${name.substring(2)};`);
const char = String.fromCodePoint(codePoint);
this.contentHandler?.characters(char);
}
else if (name.startsWith('#')) {
const codePoint = Number.parseInt(name.substring(1), 10);
XMLUtils_js_1.XMLUtils.ensureValidXmlCodePoint(this.xmlVersion, codePoint, `character reference &#${name.substring(1)};`);
const char = String.fromCodePoint(codePoint);
this.contentHandler?.characters(char);
}
else if (resolvedEntity !== undefined) {
this.pointer++; // skip ';'
const remaining = this.buffer.substring(this.pointer);
const expandedReplacement = this.expandEntityReplacement(resolvedEntity, grammar, 0, new Set([name]));
XMLUtils_js_1.XMLUtils.ensureValidXmlCharacters(this.xmlVersion, expandedReplacement, `expanded entity &${name};`);
this.buffer = expandedReplacement + remaining;
this.pointer = 0;
return;
}
else {
throw new Error(`Malformed XML document: undefined general entity &${name};`);
}
this.pointer++; // skip ';'
this.buffer = this.buffer.substring(this.pointer);
this.pointer = 0;
}
startElement() {
this.cleanCharacterRun();
const tagStartPointer = this.pointer;
let namespacePushed = false;
try {
this.pointer++; // skip '<'
let name = '';
while (true) {
this.ensureLookahead(1);
if (this.pointer >= this.buffer.length) {
if (this.sourceEnded) {
throw new Error('Malformed XML document: unterminated start tag');
}
if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
}
if (XMLUtils_js_1.XMLUtils.isXmlSpace(this.buffer.charAt(this.pointer)) || this.lookingAt('>') || this.lookingAt('/>')) {
break;
}
name += this.buffer.charAt(this.pointer++);
}
if (this.validating) {
if (!XMLUtils_js_1.XMLUtils.isValidXMLName(name)) {
throw new Error('Invalid XML name: ' + name);
}
}
let rest = '';
let inQuotes = false;
let quoteChar = '';
while (true) {
this.ensureLookahead(1);
if (this.pointer >= this.buffer.length) {
if (this.sourceEnded) {
throw new Error('Malformed XML document: unterminated start tag');
}
if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
}
const currentChar = this.buffer.charAt(this.pointer);
const isQuote = currentChar === '"' || currentChar === '\'';
if (isQuote) {
if (inQuotes && currentChar === quoteChar) {
inQuotes = false;
quoteChar = '';
}
else if (!inQuotes) {
inQuotes = true;
quoteChar = currentChar;
}
}
if (!inQuotes && currentChar === '>') {
break;
}
if (!inQuotes && currentChar === '/' && this.lookingAt('/>')) {
break;
}
rest += currentChar;
this.pointer++;
}
rest = rest.trim();
let attributesMap = this.parseAttributes(rest);
const previousContext = this.namespaceContextStack.length > 0 ? this.namespaceContextStack[this.namespaceContextStack.length - 1] : undefined;
const namespaceContext = this.buildNamespaceContext(attributesMap, previousContext);
this.handleSchemaLocationAttributes(attributesMap, namespaceContext);
this.namespaceContextStack.push(namespaceContext);
namespacePushed = true;
this.handleNamespaceDeclarations(attributesMap, namespaceContext, previousContext);
const grammarForEntities = this.contentHandler?.getGrammar();
const dtdGrammarForEntities = grammarForEntities instanceof DTDGrammar_js_1.DTDGrammar ? grammarForEntities : undefined;
attributesMap.forEach((value, key) => {
const decoded = this.decodeAttributeEntities(value, dtdGrammarForEntities);
XMLUtils_js_1.XMLUtils.ensureValidXmlCharacters(this.xmlVersion, decoded, 'attribute value');
if (!dtdGrammarForEntities) {
attributesMap.set(key, decoded);
}
});
attributesMap = this.normalizeDTDAttributes(name, attributesMap);
let attributes = [];
attributesMap.forEach((value, key) => {
// TODO https://www.w3.org/TR/REC-xml/#AVNormalize
let attribute = new XMLAttribute_js_1.XMLAttribute(key, value);
attributes.push(attribute);
});
attributes = this.getDefaultAttributes(name, attributes);
attributes.forEach((attr) => {
if (!attributesMap.has(attr.getName())) {
attributesMap.set(attr.getName(), attr.getValue());
}
});
if (this.validating) {
attributes.forEach((attr) => {
if (!XMLUtils_js_1.XMLUtils.isValidXMLName(attr.getName())) {
throw new Error('Invalid XML attribute name: ' + attr.getName());
}
});
const grammar = this.contentHandler?.getGrammar();
if (grammar) {
let result = grammar.validateAttributes(name, attributesMap);
if (result.isValid === false) {
let errorMessages = '';
result.errors.forEach((error) => {
errorMessages += error.message + '\n';
});
throw new Error('Validation failed for element ' + name + ':\n' + errorMessages);
}
}
}
this.ensureLookahead(1);
let isSelfClosing = false;
const terminatorChar = this.buffer.charAt(this.pointer);
if (terminatorChar === '/') {
this.ensureLookahead(2);
if (this.buffer.charAt(this.pointer + 1) !== '>') {
throw new Error('Malformed XML document: expected "/>" to close start tag for ' + name);
}
isSelfClosing = true;
}
else if (terminatorChar === '>') {
isSelfClosing = false;
}
else {
throw new Error('Malformed XML document: unexpected character "' + terminatorChar + '" at end of start tag');
}
// Add this element as a child of its parent (if parent exists)
if (this.childrenNames.length > 0) {
let parentChildren = this.childrenNames[this.childrenNames.length - 1];
parentChildren.push(name);
}
// Push a new empty array for this element's children
this.childrenNames.push([]);
this.contentHandler?.startElement(name, attributes);
this.elementStack++;
this.elementNameStack.push(name);
if (!this.rootParsed) {
this.rootParsed = true;
}
if (isSelfClosing) {
this.cleanCharacterRun();
let namespace = '';
if (this.namespaceContextStack.length > 0) {
this.namespaceContextStack[this.namespaceContextStack.length - 1].forEach((uri, prefix) => {
if (prefix === '') {
namespace = uri;
}
else if (name.startsWith(prefix + ':')) {
namespace = uri;
}
});
}
this.validateElement(name, namespace);
this.contentHandler?.endElement(name);
this.elementStack--;
this.elementNameStack.pop();
this.childrenNames.pop();
if (namespacePushed && this.namespaceContextStack.length > 0) {
this.namespaceContextStack.pop();
namespacePushed = false;
}
this.pointer += 2; // skip '/>'
}
else {
this.pointer++; // skip '>'
}
this.buffer = this.buffer.substring(this.pointer);
this.pointer = 0;
}
catch (error) {
if (error instanceof NeedMoreDataError_js_1.NeedMoreDataError) {
this.pointer = tagStartPointer;
}
if (namespacePushed && this.namespaceContextStack.length > 0) {
this.namespaceContextStack.pop();
}
throw error;
}
}
endElement() {
this.cleanCharacterRun();
this.pointer += 2; // skip '</'
let name = '';
// Read tag name until whitespace or '>'
while (true) {
this.ensureLookahead(1);
if (this.pointer >= this.buffer.length) {
if (this.sourceEnded) {
throw new Error('Malformed XML document: unterminated end tag');
}
if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
}
if (this.lookingAt('>') || XMLUtils_js_1.XMLUtils.isXmlSpace(this.buffer.charAt(this.pointer))) {
break;
}
name += this.buffer.charAt(this.pointer);
this.pointer++;
}
// Skip optional whitespace before '>'
while (true) {
this.ensureLookahead(1);
if (this.pointer >= this.buffer.length) {
if (this.sourceEnded) {
throw new Error('Malformed XML document: unterminated end tag');
}
if (this.streamingMode) {
throw new NeedMoreDataError_js_1.NeedMoreDataError();
}
}
if (!XMLUtils_js_1.XMLUtils.isXmlSpace(this.buffer.charAt(this.pointer))) {
break;
}
this.pointer++;
}
// Expect '>'
if (!this.lookingAt('>')) {
throw new Error('Well-formedness error: expected ">" in end tag "</' + name + '"');
}
// Well-formedness check: mismatched element tags
if (this.elementNameStack.length === 0) {
throw new Error('Mismatched element tags: found closing tag "' + name + '" but no elements are open');
}
const expectedName = this.elementNameStack.pop();
if (name !== expectedName) {
throw new Error('Mismatched element tags: expected closing tag for "' + expectedName + '" but found "' + name + '"');
}
// Validate element content when validating mode is enabled
if (this.validating && !this.isRelaxNG) {
let namespace = '';
if (this.namespaceContextStack.length > 0) {
this.namespaceContextStack[this.namespaceContextStack.length - 1].forEach((uri, prefix) => {
if (prefix === '') {
namespace = uri;
}
else if (name.startsWith(prefix + ':')) {
namespace = uri;
}
});
}
this.validateElement(name, namespace);
}
const grammar = this.contentHandler?.getGrammar();
if (grammar !== undefined && grammar.getGrammarType() === Grammar_js_1.GrammarType.XML_SCHEMA) {
const currentText = this.contentHandler ? this.contentHandler.getCurrentText() : '';
if (currentText.trim() === '') {
const defaultText = grammar.getElementTextDefault(name);
if (defaultText !== undefined) {
this.contentHandler?.characters(defaultText);
}
}
}
this.contentHandler?.endElement(name);
this.elementStack--;
if (this.childrenNames.length > 0) {
this.childrenNames.pop();
}
if (this.namespaceContextStack.length > 0) {
this.namespaceContextStack.pop();
}
this.pointer++; // skip '>'
this.buffer = this.buffer.substring(this.pointer);
this.pointer = 0;
}
validateElement(name, namespace) {
const grammar = this.contentHandler?.getGrammar();
if (grammar && this.validating) {
const text = this.contentHandler ? this.contentHandler.getCurrentText() : '';
const actualChildrenNames = this.childrenNames.length > 0 ? this.childrenNames[this.childrenNames.length - 1] : [];
const elementValidationResult = grammar.validateElement(name, namespace, actualChildrenNames, text);
if (!elementValidationResult.isValid) {
const errorMessages = elementValidationResult.errors.map(e => e.message).join('; ');
throw new Error('Element validation failed for element "' + name + '": ' + errorMessages);
}
}
}
getDefaultAttributes(elementName, attributes) {
const grammar = this.contentHandler?.getGrammar();
const namespaceContext = this.namespaceContextStack.length > 0 ? this.namespaceContextStack[this.namespaceContextStack.length - 1] : new Map();
const existingAttributeNames = new Set();
const existingAttributeKeys = new Set();
attributes.forEach((attr) => {
const attributeName = attr.getName();
existingAttributeNames.add(attributeName);
const attributeParts = this.splitQualifiedName(attributeName);
let attributeNamespaceUri = undefined;
if (attributeParts.prefix) {
attributeNamespaceUri = namespaceContext.get(attributeParts.prefix);
}
const attributeKey = this.buildSchemaAttributeKey(attributeParts.localName, attributeNamespaceUri);
existingAttributeKeys.add(attributeKey);
});
if (grammar) {
const grammarDefaults = grammar.getDefaultAttributes(elementName);
if (grammarDefaults) {
const dtdGrammar = grammar instanceof DTDGrammar_js_1.DTDGrammar ? grammar : undefined;
const declarations = dtdGrammar?.getElementAttributesMap(elementName);
grammarDefaults.forEach((value, key) => {
const grammarParts = this.splitQualifiedName(key);
const attributeKey = this.buildSchemaAttributeKey(grammarParts.localName);
if (existingAttributeKeys.has(attributeKey) || existingAttributeNames.has(key)) {
return;
}
let normalizedValue;
if (dtdGrammar) {
const expanded = this.decodeAttributeEntities(value, dtdGrammar);
const decl = declarations?.get(key);
normalizedValue = this.normalizeAttributeValue(value, expanded, decl);
}
else {
normalizedValue = this.normalizeAttributeValue(value, value);
}
attributes.push(new XMLAttribute_js_1.XMLAttribute(key, normalizedValue));
existingAttributeNames.add(key);
existingAttributeKeys.add(attributeKey);
});
}
}
const nameParts = this.splitQualifiedName(elementName);
const namespaceUri = this.getNamespaceUriForElement(elementName);
if (namespaceUri) {
const namespaceKey = namespaceUri + "|" + nameParts.localName;
this.appendSchemaDefaultsForElement(this.schemaDefaultAttributes.get(namespaceKey), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
}
this.appendSchemaDefaultsForElement(this.schemaDefaultAttributes.get(nameParts.localName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
if (nameParts.localName !== elementName) {
this.appendSchemaDefaultsForElement(this.schemaDefaultAttributes.get(elementName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
}
if (this.isRelaxNG) {
if (namespaceUri) {
const namespaceRelaxKey = namespaceUri + "|" + nameParts.localName;
this.appendSchemaDefaultsForElement(this.relaxNGDefaultAttributes.get(namespaceRelaxKey), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
}
this.appendSchemaDefaultsForElement(this.relaxNGDefaultAttributes.get(nameParts.localName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
if (nameParts.localName !== elementName) {
this.appendSchemaDefaultsForElement(this.relaxNGDefaultAttributes.get(elementName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
}
}
return attributes;
}
appendSchemaDefaultsForElement(defaults, attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, elementNameParts, namespaceContext) {
if (!defaults) {
return;
}
defaults.forEach((info) => {
const attributeKey = this.buildSchemaAttributeKey(info.localName, info.namespace);
if (existingAttributeKeys.has(attributeKey)) {
return;
}
const attributeName = this.resolveSchemaAttributeQualifiedName(info, namespaceUri, elementNameParts, namespaceContext);
if (existingAttributeNames.has(attributeName)) {
return;
}
const normalizedValue = this.normalizeAttributeValue(info.value, info.value);
attributes.push(new XMLAttribute_js_1.XMLAttribute(attributeName, normalizedValue));
existingAttributeNames.add(attributeName);
existingAttributeKeys.add(attributeKey);
});
}
resolveSchemaAttributeQualifiedName(info, namespaceUri, elementNameParts, namespaceContext) {
const lexicalName = info.lexicalName;
const attributeNamespace = info.namespace;
const parts = this.splitQualifiedName(lexicalName);
if (parts.prefix) {
const mappedNamespace = namespaceContext.get(parts.prefix);
if (!attributeNamespace && mappedNamespace) {
return lexicalName;
}
if (attributeNamespace && mappedNamespace === attributeNamespace) {
return lexicalName;
}
if (attributeNamespace) {
const prefix = this.findNamespacePrefix(attributeNamespace, namespaceContext);
if (prefix) {
return prefix + ":" + info.localName;
}
}
return info.localName;
}
if (attributeNamespace) {
const prefix = this.findNamespacePrefix(attributeNamespace, namespaceContext);
if (prefix) {
return prefix + ":" + info.localName;
}
if (namespaceUri && attributeNamespace === namespaceUri && elementNameParts.prefix) {
return elementNameParts.prefix + ":" + info.localName;
}
}
return info.lexicalName;
}
findNamespacePrefix(namespaceUri, context) {
for (const entry of context.entries()) {
const prefix = entry[0];
const uri = entry[1];
if (uri === namespaceUri && prefix !== '') {
return prefix;
}
}
return undefined;
}
buildSchemaAttributeKey(localName, namespace) {
if (namespace) {
return namespace + "|" + localName;
}
return localName;
}
buildNamespaceContext(attributes, previousContext) {
const context = previousContext ? new Map(previousContext) : new Map();
attributes.forEach((value, key) => {
if (key === 'xmlns') {
context.set('', value);
}
else if (key.startsWith('xmlns:') && key.length > 6) {
const prefix = key.substring(6);
context.set(prefix, value);
}
});
if (!context.has('xml')) {
context.set('xml', 'http://www.w3.org/XML/1998/namespace');
}
return context;
}
handleNamespaceDeclarations(attributes, namespaceContext, previousContext) {
attributes.forEach((value, key) => {
if (key === 'xmlns') {
const trimmed = value.trim();
const previousValue = previousContext ? previousContext.get('') : undefined;
if (trimmed !== '' && trimmed !== previousValue) {
this.tryLoadSchemaForNamespace(trimmed);
}
this.propagateNamespaceToGrammar('', trimmed);
return;
}
if (!key.startsWith('xmlns:') || key.length <= 6) {
return;
}
const prefix = key.substring(6);
const trimmed = value.trim();
const previousValue = previousContext ? previousContext.get(prefix) : undefined;
if (trimmed !== '' && trimmed !== previousValue) {
this.tryLoadSchemaForNamespace(trimmed);
}
this.propagateNamespaceToGrammar(prefix, trimmed);
});
// If no new declaration appears on the current element, ensure the default namespace is considered.
if (!previousContext && namespaceContext.has('')) {
const defaultNamespace = namespaceContext.get('');
if (defaultNamespace) {
this.tryLoadSchemaForNamespace(defaultNamespace);
this.propagateNamespaceToGrammar('', defaultNamespace);
}
}
}
propagateNamespaceToGrammar(prefix, uri) {
if (!uri) {
return;
}
const grammar = this.contentHandler?.getGrammar();
if (grammar instanceof SchemaGrammar_js_1.SchemaGrammar) {
grammar.addNamespaceDeclaration(prefix, uri);
}
}
handleSchemaLocationAttributes(attributes, namespaceContext) {
const schemaInstancePrefixes = new Set();
namespaceContext.forEach((uri, prefix) => {
if (uri === Constants_js_1.Constants.XML_SCHEMA_INSTANCE_NS_URI) {
schemaInstancePrefixes.add(prefix);
}
});
if (schemaInstancePrefixes.size === 0) {
return;
}
attributes.forEach((value, key) => {
if (key === 'xmlns' || key.startsWith('xmlns:')) {
return;
}
const { prefix, localName } = this.splitQualifiedName(key);
if (!prefix || !schemaInstancePrefixes.has(prefix)) {
return;
}
if (localName === 'schemaLocation') {
const tokens = value.trim().split(/\s+/).filter((token) => token.length > 0);
if (tokens.length < 2) {
return;
}
for (let index = 0; index + 1 < tokens.length; index += 2) {
const namespaceUri = tokens[index];
const location = tokens[index + 1];
this.processSchemaReference(namespaceUri, location);
}
}
else if (localName === 'noNamespaceSchemaLocation') {
const location = value.trim();
if (location !== '') {
this.processSchemaReference('', location);
}
}
});
}
tryLoadSchemaForNamespace(namespaceUri) {
if (!this.schemaLoadingEnabled) {
return;
}
if (namespaceUri === '') {
return;
}
if (XMLSchemaParser_js_1.XMLSchemaParser.shouldIgnoreNamespace(namespaceUri)) {
this.processedNamespaces.add(namespaceUri);
return;
}
if (this.processedNamespaces.has(namespaceUri) || this.failedNamespaces.has(namespaceUri)) {
return;
}
if (!this.catalog) {
return;
}
const candidates = [
this.catalog.matchURI(namespaceUri),
this.catalog.matchSystem(namespaceUri)
];
for (let index = 0; index < candidates.length; index++) {
const candidate = candidates[index];
if (!candidate) {
continue;
}
const normalized = candidate.startsWith('file://') ? (0, node_url_1.fileURLToPath)(candidate) : candidate;
if (!(0, node_fs_1.existsSync)(normalized)) {
continue;
}
if (this.loadSchemaDefaults(normalized, namespaceUri)) {
this.processedNamespaces.add(namespaceUri);
return;
}
}
this.failedNamespaces.add(namespaceUri);
}
processSchemaReference(namespaceUri, location) {
if (!this.schemaLoadingEnabled) {
return;
}
if (location === '') {
return;
}
if (this.processedSchemaLocations.has(location) || this.failedSchemaLocations.has(location)) {
return;
}
const resolvedPath = this.resolveSchemaLocation(namespaceUri, location);
if (!resolvedPath) {
this.failedSchemaLocations.add(location);
return;
}
const schemaLoaded = this.loadSchemaDefaults(resolvedPath, location);
if (resolvedPath.startsWith((0, node_os_1.tmpdir)())) {
try {
(0, node_fs_1.unlinkSync)(resolvedPath);
}
catch (e) { /* ignore */ }
}
if (!schemaLoaded) {
this.failedSchemaLocations.add(location);
}
}
loadSchemaDefaults(resolvedPath, identifier) {
if (this.processedSchemaLocations.has(resolvedPath)) {
this.processedSchemaLocations.add(identifier);
return true;
}
try {
const parser = XMLSchemaParser_js_1.XMLSchemaParser.getInstance(this.catalog);
const rawDefaults = parser.collectDefaultAttributes(resolvedPath);
const convertedDefaults = new Map();
rawDefaults.forEach((attributeMap, elementKey) => {
const converted = new Map();
attributeMap.forEach((info, attributeKey) => {
const copy = {
localName: info.localName,
namespace: info.namespace,
lexicalName: info.lexicalName,
value: info.value
};
converted.set(attributeKey, copy);
});
convertedDefaults.set(elementKey, converted);
});
this.mergeSchemaDefaults(convertedDefaults);
this.processedSchemaLocations.add(resolvedPath);
this.processedSchemaLocations.add(identifier);
if (this.validating) {
const builder = new SchemaBuilder_js_1.SchemaBuilder(this.catalog);
const grammar = builder.buildGrammar(resolvedPath);
const existing = this.contentHandler?.getGrammar();
if (existing instanceof SchemaGrammar_js_1.SchemaGrammar) {
existing.mergeFrom(grammar);
}
else {
this.contentHandler?.setGrammar(grammar);
}
}
return true;
}
catch (error) {
if (this.validating) {
throw error;
}
const message = error instanceof Error ? error.message : String(error);
console.warn(`Warning: Could not load XML Schema defaults from ${resolvedPath}: ${message}`);
return false;
}
}
resolveSchemaLocation(namespaceUri, location) {
let candidate = location;
if (candidate.startsWith('file://')) {
candidate = (0, node_url_1.fileURLToPath)(candidate);
if ((0, node_fs_1.existsSync)(candidate)) {
return candidate;
}
}
if ((0, node_path_1.isAbsolute)(location) && (0, node_fs_1.existsSync)(location)) {
return location;
}
if (!location.startsWith('http://') && !location.startsWith('https://') && !location.startsWith('urn:')) {
if (this.currentFile) {
const baseDir = (0, node_path_1.dirname)(this.currentFile);
const