UNPKG

typesxml

Version:

Open source XML library written in TypeScript

1,188 lines 96.4 kB
"use strict"; /******************************************************************************* * Copyright (c) 2023-2026 Maxprograms. * * This program and the accompanying materials * are made available under the terms of the Eclipse License 1.0 * which accompanies this distribution, and is available at * https://www.eclipse.org/org/documents/epl-v10.html * * Contributors: * Maxprograms - initial API and implementation *******************************************************************************/ Object.defineProperty(exports, "__esModule", { value: true }); exports.SAXParser = void 0; const node_fs_1 = require("node:fs"); const node_os_1 = require("node:os"); const node_path_1 = require("node:path"); const node_url_1 = require("node:url"); const Constants_js_1 = require("./Constants.js"); const FileReader_js_1 = require("./FileReader.js"); const NeedMoreDataError_js_1 = require("./NeedMoreDataError.js"); const RelaxNGParser_js_1 = require("./RelaxNGParser.js"); const StreamReader_js_1 = require("./StreamReader.js"); const StringReader_js_1 = require("./StringReader.js"); const XMLAttribute_js_1 = require("./XMLAttribute.js"); const XMLSchemaParser_js_1 = require("./XMLSchemaParser.js"); const XMLUtils_js_1 = require("./XMLUtils.js"); const DTDGrammar_js_1 = require("./dtd/DTDGrammar.js"); const DTDParser_js_1 = require("./dtd/DTDParser.js"); const fetchUrls_js_1 = require("./fetchUrls.js"); const Grammar_js_1 = require("./grammar/Grammar.js"); const SchemaBuilder_js_1 = require("./schema/SchemaBuilder.js"); const SchemaGrammar_js_1 = require("./schema/SchemaGrammar.js"); class SAXParser { static SUPPORTED_ENCODINGS = new Map([ ['UTF-8', 'UTF-8'], ['UTF-16', 'UTF-16'], ['UTF-16LE', 'UTF-16LE'], ['UTF-16BE', 'UTF-16BE'] ]); static ENCODING_NAME_PATTERN = /^[A-Za-z][A-Za-z0-9._-]*$/; contentHandler; reader; pointer; buffer = ''; elementStack; elementNameStack = []; childrenNames = []; characterRun; rootParsed; xmlVersion; currentFile; catalog; validating = false; relaxNGDefaultAttributes = new Map(); schemaDefaultAttributes = new Map(); processedSchemaLocations = new Set(); failedSchemaLocations = new Set(); namespaceContextStack = []; processedNamespaces = new Set(); failedNamespaces = new Set(); isRelaxNG = false; static DEFAULT_VIRTUAL_FILENAME = '__inmemory__.xml'; streamingMode = false; sourceEnded = false; documentStarted = false; documentEnded = false; inCDATASection = false; pendingCR = false; readingFromFile = false; internalSubsetApplied = false; xmlDeclarationParsed = false; leadingContentBeforeXmlDeclaration = false; schemaLoadingEnabled = true; constructor() { this.characterRun = ''; this.elementStack = 0; this.elementNameStack = []; this.childrenNames = []; this.pointer = 0; this.rootParsed = false; this.xmlVersion = '1.0'; this.streamingMode = false; this.sourceEnded = false; this.documentStarted = false; this.documentEnded = false; this.inCDATASection = false; this.pendingCR = false; this.readingFromFile = false; this.internalSubsetApplied = false; this.xmlDeclarationParsed = false; this.leadingContentBeforeXmlDeclaration = false; this.schemaLoadingEnabled = true; } setContentHandler(contentHandler) { this.contentHandler = contentHandler; if (this.catalog) { this.contentHandler.setCatalog(this.catalog); } } setCatalog(catalog) { this.catalog = catalog; this.contentHandler?.setCatalog(catalog); } setValidating(validating) { this.validating = validating; } setSchemaLoadingEnabled(enabled) { this.schemaLoadingEnabled = enabled; } parseFile(path, encoding) { if (!this.contentHandler) { throw new Error('ContentHandler not set'); } const normalizedPath = (0, node_path_1.isAbsolute)(path) ? path : (0, node_path_1.resolve)(path); const effectiveEncoding = encoding ?? FileReader_js_1.FileReader.detectEncoding(normalizedPath); const reader = new FileReader_js_1.FileReader(normalizedPath, effectiveEncoding); this.initializeParsing(reader, normalizedPath); this.processSynchronous(); } parseString(data, options) { if (!this.contentHandler) { throw new Error('ContentHandler not set'); } const reader = new StringReader_js_1.StringReader(data); const virtualPath = this.resolveVirtualPath(options); this.initializeParsing(reader, virtualPath); this.processSynchronous(); } parseStream(stream, options) { if (!this.contentHandler) { return Promise.reject(new Error('ContentHandler not set')); } const encoding = options?.encoding ?? 'utf8'; const reader = new StreamReader_js_1.StreamReader(encoding); const virtualPath = this.resolveVirtualPath(options); this.initializeParsing(reader, virtualPath); return new Promise((resolvePromise, rejectPromise) => { const cleanup = () => { stream.removeListener('data', onData); stream.removeListener('end', onEnd); stream.removeListener('error', onError); }; const handleProcessing = (finalizing) => { try { this.processStreaming(finalizing); if (this.documentEnded) { cleanup(); this.reader?.closeFile(); resolvePromise(); } else if (finalizing) { cleanup(); this.reader?.closeFile(); rejectPromise(new Error('Malformed XML document: unexpected end of stream')); } } catch (error) { if (error instanceof NeedMoreDataError_js_1.NeedMoreDataError) { return; } cleanup(); this.reader?.closeFile(); rejectPromise(error); } }; const onData = (chunk) => { reader.enqueue(chunk); handleProcessing(false); }; const onEnd = () => { reader.markFinished(); handleProcessing(true); }; const onError = (error) => { cleanup(); this.reader?.closeFile(); rejectPromise(error); }; stream.setEncoding(encoding); stream.on('data', onData); stream.once('end', onEnd); stream.once('error', onError); }); } initializeParsing(reader, currentFilePath) { this.reader = reader; const fallbackVirtualPath = (0, node_path_1.resolve)(process.cwd(), SAXParser.DEFAULT_VIRTUAL_FILENAME); this.currentFile = currentFilePath || fallbackVirtualPath; this.relaxNGDefaultAttributes = new Map(); this.schemaDefaultAttributes = new Map(); this.processedSchemaLocations = new Set(); this.failedSchemaLocations = new Set(); this.namespaceContextStack = []; this.processedNamespaces = new Set(); this.failedNamespaces = new Set(); this.isRelaxNG = false; this.pointer = 0; this.buffer = ''; this.elementStack = 0; this.elementNameStack = []; this.childrenNames = []; this.characterRun = ''; this.rootParsed = false; this.xmlVersion = '1.0'; this.streamingMode = reader instanceof StreamReader_js_1.StreamReader; this.sourceEnded = false; this.documentStarted = false; this.documentEnded = false; this.inCDATASection = false; this.pendingCR = false; this.readingFromFile = reader instanceof FileReader_js_1.FileReader; this.internalSubsetApplied = false; this.xmlDeclarationParsed = false; this.leadingContentBeforeXmlDeclaration = false; this.contentHandler?.initialize(); } processSynchronous() { if (!this.reader) { return; } try { while (!this.documentEnded) { this.readDocument(); if (!this.documentEnded) { if (!this.tryReadMore()) { break; } } } this.ensureDocumentClosed(); } finally { this.reader.closeFile(); } } processStreaming(finalizing) { if (!this.reader) { return; } while (true) { try { this.readDocument(); } catch (error) { if (error instanceof NeedMoreDataError_js_1.NeedMoreDataError) { return; } throw error; } if (this.documentEnded) { return; } let hasMoreData; try { hasMoreData = this.tryReadMore(); } catch (error) { if (error instanceof NeedMoreDataError_js_1.NeedMoreDataError) { return; } throw error; } if (!hasMoreData) { if (finalizing) { this.ensureDocumentClosed(); } return; } } } resolveVirtualPath(options) { if (options?.virtualPath) { const virtualPath = options.virtualPath; return (0, node_path_1.isAbsolute)(virtualPath) ? virtualPath : (0, node_path_1.resolve)(virtualPath); } const pseudoFileName = options?.pseudoFileName ?? SAXParser.DEFAULT_VIRTUAL_FILENAME; if (options?.basePath) { const normalizedBase = (0, node_path_1.isAbsolute)(options.basePath) ? options.basePath : (0, node_path_1.resolve)(options.basePath); return (0, node_path_1.resolve)(normalizedBase, pseudoFileName); } return (0, node_path_1.resolve)(process.cwd(), pseudoFileName); } tryReadMore() { if (!this.reader || this.sourceEnded) { return false; } if (this.reader instanceof StreamReader_js_1.StreamReader) { if (this.reader.dataAvailable()) { const chunk = this.reader.read(); if (chunk === '') { if (this.reader.isFinished()) { this.sourceEnded = true; this.flushPendingCR(); } return false; } this.appendToBuffer(chunk); return true; } if (this.reader.isFinished()) { this.sourceEnded = true; this.flushPendingCR(); return false; } throw new NeedMoreDataError_js_1.NeedMoreDataError(); } if (this.reader.dataAvailable()) { const chunk = this.reader.read(); if (chunk === '') { this.sourceEnded = true; this.flushPendingCR(); return false; } this.appendToBuffer(chunk); return true; } const chunk = this.reader.read(); if (chunk === '') { this.sourceEnded = true; this.flushPendingCR(); return false; } this.appendToBuffer(chunk); return true; } appendToBuffer(chunk) { if (chunk.length === 0) { return; } let text = chunk; if (this.pendingCR) { text = '\r' + text; this.pendingCR = false; } if (text.endsWith('\r')) { this.pendingCR = true; text = text.substring(0, text.length - 1); } text = text.replaceAll('\r\n', '\n').replaceAll('\r', '\n'); this.buffer += text; } flushPendingCR() { if (this.pendingCR) { this.buffer += '\n'; this.pendingCR = false; } } ensureDocumentClosed() { if (this.documentEnded) { return; } if (!this.sourceEnded) { if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } throw new Error('Malformed XML document: unexpected end of input'); } if (this.elementStack !== 0) { throw new Error('Malformed XML document: unclosed elements'); } this.cleanCharacterRun(); if (!this.rootParsed) { throw new Error('Malformed XML document: missing document element'); } if (this.rootParsed && !this.documentEnded) { this.contentHandler?.endDocument(); this.documentEnded = true; } } ensureLookahead(minRemaining) { if (this.buffer.length - this.pointer >= minRemaining) { return; } while (this.buffer.length - this.pointer < minRemaining) { if (!this.tryReadMore()) { break; } } if (this.buffer.length - this.pointer < minRemaining) { if (this.sourceEnded) { return; } if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } } } readDocument() { if (!this.reader) { return; } if (!this.documentStarted) { this.contentHandler?.startDocument(); this.documentStarted = true; } while (true) { if (this.pointer >= this.buffer.length) { if (!this.tryReadMore()) { break; } continue; } if (this.inCDATASection) { const endIndex = this.buffer.indexOf(']]>', this.pointer); if (endIndex === -1) { const chunk = this.buffer.substring(this.pointer); XMLUtils_js_1.XMLUtils.ensureValidXmlCharacters(this.xmlVersion, chunk, 'CDATA section'); this.characterRun += chunk; this.pointer = this.buffer.length; if (!this.tryReadMore()) { if (this.sourceEnded) { throw new Error('Malformed XML document: unterminated CDATA section'); } if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } } continue; } const chunk = this.buffer.substring(this.pointer, endIndex); XMLUtils_js_1.XMLUtils.ensureValidXmlCharacters(this.xmlVersion, chunk, 'CDATA section'); this.characterRun += chunk; this.pointer = endIndex; this.endCDATA(); continue; } if (this.lookingAt('<?xml ') || this.lookingAt('<?xml\t') || this.lookingAt('<?xml\r') || this.lookingAt('<?xml\n')) { if (this.rootParsed && this.elementStack > 0) { throw new Error('Malformed XML declaration: declaration cannot appear inside the document element'); } this.parseXMLDeclaration(); continue; } if (this.lookingAt('<!DOCTYPE')) { this.parseDoctype(); continue; } if (this.lookingAt('<!--')) { this.parseComment(); continue; } if (this.lookingAt('<?')) { this.parseProcessingInstruction(); continue; } if (this.lookingAt('</')) { this.endElement(); continue; } if (this.lookingAt('<![CDATA[')) { this.startCDATA(); continue; } if (this.lookingAt(']]>')) { if (!this.inCDATASection) { throw new Error('Malformed XML document: "]]>" cannot appear in character data'); } this.endCDATA(); continue; } if (this.lookingAt('&')) { if (!this.rootParsed || this.elementStack === 0) { throw new Error('Malformed XML document: text found outside the document element'); } this.parseEntityReference(); continue; } if (this.lookingAt('<')) { if (this.rootParsed && this.elementStack === 0) { throw new Error('Malformed XML document: multiple root elements'); } this.startElement(); continue; } const codePoint = this.buffer.codePointAt(this.pointer); XMLUtils_js_1.XMLUtils.ensureValidXmlCodePoint(this.xmlVersion, codePoint, 'character data'); const char = String.fromCodePoint(codePoint); if (!this.rootParsed && !XMLUtils_js_1.XMLUtils.isXmlSpace(char)) { throw new Error('Malformed XML document: text found in prolog'); } if (!this.xmlDeclarationParsed && !this.rootParsed && XMLUtils_js_1.XMLUtils.isXmlSpace(char)) { this.leadingContentBeforeXmlDeclaration = true; } if (this.rootParsed && this.elementStack === 0 && !XMLUtils_js_1.XMLUtils.isXmlSpace(char)) { throw new Error('Malformed XML document: text found after root element'); } this.characterRun += char; this.pointer += char.length; if (!this.inCDATASection && this.characterRun.endsWith(']]>')) { throw new Error('Malformed XML document: "]]>" cannot appear in character data'); } } if (this.sourceEnded) { this.ensureDocumentClosed(); } else if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } } parseEntityReference() { this.cleanCharacterRun(); this.pointer++; // skip '&' let name = ''; while (true) { this.ensureLookahead(1); if (this.pointer >= this.buffer.length) { if (this.sourceEnded) { throw new Error('Malformed XML document: unterminated entity reference'); } if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } } if (this.buffer.charAt(this.pointer) === ';') { break; } name += this.buffer.charAt(this.pointer++); } const grammar = this.contentHandler?.getGrammar(); const resolvedEntity = grammar?.resolveEntity(name); if (name === 'lt') { this.contentHandler?.characters('<'); } else if (name === 'gt') { this.contentHandler?.characters('>'); } else if (name === 'amp') { this.contentHandler?.characters('&'); } else if (name === 'apos') { this.contentHandler?.characters('\''); } else if (name === 'quot') { this.contentHandler?.characters('"'); } else if (name.startsWith('#x')) { const codePoint = Number.parseInt(name.substring(2), 16); XMLUtils_js_1.XMLUtils.ensureValidXmlCodePoint(this.xmlVersion, codePoint, `character reference &#x${name.substring(2)};`); const char = String.fromCodePoint(codePoint); this.contentHandler?.characters(char); } else if (name.startsWith('#')) { const codePoint = Number.parseInt(name.substring(1), 10); XMLUtils_js_1.XMLUtils.ensureValidXmlCodePoint(this.xmlVersion, codePoint, `character reference &#${name.substring(1)};`); const char = String.fromCodePoint(codePoint); this.contentHandler?.characters(char); } else if (resolvedEntity !== undefined) { this.pointer++; // skip ';' const remaining = this.buffer.substring(this.pointer); const expandedReplacement = this.expandEntityReplacement(resolvedEntity, grammar, 0, new Set([name])); XMLUtils_js_1.XMLUtils.ensureValidXmlCharacters(this.xmlVersion, expandedReplacement, `expanded entity &${name};`); this.buffer = expandedReplacement + remaining; this.pointer = 0; return; } else { throw new Error(`Malformed XML document: undefined general entity &${name};`); } this.pointer++; // skip ';' this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; } startElement() { this.cleanCharacterRun(); const tagStartPointer = this.pointer; let namespacePushed = false; try { this.pointer++; // skip '<' let name = ''; while (true) { this.ensureLookahead(1); if (this.pointer >= this.buffer.length) { if (this.sourceEnded) { throw new Error('Malformed XML document: unterminated start tag'); } if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } } if (XMLUtils_js_1.XMLUtils.isXmlSpace(this.buffer.charAt(this.pointer)) || this.lookingAt('>') || this.lookingAt('/>')) { break; } name += this.buffer.charAt(this.pointer++); } if (this.validating) { if (!XMLUtils_js_1.XMLUtils.isValidXMLName(name)) { throw new Error('Invalid XML name: ' + name); } } let rest = ''; let inQuotes = false; let quoteChar = ''; while (true) { this.ensureLookahead(1); if (this.pointer >= this.buffer.length) { if (this.sourceEnded) { throw new Error('Malformed XML document: unterminated start tag'); } if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } } const currentChar = this.buffer.charAt(this.pointer); const isQuote = currentChar === '"' || currentChar === '\''; if (isQuote) { if (inQuotes && currentChar === quoteChar) { inQuotes = false; quoteChar = ''; } else if (!inQuotes) { inQuotes = true; quoteChar = currentChar; } } if (!inQuotes && currentChar === '>') { break; } if (!inQuotes && currentChar === '/' && this.lookingAt('/>')) { break; } rest += currentChar; this.pointer++; } rest = rest.trim(); let attributesMap = this.parseAttributes(rest); const previousContext = this.namespaceContextStack.length > 0 ? this.namespaceContextStack[this.namespaceContextStack.length - 1] : undefined; const namespaceContext = this.buildNamespaceContext(attributesMap, previousContext); this.handleSchemaLocationAttributes(attributesMap, namespaceContext); this.namespaceContextStack.push(namespaceContext); namespacePushed = true; this.handleNamespaceDeclarations(attributesMap, namespaceContext, previousContext); const grammarForEntities = this.contentHandler?.getGrammar(); const dtdGrammarForEntities = grammarForEntities instanceof DTDGrammar_js_1.DTDGrammar ? grammarForEntities : undefined; attributesMap.forEach((value, key) => { const decoded = this.decodeAttributeEntities(value, dtdGrammarForEntities); XMLUtils_js_1.XMLUtils.ensureValidXmlCharacters(this.xmlVersion, decoded, 'attribute value'); if (!dtdGrammarForEntities) { attributesMap.set(key, decoded); } }); attributesMap = this.normalizeDTDAttributes(name, attributesMap); let attributes = []; attributesMap.forEach((value, key) => { // TODO https://www.w3.org/TR/REC-xml/#AVNormalize let attribute = new XMLAttribute_js_1.XMLAttribute(key, value); attributes.push(attribute); }); attributes = this.getDefaultAttributes(name, attributes); attributes.forEach((attr) => { if (!attributesMap.has(attr.getName())) { attributesMap.set(attr.getName(), attr.getValue()); } }); if (this.validating) { attributes.forEach((attr) => { if (!XMLUtils_js_1.XMLUtils.isValidXMLName(attr.getName())) { throw new Error('Invalid XML attribute name: ' + attr.getName()); } }); const grammar = this.contentHandler?.getGrammar(); if (grammar) { let result = grammar.validateAttributes(name, attributesMap); if (result.isValid === false) { let errorMessages = ''; result.errors.forEach((error) => { errorMessages += error.message + '\n'; }); throw new Error('Validation failed for element ' + name + ':\n' + errorMessages); } } } this.ensureLookahead(1); let isSelfClosing = false; const terminatorChar = this.buffer.charAt(this.pointer); if (terminatorChar === '/') { this.ensureLookahead(2); if (this.buffer.charAt(this.pointer + 1) !== '>') { throw new Error('Malformed XML document: expected "/>" to close start tag for ' + name); } isSelfClosing = true; } else if (terminatorChar === '>') { isSelfClosing = false; } else { throw new Error('Malformed XML document: unexpected character "' + terminatorChar + '" at end of start tag'); } // Add this element as a child of its parent (if parent exists) if (this.childrenNames.length > 0) { let parentChildren = this.childrenNames[this.childrenNames.length - 1]; parentChildren.push(name); } // Push a new empty array for this element's children this.childrenNames.push([]); this.contentHandler?.startElement(name, attributes); this.elementStack++; this.elementNameStack.push(name); if (!this.rootParsed) { this.rootParsed = true; } if (isSelfClosing) { this.cleanCharacterRun(); let namespace = ''; if (this.namespaceContextStack.length > 0) { this.namespaceContextStack[this.namespaceContextStack.length - 1].forEach((uri, prefix) => { if (prefix === '') { namespace = uri; } else if (name.startsWith(prefix + ':')) { namespace = uri; } }); } this.validateElement(name, namespace); this.contentHandler?.endElement(name); this.elementStack--; this.elementNameStack.pop(); this.childrenNames.pop(); if (namespacePushed && this.namespaceContextStack.length > 0) { this.namespaceContextStack.pop(); namespacePushed = false; } this.pointer += 2; // skip '/>' } else { this.pointer++; // skip '>' } this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; } catch (error) { if (error instanceof NeedMoreDataError_js_1.NeedMoreDataError) { this.pointer = tagStartPointer; } if (namespacePushed && this.namespaceContextStack.length > 0) { this.namespaceContextStack.pop(); } throw error; } } endElement() { this.cleanCharacterRun(); this.pointer += 2; // skip '</' let name = ''; // Read tag name until whitespace or '>' while (true) { this.ensureLookahead(1); if (this.pointer >= this.buffer.length) { if (this.sourceEnded) { throw new Error('Malformed XML document: unterminated end tag'); } if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } } if (this.lookingAt('>') || XMLUtils_js_1.XMLUtils.isXmlSpace(this.buffer.charAt(this.pointer))) { break; } name += this.buffer.charAt(this.pointer); this.pointer++; } // Skip optional whitespace before '>' while (true) { this.ensureLookahead(1); if (this.pointer >= this.buffer.length) { if (this.sourceEnded) { throw new Error('Malformed XML document: unterminated end tag'); } if (this.streamingMode) { throw new NeedMoreDataError_js_1.NeedMoreDataError(); } } if (!XMLUtils_js_1.XMLUtils.isXmlSpace(this.buffer.charAt(this.pointer))) { break; } this.pointer++; } // Expect '>' if (!this.lookingAt('>')) { throw new Error('Well-formedness error: expected ">" in end tag "</' + name + '"'); } // Well-formedness check: mismatched element tags if (this.elementNameStack.length === 0) { throw new Error('Mismatched element tags: found closing tag "' + name + '" but no elements are open'); } const expectedName = this.elementNameStack.pop(); if (name !== expectedName) { throw new Error('Mismatched element tags: expected closing tag for "' + expectedName + '" but found "' + name + '"'); } // Validate element content when validating mode is enabled if (this.validating && !this.isRelaxNG) { let namespace = ''; if (this.namespaceContextStack.length > 0) { this.namespaceContextStack[this.namespaceContextStack.length - 1].forEach((uri, prefix) => { if (prefix === '') { namespace = uri; } else if (name.startsWith(prefix + ':')) { namespace = uri; } }); } this.validateElement(name, namespace); } const grammar = this.contentHandler?.getGrammar(); if (grammar !== undefined && grammar.getGrammarType() === Grammar_js_1.GrammarType.XML_SCHEMA) { const currentText = this.contentHandler ? this.contentHandler.getCurrentText() : ''; if (currentText.trim() === '') { const defaultText = grammar.getElementTextDefault(name); if (defaultText !== undefined) { this.contentHandler?.characters(defaultText); } } } this.contentHandler?.endElement(name); this.elementStack--; if (this.childrenNames.length > 0) { this.childrenNames.pop(); } if (this.namespaceContextStack.length > 0) { this.namespaceContextStack.pop(); } this.pointer++; // skip '>' this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; } validateElement(name, namespace) { const grammar = this.contentHandler?.getGrammar(); if (grammar && this.validating) { const text = this.contentHandler ? this.contentHandler.getCurrentText() : ''; const actualChildrenNames = this.childrenNames.length > 0 ? this.childrenNames[this.childrenNames.length - 1] : []; const elementValidationResult = grammar.validateElement(name, namespace, actualChildrenNames, text); if (!elementValidationResult.isValid) { const errorMessages = elementValidationResult.errors.map(e => e.message).join('; '); throw new Error('Element validation failed for element "' + name + '": ' + errorMessages); } } } getDefaultAttributes(elementName, attributes) { const grammar = this.contentHandler?.getGrammar(); const namespaceContext = this.namespaceContextStack.length > 0 ? this.namespaceContextStack[this.namespaceContextStack.length - 1] : new Map(); const existingAttributeNames = new Set(); const existingAttributeKeys = new Set(); attributes.forEach((attr) => { const attributeName = attr.getName(); existingAttributeNames.add(attributeName); const attributeParts = this.splitQualifiedName(attributeName); let attributeNamespaceUri = undefined; if (attributeParts.prefix) { attributeNamespaceUri = namespaceContext.get(attributeParts.prefix); } const attributeKey = this.buildSchemaAttributeKey(attributeParts.localName, attributeNamespaceUri); existingAttributeKeys.add(attributeKey); }); if (grammar) { const grammarDefaults = grammar.getDefaultAttributes(elementName); if (grammarDefaults) { const dtdGrammar = grammar instanceof DTDGrammar_js_1.DTDGrammar ? grammar : undefined; const declarations = dtdGrammar?.getElementAttributesMap(elementName); grammarDefaults.forEach((value, key) => { const grammarParts = this.splitQualifiedName(key); const attributeKey = this.buildSchemaAttributeKey(grammarParts.localName); if (existingAttributeKeys.has(attributeKey) || existingAttributeNames.has(key)) { return; } let normalizedValue; if (dtdGrammar) { const expanded = this.decodeAttributeEntities(value, dtdGrammar); const decl = declarations?.get(key); normalizedValue = this.normalizeAttributeValue(value, expanded, decl); } else { normalizedValue = this.normalizeAttributeValue(value, value); } attributes.push(new XMLAttribute_js_1.XMLAttribute(key, normalizedValue)); existingAttributeNames.add(key); existingAttributeKeys.add(attributeKey); }); } } const nameParts = this.splitQualifiedName(elementName); const namespaceUri = this.getNamespaceUriForElement(elementName); if (namespaceUri) { const namespaceKey = namespaceUri + "|" + nameParts.localName; this.appendSchemaDefaultsForElement(this.schemaDefaultAttributes.get(namespaceKey), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext); } this.appendSchemaDefaultsForElement(this.schemaDefaultAttributes.get(nameParts.localName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext); if (nameParts.localName !== elementName) { this.appendSchemaDefaultsForElement(this.schemaDefaultAttributes.get(elementName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext); } if (this.isRelaxNG) { if (namespaceUri) { const namespaceRelaxKey = namespaceUri + "|" + nameParts.localName; this.appendSchemaDefaultsForElement(this.relaxNGDefaultAttributes.get(namespaceRelaxKey), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext); } this.appendSchemaDefaultsForElement(this.relaxNGDefaultAttributes.get(nameParts.localName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext); if (nameParts.localName !== elementName) { this.appendSchemaDefaultsForElement(this.relaxNGDefaultAttributes.get(elementName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext); } } return attributes; } appendSchemaDefaultsForElement(defaults, attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, elementNameParts, namespaceContext) { if (!defaults) { return; } defaults.forEach((info) => { const attributeKey = this.buildSchemaAttributeKey(info.localName, info.namespace); if (existingAttributeKeys.has(attributeKey)) { return; } const attributeName = this.resolveSchemaAttributeQualifiedName(info, namespaceUri, elementNameParts, namespaceContext); if (existingAttributeNames.has(attributeName)) { return; } const normalizedValue = this.normalizeAttributeValue(info.value, info.value); attributes.push(new XMLAttribute_js_1.XMLAttribute(attributeName, normalizedValue)); existingAttributeNames.add(attributeName); existingAttributeKeys.add(attributeKey); }); } resolveSchemaAttributeQualifiedName(info, namespaceUri, elementNameParts, namespaceContext) { const lexicalName = info.lexicalName; const attributeNamespace = info.namespace; const parts = this.splitQualifiedName(lexicalName); if (parts.prefix) { const mappedNamespace = namespaceContext.get(parts.prefix); if (!attributeNamespace && mappedNamespace) { return lexicalName; } if (attributeNamespace && mappedNamespace === attributeNamespace) { return lexicalName; } if (attributeNamespace) { const prefix = this.findNamespacePrefix(attributeNamespace, namespaceContext); if (prefix) { return prefix + ":" + info.localName; } } return info.localName; } if (attributeNamespace) { const prefix = this.findNamespacePrefix(attributeNamespace, namespaceContext); if (prefix) { return prefix + ":" + info.localName; } if (namespaceUri && attributeNamespace === namespaceUri && elementNameParts.prefix) { return elementNameParts.prefix + ":" + info.localName; } } return info.lexicalName; } findNamespacePrefix(namespaceUri, context) { for (const entry of context.entries()) { const prefix = entry[0]; const uri = entry[1]; if (uri === namespaceUri && prefix !== '') { return prefix; } } return undefined; } buildSchemaAttributeKey(localName, namespace) { if (namespace) { return namespace + "|" + localName; } return localName; } buildNamespaceContext(attributes, previousContext) { const context = previousContext ? new Map(previousContext) : new Map(); attributes.forEach((value, key) => { if (key === 'xmlns') { context.set('', value); } else if (key.startsWith('xmlns:') && key.length > 6) { const prefix = key.substring(6); context.set(prefix, value); } }); if (!context.has('xml')) { context.set('xml', 'http://www.w3.org/XML/1998/namespace'); } return context; } handleNamespaceDeclarations(attributes, namespaceContext, previousContext) { attributes.forEach((value, key) => { if (key === 'xmlns') { const trimmed = value.trim(); const previousValue = previousContext ? previousContext.get('') : undefined; if (trimmed !== '' && trimmed !== previousValue) { this.tryLoadSchemaForNamespace(trimmed); } this.propagateNamespaceToGrammar('', trimmed); return; } if (!key.startsWith('xmlns:') || key.length <= 6) { return; } const prefix = key.substring(6); const trimmed = value.trim(); const previousValue = previousContext ? previousContext.get(prefix) : undefined; if (trimmed !== '' && trimmed !== previousValue) { this.tryLoadSchemaForNamespace(trimmed); } this.propagateNamespaceToGrammar(prefix, trimmed); }); // If no new declaration appears on the current element, ensure the default namespace is considered. if (!previousContext && namespaceContext.has('')) { const defaultNamespace = namespaceContext.get(''); if (defaultNamespace) { this.tryLoadSchemaForNamespace(defaultNamespace); this.propagateNamespaceToGrammar('', defaultNamespace); } } } propagateNamespaceToGrammar(prefix, uri) { if (!uri) { return; } const grammar = this.contentHandler?.getGrammar(); if (grammar instanceof SchemaGrammar_js_1.SchemaGrammar) { grammar.addNamespaceDeclaration(prefix, uri); } } handleSchemaLocationAttributes(attributes, namespaceContext) { const schemaInstancePrefixes = new Set(); namespaceContext.forEach((uri, prefix) => { if (uri === Constants_js_1.Constants.XML_SCHEMA_INSTANCE_NS_URI) { schemaInstancePrefixes.add(prefix); } }); if (schemaInstancePrefixes.size === 0) { return; } attributes.forEach((value, key) => { if (key === 'xmlns' || key.startsWith('xmlns:')) { return; } const { prefix, localName } = this.splitQualifiedName(key); if (!prefix || !schemaInstancePrefixes.has(prefix)) { return; } if (localName === 'schemaLocation') { const tokens = value.trim().split(/\s+/).filter((token) => token.length > 0); if (tokens.length < 2) { return; } for (let index = 0; index + 1 < tokens.length; index += 2) { const namespaceUri = tokens[index]; const location = tokens[index + 1]; this.processSchemaReference(namespaceUri, location); } } else if (localName === 'noNamespaceSchemaLocation') { const location = value.trim(); if (location !== '') { this.processSchemaReference('', location); } } }); } tryLoadSchemaForNamespace(namespaceUri) { if (!this.schemaLoadingEnabled) { return; } if (namespaceUri === '') { return; } if (XMLSchemaParser_js_1.XMLSchemaParser.shouldIgnoreNamespace(namespaceUri)) { this.processedNamespaces.add(namespaceUri); return; } if (this.processedNamespaces.has(namespaceUri) || this.failedNamespaces.has(namespaceUri)) { return; } if (!this.catalog) { return; } const candidates = [ this.catalog.matchURI(namespaceUri), this.catalog.matchSystem(namespaceUri) ]; for (let index = 0; index < candidates.length; index++) { const candidate = candidates[index]; if (!candidate) { continue; } const normalized = candidate.startsWith('file://') ? (0, node_url_1.fileURLToPath)(candidate) : candidate; if (!(0, node_fs_1.existsSync)(normalized)) { continue; } if (this.loadSchemaDefaults(normalized, namespaceUri)) { this.processedNamespaces.add(namespaceUri); return; } } this.failedNamespaces.add(namespaceUri); } processSchemaReference(namespaceUri, location) { if (!this.schemaLoadingEnabled) { return; } if (location === '') { return; } if (this.processedSchemaLocations.has(location) || this.failedSchemaLocations.has(location)) { return; } const resolvedPath = this.resolveSchemaLocation(namespaceUri, location); if (!resolvedPath) { this.failedSchemaLocations.add(location); return; } const schemaLoaded = this.loadSchemaDefaults(resolvedPath, location); if (resolvedPath.startsWith((0, node_os_1.tmpdir)())) { try { (0, node_fs_1.unlinkSync)(resolvedPath); } catch (e) { /* ignore */ } } if (!schemaLoaded) { this.failedSchemaLocations.add(location); } } loadSchemaDefaults(resolvedPath, identifier) { if (this.processedSchemaLocations.has(resolvedPath)) { this.processedSchemaLocations.add(identifier); return true; } try { const parser = XMLSchemaParser_js_1.XMLSchemaParser.getInstance(this.catalog); const rawDefaults = parser.collectDefaultAttributes(resolvedPath); const convertedDefaults = new Map(); rawDefaults.forEach((attributeMap, elementKey) => { const converted = new Map(); attributeMap.forEach((info, attributeKey) => { const copy = { localName: info.localName, namespace: info.namespace, lexicalName: info.lexicalName, value: info.value }; converted.set(attributeKey, copy); }); convertedDefaults.set(elementKey, converted); }); this.mergeSchemaDefaults(convertedDefaults); this.processedSchemaLocations.add(resolvedPath); this.processedSchemaLocations.add(identifier); if (this.validating) { const builder = new SchemaBuilder_js_1.SchemaBuilder(this.catalog); const grammar = builder.buildGrammar(resolvedPath); const existing = this.contentHandler?.getGrammar(); if (existing instanceof SchemaGrammar_js_1.SchemaGrammar) { existing.mergeFrom(grammar); } else { this.contentHandler?.setGrammar(grammar); } } return true; } catch (error) { if (this.validating) { throw error; } const message = error instanceof Error ? error.message : String(error); console.warn(`Warning: Could not load XML Schema defaults from ${resolvedPath}: ${message}`); return false; } } resolveSchemaLocation(namespaceUri, location) { let candidate = location; if (candidate.startsWith('file://')) { candidate = (0, node_url_1.fileURLToPath)(candidate); if ((0, node_fs_1.existsSync)(candidate)) { return candidate; } } if ((0, node_path_1.isAbsolute)(location) && (0, node_fs_1.existsSync)(location)) { return location; } if (!location.startsWith('http://') && !location.startsWith('https://') && !location.startsWith('urn:')) { if (this.currentFile) { const baseDir = (0, node_path_1.dirname)(this.currentFile); const