UNPKG

typesxml

Version:

Open source XML library written in TypeScript

192 lines 7.3 kB
"use strict"; /******************************************************************************* * Copyright (c) 2023-2026 Maxprograms. * * This program and the accompanying materials * are made available under the terms of the Eclipse License 1.0 * which accompanies this distribution, and is available at * https://www.eclipse.org/org/documents/epl-v10.html * * Contributors: * Maxprograms - initial API and implementation *******************************************************************************/ Object.defineProperty(exports, "__esModule", { value: true }); exports.XMLCanonicalizer = void 0; const CData_js_1 = require("./CData.js"); const DOMBuilder_js_1 = require("./DOMBuilder.js"); const ProcessingInstruction_js_1 = require("./ProcessingInstruction.js"); const SAXParser_js_1 = require("./SAXParser.js"); const TextNode_js_1 = require("./TextNode.js"); const XMLComment_js_1 = require("./XMLComment.js"); const XMLDeclaration_js_1 = require("./XMLDeclaration.js"); const XMLDocumentType_js_1 = require("./XMLDocumentType.js"); const XMLElement_js_1 = require("./XMLElement.js"); /** * Generates the canonical XML representation defined by the W3C XML Test Suite. * * Canonicalization rules: * - Attribute order is lexical (Unicode code point order). * - Character data is escaped using the Datachar productions (&amp;, &lt;, &gt;, &quot;, &#9;, &#10;, &#13;). * - CDATA sections are treated as their character content. * - Comments and document type declarations are omitted. * - Processing instructions are preserved in document order with their data escaped as Datachar. */ class XMLCanonicalizer { document; parseFile(path, encoding) { const builder = new DOMBuilder_js_1.DOMBuilder(); const parser = new SAXParser_js_1.SAXParser(); parser.setContentHandler(builder); parser.parseFile(path, encoding); this.document = builder.getDocument(); } parseString(xml, options) { const builder = new DOMBuilder_js_1.DOMBuilder(); const parser = new SAXParser_js_1.SAXParser(); parser.setContentHandler(builder); parser.parseString(xml, options); this.document = builder.getDocument(); } async parseStream(stream, options) { const builder = new DOMBuilder_js_1.DOMBuilder(); const parser = new SAXParser_js_1.SAXParser(); parser.setContentHandler(builder); await parser.parseStream(stream, options); this.document = builder.getDocument(); } setDocument(document) { this.document = document; } getDocument() { return this.document; } toString() { if (!this.document) { throw new Error("Canonicalizer has no document. Parse an XML source first."); } return this.renderDocument(this.document); } renderDocument(document) { const parts = []; for (const node of document.contentIterator()) { parts.push(this.renderTopLevelNode(node)); } return parts.join(""); } renderTopLevelNode(node) { if (node instanceof XMLDeclaration_js_1.XMLDeclaration || node instanceof XMLComment_js_1.XMLComment || node instanceof XMLDocumentType_js_1.XMLDocumentType) { return ""; // omitted from canonical form } if (node instanceof ProcessingInstruction_js_1.ProcessingInstruction) { return this.renderProcessingInstruction(node); } if (node instanceof XMLElement_js_1.XMLElement) { return this.renderElement(node); } if (node instanceof TextNode_js_1.TextNode || node instanceof CData_js_1.CData) { const value = this.getNodeValue(node); if (this.isWhitespaceOnly(value)) { return ""; } return this.escapeData(value); } return ""; } renderElement(element) { const builder = []; builder.push("<" + element.getName()); const attributes = [...element.getAttributes()].sort((a, b) => a.getName().localeCompare(b.getName())); attributes.forEach((attribute) => { builder.push(" " + attribute.getName() + "=\"" + this.escapeData(attribute.getValue()) + "\""); }); builder.push(">"); element.getContent().forEach((child) => { if (child instanceof XMLElement_js_1.XMLElement) { builder.push(this.renderElement(child)); } else if (child instanceof TextNode_js_1.TextNode || child instanceof CData_js_1.CData) { builder.push(this.escapeData(this.getNodeValue(child))); } else if (child instanceof ProcessingInstruction_js_1.ProcessingInstruction) { builder.push(this.renderProcessingInstruction(child)); } // comments and other node types are ignored in canonical form }); builder.push("</" + element.getName() + ">"); return builder.join(""); } renderProcessingInstruction(pi) { const data = this.escapeProcessingInstructionData(pi.getData()); return `<?${pi.getTarget()} ${data}?>`; } getNodeValue(node) { if (node instanceof TextNode_js_1.TextNode) { return node.getValue(); } return node.getValue(); } escapeData(data) { const normalized = data.replaceAll('\r\n', "\n"); let result = ""; for (let i = 0; i < normalized.length; i++) { const char = normalized.charAt(i); switch (char) { case "&": result += "&amp;"; break; case "<": result += "&lt;"; break; case ">": result += "&gt;"; break; case '"': result += "&quot;"; break; case "\t": result += "&#9;"; break; case "\n": result += "&#10;"; break; case "\r": result += "&#13;"; break; default: result += char; } } return result; } escapeProcessingInstructionData(data) { const normalized = data.replaceAll('\r\n', "\n"); let result = ""; for (let i = 0; i < normalized.length; i++) { const char = normalized.charAt(i); if (char === "&") { result += "&amp;"; } else if (char === "\r") { result += "&#13;"; } else { result += char; } } return result; } isWhitespaceOnly(value) { if (value.length === 0) { return true; } for (let i = 0; i < value.length; i++) { const char = value.charAt(i); if (char !== " " && char !== "\t" && char !== "\n" && char !== "\r") { return false; } } return true; } } exports.XMLCanonicalizer = XMLCanonicalizer; //# sourceMappingURL=XMLCanonicalizer.js.map