UNPKG

typesxml

Version:

Open source XML library written in TypeScript

github.com/maxprograms-com/TypesXML

maxprograms-com/TypesXML

188 lines • 6.8 kB

JavaScript

/******************************************************************************* * Copyright (c) 2023-2026 Maxprograms. * * This program and the accompanying materials * are made available under the terms of the Eclipse License 1.0 * which accompanies this distribution, and is available at * https://www.eclipse.org/org/documents/epl-v10.html * * Contributors: * Maxprograms - initial API and implementation *******************************************************************************/ import { CData } from "./CData.js"; import { DOMBuilder } from "./DOMBuilder.js"; import { ProcessingInstruction } from "./ProcessingInstruction.js"; import { SAXParser } from "./SAXParser.js"; import { TextNode } from "./TextNode.js"; import { XMLComment } from "./XMLComment.js"; import { XMLDeclaration } from "./XMLDeclaration.js"; import { XMLDocumentType } from "./XMLDocumentType.js"; import { XMLElement } from "./XMLElement.js"; /** * Generates the canonical XML representation defined by the W3C XML Test Suite. * * Canonicalization rules: * - Attribute order is lexical (Unicode code point order). * - Character data is escaped using the Datachar productions (&, <, >, ", 	, 
, ). * - CDATA sections are treated as their character content. * - Comments and document type declarations are omitted. * - Processing instructions are preserved in document order with their data escaped as Datachar. */ export class XMLCanonicalizer { document; parseFile(path, encoding) { const builder = new DOMBuilder(); const parser = new SAXParser(); parser.setContentHandler(builder); parser.parseFile(path, encoding); this.document = builder.getDocument(); } parseString(xml, options) { const builder = new DOMBuilder(); const parser = new SAXParser(); parser.setContentHandler(builder); parser.parseString(xml, options); this.document = builder.getDocument(); } async parseStream(stream, options) { const builder = new DOMBuilder(); const parser = new SAXParser(); parser.setContentHandler(builder); await parser.parseStream(stream, options); this.document = builder.getDocument(); } setDocument(document) { this.document = document; } getDocument() { return this.document; } toString() { if (!this.document) { throw new Error("Canonicalizer has no document. Parse an XML source first."); } return this.renderDocument(this.document); } renderDocument(document) { const parts = []; for (const node of document.contentIterator()) { parts.push(this.renderTopLevelNode(node)); } return parts.join(""); } renderTopLevelNode(node) { if (node instanceof XMLDeclaration || node instanceof XMLComment || node instanceof XMLDocumentType) { return ""; // omitted from canonical form } if (node instanceof ProcessingInstruction) { return this.renderProcessingInstruction(node); } if (node instanceof XMLElement) { return this.renderElement(node); } if (node instanceof TextNode || node instanceof CData) { const value = this.getNodeValue(node); if (this.isWhitespaceOnly(value)) { return ""; } return this.escapeData(value); } return ""; } renderElement(element) { const builder = []; builder.push("<" + element.getName()); const attributes = [...element.getAttributes()].sort((a, b) => a.getName().localeCompare(b.getName())); attributes.forEach((attribute) => { builder.push(" " + attribute.getName() + "=\"" + this.escapeData(attribute.getValue()) + "\""); }); builder.push(">"); element.getContent().forEach((child) => { if (child instanceof XMLElement) { builder.push(this.renderElement(child)); } else if (child instanceof TextNode || child instanceof CData) { builder.push(this.escapeData(this.getNodeValue(child))); } else if (child instanceof ProcessingInstruction) { builder.push(this.renderProcessingInstruction(child)); } // comments and other node types are ignored in canonical form }); builder.push("</" + element.getName() + ">"); return builder.join(""); } renderProcessingInstruction(pi) { const data = this.escapeProcessingInstructionData(pi.getData()); return `<?${pi.getTarget()} ${data}?>`; } getNodeValue(node) { if (node instanceof TextNode) { return node.getValue(); } return node.getValue(); } escapeData(data) { const normalized = data.replaceAll('\r\n', "\n"); let result = ""; for (let i = 0; i < normalized.length; i++) { const char = normalized.charAt(i); switch (char) { case "&": result += "&"; break; case "<": result += "<"; break; case ">": result += ">"; break; case '"': result += """; break; case "\t": result += "	"; break; case "\n": result += "
"; break; case "\r": result += ""; break; default: result += char; } } return result; } escapeProcessingInstructionData(data) { const normalized = data.replaceAll('\r\n', "\n"); let result = ""; for (let i = 0; i < normalized.length; i++) { const char = normalized.charAt(i); if (char === "&") { result += "&"; } else if (char === "\r") { result += ""; } else { result += char; } } return result; } isWhitespaceOnly(value) { if (value.length === 0) { return true; } for (let i = 0; i < value.length; i++) { const char = value.charAt(i); if (char !== " " && char !== "\t" && char !== "\n" && char !== "\r") { return false; } } return true; } } //# sourceMappingURL=XMLCanonicalizer.js.map