typesxml
Version:
Open source XML library written in TypeScript
192 lines • 7.3 kB
JavaScript
;
/*******************************************************************************
* Copyright (c) 2023-2026 Maxprograms.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse License 1.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/epl-v10.html
*
* Contributors:
* Maxprograms - initial API and implementation
*******************************************************************************/
Object.defineProperty(exports, "__esModule", { value: true });
exports.XMLCanonicalizer = void 0;
const CData_js_1 = require("./CData.js");
const DOMBuilder_js_1 = require("./DOMBuilder.js");
const ProcessingInstruction_js_1 = require("./ProcessingInstruction.js");
const SAXParser_js_1 = require("./SAXParser.js");
const TextNode_js_1 = require("./TextNode.js");
const XMLComment_js_1 = require("./XMLComment.js");
const XMLDeclaration_js_1 = require("./XMLDeclaration.js");
const XMLDocumentType_js_1 = require("./XMLDocumentType.js");
const XMLElement_js_1 = require("./XMLElement.js");
/**
* Generates the canonical XML representation defined by the W3C XML Test Suite.
*
* Canonicalization rules:
* - Attribute order is lexical (Unicode code point order).
* - Character data is escaped using the Datachar productions (&, <, >, ", 	, , ).
* - CDATA sections are treated as their character content.
* - Comments and document type declarations are omitted.
* - Processing instructions are preserved in document order with their data escaped as Datachar.
*/
class XMLCanonicalizer {
document;
parseFile(path, encoding) {
const builder = new DOMBuilder_js_1.DOMBuilder();
const parser = new SAXParser_js_1.SAXParser();
parser.setContentHandler(builder);
parser.parseFile(path, encoding);
this.document = builder.getDocument();
}
parseString(xml, options) {
const builder = new DOMBuilder_js_1.DOMBuilder();
const parser = new SAXParser_js_1.SAXParser();
parser.setContentHandler(builder);
parser.parseString(xml, options);
this.document = builder.getDocument();
}
async parseStream(stream, options) {
const builder = new DOMBuilder_js_1.DOMBuilder();
const parser = new SAXParser_js_1.SAXParser();
parser.setContentHandler(builder);
await parser.parseStream(stream, options);
this.document = builder.getDocument();
}
setDocument(document) {
this.document = document;
}
getDocument() {
return this.document;
}
toString() {
if (!this.document) {
throw new Error("Canonicalizer has no document. Parse an XML source first.");
}
return this.renderDocument(this.document);
}
renderDocument(document) {
const parts = [];
for (const node of document.contentIterator()) {
parts.push(this.renderTopLevelNode(node));
}
return parts.join("");
}
renderTopLevelNode(node) {
if (node instanceof XMLDeclaration_js_1.XMLDeclaration || node instanceof XMLComment_js_1.XMLComment || node instanceof XMLDocumentType_js_1.XMLDocumentType) {
return ""; // omitted from canonical form
}
if (node instanceof ProcessingInstruction_js_1.ProcessingInstruction) {
return this.renderProcessingInstruction(node);
}
if (node instanceof XMLElement_js_1.XMLElement) {
return this.renderElement(node);
}
if (node instanceof TextNode_js_1.TextNode || node instanceof CData_js_1.CData) {
const value = this.getNodeValue(node);
if (this.isWhitespaceOnly(value)) {
return "";
}
return this.escapeData(value);
}
return "";
}
renderElement(element) {
const builder = [];
builder.push("<" + element.getName());
const attributes = [...element.getAttributes()].sort((a, b) => a.getName().localeCompare(b.getName()));
attributes.forEach((attribute) => {
builder.push(" " + attribute.getName() + "=\"" + this.escapeData(attribute.getValue()) + "\"");
});
builder.push(">");
element.getContent().forEach((child) => {
if (child instanceof XMLElement_js_1.XMLElement) {
builder.push(this.renderElement(child));
}
else if (child instanceof TextNode_js_1.TextNode || child instanceof CData_js_1.CData) {
builder.push(this.escapeData(this.getNodeValue(child)));
}
else if (child instanceof ProcessingInstruction_js_1.ProcessingInstruction) {
builder.push(this.renderProcessingInstruction(child));
}
// comments and other node types are ignored in canonical form
});
builder.push("</" + element.getName() + ">");
return builder.join("");
}
renderProcessingInstruction(pi) {
const data = this.escapeProcessingInstructionData(pi.getData());
return `<?${pi.getTarget()} ${data}?>`;
}
getNodeValue(node) {
if (node instanceof TextNode_js_1.TextNode) {
return node.getValue();
}
return node.getValue();
}
escapeData(data) {
const normalized = data.replaceAll('\r\n', "\n");
let result = "";
for (let i = 0; i < normalized.length; i++) {
const char = normalized.charAt(i);
switch (char) {
case "&":
result += "&";
break;
case "<":
result += "<";
break;
case ">":
result += ">";
break;
case '"':
result += """;
break;
case "\t":
result += "	";
break;
case "\n":
result += " ";
break;
case "\r":
result += " ";
break;
default:
result += char;
}
}
return result;
}
escapeProcessingInstructionData(data) {
const normalized = data.replaceAll('\r\n', "\n");
let result = "";
for (let i = 0; i < normalized.length; i++) {
const char = normalized.charAt(i);
if (char === "&") {
result += "&";
}
else if (char === "\r") {
result += " ";
}
else {
result += char;
}
}
return result;
}
isWhitespaceOnly(value) {
if (value.length === 0) {
return true;
}
for (let i = 0; i < value.length; i++) {
const char = value.charAt(i);
if (char !== " " && char !== "\t" && char !== "\n" && char !== "\r") {
return false;
}
}
return true;
}
}
exports.XMLCanonicalizer = XMLCanonicalizer;
//# sourceMappingURL=XMLCanonicalizer.js.map