salve-annos
Version:
A fork with support for documentation of Salve, a Javascript library which implements a validator able to validate an XML document on the basis of a subset of RelaxNG.
621 lines • 20.9 kB
JavaScript
"use strict";
/**
* This module contains classes for a conversion parser.
* @author Louis-Dominique Dubeau
* @license MPL 2.0
* @copyright Mangalam Research Center for Buddhist Languages
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.BasicParser = exports.Validator = exports.Text = exports.Element = void 0;
exports.isElement = isElement;
exports.isText = isText;
exports.parseSimplifiedSchema = parseSimplifiedSchema;
exports.dependsOnExternalFile = dependsOnExternalFile;
const saxes_1 = require("saxes");
const ename_1 = require("../ename");
const name_resolver_1 = require("../name_resolver");
const tools_1 = require("../tools");
const util_1 = require("./simplifier/util");
const emptyNS = Object.create(null);
/**
* An Element produced by [[BasicParser]].
*
* This constructor will insert the created object into the parent automatically
* if the parent is provided.
*/
class Element {
/**
* @param node The value of the ``node`` created by the SAX parser.
*
* @param children The children of this element. **These children must not yet
* be children of any element.**
*/
constructor(prefix, local, uri, ns, attributes, documentation, children) {
this.children = children;
this.kind = "element";
this.prefix = prefix;
this.local = local;
this.uri = uri;
// Namespace declarations are immutable.
this.ns = ns;
this.attributes = attributes;
this.documentation = documentation;
for (const child of children) {
if (child.parent !== undefined) {
child.parent.removeChild(child);
}
child.parent = this;
}
}
static fromSax(node, children, documentation = "") {
return new Element(node.prefix || "", node.local || "", node.uri || "", node.ns || {}, node.attributes, documentation, children);
}
static makeElement(name, children, documentation = "") {
return new Element("", name, "",
// We always pass the same object as ns. So we save an unnecessary object
// creation.
emptyNS, Object.create(null), documentation, children);
}
get parent() {
return this._parent;
}
set parent(value) {
this.setParent(value);
}
setParent(value) {
//
// The cost of looking for cycles is noticeable. So we should use this
// only when debugging new code.
//
// let scan = value;
// while (scan !== undefined) {
// if (scan === this) {
// throw new Error("creating reference loop!");
// }
// scan = scan.parent;
// }
this._path = undefined; // This becomes void.
this._parent = value;
}
resolve(name) {
if (name === "xml") {
return name_resolver_1.XML1_NAMESPACE;
}
if (name === "xmlns") {
return name_resolver_1.XMLNS_NAMESPACE;
}
return this._resolve(name);
}
_resolve(name) {
const ret = this.ns[name];
if (ret !== undefined) {
return ret;
}
return (this.parent === undefined) ? undefined : this.parent._resolve(name);
}
get text() {
// Testing for this special case does payoff.
if (this.children.length === 1) {
return this.children[0].text;
}
let ret = "";
for (const child of this.children) {
ret += child.text;
}
return ret;
}
/**
* A path describing the location of the element in the XML. Note that this is
* meant to be used **only** after the simplification is complete. The value
* is computed once and for all as soon as it is accessed.
*/
get path() {
if (this._path === undefined) {
this._path = this.makePath();
}
return this._path;
}
makePath() {
let ret = `${(this.parent !== undefined) ? this.parent.path : ""}/${this.local}`;
const name = this.getAttribute("name");
if (name !== undefined) {
// tslint:disable-next-line:no-string-literal
ret += `[='${name}']`;
}
// Name classes are only valid on elements and attributes. So don't go
// searching for it on other elements.
else if (this.local === "element" || this.local === "attribute") {
// By the time path is used, the name class is the first child.
const first = this.children[0];
if (isElement(first) && first.local === "name") {
ret += `[='${first.text}']`;
}
}
return ret;
}
removeChild(child) {
// We purposely don't call removeChildAt, so as to save a call.
//
// We don't check whether there's an element at [0]. If not, a hard fail is
// appropriate. It shouldn't happen.
this.children.splice(this.indexOfChild(child), 1)[0].parent = undefined;
}
removeChildAt(i) {
// We don't check whether there's an element at [0]. If not, a hard fail is
// appropriate. It shouldn't happen.
this.children.splice(i, 1)[0].parent = undefined;
}
replaceChildWith(child, replacement) {
this.replaceChildAt(this.indexOfChild(child), replacement);
}
replaceChildAt(i, replacement) {
const child = this.children[i];
// In practice this is not a great optimization.
//
// if (child === replacement) {
// return;
// }
if (replacement.parent !== undefined) {
replacement.parent.removeChild(replacement);
}
this.children[i] = replacement;
child.parent = undefined;
replacement.parent = this;
}
appendChild(child) {
// It is faster to use custom code than to rely on insertAt: splice
// operations are costly.
if (child.parent !== undefined) {
child.parent.removeChild(child);
}
child.parent = this;
this.children.push(child);
}
appendChildren(children) {
// It is faster to use custom code than to rely on insertAt: splice
// operations are costly.
for (const el of children) {
if (el.parent !== undefined) {
el.parent.removeChild(el);
}
el.parent = this;
}
this.children.push(...children);
}
prependChild(child) {
// It is faster to do this than to rely on insertAt: splice operations
// are costly.
if (child.parent !== undefined) {
child.parent.removeChild(child);
}
child.parent = this;
this.children.unshift(child);
}
insertAt(index, toInsert) {
for (const el of toInsert) {
if (el.parent !== undefined) {
el.parent.removeChild(el);
}
el.parent = this;
}
this.children.splice(index, 0, ...toInsert);
}
/**
* Gets all the children from another element and append them to this
* element. This is a faster operation than done through other means.
*
* @param src The element form which to get the children.
*/
grabChildren(src) {
const children = src.children.splice(0, src.children.length);
this.children.push(...children);
for (const child of children) {
child.parent = this;
}
}
replaceContent(children) {
const prev = this.children.splice(0, this.children.length, ...children);
for (const child of prev) {
child.parent = undefined;
}
for (const child of children) {
child.parent = this;
}
}
indexOfChild(child) {
const parent = child.parent;
if (parent !== this) {
throw new Error("the child is not a child of this");
}
const index = parent.children.indexOf(child);
if (index === -1) {
throw new Error("child not among children");
}
return index;
}
/**
* Set an attribute on an element.
*
* @param name The attribute name.
*
* @param value The new value of the attribute.
*/
setAttribute(name, value) {
if (name.includes(":")) {
throw new Error("we don't support namespaces on this function");
}
this.attributes[name] = {
name,
prefix: "",
local: name,
uri: "",
value,
};
}
setXMLNS(value) {
this.attributes.xmlns = {
name: "xmlns",
prefix: "",
uri: name_resolver_1.XMLNS_NAMESPACE,
value,
local: "xmlns",
};
}
removeAttribute(name) {
delete this.attributes[name];
}
getAttribute(name) {
const attr = this.attributes[name];
return (attr !== undefined) ? attr.value : undefined;
}
getRawAttributes() {
return this.attributes;
}
mustGetAttribute(name) {
const attr = this.getAttribute(name);
if (attr === undefined) {
throw new Error(`no attribute named ${name}`);
}
return attr;
}
clone() {
const newAttributes = Object.create(null);
const { attributes } = this;
const keys = Object.keys(attributes);
if (keys.length !== 0) {
for (const key of keys) {
// We do not use Object.create(null) here because there's no advantage
// to it.
newAttributes[key] = Object.assign({}, attributes[key]);
}
}
// This switch provides a significant improvement.
let { children } = this;
switch (children.length) {
case 0:
break;
case 1:
children = [children[0].clone()];
break;
case 2:
children = [children[0].clone(), children[1].clone()];
break;
default:
// This actually does not happen in the current code.
children = children.map(child => child.clone());
}
return new Element(this.prefix, this.local, this.uri, this.ns, newAttributes, this.documentation, children);
}
}
exports.Element = Element;
class Text {
/**
* @param text The textual value.
*/
constructor(text) {
this.text = text;
this.kind = "text";
}
clone() {
return new Text(this.text);
}
}
exports.Text = Text;
function isElement(node) {
return node.kind === "element";
}
function isText(node) {
return node.kind === "text";
}
class SaxesNameResolver {
constructor(saxesParser) {
this.saxesParser = saxesParser;
}
resolveName(name, attribute = false) {
const colon = name.indexOf(":");
let prefix;
let local;
if (colon === -1) {
if (attribute) { // Attribute in undefined namespace
return new ename_1.EName("", name);
}
// We are searching for the default namespace currently in effect.
prefix = "";
local = name;
}
else {
prefix = name.substring(0, colon);
local = name.substring(colon + 1);
if (local.includes(":")) {
throw new Error("invalid name passed to resolveName");
}
}
const uri = this.saxesParser.resolve(prefix);
if (uri !== undefined) {
return new ename_1.EName(uri, local);
}
return (prefix === "") ? new ename_1.EName("", local) : undefined;
}
clone() {
throw new Error("cannot clone a SaxesNameResolver");
}
}
class Validator {
constructor(grammar, parser) {
/** Whether we ran into an error. */
this.errors = [];
this.walker = grammar.newWalker(new SaxesNameResolver(parser));
}
fireEvent(name, args) {
const ret = this.walker.fireEvent(name, args);
if (ret) {
this.errors.push(...ret);
}
}
onopentag(node) {
const { attributes } = node;
const keys = Object.keys(attributes);
// Pre-allocate an array of the right size, instead of reallocating
// a bunch of times.
// tslint:disable-next-line:prefer-array-literal
const params = new Array(2 + keys.length);
params[0] = node.uri || "";
params[1] = node.local || "";
let ix = 2;
for (const name of keys) {
const { uri, local, value } = attributes[name];
// Skip XML namespace declarations
if (uri !== name_resolver_1.XMLNS_NAMESPACE) {
params[ix++] = uri;
params[ix++] = local;
params[ix++] = value;
}
}
this.fireEvent("startTagAndAttributes", params);
}
onclosetag(node) {
this.fireEvent("endTag", [node.uri || "", node.local || ""]);
}
ontext(text) {
this.fireEvent("text", [text]);
}
onend() {
const result = this.walker.end();
if (result !== false) {
this.errors.push(...result);
}
}
}
exports.Validator = Validator;
// A validator that does not validate.
class NullValidator {
// tslint:disable-next-line:no-empty
onopentag() { }
// tslint:disable-next-line:no-empty
onclosetag() { }
// tslint:disable-next-line:no-empty
ontext() { }
// tslint:disable-next-line:no-empty
onend() { }
}
/**
* A simple parser used for loading a XML document into memory. Parsers of this
* class use [[Node]] objects to represent the tree of nodes.
*/
class BasicParser {
constructor(saxesParser, validator = new NullValidator()) {
this.saxesParser = saxesParser;
this.validator = validator;
this.drop = 0;
this.isAnnotation = false;
saxesParser.on("opentag", this.onopentag.bind(this));
saxesParser.on("closetag", this.onclosetag.bind(this));
saxesParser.on("text", this.ontext.bind(this));
saxesParser.on("end", this.onend.bind(this));
this.stack = [{
// We cheat. The node field of the top level stack item won't ever be
// accessed.
node: undefined,
children: [],
}];
this.docStack = [];
}
/**
* The root of the parsed XML.
*/
get root() {
return this.stack[0].children.filter(isElement)[0];
}
onopentag(node) {
// We have to validate the node even if we are not going to record it,
// because RelaxNG does not allow foreign nodes everywhere.
this.validator.onopentag(node);
// We can skip creating Element objects for foreign nodes and their
// children.
if ((node.uri !== util_1.RELAXNG_URI
&& node.uri !== util_1.ANNOS_URI && node.uri !== util_1.XHTML_URI) || this.drop !== 0) {
this.drop++;
return;
}
if (node.uri === util_1.ANNOS_URI || node.uri === util_1.XHTML_URI) {
if (!this.isAnnotation) {
this.isAnnotation = true;
this.docStack.push({
node,
text: "",
});
}
else {
this.drop++;
}
}
else {
this.stack.push({
node,
children: [],
});
}
}
onclosetag(node) {
// We have to validate the node even if we are not going to record it,
// because RelaxNG does not allow foreign nodes everywhere.
this.validator.onclosetag(node);
if (this.drop !== 0) {
this.drop--;
return;
}
if (this.isAnnotation) {
if (this.docStack.length === 1) {
// Annotations must be either the first child
// or an immediate sibling of a value element
const topNode = this.stack[this.stack.length - 1];
const doc = this.docStack[this.docStack.length - 1].text;
const childrenEls = topNode.children.filter(child => child.kind === "element");
if (childrenEls.length === 0) {
// Assign doc to topNode if it doesn't yet have children.
topNode.documentation = doc;
}
else {
const lastChild = childrenEls[childrenEls.length - 1];
if (lastChild.local === "value") {
// Assign to first child if it's a value.
lastChild.documentation = doc;
}
}
}
// tslint:disable-next-line: no-unused-expression
this.docStack.pop();
if (this.docStack.length === 0) {
this.isAnnotation = false;
}
}
else {
// tslint:disable-next-line:no-non-null-assertion
const { node: topNode, children, documentation } = this.stack.pop();
this.stack[this.stack.length - 1].children
.push(Element.fromSax(topNode, children, documentation));
}
}
ontext(text) {
this.validator.ontext(text);
if (this.drop !== 0) {
return;
}
if (this.isAnnotation) {
this.docStack[this.docStack.length - 1].text += text;
}
else {
this.stack[this.stack.length - 1].children.push(new Text(text));
}
}
onend() {
this.validator.onend();
}
}
exports.BasicParser = BasicParser;
/**
* This parser is specifically dedicated to the task of reading simplified Relax
* NG schemas. In a Relax NG schema, text nodes that consist entirely of white
* space are expendable, except in the ``param`` and ``value`` elements, where
* they do potentially carry significant information.
*
* This parser strips nodes that consist entirely of white space because this
* simplifies code that needs to process the resulting tree, but preserve those
* nodes that are potentially significant.
*
* This parser does not allow elements which are not in the Relax NG namespace.
*/
class ConversionParser extends BasicParser {
onopentag(node) {
// tslint:disable-next-line: no-http-string
if (node.uri !== "http://relaxng.org/ns/structure/1.0") {
throw new Error(`node in unexpected namespace: ${node.uri}`);
}
super.onopentag(node);
}
ontext(text) {
// We ignore text appearing before or after the top level element.
if (this.stack.length <= 1 || this.drop !== 0) {
return;
}
const top = this.stack[this.stack.length - 1];
const local = top.node.local;
// The parser does not allow non-RNG nodes, so we don't need to check the
// namespace.
const keepWhitespaceNodes = local === "param" || local === "value";
if (keepWhitespaceNodes || text.trim() !== "") {
super.ontext(text);
}
}
}
function parseSimplifiedSchema(fileName, simplifiedSchema) {
const convParser = new ConversionParser(new saxes_1.SaxesParser({ xmlns: true,
position: false,
fileName }));
convParser.saxesParser.write(simplifiedSchema).close();
return convParser.root;
}
// Exception used to terminate the saxes parser early.
class Found extends Error {
constructor() {
super();
(0, tools_1.fixPrototype)(this, Found);
}
}
class IncludeParser {
constructor(saxesParser) {
this.saxesParser = saxesParser;
saxesParser.on("opentag", this.onopentag.bind(this));
}
onopentag(node) {
// tslint:disable-next-line:no-http-string
if (node.uri === "http://relaxng.org/ns/structure/1.0" &&
(node.local === "include" || node.local === "externalRef")) {
throw new Found(); // Stop early.
}
}
}
/**
* Determine whether an RNG file depends on another file either through the use
* of ``include`` or ``externalRef``.
*
* @param rng The RNG file to check.
*
* @returns ``true`` if dependent, ``false`` if not.
*/
function dependsOnExternalFile(rng) {
const parser = new IncludeParser(new saxes_1.SaxesParser({ xmlns: true, position: false }));
let found = false;
try {
parser.saxesParser.write(rng).close();
}
catch (ex) {
if (!(ex instanceof Found)) {
throw ex;
}
found = true;
}
return found;
}
// LocalWords: MPL NG param RNG
//# sourceMappingURL=parser.js.map