UNPKG

salve-annos

Version:

A fork with support for documentation of Salve, a Javascript library which implements a validator able to validate an XML document on the basis of a subset of RelaxNG.

464 lines 20 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.GrammarWalker = exports.Grammar = void 0; /** * Pattern and walker for RNG's ``grammar`` elements. * @author Louis-Dominique Dubeau * @license MPL 2.0 * @copyright Mangalam Research Center for Buddhist Languages */ const errors_1 = require("../errors"); const name_patterns_1 = require("../name_patterns"); const set_1 = require("../set"); const base_1 = require("./base"); /** * Grammar object. Users of this library normally do not create objects of this * class themselves but rely on the conversion facilities of salve to create * these objects. */ class Grammar extends base_1.BasePattern { /** * @param xmlPath This is a string which uniquely identifies the * element from the simplified RNG tree. Used in debugging. * * @param start The start pattern of this grammar. * * @param definitions An array which contain all definitions specified in this * grammar. * * @throws {Error} When any definition in the original * schema refers to a schema entity which is not defined in the schema. */ constructor(xmlPath, start, definitions) { super(xmlPath); this.xmlPath = xmlPath; this.start = start; this._namespaces = new Set(); const mapInit = []; if (definitions !== undefined) { for (const def of definitions) { mapInit.push([def.name, def]); } } this.definitions = new Map(mapInit); this._prepare(this.definitions, this._namespaces); } /** * Adds a definition. * * @param d The definition to add. */ add(d) { this.definitions.set(d.name, d); } get elementDefinitions() { const ret = this._elementDefinitions; if (ret !== undefined) { return ret; } const newDef = this._elementDefinitions = Object.create(null); for (const def of this.definitions.values()) { const el = def.pat; const key = el.name.toString(); if (newDef[key] === undefined) { newDef[key] = [el]; } else { newDef[key].push(el); } } return newDef; } /** * @returns ``true`` if the schema is wholly context independent. This means * that each element in the schema can be validated purely on the basis of * knowing its expanded name. ``false`` otherwise. */ whollyContextIndependent() { const defs = this.elementDefinitions; for (const v in defs) { if (defs[v].length > 1) { return false; } } return true; } /** * @returns An array of all namespaces used in the schema. The array may * contain two special values: ``*`` indicates that there was an ``anyName`` * element in the schema and thus that it is probably possible to insert more * than the namespaces listed in the array, ``::except`` indicates that an * ``except`` element is affecting what namespaces are acceptable to the * schema. */ getNamespaces() { return Array.from(this._namespaces); } _prepare(definitions, namespaces) { this.start._prepare(definitions, namespaces); for (const d of this.definitions.values()) { d._prepare(definitions, namespaces); } } /** * Creates a new walker to walk this pattern. * * @returns A walker. */ newWalker(nameResolver, idCheck = true) { // tslint:disable-next-line:no-use-before-declare return GrammarWalker.make(this, nameResolver, idCheck); } } exports.Grammar = Grammar; class MisplacedElementWalker { constructor() { this.canEnd = true; this.canEndAttribute = true; } fireEvent(name, params) { // The strategy here is to accept everything except for elements. The lack // of match that occurs on enterStartTag and startTagAndAttributes is // handled elsewhere. switch (name) { case "enterStartTag": case "startTagAndAttributes": return new base_1.InternalFireEventResult(false); default: return new base_1.InternalFireEventResult(true); } } end() { return false; } possible() { return new Set(); } clone() { return new this.constructor(); } } /** * Walker for [[Grammar]]. */ class GrammarWalker { constructor(el, nameResolver, elementWalkerStack, misplacedDepth, _swallowAttributeValue, suspendedWs, ignoreNextWs, idCheck, idStack) { this.el = el; this.nameResolver = nameResolver; this.elementWalkerStack = elementWalkerStack; this.misplacedDepth = misplacedDepth; this._swallowAttributeValue = _swallowAttributeValue; this.suspendedWs = suspendedWs; this.ignoreNextWs = ignoreNextWs; this.idCheck = idCheck; this.idStack = idStack; } static make(el, nameResolver, idCheck) { return new GrammarWalker(el, nameResolver, [[el.start.newWalker()]], 0, false, undefined, false, idCheck, undefined); } clone() { return new GrammarWalker(this.el, this.nameResolver.clone(), this.elementWalkerStack .map(walkers => walkers.map(x => x.clone())), this.misplacedDepth, this._swallowAttributeValue, this.suspendedWs, this.ignoreNextWs, this.idCheck, this.idStack); } /** * On a [[GrammarWalker]] this method cannot return ``undefined``. An * undefined value would mean nothing matched, which is a validation error. * * @param name The event name. * * @param params The event parameters. * * @returns ``false`` if there is no error or an array errors. * * @throws {Error} When trying to process an event type unknown to salve. */ // Whitespaces are problematic. On the one hand, if an element may contain // only other elements and no text, then XML allows putting whitespace // between the elements. This whitespace must not cause a validation // error. When mixed content is possible, everywhere where text is allowed, // a text of length 0 is possible. (``<text/>`` does not allow specifying a // pattern or minimum length. And Relax NG constraints do not allow having // an element whose content is a mixture of ``element`` and ``data`` and // ``value`` that would constrain specific text patterns between the // elements.) We can satisfy all situations by dropping text events that // contain only whitespace. fireEvent(name, params) { var _a, _b; // The only case where we'd want to pass a node consisting entirely of // whitespace is to satisfy a data or value pattern because they can require // a sequence of whitespaces. let wsMatch = true; switch (name) { case "text": { // Earlier versions of salve processed text events ahead of this switch // block, but we moved it here to improve performance. There's no issue // with having a case for text here because salve disallows firing more // than one text event in sequence. const text = params[0]; // Process whitespace nodes if (!/\S/.test(text)) { if (text === "") { throw new Error("firing empty text events makes no sense"); } // We don't check the old value of suspendedWs because salve does not // allow two text events in a row. So we should never have to // concatenate values. this.suspendedWs = text; return false; } break; } case "endTag": if (!this.ignoreNextWs && this.suspendedWs !== undefined) { wsMatch = this._fireSuspendedWsOnCurrentWalkers(); } this.ignoreNextWs = true; break; default: this.ignoreNextWs = false; } // Absorb the whitespace: poof, gone! this.suspendedWs = undefined; // This would happen if the user puts an attribute on a tag that does not // allow one. Instead of generating errors for both the attribute name // and value, we generate an error for the name and ignore the value. if (this._swallowAttributeValue) { // Swallow only one event. this._swallowAttributeValue = false; return name === "attributeValue" ? false : [new errors_1.ValidationError("attribute value required")]; } const ret = this._fireOnCurrentWalkers(name, params); // Check ID if (this.idCheck && name === "attributeValue") { if (((_a = ret.datatype) === null || _a === void 0 ? void 0 : _a.name) === "ID") { if ((_b = this.idStack) === null || _b === void 0 ? void 0 : _b.has(params[0])) { return [new errors_1.ValidationError(`ID "${params[0]}" has already been declared.`)]; } else { if (this.idStack) { this.idStack.add(params[0]); } else { this.idStack = new Set([params[0]]); } } } } else if (name === "endTag") { // We do not need to end the walkers because the fireEvent handler // for elements calls end when it sees an "endTag" event. // We do not reduce the stack to nothing. if (this.elementWalkerStack.length > 1) { this.elementWalkerStack.pop(); } if (this.misplacedDepth > 0) { this.misplacedDepth--; } } if (ret.matched) { const { refs } = ret; if (refs !== undefined && refs.length !== 0) { this._processRefs(name, refs, params); return false; } // There may still have been a problem a problem with the whitespace. return wsMatch ? false : [new errors_1.ValidationError("text not allowed here")]; } return ret.errors !== undefined ? ret.errors : this.diagnose(name, params, wsMatch); } diagnose(name, params, wsMatch) { switch (name) { case "enterStartTag": case "startTagAndAttributes": // Once in dumb mode, we remain in dumb mode. if (this.misplacedDepth > 0) { this.misplacedDepth++; this.elementWalkerStack.push([new MisplacedElementWalker()]); return wsMatch ? false : [new errors_1.ValidationError("text not allowed here")]; } const elName = new name_patterns_1.Name(params[0], params[1]); // Try to infer what element is meant by this errant tag. If we can't // find a candidate, then fall back to a dumb mode. const candidates = this.el.elementDefinitions[elName.toString()]; if (candidates !== undefined && candidates.length === 1) { const newWalker = candidates[0].newWalker(elName); this.elementWalkerStack.push([newWalker]); if (name === "startTagAndAttributes") { if (!newWalker.initWithAttributes(params, this.nameResolver).matched) { throw new Error("internal error: the inferred element " + "does not accept its initial event"); } } } else { // Dumb mode... this.misplacedDepth++; this.elementWalkerStack.push([new MisplacedElementWalker()]); } return [new errors_1.ElementNameError(name === "enterStartTag" ? "tag not allowed here" : "tag not allowed here with these attributes", elName)]; case "endTag": return [new errors_1.ElementNameError("unexpected end tag", new name_patterns_1.Name(params[0], params[1]))]; case "attributeName": this._swallowAttributeValue = true; return [new errors_1.AttributeNameError("attribute not allowed here", new name_patterns_1.Name(params[0], params[1]))]; case "attributeNameAndValue": return [new errors_1.AttributeNameError("attribute not allowed here", new name_patterns_1.Name(params[0], params[1]))]; case "attributeValue": return [new errors_1.ValidationError("unexpected attributeValue event; it \ is likely that fireEvent is incorrectly called")]; case "text": return [new errors_1.ValidationError("text not allowed here")]; case "leaveStartTag": // If MisplacedElementWalker did not exist then we would get here if a // file being validated contains a tag which is not allowed. But it // exists, so we cannot get here. If we do end up here, then there is // an internal error somewhere. /* falls through */ default: throw new Error(`unexpected event type in GrammarWalker's fireEvent: \ ${name}`); } } // A text event either matches or does not match. It does not generate by // itself an error. So we do not track errors in this specialized function, // nor do we track references. _fireSuspendedWsOnCurrentWalkers() { const { elementWalkerStack } = this; const last = elementWalkerStack.length - 1; const walkers = elementWalkerStack[last]; // Checking whether walkers.length === 0 would not be a particularly useful // optimization, as we don't let that happen. // This optimization for the single walker case is significant. if (walkers.length === 1) { return walkers[0].fireEvent("text", [this.suspendedWs], this.nameResolver).matched; } const params = [this.suspendedWs]; const remainingWalkers = []; for (const walker of walkers) { const result = walker.fireEvent("text", params, this.nameResolver); // We immediately filter out results that report a match (i.e. false). if (result.matched) { remainingWalkers.push(walker); } } // We don't remove all walkers. If some walkers were successful and some // were not, then we just keep the successful ones. But removing all walkers // at once prevents us from giving useful error messages. if (remainingWalkers.length !== 0) { elementWalkerStack[last] = remainingWalkers; return true; } return false; } _fireOnCurrentWalkers(name, params) { const { elementWalkerStack } = this; const last = elementWalkerStack.length - 1; const walkers = elementWalkerStack[last]; // Checking whether walkers.length === 0 would not be a particularly useful // optimization, as we don't let that happen. // This optimization for the single walker case is significant. if (walkers.length === 1) { return walkers[0].fireEvent(name, params, this.nameResolver); } const errors = []; const refs = []; const remainingWalkers = []; for (const walker of walkers) { const result = walker.fireEvent(name, params, this.nameResolver); // We immediately filter out results that report a match (i.e. false). if (result.matched) { remainingWalkers.push(walker); if (result.refs !== undefined) { refs.push(...result.refs); } } // There's no point in recording errors if we're going to toss them // anyway. else if ((remainingWalkers.length === 0) && (result.errors !== undefined)) { errors.push(...result.errors); } } // We don't remove all walkers. If some walkers were successful and some // were not, then we just keep the successful ones. But removing all walkers // at once prevents us from giving useful error messages. if (remainingWalkers.length !== 0) { elementWalkerStack[last] = remainingWalkers; // If some of the walkers matched, we ignore the errors from the other // walkers. return new base_1.InternalFireEventResult(true, undefined, refs.length !== 0 ? refs : undefined); } return new base_1.InternalFireEventResult(false, errors.length !== 0 ? errors : undefined); } _processRefs(name, refs, params) { const newWalkers = []; const boundName = new name_patterns_1.Name(params[0], params[1]); if (name === "startTagAndAttributes") { for (const item of refs) { const walker = item.element.newWalker(boundName); // If we get anything else than false here, the internal logic is // wrong. if (!walker.initWithAttributes(params, this.nameResolver).matched) { throw new Error("error or failed to match on a new element \ walker: the internal logic is incorrect"); } newWalkers.push(walker); } } else { for (const item of refs) { newWalkers.push(item.element.newWalker(boundName)); } } this.elementWalkerStack.push(newWalkers); } canEnd() { const top = this.elementWalkerStack[this.elementWalkerStack.length - 1]; return this.elementWalkerStack.length === 1 && top.length > 0 && top[0].canEnd; } end() { if (this.elementWalkerStack.length < 1) { throw new Error("stack underflow"); } let finalResult = []; for (let ix = this.elementWalkerStack.length - 1; ix >= 0; --ix) { const stackElement = this.elementWalkerStack[ix]; for (const walker of stackElement) { const result = walker.end(); if (result) { finalResult = finalResult.concat(result); } } } return finalResult.length !== 0 ? finalResult : false; } possible() { let possible = new Set(); for (const walker of this.elementWalkerStack[this.elementWalkerStack.length - 1]) { (0, set_1.union)(possible, walker.possible()); } // If we have any attributeValue possible, then the only possible // events are attributeValue events. if (possible.size !== 0) { const valueEvs = (0, set_1.filter)(possible, ({ name }) => name === "attributeValue"); if (valueEvs.size !== 0) { possible = valueEvs; } } return possible; } } exports.GrammarWalker = GrammarWalker; // LocalWords: RNG's MPL unresolvable runtime RNG NG firstName enterContext // LocalWords: leaveContext definePrefix whitespace enterStartTag endTag // LocalWords: fireEvent attributeValue attributeName leaveStartTag // LocalWords: misplacedElements ElementNameError GrammarWalker's // LocalWords: suppressAttributes GrammarWalker //# sourceMappingURL=grammar.js.map