bitmark-grammar
Version:
192 lines (191 loc) • 7.14 kB
JavaScript
/*!
* Copyright 2016 The ANTLR Project. All rights reserved.
* Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information.
*/
// ConvertTo-TS run at 2016-10-04T11:26:46.4373888-07:00
import { CharStreams } from "../../CharStreams";
import { CommonTokenStream } from "../../CommonTokenStream";
import { LexerNoViableAltException } from "../../LexerNoViableAltException";
import { ParserRuleContext } from "../../ParserRuleContext";
import { Token } from "../../Token";
import { XPathLexer } from "./XPathLexer";
import { XPathLexerErrorListener } from "./XPathLexerErrorListener";
import { XPathRuleAnywhereElement } from "./XPathRuleAnywhereElement";
import { XPathRuleElement } from "./XPathRuleElement";
import { XPathTokenAnywhereElement } from "./XPathTokenAnywhereElement";
import { XPathTokenElement } from "./XPathTokenElement";
import { XPathWildcardAnywhereElement } from "./XPathWildcardAnywhereElement";
import { XPathWildcardElement } from "./XPathWildcardElement";
/**
* Represent a subset of XPath XML path syntax for use in identifying nodes in
* parse trees.
*
* Split path into words and separators `/` and `//` via ANTLR
* itself then walk path elements from left to right. At each separator-word
* pair, find set of nodes. Next stage uses those as work list.
*
* The basic interface is
* {@link XPath#findAll ParseTree.findAll}`(tree, pathString, parser)`.
* But that is just shorthand for:
*
* ```
* let p = new XPath(parser, pathString);
* return p.evaluate(tree);
* ```
*
* See `TestXPath` for descriptions. In short, this
* allows operators:
*
* | | |
* | --- | --- |
* | `/` | root |
* | `//` | anywhere |
* | `!` | invert; this much appear directly after root or anywhere operator |
*
* and path elements:
*
* | | |
* | --- | --- |
* | `ID` | token name |
* | `'string'` | any string literal token from the grammar |
* | `expr` | rule name |
* | `*` | wildcard matching any node |
*
* Whitespace is not allowed.
*/
export class XPath {
constructor(parser, path) {
this.parser = parser;
this.path = path;
this.elements = this.split(path);
// console.log(this.elements.toString());
}
// TODO: check for invalid token/rule names, bad syntax
split(path) {
let lexer = new XPathLexer(CharStreams.fromString(path));
lexer.recover = (e) => { throw e; };
lexer.removeErrorListeners();
lexer.addErrorListener(new XPathLexerErrorListener());
let tokenStream = new CommonTokenStream(lexer);
try {
tokenStream.fill();
}
catch (e) {
if (e instanceof LexerNoViableAltException) {
let pos = lexer.charPositionInLine;
let msg = "Invalid tokens or characters at index " + pos + " in path '" + path + "' -- " + e.message;
throw new RangeError(msg);
}
throw e;
}
let tokens = tokenStream.getTokens();
// console.log("path=" + path + "=>" + tokens);
let elements = [];
let n = tokens.length;
let i = 0;
loop: while (i < n) {
let el = tokens[i];
let next;
switch (el.type) {
case XPathLexer.ROOT:
case XPathLexer.ANYWHERE:
let anywhere = el.type === XPathLexer.ANYWHERE;
i++;
next = tokens[i];
let invert = next.type === XPathLexer.BANG;
if (invert) {
i++;
next = tokens[i];
}
let pathElement = this.getXPathElement(next, anywhere);
pathElement.invert = invert;
elements.push(pathElement);
i++;
break;
case XPathLexer.TOKEN_REF:
case XPathLexer.RULE_REF:
case XPathLexer.WILDCARD:
elements.push(this.getXPathElement(el, false));
i++;
break;
case Token.EOF:
break loop;
default:
throw new Error("Unknowth path element " + el);
}
}
return elements;
}
/**
* Convert word like `*` or `ID` or `expr` to a path
* element. `anywhere` is `true` if `//` precedes the
* word.
*/
getXPathElement(wordToken, anywhere) {
if (wordToken.type === Token.EOF) {
throw new Error("Missing path element at end of path");
}
let word = wordToken.text;
if (word == null) {
throw new Error("Expected wordToken to have text content.");
}
let ttype = this.parser.getTokenType(word);
let ruleIndex = this.parser.getRuleIndex(word);
switch (wordToken.type) {
case XPathLexer.WILDCARD:
return anywhere ?
new XPathWildcardAnywhereElement() :
new XPathWildcardElement();
case XPathLexer.TOKEN_REF:
case XPathLexer.STRING:
if (ttype === Token.INVALID_TYPE) {
throw new Error(word + " at index " +
wordToken.startIndex +
" isn't a valid token name");
}
return anywhere ?
new XPathTokenAnywhereElement(word, ttype) :
new XPathTokenElement(word, ttype);
default:
if (ruleIndex === -1) {
throw new Error(word + " at index " +
wordToken.startIndex +
" isn't a valid rule name");
}
return anywhere ?
new XPathRuleAnywhereElement(word, ruleIndex) :
new XPathRuleElement(word, ruleIndex);
}
}
static findAll(tree, xpath, parser) {
let p = new XPath(parser, xpath);
return p.evaluate(tree);
}
/**
* Return a list of all nodes starting at `t` as root that satisfy the
* path. The root `/` is relative to the node passed to {@link evaluate}.
*/
evaluate(t) {
let dummyRoot = new ParserRuleContext();
dummyRoot.addChild(t);
let work = new Set([dummyRoot]);
let i = 0;
while (i < this.elements.length) {
let next = new Set();
for (let node of work) {
if (node.childCount > 0) {
// only try to match next element if it has children
// e.g., //func/*/stat might have a token node for which
// we can't go looking for stat nodes.
let matching = this.elements[i].evaluate(node);
matching.forEach(next.add, next);
}
}
i++;
work = next;
}
return work;
}
}
XPath.WILDCARD = "*"; // word not operator/separator
XPath.NOT = "!"; // word for invert operator