@bokeh/bokehjs
Version:
Interactive, novel data visualization
596 lines • 21.3 kB
JavaScript
// Based on https://ericsmekens.github.io/jsep/.
import { dict } from "../core/util/object";
const TAB_CODE = 9;
const LF_CODE = 10;
const CR_CODE = 13;
const SPACE_CODE = 32;
const PERIOD_CODE = 46; // "."
const COMMA_CODE = 44; // ","
const SQUOTE_CODE = 39; // single quote
const DQUOTE_CODE = 34; // double quotes
const OPAREN_CODE = 40; // (
const CPAREN_CODE = 41; // )
const OBRACK_CODE = 91; // [
const CBRACK_CODE = 93; // ]
// const QUMARK_CODE = 63 // ?
const SEMCOL_CODE = 59; // ;
// const COLON_CODE = 58 // :
// Node Types
// ----------
// This is the full set of types that any JSEP node can be.
// Store them here to save space when minified
export const COMPOUND = Symbol("Compound");
export const LITERAL = Symbol("Literal");
export const IDENT = Symbol("Identifier");
export const MEMBER = Symbol("MemberExpression");
export const INDEX = Symbol("IndexExpression");
export const CALL = Symbol("CallExpression");
export const UNARY = Symbol("UnaryExpression");
export const BINARY = Symbol("BinaryExpression");
export const SEQUENCE = Symbol("SequenceExpression");
export const ARRAY = Symbol("ArrayExpression");
export const FAILURE = Symbol("Failure");
// Operations
// ----------
// Use a quickly-accessible map to store all of the unary operators
// Values are set to `1` (it really doesn't matter)
const unary_ops = dict({
"-": 1,
"!": 1,
"~": 1,
"+": 1,
});
// Also use a map for the binary operations but set their values to their
// binary precedence for quick reference (higher number = higher precedence)
// see [Order of operations](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence)
const binary_ops = dict({
"||": 1,
"&&": 2,
"|": 3,
"^": 4,
"&": 5,
"==": 6, "!=": 6,
"<": 7, ">": 7, "<=": 7, ">=": 7,
"<<": 8, ">>": 8,
"+": 9, "-": 9,
"*": 10, "/": 10, "%": 10,
"**": 11,
});
// Additional valid identifier chars, apart from a-z, A-Z and 0-9 (except on the starting char)
const additional_identifier_chars = new Set(["$", "_"]);
// Literals
// ----------
// Store the values to return for the various literals we may encounter
const literals = dict({
true: true,
false: false,
null: null,
});
function max_key_len(obj) {
return Math.max(0, ...[...obj.keys()].map((k) => k.length));
}
const max_unop_len = max_key_len(unary_ops);
const max_binop_len = max_key_len(binary_ops);
function binary_precedence(op_val) {
return binary_ops.get(op_val) ?? 0;
}
function is_decimal_digit(ch) {
return ch >= 48 && ch <= 57; // 0...9
}
function is_identifier_start(ch) {
return (ch >= 65 && ch <= 90) || // A...Z
(ch >= 97 && ch <= 122) || // a...z
(ch >= 128 && !binary_ops.has(String.fromCharCode(ch))) || // any non-ASCII that is not an operator
(additional_identifier_chars.has(String.fromCharCode(ch))); // additional characters
}
function is_identifier_part(ch) {
return is_identifier_start(ch) || is_decimal_digit(ch);
}
class ParseError extends Error {
static __name__ = "ParseError";
}
export class Parser {
expr;
static __name__ = "Parser";
// `index` stores the character number we are currently at
// All of the gobbles below will modify `index` as we move along
index = 0;
constructor(expr) {
this.expr = expr;
}
get char() {
return this.expr.charAt(this.index);
}
get code() {
return this.expr.charCodeAt(this.index);
}
/**
* throw error at index of the expression
*/
error(message) {
throw new ParseError(`${message} at character ${this.index}`);
}
/**
* Push `index` up to the next non-space character
*/
gobbleSpaces() {
let ch = this.code;
while (ch == SPACE_CODE || ch == TAB_CODE || ch == LF_CODE || ch == CR_CODE) {
ch = this.expr.charCodeAt(++this.index);
}
}
/**
* Top-level method to parse all expressions and returns compound or single node
*/
parse() {
try {
const nodes = this.gobbleExpressions(undefined);
// If there's only one expression just try returning the expression
const node = nodes.length == 1 ? nodes[0] : { type: COMPOUND, body: nodes };
return node;
}
catch (error) {
if (error instanceof ParseError) {
return { type: FAILURE, message: error.message };
}
else {
throw error;
}
}
}
/**
* top-level parser (but can be reused within as well)
*/
gobbleExpressions(until) {
const nodes = [];
while (this.index < this.expr.length) {
const ch_i = this.code;
// Expressions can be separated by semicolons, commas, or just inferred without any
// separators
if (ch_i == SEMCOL_CODE || ch_i == COMMA_CODE) {
this.index++; // ignore separators
}
else {
// Try to gobble each expression individually
const node = this.gobbleExpression();
if (node != false) {
nodes.push(node);
// If we weren't able to find a binary expression and are out of room, then
// the expression passed in probably has too much
}
else if (this.index < this.expr.length) {
if (ch_i == until) {
break;
}
this.error(`Unexpected '${this.char}'`);
}
}
}
return nodes;
}
/**
* The main parsing function.
*/
gobbleExpression() {
const node = this.gobbleBinaryExpression();
this.gobbleSpaces();
return node;
}
/**
* Search for the operation portion of the string (e.g. `+`, `===`)
* Start by taking the longest possible binary operations (3 characters: `===`, `!==`, `>>>`)
* and move down from 3 to 2 to 1 character until a matching binary operation is found
* then, return that binary operation
*/
gobbleBinaryOp() {
this.gobbleSpaces();
let to_check = this.expr.substring(this.index, this.index + max_binop_len);
let tc_len = to_check.length;
while (tc_len > 0) {
// Don't accept a binary op when it is an identifier.
// Binary ops that start with a identifier-valid character must be followed
// by a non identifier-part valid character
if (binary_ops.has(to_check) && (!is_identifier_start(this.code) ||
(this.index + to_check.length < this.expr.length && !is_identifier_part(this.expr.charCodeAt(this.index + to_check.length))))) {
this.index += tc_len;
return to_check;
}
to_check = to_check.substring(0, --tc_len);
}
return false;
}
/**
* This function is responsible for gobbling an individual expression,
* e.g. `1`, `1+2`, `a+(b*2)-Math.sqrt(2)`
*/
gobbleBinaryExpression() {
// First, try to get the leftmost thing
// Then, check to see if there's a binary operator operating on that leftmost thing
// Don't gobbleBinaryOp without a left-hand-side
const left = this.gobbleToken();
if (left == false) {
return left;
}
let biop = this.gobbleBinaryOp();
// If there wasn't a binary operator, just return the leftmost node
if (biop == false) {
return left;
}
let biop_info = { value: biop, prec: binary_precedence(biop) };
const right = this.gobbleToken();
if (right == false) {
this.error(`Expected expression after ${biop}`);
}
const stack = [left, biop_info, right];
// Properly deal with precedence using [recursive descent](http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm)
let cur_biop;
while ((biop = this.gobbleBinaryOp()) != false) {
const prec = binary_precedence(biop);
if (prec == 0) {
this.index -= biop.length;
break;
}
biop_info = { value: biop, prec };
cur_biop = biop;
// Reduce: make a binary expression from the three topmost entries.
while ((stack.length > 2) && (prec <= stack[stack.length - 2].prec)) {
const right = stack.pop();
const biop = stack.pop().value;
const left = stack.pop();
const node = {
type: BINARY,
operator: biop,
left,
right,
};
stack.push(node);
}
const node = this.gobbleToken();
if (node == false) {
this.error(`Expected expression after ${cur_biop}`);
}
stack.push(biop_info, node);
}
let i = stack.length - 1;
let node = stack[i];
while (i > 1) {
node = {
type: BINARY,
operator: stack[i - 1].value,
left: stack[i - 2],
right: node,
};
i -= 2;
}
return node;
}
/**
* An individual part of a binary expression:
* e.g. `foo.bar(baz)`, `1`, `"abc"`, `(a % 2)` (because it's in parenthesis)
*/
gobbleToken() {
this.gobbleSpaces();
const ch = this.code;
if (is_decimal_digit(ch) || ch == PERIOD_CODE) {
// Char code 46 is a dot `.` which can start off a numeric literal
return this.gobbleNumericLiteral();
}
let node = false;
if (ch == SQUOTE_CODE || ch == DQUOTE_CODE) {
// Single or double quotes
node = this.gobbleStringLiteral();
}
else if (ch == OBRACK_CODE) {
node = this.gobbleArray();
}
else {
let to_check = this.expr.substring(this.index, this.index + max_unop_len);
let tc_len = to_check.length;
while (tc_len > 0) {
// Don't accept an unary op when it is an identifier.
// Unary ops that start with a identifier-valid character must be followed
// by a non identifier-part valid character
if (unary_ops.has(to_check) && (!is_identifier_start(this.code) ||
(this.index + to_check.length < this.expr.length && !is_identifier_part(this.expr.charCodeAt(this.index + to_check.length))))) {
this.index += tc_len;
const argument = this.gobbleToken();
if (argument == false) {
this.error("missing unaryOp argument");
}
return {
type: UNARY,
operator: to_check,
argument,
prefix: true,
};
}
to_check = to_check.substring(0, --tc_len);
}
if (is_identifier_start(ch)) {
node = this.gobbleIdentifier();
const value = literals.get(node.name);
if (value !== undefined) {
node = {
type: LITERAL,
value,
};
}
}
else if (ch == OPAREN_CODE) { // open parenthesis
node = this.gobbleGroup();
}
}
if (node == false) {
return false;
}
node = this.gobbleTokenProperty(node);
return node;
}
/**
* Gobble properties of of identifiers/strings/arrays/groups.
* e.g. `foo`, `bar.baz`, `foo['bar'].baz`
* It also gobbles function calls:
* e.g. `Math.acos(obj.angle)`
*/
gobbleTokenProperty(node) {
this.gobbleSpaces();
let ch = this.code;
while (ch == PERIOD_CODE || ch == OBRACK_CODE || ch == OPAREN_CODE) {
this.index++;
if (ch == PERIOD_CODE) {
this.gobbleSpaces();
node = {
type: MEMBER,
object: node,
member: this.gobbleIdentifier(),
};
}
else if (ch == OBRACK_CODE) {
const expr = this.gobbleExpression();
if (expr == false) {
this.error("Expected an expression");
}
node = {
type: INDEX,
object: node,
index: expr,
};
this.gobbleSpaces();
ch = this.code;
if (ch !== CBRACK_CODE) {
this.error("Unclosed [");
}
this.index++;
}
else { // ch == OPAREN_CODE
// A function call is being made; gobble all the arguments
node = {
type: CALL,
args: this.gobbleArguments(CPAREN_CODE),
callee: node,
};
}
this.gobbleSpaces();
ch = this.code;
}
return node;
}
/**
* Parse simple numeric literals: `12`, `3.4`, `.5`. Do this by using a string to
* keep track of everything in the numeric literal and then calling `parseFloat` on that string
*/
gobbleNumericLiteral() {
let number = "";
while (is_decimal_digit(this.code)) {
number += this.expr.charAt(this.index++);
}
if (this.code == PERIOD_CODE) { // can start with a decimal marker
number += this.expr.charAt(this.index++);
while (is_decimal_digit(this.code)) {
number += this.expr.charAt(this.index++);
}
}
let ch = this.char;
if (ch == "e" || ch == "E") { // exponent marker
number += this.expr.charAt(this.index++);
ch = this.char;
if (ch == "+" || ch == "-") { // exponent sign
number += this.expr.charAt(this.index++);
}
while (is_decimal_digit(this.code)) { // exponent itself
number += this.expr.charAt(this.index++);
}
if (!is_decimal_digit(this.expr.charCodeAt(this.index - 1))) {
this.error(`Expected exponent (${number + this.char})`);
}
}
const code = this.code;
// Check to make sure this isn't a variable name that start with a number (123abc)
if (is_identifier_start(code)) {
this.error(`Variable names cannot start with a number (${number + this.char})`);
}
else if (code == PERIOD_CODE || (number.length == 1 && number.charCodeAt(0) == PERIOD_CODE)) {
this.error("Unexpected ','");
}
return {
type: LITERAL,
value: parseFloat(number),
};
}
/**
* Parses a string literal, staring with single or double quotes with basic support for escape codes
* e.g. `"hello world"`, `'this is\nJSEP'`
*/
gobbleStringLiteral() {
const quote = this.expr.charAt(this.index++);
let str = "";
let closed = false;
while (this.index < this.expr.length) {
let ch = this.expr.charAt(this.index++);
if (ch == quote) {
closed = true;
break;
}
else if (ch == "\\") {
// Check for all of the common escape codes
ch = this.expr.charAt(this.index++);
switch (ch) {
case "n":
str += "\n";
break;
case "r":
str += "\r";
break;
case "t":
str += "\t";
break;
case "b":
str += "\b";
break;
case "f":
str += "\f";
break;
case "v":
str += "\x0B";
break;
default: str += ch;
}
}
else {
str += ch;
}
}
if (!closed) {
this.error(`Unclosed quote after "${str}"`);
}
return {
type: LITERAL,
value: str,
};
}
/**
* Gobbles only identifiers
* e.g.: `foo`, `_value`, `$x1`
* Also, this function checks if that identifier is a literal:
* (e.g. `true`, `false`, `null`) or `this`
*/
gobbleIdentifier() {
let ch = this.code;
const start = this.index;
if (is_identifier_start(ch)) {
this.index++;
}
else {
this.error(`Unexpected '${this.char}'`);
}
while (this.index < this.expr.length) {
ch = this.code;
if (is_identifier_part(ch)) {
this.index++;
}
else {
break;
}
}
return {
type: IDENT,
name: this.expr.slice(start, this.index),
};
}
/**
* Gobbles a list of arguments within the context of a function call
* or array literal. This function also assumes that the opening character
* `(` or `[` has already been gobbled, and gobbles expressions and commas
* until the terminator character `)` or `]` is encountered.
* e.g. `foo(bar, baz)`, `my_func()`, or `[bar, baz]`
*/
gobbleArguments(termination) {
const args = [];
let closed = false;
let separator_count = 0;
while (this.index < this.expr.length) {
this.gobbleSpaces();
const ch_i = this.code;
if (ch_i == termination) { // done parsing
closed = true;
this.index++;
if (termination == CPAREN_CODE && separator_count != 0 && separator_count >= args.length) {
this.error(`Unexpected token '${String.fromCharCode(termination)}'`);
}
break;
}
else if (ch_i == COMMA_CODE) { // between expressions
this.index++;
separator_count++;
if (separator_count !== args.length) { // missing argument
if (termination == CPAREN_CODE) {
this.error("Unexpected token ','");
}
else if (termination == CBRACK_CODE) {
for (let arg = args.length; arg < separator_count; arg++) {
this.error("Expected an expression");
}
}
}
}
else if (args.length !== separator_count && separator_count !== 0) {
// NOTE: `&& separator_count !== 0` allows for either all commas, or all spaces as arguments
this.error("Expected comma");
}
else {
const node = this.gobbleExpression();
if (node == false || node.type == COMPOUND) {
this.error("Expected comma");
}
args.push(node);
}
}
if (!closed) {
this.error(`Expected ${String.fromCharCode(termination)}`);
}
return args;
}
/**
* Responsible for parsing a group of things within parentheses `()`
* that have no identifier in front (so not a function call)
* This function assumes that it needs to gobble the opening parenthesis
* and then tries to gobble everything within that parenthesis, assuming
* that the next thing it should see is the close parenthesis. If not,
* then the expression probably doesn't have a `)`
*/
gobbleGroup() {
this.index++;
const nodes = this.gobbleExpressions(CPAREN_CODE);
if (this.code == CPAREN_CODE) {
this.index++;
if (nodes.length == 1) {
return nodes[0];
}
else if (nodes.length == 0) {
return false;
}
else {
return {
type: SEQUENCE,
expressions: nodes,
};
}
}
else {
this.error("Unclosed (");
}
}
/**
* Responsible for parsing Array literals `[1, 2, 3]`
* This function assumes that it needs to gobble the opening bracket
* and then tries to gobble the expressions as arguments.
*/
gobbleArray() {
this.index++;
return {
type: ARRAY,
elements: this.gobbleArguments(CBRACK_CODE),
};
}
}
//# sourceMappingURL=parser.js.map