arquero
Version:
Query processing and transformation of array-backed data tables.
407 lines (362 loc) • 12.6 kB
JavaScript
import { parse } from 'acorn';
import {
ArrayPattern,
Constant,
Function,
Identifier,
Literal,
MemberExpression,
ObjectPattern,
Op,
Parameter,
Property
} from './ast/constants.js';
import { is, isFunctionExpression } from './ast/util.js';
import { walk } from './ast/walk.js';
import { constants } from './constants.js';
import { rewrite } from './rewrite.js';
import { ROW_OBJECT, rowObjectExpression } from './row-object.js';
import {
getAggregate, getWindow,
hasAggregate, hasFunction, hasWindow
} from '../op/index.js';
import { error } from '../util/error.js';
import { isArray } from '../util/is-array.js';
import { isNumber } from '../util/is-number.js';
import { toString } from '../util/to-string.js';
const PARSER_OPT = { ecmaVersion: 11 };
const DEFAULT_PARAM_ID = '$';
const DEFAULT_TUPLE_ID = 'd';
const DEFAULT_TUPLE_ID1 = 'd1';
const DEFAULT_TUPLE_ID2 = 'd2';
const NO = msg => (node, ctx) => ctx.error(node, msg + ' not allowed');
const ERROR_AGGREGATE = NO('Aggregate function');
const ERROR_WINDOW = NO('Window function');
const ERROR_ARGUMENT = 'Invalid argument';
const ERROR_COLUMN = 'Invalid column reference';
const ERROR_AGGRONLY = ERROR_COLUMN + ' (must be input to an aggregate function)';
const ERROR_FUNCTION = 'Invalid function call';
const ERROR_MEMBER = 'Invalid member expression';
const ERROR_OP_PARAMETER = 'Invalid operator parameter';
const ERROR_PARAM = 'Invalid param reference';
const ERROR_VARIABLE = 'Invalid variable reference';
const ERROR_VARIABLE_OP = 'Variable not accessible in operator call';
const ERROR_DECLARATION = 'Unsupported variable declaration';
const ERROR_DESTRUCTURE = 'Unsupported destructuring pattern';
const ERROR_CLOSURE = 'Table expressions do not support closures';
const ERROR_ESCAPE = 'Use aq.escape(fn) to use a function as-is (including closures)';
const ERROR_USE_PARAMS = 'use table.params({ name: value }) to define dynamic parameters';
const ERROR_ADD_FUNCTION = 'use aq.addFunction(name, fn) to add new op functions';
const ERROR_VARIABLE_NOTE = `\nNote: ${ERROR_CLOSURE}. ${ERROR_ESCAPE}, or ${ERROR_USE_PARAMS}.`;
const ERROR_FUNCTION_NOTE = `\nNote: ${ERROR_CLOSURE}. ${ERROR_ESCAPE}, or ${ERROR_ADD_FUNCTION}.`;
const ERROR_ROW_OBJECT = `The ${ROW_OBJECT} method is not valid in multi-table expressions.`;
export function parseExpression(ctx, spec) {
const ast = parseAST(spec);
let node = ctx.root = ast;
ctx.spec = spec;
ctx.tuple = null;
ctx.tuple1 = null;
ctx.tuple2 = null;
ctx.$param = null;
ctx.$op = 0;
ctx.scope = new Set();
ctx.paramsRef = new Map();
ctx.columnRef = new Map();
// parse input column parameters
// if no function def, assume default tuple identifiers
if (isFunctionExpression(node)) {
parseFunction(node, ctx);
node = node.body;
} else if (ctx.join) {
ctx.scope.add(ctx.tuple1 = DEFAULT_TUPLE_ID1);
ctx.scope.add(ctx.tuple2 = DEFAULT_TUPLE_ID2);
ctx.scope.add(ctx.$param = DEFAULT_PARAM_ID);
} else {
ctx.scope.add(ctx.tuple = DEFAULT_TUPLE_ID);
ctx.scope.add(ctx.$param = DEFAULT_PARAM_ID);
}
// rewrite column references & function calls
walk(node, ctx, visitors);
return ctx.root;
}
function parseAST(expr) {
try {
const code = expr.field ? fieldRef(expr)
: isArray(expr) ? toString(expr)
: expr;
// @ts-ignore
return parse(`expr=(${code})`, PARSER_OPT).body[0].expression.right;
} catch (err) { // eslint-disable-line no-unused-vars
error(`Expression parse error: ${expr+''}`);
}
}
function fieldRef(expr) {
const col = JSON.stringify(expr+'');
return !(expr.table || 0) ? `d=>d[${col}]` : `(a,b)=>b[${col}]`;
}
const visitors = {
FunctionDeclaration: NO('Function definitions'),
ForStatement: NO('For loops'),
ForOfStatement: NO('For-of loops'),
ForInStatement: NO('For-in loops'),
WhileStatement: NO('While loops'),
DoWhileStatement: NO('Do-while loops'),
AwaitExpression: NO('Await expressions'),
ArrowFunctionExpression: NO('Function definitions'),
AssignmentExpression: NO('Assignments'),
FunctionExpression: NO('Function definitions'),
NewExpression: NO('Use of "new"'),
UpdateExpression: NO('Update expressions'),
VariableDeclarator(node, ctx) {
handleDeclaration(node.id, ctx);
},
Identifier(node, ctx, parent) {
if (handleIdentifier(node, ctx, parent) && !ctx.scope.has(node.name)) {
// handle identifier passed responsibility here
// raise error if identifier not defined in scope
ctx.error(node, ERROR_VARIABLE, ERROR_VARIABLE_NOTE);
}
},
CallExpression(node, ctx) {
const name = functionName(node.callee);
const def = getAggregate(name) || getWindow(name);
// parse operator and rewrite invocation
if (def) {
if ((ctx.join || ctx.aggregate === false) && hasAggregate(name)) {
ERROR_AGGREGATE(node, ctx);
}
if ((ctx.join || ctx.window === false) && hasWindow(name)) {
ERROR_WINDOW(node, ctx);
}
ctx.$op = 1;
if (ctx.ast) {
updateFunctionNode(node, name, ctx);
node.arguments.forEach(arg => walk(arg, ctx, opVisitors));
} else {
const op = ctx.op(parseOperator(ctx, def, name, node.arguments));
Object.assign(node, { type: Op, name: op.id });
}
ctx.$op = 0;
return false;
} else if (hasFunction(name)) {
updateFunctionNode(node, name, ctx);
} else {
ctx.error(node, ERROR_FUNCTION, ERROR_FUNCTION_NOTE);
}
},
MemberExpression(node, ctx, parent) {
const { object, property } = node;
// bail if left head is not an identifier
// in this case we will recurse and handle it later
if (!is(Identifier, object)) return;
const { name } = object;
// allow use of Math prefix to access constant values
if (isMath(node) && is(Identifier, property)
&& Object.hasOwn(constants, property.name)) {
updateConstantNode(node, property.name);
return;
}
const index = name === ctx.tuple ? 0
: name === ctx.tuple1 ? 1
: name === ctx.tuple2 ? 2
: -1;
if (index >= 0) {
// replace member expression with column ref
return spliceMember(node, index, ctx, checkColumn, parent);
} else if (name === ctx.$param) {
// replace member expression with param ref
return spliceMember(node, index, ctx, checkParam);
} else if (ctx.paramsRef.has(name)) {
updateParameterNode(node, ctx.paramsRef.get(name));
} else if (ctx.columnRef.has(name)) {
updateColumnNode(object, name, ctx, node);
} else if (Object.hasOwn(ctx.params, name)) {
updateParameterNode(object, name);
}
}
};
function spliceMember(node, index, ctx, check, parent) {
const { property, computed } = node;
let name;
if (!computed) {
name = property.name;
} else if (is(Literal, property)) {
name = property.value;
} else try {
// visit subtree to ensure nodes are rewritten as needed
// then compile the code to compute the property name
walk(property, ctx, visitors, node);
name = ctx.param(property);
} catch (e) { // eslint-disable-line no-unused-vars
ctx.error(node, ERROR_MEMBER);
}
check(node, name, index, ctx, parent);
return false;
}
const opVisitors = {
...visitors,
VariableDeclarator: NO('Variable declaration in operator call'),
Identifier(node, ctx, parent) {
if (handleIdentifier(node, ctx, parent)) {
ctx.error(node, ERROR_VARIABLE_OP);
}
},
CallExpression(node, ctx) {
const name = functionName(node.callee);
// rewrite if built-in function
if (hasFunction(name)) {
updateFunctionNode(node, name, ctx);
} else {
ctx.error(node, ERROR_FUNCTION, ERROR_FUNCTION_NOTE);
}
}
};
function parseFunction(node, ctx) {
if (node.generator) NO('Generator functions')(node, ctx);
if (node.async) NO('Async functions')(node, ctx);
const { params } = node;
const len = params.length;
const setc = index => (name, key) => ctx.columnRef.set(name, [key, index]);
const setp = (name, key) => ctx.paramsRef.set(name, key);
if (!len) {
// do nothing
} else if (ctx.join) {
parseRef(ctx, params[0], 'tuple1', setc(1));
if (len > 1) parseRef(ctx, params[1], 'tuple2', setc(2));
if (len > 2) parseRef(ctx, params[2], '$param', setp);
} else {
parseRef(ctx, params[0], 'tuple', setc(0));
if (len > 1) parseRef(ctx, params[1], '$param', setp);
}
ctx.root = node.body;
}
function parseRef(ctx, node, refName, alias) {
if (is(Identifier, node)) {
ctx.scope.add(node.name);
ctx[refName] = node.name;
} else if (is(ObjectPattern, node)) {
node.properties.forEach(p => {
const key = is(Identifier, p.key) ? p.key.name
: is(Literal, p.key) ? p.key.value
: ctx.error(p, ERROR_ARGUMENT);
if (!is(Identifier, p.value)) {
ctx.error(p.value, ERROR_DESTRUCTURE);
}
alias(p.value.name, key);
});
}
}
function parseOperator(ctx, def, name, args) {
const fields = [];
const params = [];
const idxFields = def.param[0] || 0;
const idxParams = idxFields + (def.param[1] || 0);
args.forEach((arg, index) => {
if (index < idxFields) {
walk(arg, ctx, opVisitors);
fields.push(ctx.field(arg));
} else if (index < idxParams) {
walk(arg, ctx, opVisitors);
params.push(ctx.param(arg));
} else {
ctx.error(arg, ERROR_OP_PARAMETER);
}
});
return { name, fields, params, ...(ctx.spec.window || {}) };
}
function functionName(node) {
return is(Identifier, node) ? node.name
: !is(MemberExpression, node) ? null
: isMath(node) ? rewriteMath(node.property.name)
: node.property.name;
}
function isMath(node) {
return is(Identifier, node.object) && node.object.name === 'Math';
}
function rewriteMath(name) {
return name === 'max' ? 'greatest'
: name === 'min' ? 'least'
: name;
}
function handleIdentifier(node, ctx, parent) {
const { name } = node;
if (is(MemberExpression, parent) && parent.property === node) {
// do nothing: check head node, not nested properties
} else if (is(Property, parent) && parent.key === node) {
// do nothing: identifiers allowed in object expressions
} else if (ctx.paramsRef.has(name)) {
updateParameterNode(node, ctx.paramsRef.get(name));
} else if (ctx.columnRef.has(name)) {
updateColumnNode(node, name, ctx, parent);
} else if (Object.hasOwn(ctx.params, name)) {
updateParameterNode(node, name);
} else if (Object.hasOwn(constants, name)) {
updateConstantNode(node, name);
} else {
return true;
}
}
function checkColumn(node, name, index, ctx, parent) {
// check column existence if we have a backing table
const table = index === 0 ? ctx.table
: index > 0 ? ctx.join[index - 1]
: null;
const col = table && table.column(name);
if (table && !col) {
ctx.error(node, ERROR_COLUMN);
}
// check if column reference is valid in current context
if (ctx.aggronly && !ctx.$op) {
ctx.error(node, ERROR_AGGRONLY);
}
// rewrite ast node as a column access
rewrite(node, name, index, col, parent);
}
function updateColumnNode(node, key, ctx, parent) {
const [name, index] = ctx.columnRef.get(key);
checkColumn(node, name, index, ctx, parent);
}
function checkParam(node, name, index, ctx) {
if (ctx.params && !Object.hasOwn(ctx.params, name)) {
ctx.error(node, ERROR_PARAM);
}
updateParameterNode(node, name);
}
function updateParameterNode(node, name) {
node.type = Parameter;
node.name = name;
}
function updateConstantNode(node, name) {
node.type = Constant;
node.name = name;
node.raw = constants[name];
}
function updateFunctionNode(node, name, ctx) {
if (name === ROW_OBJECT) {
const t = ctx.table;
if (!t) ctx.error(node, ERROR_ROW_OBJECT);
rowObjectExpression(node, t,
node.arguments.length
? node.arguments.map(node => {
const col = ctx.param(node);
const name = isNumber(col) ? t.columnName(col) : col;
if (!t.column(name)) ctx.error(node, ERROR_COLUMN);
return name;
})
: t.columnNames()
);
} else {
node.callee = { type: Function, name };
}
}
function handleDeclaration(node, ctx) {
if (is(Identifier, node)) {
ctx.scope.add(node.name);
} else if (is(ArrayPattern, node)) {
node.elements.forEach(elm => handleDeclaration(elm, ctx));
} else if (is(ObjectPattern, node)) {
node.properties.forEach(prop => handleDeclaration(prop.value, ctx));
} else {
ctx.error(node.id, ERROR_DECLARATION);
}
}